Skip to content

Commit b0b526d

Browse files
I am going to handle the screenshot functionality for Gemma 3n E4B it.
1 parent 8a11629 commit b0b526d

File tree

5 files changed

+109
-136
lines changed

5 files changed

+109
-136
lines changed

app/src/main/kotlin/com/google/ai/sample/GenerativeAiViewModelFactory.kt

Lines changed: 10 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -21,38 +21,6 @@ enum class ModelOption(val displayName: String, val modelName: String) {
2121
}
2222

2323
val GenerativeViewModelFactory = object : ViewModelProvider.Factory {
24-
// Current selected model name
25-
private var currentModelName = ModelOption.GEMINI_FLASH_PREVIEW.modelName
26-
27-
/**
28-
* Set the model to high reasoning capability (gemini-2.5-pro-preview-03-25)
29-
*/
30-
fun highReasoningModel() {
31-
currentModelName = ModelOption.GEMINI_PRO.modelName
32-
}
33-
34-
/**
35-
* Set the model to low reasoning capability (gemini-2.0-flash-lite)
36-
*/
37-
fun lowReasoningModel() {
38-
currentModelName = ModelOption.GEMINI_FLASH_LITE.modelName
39-
}
40-
41-
/**
42-
* Set the model to a specific model option
43-
*/
44-
fun setModel(modelOption: ModelOption) {
45-
currentModelName = modelOption.modelName
46-
}
47-
48-
/**
49-
* Get the current model option
50-
*/
51-
fun getCurrentModel(): ModelOption {
52-
return ModelOption.values().find { it.modelName == currentModelName }
53-
?: ModelOption.GEMINI_FLASH_LITE
54-
}
55-
5624
override fun <T : ViewModel> create(
5725
viewModelClass: Class<T>,
5826
extras: CreationExtras
@@ -63,106 +31,46 @@ val GenerativeViewModelFactory = object : ViewModelProvider.Factory {
6331

6432
// Get the application context from extras
6533
val application = checkNotNull(extras[ViewModelProvider.AndroidViewModelFactory.APPLICATION_KEY])
66-
34+
6735
// Get the API key from MainActivity
6836
val mainActivity = MainActivity.getInstance()
6937
val apiKey = mainActivity?.getCurrentApiKey() ?: ""
70-
38+
7139
if (apiKey.isEmpty()) {
7240
throw IllegalStateException("API key is not available. Please set an API key.")
7341
}
7442

7543
return with(viewModelClass) {
7644
when {
77-
7845
isAssignableFrom(PhotoReasoningViewModel::class.java) -> {
46+
val currentModel = GenerativeAiViewModelFactory.getCurrentModel()
7947
// Initialize a GenerativeModel with the currently selected model
8048
// for multimodal text generation
8149
val generativeModel = GenerativeModel(
82-
modelName = currentModelName,
50+
modelName = currentModel.modelName,
8351
apiKey = apiKey,
8452
generationConfig = config
8553
)
8654
// Pass the ApiKeyManager to the ViewModel for key rotation
8755
val apiKeyManager = ApiKeyManager.getInstance(application)
88-
PhotoReasoningViewModel(generativeModel, apiKeyManager)
56+
PhotoReasoningViewModel(generativeModel, currentModel.modelName, apiKeyManager)
8957
}
9058

91-
9259
else ->
9360
throw IllegalArgumentException("Unknown ViewModel class: ${viewModelClass.name}")
9461
}
9562
} as T
9663
}
9764
}
9865

99-
// Add companion object with static methods for easier access
10066
object GenerativeAiViewModelFactory {
101-
// Current selected model name - duplicated from GenerativeViewModelFactory
102-
private var currentModelName = ModelOption.GEMINI_FLASH_PREVIEW.modelName
103-
104-
/**
105-
* Set the model to high reasoning capability (gemini-2.5-pro-preview-03-25)
106-
*/
107-
fun highReasoningModel() {
108-
currentModelName = ModelOption.GEMINI_PRO.modelName
109-
// Also update the original factory to keep them in sync
110-
(GenerativeViewModelFactory as ViewModelProvider.Factory).apply {
111-
if (this is ViewModelProvider.Factory) {
112-
try {
113-
val field = this.javaClass.getDeclaredField("currentModelName")
114-
field.isAccessible = true
115-
field.set(this, currentModelName)
116-
} catch (e: Exception) {
117-
// Fallback if reflection fails
118-
}
119-
}
120-
}
121-
}
122-
123-
/**
124-
* Set the model to low reasoning capability (gemini-2.0-flash-lite)
125-
*/
126-
fun lowReasoningModel() {
127-
currentModelName = ModelOption.GEMINI_FLASH_LITE.modelName
128-
// Also update the original factory to keep them in sync
129-
(GenerativeViewModelFactory as ViewModelProvider.Factory).apply {
130-
if (this is ViewModelProvider.Factory) {
131-
try {
132-
val field = this.javaClass.getDeclaredField("currentModelName")
133-
field.isAccessible = true
134-
field.set(this, currentModelName)
135-
} catch (e: Exception) {
136-
// Fallback if reflection fails
137-
}
138-
}
139-
}
140-
}
141-
142-
/**
143-
* Set the model to a specific model option
144-
*/
67+
private var currentModel: ModelOption = ModelOption.GEMINI_FLASH_PREVIEW
68+
14569
fun setModel(modelOption: ModelOption) {
146-
currentModelName = modelOption.modelName
147-
// Also update the original factory to keep them in sync
148-
(GenerativeViewModelFactory as ViewModelProvider.Factory).apply {
149-
if (this is ViewModelProvider.Factory) {
150-
try {
151-
val field = this.javaClass.getDeclaredField("currentModelName")
152-
field.isAccessible = true
153-
field.set(this, currentModelName)
154-
} catch (e: Exception) {
155-
// Fallback if reflection fails
156-
}
157-
}
158-
}
70+
currentModel = modelOption
15971
}
160-
161-
/**
162-
* Get the current model option
163-
*/
72+
16473
fun getCurrentModel(): ModelOption {
165-
return ModelOption.values().find { it.modelName == currentModelName }
166-
?: ModelOption.GEMINI_FLASH_LITE
74+
return currentModel
16775
}
16876
}

app/src/main/kotlin/com/google/ai/sample/MainActivity.kt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ class MainActivity : ComponentActivity() {
122122

123123
private lateinit var navController: NavHostController
124124
private var isProcessingExplicitScreenshotRequest: Boolean = false
125+
private var onMediaProjectionPermissionGranted: (() -> Unit)? = null
125126

126127
private val screenshotRequestHandler = object : BroadcastReceiver() {
127128
override fun onReceive(context: Context?, intent: Intent?) {
@@ -168,8 +169,9 @@ class MainActivity : ComponentActivity() {
168169
private lateinit var requestNotificationPermissionLauncher: ActivityResultLauncher<String>
169170
// private val requestPermissionLauncher = registerForActivityResult(...) // Deleted
170171

171-
private fun requestMediaProjectionPermission() {
172+
fun requestMediaProjectionPermission(onGranted: (() -> Unit)? = null) {
172173
Log.d(TAG, "Requesting MediaProjection permission")
174+
onMediaProjectionPermissionGranted = onGranted
173175
// Ensure mediaProjectionManager is initialized before using it.
174176
// This should be guaranteed by its placement in onCreate.
175177
if (!::mediaProjectionManager.isInitialized) {
@@ -222,6 +224,11 @@ class MainActivity : ComponentActivity() {
222224
val isAccessibilityServiceEnabledFlow: StateFlow<Boolean> = _isAccessibilityServiceEnabled.asStateFlow()
223225
// END: Added for Accessibility Service Status
224226

227+
// START: Added for MediaProjection Permission Status
228+
private val _isMediaProjectionPermissionGranted = MutableStateFlow(false)
229+
val isMediaProjectionPermissionGrantedFlow: StateFlow<Boolean> = _isMediaProjectionPermissionGranted.asStateFlow()
230+
// END: Added for MediaProjection Permission Status
231+
225232
// SharedPreferences for first launch info
226233
private lateinit var prefs: SharedPreferences
227234
private var showFirstLaunchInfoDialog by mutableStateOf(false)
@@ -444,6 +451,9 @@ class MainActivity : ComponentActivity() {
444451
Log.d(TAG, "Resetting isProcessingExplicitScreenshotRequest flag after successful explicit grant.")
445452
this@MainActivity.isProcessingExplicitScreenshotRequest = false
446453
}
454+
_isMediaProjectionPermissionGranted.value = true
455+
onMediaProjectionPermissionGranted?.invoke()
456+
onMediaProjectionPermissionGranted = null
447457
} else {
448458
Log.w(TAG, "MediaProjection permission denied or cancelled by user.")
449459
Toast.makeText(this, "Screen capture permission denied", Toast.LENGTH_SHORT).show()
@@ -452,6 +462,7 @@ class MainActivity : ComponentActivity() {
452462
Log.d(TAG, "Resetting isProcessingExplicitScreenshotRequest flag after explicit denial.")
453463
this@MainActivity.isProcessingExplicitScreenshotRequest = false
454464
}
465+
_isMediaProjectionPermissionGranted.value = false
455466
}
456467
}
457468

@@ -589,11 +600,6 @@ class MainActivity : ComponentActivity() {
589600
Log.w(TAG, "photoReasoningViewModel is null at the end of onCreate. Notification flow collection might be delayed or not start if VM is set much later or never.")
590601
}
591602

592-
Log.d(TAG, "onCreate: Scheduling MediaProjection permission request")
593-
Handler(Looper.getMainLooper()).postDelayed({
594-
Log.d(TAG, "onCreate: Calling requestMediaProjectionPermission")
595-
requestMediaProjectionPermission()
596-
}, 1000) // 1 second delay to ensure everything is initialized
597603
}
598604

599605
fun showStopOperationNotification() {

app/src/main/kotlin/com/google/ai/sample/ScreenOperatorAccessibilityService.kt

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -214,23 +214,37 @@ class ScreenOperatorAccessibilityService : AccessibilityService() {
214214
true // Asynchronous
215215
}
216216
is Command.TakeScreenshot -> {
217-
Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.")
218-
this.showToast("Preparing screenshot...", false) // Updated toast message
217+
val modelName = GenerativeAiViewModelFactory.getCurrentModel().modelName
218+
if (modelName == "gemma-3n-e4b-it") {
219+
Log.d(TAG, "Command.TakeScreenshot: Model is gemma-3n-e4b-it, capturing screen info only.")
220+
this.showToast("Capturing screen info...", false)
221+
val screenInfo = captureScreenInformation()
222+
val mainActivity = MainActivity.getInstance()
223+
mainActivity?.getPhotoReasoningViewModel()?.addScreenshotToConversation(
224+
Uri.EMPTY,
225+
applicationContext,
226+
screenInfo
227+
)
228+
false
229+
} else {
230+
Log.d(TAG, "Command.TakeScreenshot: Capturing screen info and sending request broadcast to MainActivity.")
231+
this.showToast("Preparing screenshot...", false) // Updated toast message
219232

220-
val screenInfo = captureScreenInformation() // Capture fresh screen info
233+
val screenInfo = captureScreenInformation() // Capture fresh screen info
221234

222-
val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply {
223-
putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo)
224-
// Set package to ensure only our app's receiver gets it
225-
`package` = applicationContext.packageName
226-
}
227-
applicationContext.sendBroadcast(intent)
228-
Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.")
235+
val intent = Intent(MainActivity.ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT).apply {
236+
putExtra(MainActivity.EXTRA_SCREEN_INFO, screenInfo)
237+
// Set package to ensure only our app's receiver gets it
238+
`package` = applicationContext.packageName
239+
}
240+
applicationContext.sendBroadcast(intent)
241+
Log.d(TAG, "Sent broadcast ACTION_REQUEST_MEDIAPROJECTION_SCREENSHOT to MainActivity with screenInfo.")
229242

230-
// The command is considered "handled" once the broadcast is sent.
231-
// MainActivity and ScreenCaptureService will handle the rest asynchronously.
232-
// Return false to allow the command queue to proceed immediately.
233-
false
243+
// The command is considered "handled" once the broadcast is sent.
244+
// MainActivity and ScreenCaptureService will handle the rest asynchronously.
245+
// Return false to allow the command queue to proceed immediately.
246+
false
247+
}
234248
}
235249
is Command.PressHomeButton -> {
236250
Log.d(TAG, "Pressing home button")

app/src/main/kotlin/com/google/ai/sample/feature/multimodal/PhotoReasoningScreen.kt

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ internal fun PhotoReasoningRoute(
144144
val systemMessage by viewModel.systemMessage.collectAsState()
145145
val chatMessages by viewModel.chatMessagesFlow.collectAsState()
146146
val isInitialized by viewModel.isInitialized.collectAsState()
147+
val modelName by viewModel.modelNameState.collectAsState()
147148

148149
// Hoisted: var showNotificationRationaleDialog by rememberSaveable { mutableStateOf(false) }
149150
// This state will now be managed in PhotoReasoningRoute and passed down.
@@ -157,6 +158,7 @@ internal fun PhotoReasoningRoute(
157158
val mainActivity = context as? MainActivity
158159

159160
val isAccessibilityServiceEffectivelyEnabled by mainActivity?.isAccessibilityServiceEnabledFlow?.collectAsState() ?: mutableStateOf(false)
161+
val isMediaProjectionPermissionGranted by mainActivity?.isMediaProjectionPermissionGrantedFlow?.collectAsState() ?: mutableStateOf(false)
160162
val isKeyboardOpen by mainActivity?.isKeyboardOpen?.collectAsState() ?: mutableStateOf(false)
161163

162164
val accessibilitySettingsLauncher = rememberLauncherForActivityResult(
@@ -199,6 +201,7 @@ internal fun PhotoReasoningRoute(
199201
}
200202
},
201203
isAccessibilityServiceEnabled = isAccessibilityServiceEffectivelyEnabled,
204+
isMediaProjectionPermissionGranted = isMediaProjectionPermissionGranted,
202205
onEnableAccessibilityService = {
203206
val intent = Intent(Settings.ACTION_ACCESSIBILITY_SETTINGS)
204207
try {
@@ -214,7 +217,8 @@ internal fun PhotoReasoningRoute(
214217
onStopClicked = { viewModel.onStopClicked() },
215218
// showNotificationRationaleDialog = showNotificationRationaleDialogStateInRoute, // Removed
216219
// onShowNotificationRationaleDialogChange = { showNotificationRationaleDialogStateInRoute = it }, // Removed
217-
isInitialized = isInitialized // Pass the collected state
220+
isInitialized = isInitialized, // Pass the collected state
221+
modelName = modelName
218222
)
219223
}
220224

@@ -228,13 +232,15 @@ fun PhotoReasoningScreen(
228232
onSystemMessageChanged: (String) -> Unit = {},
229233
onReasonClicked: (String, List<Uri>) -> Unit = { _, _ -> },
230234
isAccessibilityServiceEnabled: Boolean = false,
235+
isMediaProjectionPermissionGranted: Boolean = false,
231236
onEnableAccessibilityService: () -> Unit = {},
232237
onClearChatHistory: () -> Unit = {},
233238
isKeyboardOpen: Boolean,
234239
onStopClicked: () -> Unit = {},
235240
// showNotificationRationaleDialog: Boolean, // Removed
236241
// onShowNotificationRationaleDialogChange: (Boolean) -> Unit, // Removed
237-
isInitialized: Boolean = true // Added parameter with default for preview
242+
isInitialized: Boolean = true, // Added parameter with default for preview
243+
modelName: String = ""
238244
) {
239245
var userQuestion by rememberSaveable { mutableStateOf("") }
240246
val imageUris = rememberSaveable(saver = UriSaver()) { mutableStateListOf() }
@@ -433,25 +439,37 @@ fun PhotoReasoningScreen(
433439
)
434440
IconButton(
435441
onClick = {
436-
if (isAccessibilityServiceEnabled) {
437-
if (userQuestion.isNotBlank()) {
438-
onReasonClicked(userQuestion, imageUris.toList())
439-
userQuestion = ""
440-
}
441-
// If accessibility is ON but userQuestion is BLANK, no action is needed here as the button's enabled state and tint convey this.
442-
} else {
443-
// Accessibility is OFF
442+
val mainActivity = context as? MainActivity
443+
if (!isAccessibilityServiceEnabled) {
444444
onEnableAccessibilityService()
445445
Toast.makeText(context, "Enable the Accessibility service for Screen Operator", Toast.LENGTH_LONG).show()
446-
} // Closes the else block
446+
return@IconButton
447+
}
448+
449+
if (!isMediaProjectionPermissionGranted && modelName != "gemma-3n-e4b-it") {
450+
mainActivity?.requestMediaProjectionPermission {
451+
// This block will be executed after permission is granted
452+
if (userQuestion.isNotBlank()) {
453+
onReasonClicked(userQuestion, imageUris.toList())
454+
userQuestion = ""
455+
}
456+
}
457+
Toast.makeText(context, "Requesting screen capture permission...", Toast.LENGTH_SHORT).show()
458+
return@IconButton
459+
}
460+
461+
if (userQuestion.isNotBlank()) {
462+
onReasonClicked(userQuestion, imageUris.toList())
463+
userQuestion = ""
464+
}
447465
},
448-
enabled = isInitialized && ((isAccessibilityServiceEnabled && userQuestion.isNotBlank()) || !isAccessibilityServiceEnabled),
466+
enabled = isInitialized && userQuestion.isNotBlank(),
449467
modifier = Modifier.padding(all = 4.dp).align(Alignment.CenterVertically)
450468
) {
451469
Icon(
452470
Icons.Default.Send,
453471
stringResource(R.string.action_go),
454-
tint = if ((isAccessibilityServiceEnabled && userQuestion.isNotBlank()) || !isAccessibilityServiceEnabled) MaterialTheme.colorScheme.primary else Color.Gray,
472+
tint = if (isInitialized && userQuestion.isNotBlank()) MaterialTheme.colorScheme.primary else Color.Gray,
455473
)
456474
}
457475
} // Closes Row

0 commit comments

Comments
 (0)