diff --git a/src/label/prompts/default.txt b/src/label/prompts/default.txt index 4fc7786..666ae21 100644 --- a/src/label/prompts/default.txt +++ b/src/label/prompts/default.txt @@ -42,15 +42,23 @@ Here are the user input events: - Identify folders, files, UI elements, spreadsheet cells (with values and labels), browser fields, etc. - Include **visible labels or contents** of buttons, menu items, folders, etc. -5. **Ignore technical rendering details** +5. **Quote exact text for copy/cut/paste/select/delete/find-replace actions** + + - Always include the exact text content in quotes and the precise location (filename, line number, cell, field name, URL). + +6. **Name the application and location** + + - Always name the app (e.g. VS Code, Chrome, Terminal). Include filename + line number for editors, site name for browsers, working directory for terminals. + +7. **Ignore technical rendering details** - Do not mention coordinates, cursor paths, or raw keycodes. -6. **Favor screenshot over input events** +8. **Favor screenshot over input events** - In cases where input logs and screenshots conflict, or logs are harder to understand, prioritize the **visual evidence** from screenshots. -6. IMPORTANT: **Merge repeated identical actions** +9. IMPORTANT: **Merge repeated identical actions** - If the same action is done repeatedly with no change or intermediate action, **merge them into one action** with a wider start–end interval. For example, instead of multiple "Ran the command \"ls\" in the terminal," generate it ONLY once. - If the user repeatedly clicks / switches between applications without performing any intermediate action, merge them into a single combined action. @@ -68,6 +76,10 @@ Generated captions must be in past tense, and at the level of detail as the exam - Ran "cd /home/user/projects/gs-utils" in the terminal. - Deleted the text "hyundai i30" from cell I2. - Clicked the "Downloads" folder in the sidebar. +- Copied "export default App" from line 24 of App.tsx in VS Code. +- Pasted "border-radius: 8px;" into the .card class in styles.css at line 31. +- Selected "return None" on line 15 of utils.py in VS Code. +- Replaced "http" with "https" using Find and Replace in config.yaml. You MUST quote specific things from the screen so it's easy to reproduce your steps. diff --git a/src/label/prompts/screenshots_only.txt b/src/label/prompts/screenshots_only.txt index e568291..d3dc8e8 100644 --- a/src/label/prompts/screenshots_only.txt +++ b/src/label/prompts/screenshots_only.txt @@ -29,15 +29,23 @@ Your job is to generate **fully detailed captions** describing **exactly what th - Identify folders, files, UI elements, spreadsheet cells (with values and labels), browser fields, etc. - Include **visible labels or contents** of buttons, menu items, folders, etc. -5. **Ignore technical rendering details** +5. **Quote exact text for copy/cut/paste/select/delete/find-replace actions** + + - Always include the exact text content in quotes and the precise location (filename, line number, cell, field name, URL). + +6. **Name the application and location** + + - Always name the app (e.g. VS Code, Chrome, Terminal). Include filename + line number for editors, site name for browsers, working directory for terminals. + +7. **Ignore technical rendering details** - Do not mention coordinates, cursor paths, or raw keycodes. -6. **Favor screenshot over input events** +8. **Favor screenshot over input events** - In cases where input logs and screenshots conflict, or logs are harder to understand, prioritize the **visual evidence** from screenshots. -6. IMPORTANT: **Merge repeated identical actions** +9. IMPORTANT: **Merge repeated identical actions** - If the same action is done repeatedly with no change or intermediate action, **merge them into one action** with a wider start–end interval. For example, instead of multiple "Ran the command \"ls\" in the terminal," generate it ONLY once. - If the user repeatedly clicks / switches between applications without performing any intermediate action, merge them into a single combined action. @@ -57,6 +65,10 @@ Generated captions must be in past tense, and at the level of detail as the exam - Ran "cd /home/user/projects/gs-utils" in the terminal. - Deleted the text "hyundai i30" from cell I2. - Clicked the "Downloads" folder in the sidebar. +- Copied "export default App" from line 24 of App.tsx in VS Code. +- Pasted "border-radius: 8px;" into the .card class in styles.css at line 31. +- Selected "return None" on line 15 of utils.py in VS Code. +- Replaced "http" with "https" using Find and Replace in config.yaml. You MUST quote specific things from the screen so it's easy to reproduce your steps. diff --git a/src/record/handlers/input_event.py b/src/record/handlers/input_event.py index 5f70faa..5db5655 100644 --- a/src/record/handlers/input_event.py +++ b/src/record/handlers/input_event.py @@ -33,6 +33,8 @@ def __init__( """ self.event_queue = event_queue self._monitors = list(get_monitors()) + self._monitors_last_refresh = time.time() + self._monitors_refresh_interval = 5.0 self.accessibility_enabled = accessibility self.accessibility_handler = None @@ -68,6 +70,14 @@ def _get_monitor(self, x: int, y: int) -> int: Returns: Monitor index (0-based) """ + now = time.time() + if now - self._monitors_last_refresh > self._monitors_refresh_interval: + try: + self._monitors = list(get_monitors()) + except Exception: + pass + self._monitors_last_refresh = now + def to_monitor_dict(monitor): return { "left": monitor.x, "top": monitor.y, "width": monitor.width, "height": monitor.height diff --git a/src/record/models/event_queue.py b/src/record/models/event_queue.py index 39edd2d..0189795 100644 --- a/src/record/models/event_queue.py +++ b/src/record/models/event_queue.py @@ -96,7 +96,7 @@ def enqueue(self, event: InputEvent) -> None: queue = self.aggregations[agg_type] config = self.configs[agg_type] - screenshots = self._collect_screenshots(event.timestamp) + screenshots = self._collect_screenshots(event.timestamp, event.monitor_index) last_event, last_screenshots = queue[-1] if queue else (None, None) first_event, first_screenshots = queue[0] if queue else (None, None) @@ -171,7 +171,7 @@ def _end_burst(self, agg_type: str, event: InputEvent, screenshot: Any) -> None: current_burst_id = self._get_burst_id_for_type(agg_type) # Get screenshot with padding after - end_screenshot = self._collect_end_screenshot(event.timestamp) + end_screenshot = self._collect_end_screenshot(event.timestamp, event.monitor_index) request = self._create_request( event=event, @@ -255,21 +255,33 @@ def _get_burst_id_for_type(self, agg_type: str) -> int: self.next_burst_id ) - def _collect_screenshots(self, timestamp: float) -> Any: - """Get screenshot before timestamp.""" + def _collect_screenshots(self, timestamp: float, monitor_index: int = None) -> Any: + """Get screenshot before timestamp, preferring the given monitor.""" constants = constants_manager.get() start_candidates = self.image_queue.get_entries_before( timestamp, milliseconds=constants.PADDING_BEFORE ) - return start_candidates[-1] if start_candidates else None - - def _collect_end_screenshot(self, timestamp: float) -> Any: - """Get screenshot after timestamp with padding.""" + if not start_candidates: + return None + if monitor_index is not None: + matching = [s for s in start_candidates if s.monitor_index == monitor_index] + if matching: + return matching[-1] + return start_candidates[-1] + + def _collect_end_screenshot(self, timestamp: float, monitor_index: int = None) -> Any: + """Get screenshot after timestamp with padding, preferring the given monitor.""" constants = constants_manager.get() exact_candidates = self.image_queue.get_entries_after( timestamp, milliseconds=constants.PADDING_AFTER ) - return exact_candidates[-1] if exact_candidates else None + if not exact_candidates: + return None + if monitor_index is not None: + matching = [s for s in exact_candidates if s.monitor_index == monitor_index] + if matching: + return matching[-1] + return exact_candidates[-1] def _save_event_to_jsonl(self, event: InputEvent) -> None: if self.session_dir: