diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index cd9115a..d752ab9 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -129,7 +129,7 @@ {"id":"skills-bo8","title":"Gemini skills access: ReadFile path restrictions block .claude/skills/","description":"Gemini agent couldn't read skill files from .claude/skills/orch/SKILL.md due to path restrictions. ReadFile tool restricts paths to workspace directories, so .claude/skills/ (symlinked from home-manager) is blocked. Agent had to fall back to shell cat command. Breaks skills portability across agents. Potential fixes: copy skills into repo, configure allowed paths, use MCP, or document workaround.","status":"closed","priority":3,"issue_type":"bug","created_at":"2026-01-09T10:58:04.037329419-08:00","created_by":"dan","updated_at":"2026-01-09T19:35:28.068433744-08:00","closed_at":"2026-01-09T19:35:28.068433744-08:00","close_reason":"Fix found: Gemini includeDirectories setting"} {"id":"skills-buh","title":"Document SQLite compile flags in config.nims","description":"[EVOLVE] LOW - SQLite compile flags (SQLITE_THREADSAFE, SQLITE_ENABLE_JSON1, SQLITE_OMIT_LOAD_EXTENSION) are hardcoded. Add comments explaining purpose.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-10T18:50:54.19875394-08:00","created_by":"dan","updated_at":"2026-01-10T18:50:54.19875394-08:00"} {"id":"skills-bvz","title":"spec-review: Add Definition of Ready checklists for each phase","description":"'Ready for /speckit.plan' and similar are underspecified.\n\nAdd concrete checklists:\n- Spec ready for planning: problem statement, goals, constraints, acceptance criteria, etc.\n- Plan ready for tasks: milestones, risks, dependencies, test strategy, etc.\n- Tasks ready for bd: each task has acceptance criteria, dependencies explicit, etc.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-15T00:23:24.877531852-08:00","updated_at":"2025-12-15T14:05:26.880419097-08:00","closed_at":"2025-12-15T14:05:26.880419097-08:00"} -{"id":"skills-bww","title":"Benchmark AT-SPI overhead and coverage","description":"## Goal\nMeasure AT-SPI's runtime overhead and coverage across apps.\n\n## Prerequisites\n- Enable `services.gnome.at-spi2-core.enable = true` in NixOS\n- Set `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` for Qt apps\n- Rebuild and re-login\n\n## Overhead benchmarks\n1. **Startup time**: App launch with/without AT-SPI\n2. **Memory**: RSS delta with AT-SPI enabled\n3. **CPU**: Idle CPU with AT-SPI bus running\n4. **UI latency**: Input-to-paint latency (if measurable)\n\n## Coverage audit\nFor each app, document:\n- Does it expose accessibility tree?\n- How complete is the tree? (all elements vs partial)\n- Are coordinates accurate?\n- Are element types/roles correct?\n\n### Apps to test\n- [ ] Firefox\n- [ ] Ghostty terminal\n- [ ] Nautilus/file manager\n- [ ] VS Code / Electron app\n- [ ] A Qt app (if any installed)\n\n## Query benchmarks\n- Time to enumerate all elements in a window\n- Time to find element by role/name\n- Memory overhead of pyatspi queries\n\n## Depends on\n- skills-pdg (Enable AT-SPI for UI tree access)","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-17T14:13:21.599259773-08:00","updated_at":"2025-12-17T14:13:21.599259773-08:00","dependencies":[{"issue_id":"skills-bww","depends_on_id":"skills-pdg","type":"blocks","created_at":"2025-12-17T14:13:41.633210539-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"skills-bww","title":"Benchmark AT-SPI overhead and coverage","description":"## Goal\nMeasure AT-SPI's runtime overhead and coverage across apps.\n\n## Prerequisites\n- Enable `services.gnome.at-spi2-core.enable = true` in NixOS\n- Set `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` for Qt apps\n- Rebuild and re-login\n\n## Overhead benchmarks\n1. **Startup time**: App launch with/without AT-SPI\n2. **Memory**: RSS delta with AT-SPI enabled\n3. **CPU**: Idle CPU with AT-SPI bus running\n4. **UI latency**: Input-to-paint latency (if measurable)\n\n## Coverage audit\nFor each app, document:\n- Does it expose accessibility tree?\n- How complete is the tree? (all elements vs partial)\n- Are coordinates accurate?\n- Are element types/roles correct?\n\n### Apps to test\n- [ ] Firefox\n- [ ] Ghostty terminal\n- [ ] Nautilus/file manager\n- [ ] VS Code / Electron app\n- [ ] A Qt app (if any installed)\n\n## Query benchmarks\n- Time to enumerate all elements in a window\n- Time to find element by role/name\n- Memory overhead of pyatspi queries\n\n## Depends on\n- skills-pdg (Enable AT-SPI for UI tree access)","status":"in_progress","priority":2,"issue_type":"task","created_at":"2025-12-17T14:13:21.599259773-08:00","updated_at":"2026-01-15T19:07:42.733336455-08:00","dependencies":[{"issue_id":"skills-bww","depends_on_id":"skills-pdg","type":"blocks","created_at":"2025-12-17T14:13:41.633210539-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"skills-byq","title":"Integrate: review-gate with worker primitives","description":"Connect existing review-gate CLI with new worker system.\n\n## Current state\nreview-gate CLI exists with:\n- check/enable/approve/reject\n- Circuit breaker (3 strikes)\n- Stop hook integration (for Claude)\n\n## Integration needed\n- worker spawn enables review-gate automatically\n- worker status shows review state\n- worker approve/reject wraps review-gate\n- Evidence artifacts feed into review-gate\n\n## File coordination\n.worker-state/X.json includes:\n - review_session_id (links to .review-state/)\n - needs_review: true/false\n - review_status: pending/approved/rejected","notes":"MVP Tier 1: Wire review-gate to worker state machine","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-10T12:15:04.625083755-08:00","created_by":"dan","updated_at":"2026-01-10T23:24:21.172713875-08:00","closed_at":"2026-01-10T23:24:21.172713875-08:00","close_reason":"Integrated review-gate with worker: spawn enables review, status/show display review state, approve/reject update review-gate, cancel/merge clean up review state","dependencies":[{"issue_id":"skills-byq","depends_on_id":"skills-s6y","type":"blocks","created_at":"2026-01-10T12:15:10.376067847-08:00","created_by":"dan"}]} {"id":"skills-cc0","title":"spec-review: Add anti-hallucination constraints to prompts","description":"Models may paraphrase and present as quotes, or invent requirements/risks not in the doc.\n\nAdd:\n- 'Quotes must be verbatim'\n- 'Do not assume technologies/constraints not stated'\n- 'If missing info, list as open questions rather than speculating'","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-15T00:23:26.045478292-08:00","updated_at":"2025-12-15T14:07:19.556888057-08:00","closed_at":"2025-12-15T14:07:19.556888057-08:00"} {"id":"skills-cg7c","title":"Design worker system prompt template","description":"Create the system prompt/context that spawned workers receive.\n\nContents:\n- Role definition (you are a worker agent)\n- Task context (from bd issue or description)\n- Available tools (worker start/done/heartbeat, bd comments)\n- Completion criteria\n- How to signal blockers/questions\n- How to hand off for review\n\nOutput: skills/hq/templates/worker-system.md","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-11T21:06:34.943983399-08:00","created_by":"dan","updated_at":"2026-01-12T10:41:56.919305275-08:00","closed_at":"2026-01-12T10:41:56.919305275-08:00","close_reason":"Completed - skills/hq/templates/worker-system.md created with role definition, available commands, communication protocol, and completion criteria"} diff --git a/skills/ui-query/docs/benchmark-results.md b/skills/ui-query/docs/benchmark-results.md new file mode 100644 index 0000000..f4ac4b8 --- /dev/null +++ b/skills/ui-query/docs/benchmark-results.md @@ -0,0 +1,88 @@ +# AT-SPI Benchmark Results + +Date: 2026-01-15 + +## Environment + +- NixOS with `services.gnome.at-spi2-core.enable = true` +- AT-SPI bus launcher and registryd running +- QT_LINUX_ACCESSIBILITY_ALWAYS_ON not tested (no Qt apps active) + +## Coverage Audit + +| App | Windows | Elements | Enum Time | Coordinates | Text | Actions | +|-----|---------|----------|-----------|-------------|------|---------| +| Ghostty ("Unnamed") | 17 | 494 | 3198ms | ✓ | - | ✓ | +| vicinae | 2 | 265 | 1339ms | - | - | ✓ | +| waybar | 1 | 33 | 49ms | ✓ | ✓ | ✓ | +| xdg-desktop-portal-gtk | 0 | 0 | 0.5ms | - | - | - | +| **TOTAL** | 20 | 792 | 4586ms | | | | + +### App-Specific Notes + +**Ghostty Terminal** +- Registers as "Unnamed" app (accessibility name not set) +- Window titles exposed (terminal titles like "codex", "btop") +- No text interface (expected - TUI apps don't expose terminal buffer) +- Coordinates available but all show (0,0) origin +- Actions exposed on buttons/controls + +**waybar (GTK)** +- Full AT-SPI coverage: coordinates, text, actions +- Fast enumeration (49ms for 33 elements) +- Good example of GTK accessibility working well + +**Firefox** +- NOT visible to AT-SPI despite running +- Needs `MOZ_USE_XINPUT2=1` and/or restart with accessibility enabled +- Firefox requires explicit enablement in about:config (`accessibility.force_disabled = 0`) + +**vicinae (Launcher)** +- Elements exposed but coordinates show (0,0) +- May be due to offscreen/hidden state + +## Query Performance + +### Full Enumeration +- **Total time**: 4586ms for 792 elements +- **Rate**: ~173 elements/second +- Ghostty enumeration is slow (3.2s for 494 elements) + +### Find by Role (button) +- **Average**: 1704ms +- **Min**: 1407ms +- **Max**: 1854ms +- 5 iterations across all apps + +## Roles Discovered + +10 unique roles found: +- button, filler, frame, grouping, label +- layered pane, panel, progress bar, scroll bar, text + +## Key Findings + +1. **Coverage is partial**: Only GTK apps fully expose AT-SPI. Firefox needs explicit config. + +2. **Performance is moderate**: Full enumeration takes seconds, not milliseconds. Caching recommended for repeated queries. + +3. **Coordinates unreliable**: Some apps report (0,0) for all elements. May be Wayland/compositor issue. + +4. **Terminal text inaccessible**: Ghostty and other terminals don't expose buffer contents via Text interface (expected limitation). + +5. **App identification**: Some apps (Ghostty) don't set accessibility application name, appearing as "Unnamed". + +## Recommendations + +1. **Cache query results** when doing repeated lookups +2. **Filter by window first** before deep element searches +3. **Don't rely on coordinates** for click automation - use actions instead +4. **Enable Firefox accessibility** via about:config if needed +5. **Use visual capture (niri-window-capture)** as complement for apps with poor AT-SPI support + +## Next Steps + +- Test with Firefox accessibility enabled +- Test Electron apps (VS Code) when available +- Measure memory overhead of pyatspi queries +- Consider async/parallel enumeration for large element counts diff --git a/skills/ui-query/scripts/benchmark.py b/skills/ui-query/scripts/benchmark.py new file mode 100644 index 0000000..b0ab1b0 --- /dev/null +++ b/skills/ui-query/scripts/benchmark.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +"""Benchmark AT-SPI overhead and coverage. + +Usage: + benchmark.py [--app APP] [--json] + +Options: + --app APP Benchmark specific app only + --json Output as JSON +""" + +import argparse +import json +import sys +import time +from dataclasses import dataclass, field, asdict + +import pyatspi + +from common import set_debug, log_debug + + +@dataclass +class AppCoverage: + """Coverage data for an application.""" + app_name: str + window_count: int = 0 + element_count: int = 0 + roles_found: set = field(default_factory=set) + has_coordinates: bool = False + has_text_elements: bool = False + has_actions: bool = False + enumeration_time_ms: float = 0.0 + errors: list = field(default_factory=list) + + def to_dict(self): + d = asdict(self) + d['roles_found'] = sorted(list(self.roles_found)) + return d + + +def count_elements(accessible, stats, depth=0, max_depth=20): + """Recursively count elements and gather stats.""" + if depth > max_depth: + return + + try: + stats.element_count += 1 + stats.roles_found.add(accessible.getRoleName()) + + # Check for coordinates + try: + component = accessible.queryComponent() + if component: + rect = component.getExtents(pyatspi.DESKTOP_COORDS) + if rect.width > 0 and rect.height > 0: + stats.has_coordinates = True + except Exception: + pass + + # Check for text + try: + text_iface = accessible.queryText() + if text_iface and text_iface.characterCount > 0: + stats.has_text_elements = True + except Exception: + pass + + # Check for actions + try: + action_iface = accessible.queryAction() + if action_iface and action_iface.nActions > 0: + stats.has_actions = True + except Exception: + pass + + # Recurse to children + for i in range(accessible.childCount): + try: + child = accessible.getChildAtIndex(i) + if child: + count_elements(child, stats, depth + 1, max_depth) + except Exception as e: + log_debug(f"Error accessing child {i}: {e}") + + except Exception as e: + stats.errors.append(str(e)) + log_debug(f"Error counting element: {e}") + + +def benchmark_app(app): + """Benchmark a single application.""" + stats = AppCoverage(app_name=app.name or "(unnamed)") + + # Count windows + stats.window_count = app.childCount + + # Time enumeration + start = time.perf_counter() + for i in range(app.childCount): + try: + window = app.getChildAtIndex(i) + if window: + count_elements(window, stats) + except Exception as e: + stats.errors.append(f"Window {i}: {e}") + elapsed = time.perf_counter() - start + stats.enumeration_time_ms = round(elapsed * 1000, 2) + + return stats + + +def benchmark_find(desktop, role="button", iterations=5): + """Benchmark element finding.""" + times = [] + + for _ in range(iterations): + start = time.perf_counter() + count = 0 + + for i in range(desktop.childCount): + app = desktop.getChildAtIndex(i) + if not app: + continue + for j in range(app.childCount): + window = app.getChildAtIndex(j) + if not window: + continue + # Simple search + count += _count_role(window, role, 0, 15) + + elapsed = time.perf_counter() - start + times.append(elapsed * 1000) + + return { + "role": role, + "iterations": iterations, + "avg_ms": round(sum(times) / len(times), 2), + "min_ms": round(min(times), 2), + "max_ms": round(max(times), 2), + } + + +def _count_role(accessible, role, depth, max_depth): + """Count elements matching role.""" + if depth > max_depth: + return 0 + + count = 0 + try: + if role.lower() in accessible.getRoleName().lower(): + count = 1 + + for i in range(accessible.childCount): + try: + child = accessible.getChildAtIndex(i) + if child: + count += _count_role(child, role, depth + 1, max_depth) + except Exception: + pass + except Exception: + pass + + return count + + +def print_coverage_report(results, find_benchmark): + """Print human-readable coverage report.""" + print("=" * 60) + print("AT-SPI Coverage Audit") + print("=" * 60) + + # Summary table + print(f"\n{'App':<25} {'Win':>4} {'Elem':>6} {'Time':>8} {'Coord':>6} {'Text':>5} {'Act':>4}") + print("-" * 60) + + total_elements = 0 + total_time = 0 + + for r in sorted(results, key=lambda x: x.element_count, reverse=True): + coord = "✓" if r.has_coordinates else "-" + text = "✓" if r.has_text_elements else "-" + act = "✓" if r.has_actions else "-" + print(f"{r.app_name[:24]:<25} {r.window_count:>4} {r.element_count:>6} " + f"{r.enumeration_time_ms:>7.1f}ms {coord:>6} {text:>5} {act:>4}") + total_elements += r.element_count + total_time += r.enumeration_time_ms + + print("-" * 60) + print(f"{'TOTAL':<25} {'':<4} {total_elements:>6} {total_time:>7.1f}ms") + + # Roles summary + all_roles = set() + for r in results: + all_roles.update(r.roles_found) + + print(f"\nUnique roles found: {len(all_roles)}") + print(f"Roles: {', '.join(sorted(all_roles)[:20])}") + if len(all_roles) > 20: + print(f" ... and {len(all_roles) - 20} more") + + # Find benchmark + if find_benchmark: + print(f"\nFind Benchmark (role='{find_benchmark['role']}'):") + print(f" Avg: {find_benchmark['avg_ms']:.1f}ms " + f"(min: {find_benchmark['min_ms']:.1f}ms, max: {find_benchmark['max_ms']:.1f}ms)") + + # Apps with issues + apps_with_errors = [r for r in results if r.errors] + if apps_with_errors: + print(f"\nApps with AT-SPI errors: {len(apps_with_errors)}") + for r in apps_with_errors[:5]: + print(f" {r.app_name}: {len(r.errors)} errors") + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark AT-SPI coverage") + parser.add_argument("--app", "-a", help="Benchmark specific app only") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--debug", action="store_true", help="Show debug messages") + parser.add_argument("--skip-find", action="store_true", help="Skip find benchmark") + args = parser.parse_args() + + if args.debug: + set_debug(True) + + try: + desktop = pyatspi.Registry.getDesktop(0) + except Exception as e: + print(f"Error accessing AT-SPI: {e}", file=sys.stderr) + sys.exit(1) + + results = [] + + for i in range(desktop.childCount): + app = desktop.getChildAtIndex(i) + if not app: + continue + + if args.app and args.app.lower() not in (app.name or "").lower(): + continue + + stats = benchmark_app(app) + results.append(stats) + + # Run find benchmark + find_benchmark = None + if not args.skip_find: + find_benchmark = benchmark_find(desktop) + + if args.json: + output = { + "coverage": [r.to_dict() for r in results], + "find_benchmark": find_benchmark, + } + print(json.dumps(output, indent=2)) + else: + print_coverage_report(results, find_benchmark) + + +if __name__ == "__main__": + main()