feat(ui-query): add AT-SPI benchmark script and results
- benchmark.py: measures coverage and query performance - Documents coverage across Ghostty, waybar, vicinae - Key finding: Firefox not visible to AT-SPI without explicit config - Query performance: ~1.7s to find buttons across all apps Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
b48212e973
commit
ca98eec7a6
|
|
@ -129,7 +129,7 @@
|
|||
{"id":"skills-bo8","title":"Gemini skills access: ReadFile path restrictions block .claude/skills/","description":"Gemini agent couldn't read skill files from .claude/skills/orch/SKILL.md due to path restrictions. ReadFile tool restricts paths to workspace directories, so .claude/skills/ (symlinked from home-manager) is blocked. Agent had to fall back to shell cat command. Breaks skills portability across agents. Potential fixes: copy skills into repo, configure allowed paths, use MCP, or document workaround.","status":"closed","priority":3,"issue_type":"bug","created_at":"2026-01-09T10:58:04.037329419-08:00","created_by":"dan","updated_at":"2026-01-09T19:35:28.068433744-08:00","closed_at":"2026-01-09T19:35:28.068433744-08:00","close_reason":"Fix found: Gemini includeDirectories setting"}
|
||||
{"id":"skills-buh","title":"Document SQLite compile flags in config.nims","description":"[EVOLVE] LOW - SQLite compile flags (SQLITE_THREADSAFE, SQLITE_ENABLE_JSON1, SQLITE_OMIT_LOAD_EXTENSION) are hardcoded. Add comments explaining purpose.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-10T18:50:54.19875394-08:00","created_by":"dan","updated_at":"2026-01-10T18:50:54.19875394-08:00"}
|
||||
{"id":"skills-bvz","title":"spec-review: Add Definition of Ready checklists for each phase","description":"'Ready for /speckit.plan' and similar are underspecified.\n\nAdd concrete checklists:\n- Spec ready for planning: problem statement, goals, constraints, acceptance criteria, etc.\n- Plan ready for tasks: milestones, risks, dependencies, test strategy, etc.\n- Tasks ready for bd: each task has acceptance criteria, dependencies explicit, etc.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-15T00:23:24.877531852-08:00","updated_at":"2025-12-15T14:05:26.880419097-08:00","closed_at":"2025-12-15T14:05:26.880419097-08:00"}
|
||||
{"id":"skills-bww","title":"Benchmark AT-SPI overhead and coverage","description":"## Goal\nMeasure AT-SPI's runtime overhead and coverage across apps.\n\n## Prerequisites\n- Enable `services.gnome.at-spi2-core.enable = true` in NixOS\n- Set `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` for Qt apps\n- Rebuild and re-login\n\n## Overhead benchmarks\n1. **Startup time**: App launch with/without AT-SPI\n2. **Memory**: RSS delta with AT-SPI enabled\n3. **CPU**: Idle CPU with AT-SPI bus running\n4. **UI latency**: Input-to-paint latency (if measurable)\n\n## Coverage audit\nFor each app, document:\n- Does it expose accessibility tree?\n- How complete is the tree? (all elements vs partial)\n- Are coordinates accurate?\n- Are element types/roles correct?\n\n### Apps to test\n- [ ] Firefox\n- [ ] Ghostty terminal\n- [ ] Nautilus/file manager\n- [ ] VS Code / Electron app\n- [ ] A Qt app (if any installed)\n\n## Query benchmarks\n- Time to enumerate all elements in a window\n- Time to find element by role/name\n- Memory overhead of pyatspi queries\n\n## Depends on\n- skills-pdg (Enable AT-SPI for UI tree access)","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-17T14:13:21.599259773-08:00","updated_at":"2025-12-17T14:13:21.599259773-08:00","dependencies":[{"issue_id":"skills-bww","depends_on_id":"skills-pdg","type":"blocks","created_at":"2025-12-17T14:13:41.633210539-08:00","created_by":"daemon","metadata":"{}"}]}
|
||||
{"id":"skills-bww","title":"Benchmark AT-SPI overhead and coverage","description":"## Goal\nMeasure AT-SPI's runtime overhead and coverage across apps.\n\n## Prerequisites\n- Enable `services.gnome.at-spi2-core.enable = true` in NixOS\n- Set `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` for Qt apps\n- Rebuild and re-login\n\n## Overhead benchmarks\n1. **Startup time**: App launch with/without AT-SPI\n2. **Memory**: RSS delta with AT-SPI enabled\n3. **CPU**: Idle CPU with AT-SPI bus running\n4. **UI latency**: Input-to-paint latency (if measurable)\n\n## Coverage audit\nFor each app, document:\n- Does it expose accessibility tree?\n- How complete is the tree? (all elements vs partial)\n- Are coordinates accurate?\n- Are element types/roles correct?\n\n### Apps to test\n- [ ] Firefox\n- [ ] Ghostty terminal\n- [ ] Nautilus/file manager\n- [ ] VS Code / Electron app\n- [ ] A Qt app (if any installed)\n\n## Query benchmarks\n- Time to enumerate all elements in a window\n- Time to find element by role/name\n- Memory overhead of pyatspi queries\n\n## Depends on\n- skills-pdg (Enable AT-SPI for UI tree access)","status":"in_progress","priority":2,"issue_type":"task","created_at":"2025-12-17T14:13:21.599259773-08:00","updated_at":"2026-01-15T19:07:42.733336455-08:00","dependencies":[{"issue_id":"skills-bww","depends_on_id":"skills-pdg","type":"blocks","created_at":"2025-12-17T14:13:41.633210539-08:00","created_by":"daemon","metadata":"{}"}]}
|
||||
{"id":"skills-byq","title":"Integrate: review-gate with worker primitives","description":"Connect existing review-gate CLI with new worker system.\n\n## Current state\nreview-gate CLI exists with:\n- check/enable/approve/reject\n- Circuit breaker (3 strikes)\n- Stop hook integration (for Claude)\n\n## Integration needed\n- worker spawn enables review-gate automatically\n- worker status shows review state\n- worker approve/reject wraps review-gate\n- Evidence artifacts feed into review-gate\n\n## File coordination\n.worker-state/X.json includes:\n - review_session_id (links to .review-state/)\n - needs_review: true/false\n - review_status: pending/approved/rejected","notes":"MVP Tier 1: Wire review-gate to worker state machine","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-10T12:15:04.625083755-08:00","created_by":"dan","updated_at":"2026-01-10T23:24:21.172713875-08:00","closed_at":"2026-01-10T23:24:21.172713875-08:00","close_reason":"Integrated review-gate with worker: spawn enables review, status/show display review state, approve/reject update review-gate, cancel/merge clean up review state","dependencies":[{"issue_id":"skills-byq","depends_on_id":"skills-s6y","type":"blocks","created_at":"2026-01-10T12:15:10.376067847-08:00","created_by":"dan"}]}
|
||||
{"id":"skills-cc0","title":"spec-review: Add anti-hallucination constraints to prompts","description":"Models may paraphrase and present as quotes, or invent requirements/risks not in the doc.\n\nAdd:\n- 'Quotes must be verbatim'\n- 'Do not assume technologies/constraints not stated'\n- 'If missing info, list as open questions rather than speculating'","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-15T00:23:26.045478292-08:00","updated_at":"2025-12-15T14:07:19.556888057-08:00","closed_at":"2025-12-15T14:07:19.556888057-08:00"}
|
||||
{"id":"skills-cg7c","title":"Design worker system prompt template","description":"Create the system prompt/context that spawned workers receive.\n\nContents:\n- Role definition (you are a worker agent)\n- Task context (from bd issue or description)\n- Available tools (worker start/done/heartbeat, bd comments)\n- Completion criteria\n- How to signal blockers/questions\n- How to hand off for review\n\nOutput: skills/hq/templates/worker-system.md","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-11T21:06:34.943983399-08:00","created_by":"dan","updated_at":"2026-01-12T10:41:56.919305275-08:00","closed_at":"2026-01-12T10:41:56.919305275-08:00","close_reason":"Completed - skills/hq/templates/worker-system.md created with role definition, available commands, communication protocol, and completion criteria"}
|
||||
|
|
|
|||
88
skills/ui-query/docs/benchmark-results.md
Normal file
88
skills/ui-query/docs/benchmark-results.md
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
# AT-SPI Benchmark Results
|
||||
|
||||
Date: 2026-01-15
|
||||
|
||||
## Environment
|
||||
|
||||
- NixOS with `services.gnome.at-spi2-core.enable = true`
|
||||
- AT-SPI bus launcher and registryd running
|
||||
- QT_LINUX_ACCESSIBILITY_ALWAYS_ON not tested (no Qt apps active)
|
||||
|
||||
## Coverage Audit
|
||||
|
||||
| App | Windows | Elements | Enum Time | Coordinates | Text | Actions |
|
||||
|-----|---------|----------|-----------|-------------|------|---------|
|
||||
| Ghostty ("Unnamed") | 17 | 494 | 3198ms | ✓ | - | ✓ |
|
||||
| vicinae | 2 | 265 | 1339ms | - | - | ✓ |
|
||||
| waybar | 1 | 33 | 49ms | ✓ | ✓ | ✓ |
|
||||
| xdg-desktop-portal-gtk | 0 | 0 | 0.5ms | - | - | - |
|
||||
| **TOTAL** | 20 | 792 | 4586ms | | | |
|
||||
|
||||
### App-Specific Notes
|
||||
|
||||
**Ghostty Terminal**
|
||||
- Registers as "Unnamed" app (accessibility name not set)
|
||||
- Window titles exposed (terminal titles like "codex", "btop")
|
||||
- No text interface (expected - TUI apps don't expose terminal buffer)
|
||||
- Coordinates available but all show (0,0) origin
|
||||
- Actions exposed on buttons/controls
|
||||
|
||||
**waybar (GTK)**
|
||||
- Full AT-SPI coverage: coordinates, text, actions
|
||||
- Fast enumeration (49ms for 33 elements)
|
||||
- Good example of GTK accessibility working well
|
||||
|
||||
**Firefox**
|
||||
- NOT visible to AT-SPI despite running
|
||||
- Needs `MOZ_USE_XINPUT2=1` and/or restart with accessibility enabled
|
||||
- Firefox requires explicit enablement in about:config (`accessibility.force_disabled = 0`)
|
||||
|
||||
**vicinae (Launcher)**
|
||||
- Elements exposed but coordinates show (0,0)
|
||||
- May be due to offscreen/hidden state
|
||||
|
||||
## Query Performance
|
||||
|
||||
### Full Enumeration
|
||||
- **Total time**: 4586ms for 792 elements
|
||||
- **Rate**: ~173 elements/second
|
||||
- Ghostty enumeration is slow (3.2s for 494 elements)
|
||||
|
||||
### Find by Role (button)
|
||||
- **Average**: 1704ms
|
||||
- **Min**: 1407ms
|
||||
- **Max**: 1854ms
|
||||
- 5 iterations across all apps
|
||||
|
||||
## Roles Discovered
|
||||
|
||||
10 unique roles found:
|
||||
- button, filler, frame, grouping, label
|
||||
- layered pane, panel, progress bar, scroll bar, text
|
||||
|
||||
## Key Findings
|
||||
|
||||
1. **Coverage is partial**: Only GTK apps fully expose AT-SPI. Firefox needs explicit config.
|
||||
|
||||
2. **Performance is moderate**: Full enumeration takes seconds, not milliseconds. Caching recommended for repeated queries.
|
||||
|
||||
3. **Coordinates unreliable**: Some apps report (0,0) for all elements. May be Wayland/compositor issue.
|
||||
|
||||
4. **Terminal text inaccessible**: Ghostty and other terminals don't expose buffer contents via Text interface (expected limitation).
|
||||
|
||||
5. **App identification**: Some apps (Ghostty) don't set accessibility application name, appearing as "Unnamed".
|
||||
|
||||
## Recommendations
|
||||
|
||||
1. **Cache query results** when doing repeated lookups
|
||||
2. **Filter by window first** before deep element searches
|
||||
3. **Don't rely on coordinates** for click automation - use actions instead
|
||||
4. **Enable Firefox accessibility** via about:config if needed
|
||||
5. **Use visual capture (niri-window-capture)** as complement for apps with poor AT-SPI support
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Test with Firefox accessibility enabled
|
||||
- Test Electron apps (VS Code) when available
|
||||
- Measure memory overhead of pyatspi queries
|
||||
- Consider async/parallel enumeration for large element counts
|
||||
262
skills/ui-query/scripts/benchmark.py
Normal file
262
skills/ui-query/scripts/benchmark.py
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Benchmark AT-SPI overhead and coverage.
|
||||
|
||||
Usage:
|
||||
benchmark.py [--app APP] [--json]
|
||||
|
||||
Options:
|
||||
--app APP Benchmark specific app only
|
||||
--json Output as JSON
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass, field, asdict
|
||||
|
||||
import pyatspi
|
||||
|
||||
from common import set_debug, log_debug
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppCoverage:
|
||||
"""Coverage data for an application."""
|
||||
app_name: str
|
||||
window_count: int = 0
|
||||
element_count: int = 0
|
||||
roles_found: set = field(default_factory=set)
|
||||
has_coordinates: bool = False
|
||||
has_text_elements: bool = False
|
||||
has_actions: bool = False
|
||||
enumeration_time_ms: float = 0.0
|
||||
errors: list = field(default_factory=list)
|
||||
|
||||
def to_dict(self):
|
||||
d = asdict(self)
|
||||
d['roles_found'] = sorted(list(self.roles_found))
|
||||
return d
|
||||
|
||||
|
||||
def count_elements(accessible, stats, depth=0, max_depth=20):
|
||||
"""Recursively count elements and gather stats."""
|
||||
if depth > max_depth:
|
||||
return
|
||||
|
||||
try:
|
||||
stats.element_count += 1
|
||||
stats.roles_found.add(accessible.getRoleName())
|
||||
|
||||
# Check for coordinates
|
||||
try:
|
||||
component = accessible.queryComponent()
|
||||
if component:
|
||||
rect = component.getExtents(pyatspi.DESKTOP_COORDS)
|
||||
if rect.width > 0 and rect.height > 0:
|
||||
stats.has_coordinates = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for text
|
||||
try:
|
||||
text_iface = accessible.queryText()
|
||||
if text_iface and text_iface.characterCount > 0:
|
||||
stats.has_text_elements = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for actions
|
||||
try:
|
||||
action_iface = accessible.queryAction()
|
||||
if action_iface and action_iface.nActions > 0:
|
||||
stats.has_actions = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Recurse to children
|
||||
for i in range(accessible.childCount):
|
||||
try:
|
||||
child = accessible.getChildAtIndex(i)
|
||||
if child:
|
||||
count_elements(child, stats, depth + 1, max_depth)
|
||||
except Exception as e:
|
||||
log_debug(f"Error accessing child {i}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
stats.errors.append(str(e))
|
||||
log_debug(f"Error counting element: {e}")
|
||||
|
||||
|
||||
def benchmark_app(app):
|
||||
"""Benchmark a single application."""
|
||||
stats = AppCoverage(app_name=app.name or "(unnamed)")
|
||||
|
||||
# Count windows
|
||||
stats.window_count = app.childCount
|
||||
|
||||
# Time enumeration
|
||||
start = time.perf_counter()
|
||||
for i in range(app.childCount):
|
||||
try:
|
||||
window = app.getChildAtIndex(i)
|
||||
if window:
|
||||
count_elements(window, stats)
|
||||
except Exception as e:
|
||||
stats.errors.append(f"Window {i}: {e}")
|
||||
elapsed = time.perf_counter() - start
|
||||
stats.enumeration_time_ms = round(elapsed * 1000, 2)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def benchmark_find(desktop, role="button", iterations=5):
|
||||
"""Benchmark element finding."""
|
||||
times = []
|
||||
|
||||
for _ in range(iterations):
|
||||
start = time.perf_counter()
|
||||
count = 0
|
||||
|
||||
for i in range(desktop.childCount):
|
||||
app = desktop.getChildAtIndex(i)
|
||||
if not app:
|
||||
continue
|
||||
for j in range(app.childCount):
|
||||
window = app.getChildAtIndex(j)
|
||||
if not window:
|
||||
continue
|
||||
# Simple search
|
||||
count += _count_role(window, role, 0, 15)
|
||||
|
||||
elapsed = time.perf_counter() - start
|
||||
times.append(elapsed * 1000)
|
||||
|
||||
return {
|
||||
"role": role,
|
||||
"iterations": iterations,
|
||||
"avg_ms": round(sum(times) / len(times), 2),
|
||||
"min_ms": round(min(times), 2),
|
||||
"max_ms": round(max(times), 2),
|
||||
}
|
||||
|
||||
|
||||
def _count_role(accessible, role, depth, max_depth):
|
||||
"""Count elements matching role."""
|
||||
if depth > max_depth:
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
try:
|
||||
if role.lower() in accessible.getRoleName().lower():
|
||||
count = 1
|
||||
|
||||
for i in range(accessible.childCount):
|
||||
try:
|
||||
child = accessible.getChildAtIndex(i)
|
||||
if child:
|
||||
count += _count_role(child, role, depth + 1, max_depth)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def print_coverage_report(results, find_benchmark):
|
||||
"""Print human-readable coverage report."""
|
||||
print("=" * 60)
|
||||
print("AT-SPI Coverage Audit")
|
||||
print("=" * 60)
|
||||
|
||||
# Summary table
|
||||
print(f"\n{'App':<25} {'Win':>4} {'Elem':>6} {'Time':>8} {'Coord':>6} {'Text':>5} {'Act':>4}")
|
||||
print("-" * 60)
|
||||
|
||||
total_elements = 0
|
||||
total_time = 0
|
||||
|
||||
for r in sorted(results, key=lambda x: x.element_count, reverse=True):
|
||||
coord = "✓" if r.has_coordinates else "-"
|
||||
text = "✓" if r.has_text_elements else "-"
|
||||
act = "✓" if r.has_actions else "-"
|
||||
print(f"{r.app_name[:24]:<25} {r.window_count:>4} {r.element_count:>6} "
|
||||
f"{r.enumeration_time_ms:>7.1f}ms {coord:>6} {text:>5} {act:>4}")
|
||||
total_elements += r.element_count
|
||||
total_time += r.enumeration_time_ms
|
||||
|
||||
print("-" * 60)
|
||||
print(f"{'TOTAL':<25} {'':<4} {total_elements:>6} {total_time:>7.1f}ms")
|
||||
|
||||
# Roles summary
|
||||
all_roles = set()
|
||||
for r in results:
|
||||
all_roles.update(r.roles_found)
|
||||
|
||||
print(f"\nUnique roles found: {len(all_roles)}")
|
||||
print(f"Roles: {', '.join(sorted(all_roles)[:20])}")
|
||||
if len(all_roles) > 20:
|
||||
print(f" ... and {len(all_roles) - 20} more")
|
||||
|
||||
# Find benchmark
|
||||
if find_benchmark:
|
||||
print(f"\nFind Benchmark (role='{find_benchmark['role']}'):")
|
||||
print(f" Avg: {find_benchmark['avg_ms']:.1f}ms "
|
||||
f"(min: {find_benchmark['min_ms']:.1f}ms, max: {find_benchmark['max_ms']:.1f}ms)")
|
||||
|
||||
# Apps with issues
|
||||
apps_with_errors = [r for r in results if r.errors]
|
||||
if apps_with_errors:
|
||||
print(f"\nApps with AT-SPI errors: {len(apps_with_errors)}")
|
||||
for r in apps_with_errors[:5]:
|
||||
print(f" {r.app_name}: {len(r.errors)} errors")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Benchmark AT-SPI coverage")
|
||||
parser.add_argument("--app", "-a", help="Benchmark specific app only")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--debug", action="store_true", help="Show debug messages")
|
||||
parser.add_argument("--skip-find", action="store_true", help="Skip find benchmark")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.debug:
|
||||
set_debug(True)
|
||||
|
||||
try:
|
||||
desktop = pyatspi.Registry.getDesktop(0)
|
||||
except Exception as e:
|
||||
print(f"Error accessing AT-SPI: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
results = []
|
||||
|
||||
for i in range(desktop.childCount):
|
||||
app = desktop.getChildAtIndex(i)
|
||||
if not app:
|
||||
continue
|
||||
|
||||
if args.app and args.app.lower() not in (app.name or "").lower():
|
||||
continue
|
||||
|
||||
stats = benchmark_app(app)
|
||||
results.append(stats)
|
||||
|
||||
# Run find benchmark
|
||||
find_benchmark = None
|
||||
if not args.skip_find:
|
||||
find_benchmark = benchmark_find(desktop)
|
||||
|
||||
if args.json:
|
||||
output = {
|
||||
"coverage": [r.to_dict() for r in results],
|
||||
"find_benchmark": find_benchmark,
|
||||
}
|
||||
print(json.dumps(output, indent=2))
|
||||
else:
|
||||
print_coverage_report(results, find_benchmark)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue