From 6eee2be66e4ba2c240c22c36daba29530d1f2b21 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 15 Jan 2026 11:57:03 -0800 Subject: [PATCH] feat(skills): add ui-query skill with list-windows Initial AT-SPI integration for semantic UI access: - list-windows.py: enumerate windows via accessibility tree - Wrapper script handles nix dependencies (pyatspi, pygobject3) - Outputs table or JSON with window geometry and states Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 2 +- skills/ui-query/SKILL.md | 63 +++++++++++ skills/ui-query/scripts/list-windows.py | 142 ++++++++++++++++++++++++ skills/ui-query/scripts/ui-query | 28 +++++ 4 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 skills/ui-query/SKILL.md create mode 100755 skills/ui-query/scripts/list-windows.py create mode 100755 skills/ui-query/scripts/ui-query diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index fbad4cc..8bfd3e9 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -214,7 +214,7 @@ {"id":"skills-ojpq","title":"TEST: Feature Add - Mean Function","status":"open","priority":2,"issue_type":"task","owner":"dan@delpad","created_at":"2026-01-12T22:09:32.968817022-08:00","created_by":"dan","updated_at":"2026-01-12T22:09:32.968817022-08:00"} {"id":"skills-p2o","title":"Refactor update-agent-context.sh: array+loop for agents","description":"File: .specify/scripts/bash/update-agent-context.sh (772 lines)\n\nIssues:\n- 12 nearly-identical if-blocks in update_all_existing_agents() (lines 632-701)\n- Should be refactored into loop with array of agent configurations\n- Current pattern repeats: if [[ -f \"$CLAUDE_FILE\" ]]; then update_agent_file...\n\nFix:\n- Create AGENTS array with (file, name, format) tuples\n- Replace 12 if-blocks with single for loop\n- Estimated reduction: 60 lines\n\nSeverity: HIGH","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-24T02:50:57.385820971-05:00","updated_at":"2025-12-25T01:44:58.370191619-05:00","closed_at":"2025-12-25T01:44:58.370191619-05:00","close_reason":"update-agent-context.sh is .specify upstream code, not maintained here"} {"id":"skills-p3v","title":"Cross-language FFI wormholes via LSP","description":"Bridge FFI boundaries where standard LSPs go blind:\n- Rust extern C → clangd lookup\n- Go CGO → match C symbols\n- Python FFI → trace bindings\n\nGenerate synthetic go-to-definition maps. When hovering over C call in Rust, intercept hover request, query C LSP, inject C definition into Rust tooltip.\n\nEnables seamless polyglot navigation.","status":"closed","priority":4,"issue_type":"feature","created_at":"2025-12-24T02:29:57.597602745-05:00","updated_at":"2025-12-29T14:37:35.354771695-05:00","closed_at":"2025-12-29T14:37:35.354771695-05:00","close_reason":"Parked: waiting on gastown (Steve Yegge's orchestration layer for beads). Revisit when gastown lands."} -{"id":"skills-pdg","title":"Enable AT-SPI for UI tree access","description":"## Findings\n\nAT-SPI (Assistive Technology Service Provider Interface) provides semantic UI tree access - buttons, labels, text fields, their states and coordinates.\n\n### Current state\n- AT-SPI is **disabled** on this NixOS system\n- Environment has `NO_AT_BRIDGE=1` and `GTK_A11Y=none`\n- No apps are exposing accessibility info\n\n### To enable\n```nix\nservices.gnome.at-spi2-core.enable = true;\n```\n\nThen rebuild and re-login (apps must start fresh to register with bus).\n\n### App support\n- **GTK apps**: Should work automatically\n- **Qt apps**: Need `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` env var\n- **Electron**: Varies by app, often poor support\n\n### Trade-offs\n- Adds runtime overhead to all GTK/Qt apps\n- May want as boot-time option rather than always-on\n- Only useful for automation/accessibility use cases\n\n### Tools once enabled\n- `python3-pyatspi` / `dogtail` for querying UI tree\n- `accerciser` for visual inspection of accessibility tree\n\n### Next steps\n**Blocked by dotfiles-0l3** - NixOS config change filed in dotfiles repo.\n\nAfter dotfiles change deployed:\n1. Test with common apps (Firefox, terminals, etc.)\n2. Build skill to query UI elements\n\n## Related\nParent epic: skills-kg7 (Desktop automation for Wayland/niri)","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-17T13:59:55.799402507-08:00","updated_at":"2025-12-29T15:05:00.794702992-05:00"} +{"id":"skills-pdg","title":"Enable AT-SPI for UI tree access","description":"## Findings\n\nAT-SPI (Assistive Technology Service Provider Interface) provides semantic UI tree access - buttons, labels, text fields, their states and coordinates.\n\n### Current state\n- AT-SPI is **disabled** on this NixOS system\n- Environment has `NO_AT_BRIDGE=1` and `GTK_A11Y=none`\n- No apps are exposing accessibility info\n\n### To enable\n```nix\nservices.gnome.at-spi2-core.enable = true;\n```\n\nThen rebuild and re-login (apps must start fresh to register with bus).\n\n### App support\n- **GTK apps**: Should work automatically\n- **Qt apps**: Need `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` env var\n- **Electron**: Varies by app, often poor support\n\n### Trade-offs\n- Adds runtime overhead to all GTK/Qt apps\n- May want as boot-time option rather than always-on\n- Only useful for automation/accessibility use cases\n\n### Tools once enabled\n- `python3-pyatspi` / `dogtail` for querying UI tree\n- `accerciser` for visual inspection of accessibility tree\n\n### Next steps\n**Blocked by dotfiles-0l3** - NixOS config change filed in dotfiles repo.\n\nAfter dotfiles change deployed:\n1. Test with common apps (Firefox, terminals, etc.)\n2. Build skill to query UI elements\n\n## Related\nParent epic: skills-kg7 (Desktop automation for Wayland/niri)","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-17T13:59:55.799402507-08:00","updated_at":"2026-01-15T11:51:14.328638901-08:00","closed_at":"2026-01-15T11:51:14.328638901-08:00","close_reason":"AT-SPI enabled and bus responding. Blocker dotfiles-0l3 resolved."} {"id":"skills-peoo","title":"Investigate inconsistencies in skills repo","status":"closed","priority":2,"issue_type":"task","owner":"dan@delpad","created_at":"2026-01-14T11:48:41.765229512-08:00","created_by":"dan","updated_at":"2026-01-15T11:36:36.572224184-08:00","closed_at":"2026-01-15T11:36:36.572224184-08:00","close_reason":"No description or context to act on"} {"id":"skills-prt","title":"worklog: remove inline section list, reference template","description":"SKILL.md lists 11 sections that duplicate worklog-template.org. Will drift. Replace with directive to parse sections from template dynamically. Found by bloat lens review.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-25T02:03:16.811093872-05:00","updated_at":"2025-12-27T10:05:51.513685966-05:00","closed_at":"2025-12-27T10:05:51.513685966-05:00","close_reason":"Closed"} {"id":"skills-pu4","title":"Clean up stale beads.left.jsonl merge artifact","description":"bd doctor flagged multiple JSONL files. beads.left.jsonl is empty merge artifact that should be removed: git rm .beads/beads.left.jsonl","status":"closed","priority":2,"issue_type":"task","created_at":"2025-11-30T11:58:33.292221449-08:00","updated_at":"2025-11-30T12:37:49.916795223-08:00","closed_at":"2025-11-30T12:37:49.916795223-08:00"} diff --git a/skills/ui-query/SKILL.md b/skills/ui-query/SKILL.md new file mode 100644 index 0000000..8f53967 --- /dev/null +++ b/skills/ui-query/SKILL.md @@ -0,0 +1,63 @@ +--- +name: ui-query +description: Query UI elements via AT-SPI accessibility tree. Get window lists, extract text, find elements by role/name without OCR. +tools: scripts/ui-query +--- + +# UI Query Skill + +Query desktop UI elements via AT-SPI (Assistive Technology Service Provider Interface). This provides semantic access to UI data - buttons, labels, text fields, their states and positions - without OCR. + +## Prerequisites + +AT-SPI must be enabled on the system: +- NixOS: `services.gnome.at-spi2-core.enable = true` +- Qt apps need: `QT_LINUX_ACCESSIBILITY_ALWAYS_ON=1` + +Apps must be started after AT-SPI is enabled to register with the bus. + +## Commands + +### list-windows + +List all windows visible to AT-SPI: + +```bash +scripts/ui-query list-windows # Table format +scripts/ui-query list-windows --json # JSON output +scripts/ui-query list-windows -v # Include element counts (slower) +``` + +Output includes: +- Application name +- Window name and role +- Window geometry (position, size) +- States (active, focused, visible, etc.) + +## Supported Applications + +| Type | Support | +|------|---------| +| GTK apps | Good - automatic AT-SPI support | +| Qt apps | Good - with QT_LINUX_ACCESSIBILITY_ALWAYS_ON | +| Electron | Varies - often poor support | +| Terminals | Basic - window frames only | + +## Debugging + +Check AT-SPI bus: +```bash +dbus-send --session --dest=org.a11y.Bus --print-reply /org/a11y/bus org.freedesktop.DBus.Peer.Ping +``` + +Explore accessibility tree visually: +```bash +accerciser +``` + +## Complementary Skills + +- **niri-window-capture**: Visual screenshots (what it looks like) +- **ui-query**: Semantic data (what it contains) + +Use together for comprehensive UI understanding. diff --git a/skills/ui-query/scripts/list-windows.py b/skills/ui-query/scripts/list-windows.py new file mode 100755 index 0000000..b202266 --- /dev/null +++ b/skills/ui-query/scripts/list-windows.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +"""List windows visible to AT-SPI accessibility tree. + +Usage: + list-windows.py [--json] [--verbose] + +Options: + --json Output as JSON + --verbose Include child element counts +""" + +import argparse +import json +import sys + +import pyatspi + + +def get_window_info(window, verbose=False): + """Extract info from a window accessible.""" + info = { + "name": window.name or "(unnamed)", + "role": window.getRoleName(), + } + + # Get state + state = window.getState() + states = [] + for s in pyatspi.STATE_VALUE_TO_NAME: + if state.contains(s): + states.append(pyatspi.STATE_VALUE_TO_NAME[s]) + info["states"] = states + + # Get position/size if available + try: + component = window.queryComponent() + if component: + rect = component.getExtents(pyatspi.DESKTOP_COORDS) + info["geometry"] = { + "x": rect.x, + "y": rect.y, + "width": rect.width, + "height": rect.height, + } + except Exception: + pass + + if verbose: + # Count children recursively (expensive) + def count_children(acc): + try: + count = acc.childCount + for i in range(acc.childCount): + child = acc.getChildAtIndex(i) + if child: + count += count_children(child) + return count + except Exception: + return 0 + info["child_count"] = count_children(window) + + return info + + +def list_windows(verbose=False): + """Get all windows from AT-SPI desktop.""" + desktop = pyatspi.Registry.getDesktop(0) + + apps = [] + for i in range(desktop.childCount): + app = desktop.getChildAtIndex(i) + if not app: + continue + + app_info = { + "name": app.name or "(unnamed)", + "windows": [], + } + + for j in range(app.childCount): + window = app.getChildAtIndex(j) + if window: + window_info = get_window_info(window, verbose) + app_info["windows"].append(window_info) + + if app_info["windows"]: # Only include apps with windows + apps.append(app_info) + + return apps + + +def print_table(apps): + """Print apps/windows as formatted table.""" + if not apps: + print("No windows found via AT-SPI.") + print("\nTips:") + print(" - Apps started before AT-SPI enablement won't register") + print(" - GTK/Qt apps should work; Electron varies") + print(" - Try: accerciser (GUI) to explore the tree") + return + + for app in apps: + print(f"\n{app['name']}") + print("-" * len(app["name"])) + for win in app["windows"]: + geo = win.get("geometry", {}) + geo_str = f"{geo.get('width', '?')}x{geo.get('height', '?')}" if geo else "" + states_str = ", ".join(win.get("states", [])[:3]) # First 3 states + + line = f" [{win['role']}] {win['name']}" + if geo_str: + line += f" ({geo_str})" + if states_str: + line += f" [{states_str}]" + if "child_count" in win: + line += f" ({win['child_count']} elements)" + print(line) + + +def main(): + parser = argparse.ArgumentParser(description="List AT-SPI accessible windows") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--verbose", "-v", action="store_true", + help="Include child element counts (slower)") + args = parser.parse_args() + + try: + apps = list_windows(verbose=args.verbose) + except Exception as e: + print(f"Error accessing AT-SPI: {e}", file=sys.stderr) + print("\nIs AT-SPI enabled?", file=sys.stderr) + print(" Check: dbus-send --session --dest=org.a11y.Bus --print-reply /org/a11y/bus org.freedesktop.DBus.Peer.Ping", file=sys.stderr) + sys.exit(1) + + if args.json: + print(json.dumps(apps, indent=2)) + else: + print_table(apps) + + +if __name__ == "__main__": + main() diff --git a/skills/ui-query/scripts/ui-query b/skills/ui-query/scripts/ui-query new file mode 100755 index 0000000..0433252 --- /dev/null +++ b/skills/ui-query/scripts/ui-query @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Wrapper for ui-query scripts with proper nix dependencies +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Detect which script to run +CMD="${1:-list-windows}" +shift || true + +case "$CMD" in + list-windows|list|ls) + SCRIPT="list-windows.py" + ;; + *) + echo "Usage: ui-query [options]" + echo "" + echo "Commands:" + echo " list-windows List all AT-SPI accessible windows" + echo "" + echo "Options are passed through to the underlying script." + exit 1 + ;; +esac + +exec nix-shell -p "python3.withPackages (ps: [ ps.pyatspi ps.pygobject3 ])" \ + at-spi2-core gobject-introspection \ + --run "python3 '$SCRIPT_DIR/$SCRIPT' $*"