From c876394460f843201edc74cc9637e59dbdfe4b4a Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 15 Jan 2026 14:15:46 -0800 Subject: [PATCH] feat(ui-query): add get-text.py for text extraction Extracts text content from windows via AT-SPI Text interface: - Match windows by name pattern or app name - Traverses accessibility tree to find text elements - Outputs hierarchical text structure or JSON Co-Authored-By: Claude Opus 4.5 --- skills/ui-query/scripts/get-text.py | 205 ++++++++++++++++++++++++++++ skills/ui-query/scripts/ui-query | 4 + 2 files changed, 209 insertions(+) create mode 100755 skills/ui-query/scripts/get-text.py diff --git a/skills/ui-query/scripts/get-text.py b/skills/ui-query/scripts/get-text.py new file mode 100755 index 0000000..02db446 --- /dev/null +++ b/skills/ui-query/scripts/get-text.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +"""Extract text content from a window via AT-SPI. + +Usage: + get-text.py + get-text.py --app + get-text.py --all + +Options: + Match window by name (substring, case-insensitive) + --app Match by application name + --all Extract from all windows + --json Output as JSON + --max-depth N Max tree depth to traverse (default: 10) +""" + +import argparse +import json +import sys +from dataclasses import dataclass, field + +import pyatspi + + +@dataclass +class TextNode: + """A text-bearing element in the UI tree.""" + role: str + name: str + text: str = "" + children: list = field(default_factory=list) + + def to_dict(self): + d = {"role": self.role, "name": self.name} + if self.text: + d["text"] = self.text + if self.children: + d["children"] = [c.to_dict() for c in self.children] + return d + + +def get_text_content(accessible): + """Try to get text content from an accessible.""" + text = "" + + # Try Text interface + try: + text_iface = accessible.queryText() + if text_iface: + char_count = text_iface.characterCount + if char_count > 0: + text = text_iface.getText(0, char_count) + except Exception: + pass + + return text + + +def extract_text_tree(accessible, max_depth=10, depth=0): + """Recursively extract text from accessibility tree.""" + if depth > max_depth: + return None + + try: + role = accessible.getRoleName() + name = accessible.name or "" + text = get_text_content(accessible) + + # Skip empty non-container elements + if not name and not text and accessible.childCount == 0: + return None + + node = TextNode(role=role, name=name, text=text) + + # Traverse children + for i in range(accessible.childCount): + try: + child = accessible.getChildAtIndex(i) + if child: + child_node = extract_text_tree(child, max_depth, depth + 1) + if child_node: + node.children.append(child_node) + except Exception: + continue + + # Prune nodes with no useful content + if not node.name and not node.text and not node.children: + return None + + return node + + except Exception: + return None + + +def flatten_text(node, indent=0): + """Flatten text tree to readable string.""" + lines = [] + prefix = " " * indent + + # Add this node's content + content_parts = [] + if node.name: + content_parts.append(node.name) + if node.text and node.text != node.name: + content_parts.append(node.text) + + if content_parts: + content = " | ".join(content_parts) + lines.append(f"{prefix}[{node.role}] {content}") + elif node.children: + # Container with children but no direct text + lines.append(f"{prefix}[{node.role}]") + + for child in node.children: + lines.extend(flatten_text(child, indent + 1)) + + return lines + + +def find_windows(pattern=None, app_name=None, all_windows=False): + """Find windows matching criteria.""" + desktop = pyatspi.Registry.getDesktop(0) + matches = [] + + for i in range(desktop.childCount): + app = desktop.getChildAtIndex(i) + if not app: + continue + + # Filter by app name + if app_name and app_name.lower() not in (app.name or "").lower(): + continue + + for j in range(app.childCount): + window = app.getChildAtIndex(j) + if not window: + continue + + window_name = window.name or "" + + # Filter by window pattern + if pattern and pattern.lower() not in window_name.lower(): + continue + + if all_windows or pattern or app_name: + matches.append({ + "app": app.name or "(unnamed)", + "window": window_name, + "accessible": window, + }) + + return matches + + +def main(): + parser = argparse.ArgumentParser(description="Extract text from AT-SPI windows") + parser.add_argument("pattern", nargs="?", help="Window name pattern to match") + parser.add_argument("--app", help="Filter by application name") + parser.add_argument("--all", action="store_true", help="Extract from all windows") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--max-depth", type=int, default=10, help="Max tree depth") + args = parser.parse_args() + + if not args.pattern and not args.app and not args.all: + parser.print_help() + print("\nExamples:") + print(" get-text.py 'Firefox' # Window containing 'Firefox'") + print(" get-text.py --app waybar # All windows from waybar") + print(" get-text.py --all # Everything (verbose!)") + sys.exit(1) + + try: + windows = find_windows(args.pattern, args.app, args.all) + except Exception as e: + print(f"Error accessing AT-SPI: {e}", file=sys.stderr) + sys.exit(1) + + if not windows: + print("No matching windows found.", file=sys.stderr) + sys.exit(1) + + results = [] + for win in windows: + tree = extract_text_tree(win["accessible"], max_depth=args.max_depth) + if tree: + results.append({ + "app": win["app"], + "window": win["window"], + "content": tree, + }) + + if args.json: + output = [{"app": r["app"], "window": r["window"], + "content": r["content"].to_dict()} for r in results] + print(json.dumps(output, indent=2)) + else: + for r in results: + print(f"\n=== {r['app']} / {r['window']} ===\n") + lines = flatten_text(r["content"]) + print("\n".join(lines) if lines else "(no text content)") + + +if __name__ == "__main__": + main() diff --git a/skills/ui-query/scripts/ui-query b/skills/ui-query/scripts/ui-query index 0433252..a086415 100755 --- a/skills/ui-query/scripts/ui-query +++ b/skills/ui-query/scripts/ui-query @@ -12,11 +12,15 @@ case "$CMD" in list-windows|list|ls) SCRIPT="list-windows.py" ;; + get-text|text) + SCRIPT="get-text.py" + ;; *) echo "Usage: ui-query [options]" echo "" echo "Commands:" echo " list-windows List all AT-SPI accessible windows" + echo " get-text Extract text content from a window" echo "" echo "Options are passed through to the underlying script." exit 1