feat(ui-query): add get-text.py for text extraction

Extracts text content from windows via AT-SPI Text interface:
- Match windows by name pattern or app name
- Traverses accessibility tree to find text elements
- Outputs hierarchical text structure or JSON

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
dan 2026-01-15 14:15:46 -08:00
parent 0b0b6f49f9
commit c876394460
2 changed files with 209 additions and 0 deletions

View file

@ -0,0 +1,205 @@
#!/usr/bin/env python3
"""Extract text content from a window via AT-SPI.
Usage:
get-text.py <window-pattern>
get-text.py --app <app-name>
get-text.py --all
Options:
<window-pattern> Match window by name (substring, case-insensitive)
--app <name> Match by application name
--all Extract from all windows
--json Output as JSON
--max-depth N Max tree depth to traverse (default: 10)
"""
import argparse
import json
import sys
from dataclasses import dataclass, field
import pyatspi
@dataclass
class TextNode:
"""A text-bearing element in the UI tree."""
role: str
name: str
text: str = ""
children: list = field(default_factory=list)
def to_dict(self):
d = {"role": self.role, "name": self.name}
if self.text:
d["text"] = self.text
if self.children:
d["children"] = [c.to_dict() for c in self.children]
return d
def get_text_content(accessible):
"""Try to get text content from an accessible."""
text = ""
# Try Text interface
try:
text_iface = accessible.queryText()
if text_iface:
char_count = text_iface.characterCount
if char_count > 0:
text = text_iface.getText(0, char_count)
except Exception:
pass
return text
def extract_text_tree(accessible, max_depth=10, depth=0):
"""Recursively extract text from accessibility tree."""
if depth > max_depth:
return None
try:
role = accessible.getRoleName()
name = accessible.name or ""
text = get_text_content(accessible)
# Skip empty non-container elements
if not name and not text and accessible.childCount == 0:
return None
node = TextNode(role=role, name=name, text=text)
# Traverse children
for i in range(accessible.childCount):
try:
child = accessible.getChildAtIndex(i)
if child:
child_node = extract_text_tree(child, max_depth, depth + 1)
if child_node:
node.children.append(child_node)
except Exception:
continue
# Prune nodes with no useful content
if not node.name and not node.text and not node.children:
return None
return node
except Exception:
return None
def flatten_text(node, indent=0):
"""Flatten text tree to readable string."""
lines = []
prefix = " " * indent
# Add this node's content
content_parts = []
if node.name:
content_parts.append(node.name)
if node.text and node.text != node.name:
content_parts.append(node.text)
if content_parts:
content = " | ".join(content_parts)
lines.append(f"{prefix}[{node.role}] {content}")
elif node.children:
# Container with children but no direct text
lines.append(f"{prefix}[{node.role}]")
for child in node.children:
lines.extend(flatten_text(child, indent + 1))
return lines
def find_windows(pattern=None, app_name=None, all_windows=False):
"""Find windows matching criteria."""
desktop = pyatspi.Registry.getDesktop(0)
matches = []
for i in range(desktop.childCount):
app = desktop.getChildAtIndex(i)
if not app:
continue
# Filter by app name
if app_name and app_name.lower() not in (app.name or "").lower():
continue
for j in range(app.childCount):
window = app.getChildAtIndex(j)
if not window:
continue
window_name = window.name or ""
# Filter by window pattern
if pattern and pattern.lower() not in window_name.lower():
continue
if all_windows or pattern or app_name:
matches.append({
"app": app.name or "(unnamed)",
"window": window_name,
"accessible": window,
})
return matches
def main():
parser = argparse.ArgumentParser(description="Extract text from AT-SPI windows")
parser.add_argument("pattern", nargs="?", help="Window name pattern to match")
parser.add_argument("--app", help="Filter by application name")
parser.add_argument("--all", action="store_true", help="Extract from all windows")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--max-depth", type=int, default=10, help="Max tree depth")
args = parser.parse_args()
if not args.pattern and not args.app and not args.all:
parser.print_help()
print("\nExamples:")
print(" get-text.py 'Firefox' # Window containing 'Firefox'")
print(" get-text.py --app waybar # All windows from waybar")
print(" get-text.py --all # Everything (verbose!)")
sys.exit(1)
try:
windows = find_windows(args.pattern, args.app, args.all)
except Exception as e:
print(f"Error accessing AT-SPI: {e}", file=sys.stderr)
sys.exit(1)
if not windows:
print("No matching windows found.", file=sys.stderr)
sys.exit(1)
results = []
for win in windows:
tree = extract_text_tree(win["accessible"], max_depth=args.max_depth)
if tree:
results.append({
"app": win["app"],
"window": win["window"],
"content": tree,
})
if args.json:
output = [{"app": r["app"], "window": r["window"],
"content": r["content"].to_dict()} for r in results]
print(json.dumps(output, indent=2))
else:
for r in results:
print(f"\n=== {r['app']} / {r['window']} ===\n")
lines = flatten_text(r["content"])
print("\n".join(lines) if lines else "(no text content)")
if __name__ == "__main__":
main()

View file

@ -12,11 +12,15 @@ case "$CMD" in
list-windows|list|ls)
SCRIPT="list-windows.py"
;;
get-text|text)
SCRIPT="get-text.py"
;;
*)
echo "Usage: ui-query <command> [options]"
echo ""
echo "Commands:"
echo " list-windows List all AT-SPI accessible windows"
echo " get-text Extract text content from a window"
echo ""
echo "Options are passed through to the underlying script."
exit 1