feat(ui-query): add get-text.py for text extraction
Extracts text content from windows via AT-SPI Text interface: - Match windows by name pattern or app name - Traverses accessibility tree to find text elements - Outputs hierarchical text structure or JSON Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
0b0b6f49f9
commit
c876394460
205
skills/ui-query/scripts/get-text.py
Executable file
205
skills/ui-query/scripts/get-text.py
Executable file
|
|
@ -0,0 +1,205 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Extract text content from a window via AT-SPI.
|
||||
|
||||
Usage:
|
||||
get-text.py <window-pattern>
|
||||
get-text.py --app <app-name>
|
||||
get-text.py --all
|
||||
|
||||
Options:
|
||||
<window-pattern> Match window by name (substring, case-insensitive)
|
||||
--app <name> Match by application name
|
||||
--all Extract from all windows
|
||||
--json Output as JSON
|
||||
--max-depth N Max tree depth to traverse (default: 10)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import pyatspi
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextNode:
|
||||
"""A text-bearing element in the UI tree."""
|
||||
role: str
|
||||
name: str
|
||||
text: str = ""
|
||||
children: list = field(default_factory=list)
|
||||
|
||||
def to_dict(self):
|
||||
d = {"role": self.role, "name": self.name}
|
||||
if self.text:
|
||||
d["text"] = self.text
|
||||
if self.children:
|
||||
d["children"] = [c.to_dict() for c in self.children]
|
||||
return d
|
||||
|
||||
|
||||
def get_text_content(accessible):
|
||||
"""Try to get text content from an accessible."""
|
||||
text = ""
|
||||
|
||||
# Try Text interface
|
||||
try:
|
||||
text_iface = accessible.queryText()
|
||||
if text_iface:
|
||||
char_count = text_iface.characterCount
|
||||
if char_count > 0:
|
||||
text = text_iface.getText(0, char_count)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def extract_text_tree(accessible, max_depth=10, depth=0):
|
||||
"""Recursively extract text from accessibility tree."""
|
||||
if depth > max_depth:
|
||||
return None
|
||||
|
||||
try:
|
||||
role = accessible.getRoleName()
|
||||
name = accessible.name or ""
|
||||
text = get_text_content(accessible)
|
||||
|
||||
# Skip empty non-container elements
|
||||
if not name and not text and accessible.childCount == 0:
|
||||
return None
|
||||
|
||||
node = TextNode(role=role, name=name, text=text)
|
||||
|
||||
# Traverse children
|
||||
for i in range(accessible.childCount):
|
||||
try:
|
||||
child = accessible.getChildAtIndex(i)
|
||||
if child:
|
||||
child_node = extract_text_tree(child, max_depth, depth + 1)
|
||||
if child_node:
|
||||
node.children.append(child_node)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Prune nodes with no useful content
|
||||
if not node.name and not node.text and not node.children:
|
||||
return None
|
||||
|
||||
return node
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def flatten_text(node, indent=0):
|
||||
"""Flatten text tree to readable string."""
|
||||
lines = []
|
||||
prefix = " " * indent
|
||||
|
||||
# Add this node's content
|
||||
content_parts = []
|
||||
if node.name:
|
||||
content_parts.append(node.name)
|
||||
if node.text and node.text != node.name:
|
||||
content_parts.append(node.text)
|
||||
|
||||
if content_parts:
|
||||
content = " | ".join(content_parts)
|
||||
lines.append(f"{prefix}[{node.role}] {content}")
|
||||
elif node.children:
|
||||
# Container with children but no direct text
|
||||
lines.append(f"{prefix}[{node.role}]")
|
||||
|
||||
for child in node.children:
|
||||
lines.extend(flatten_text(child, indent + 1))
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def find_windows(pattern=None, app_name=None, all_windows=False):
|
||||
"""Find windows matching criteria."""
|
||||
desktop = pyatspi.Registry.getDesktop(0)
|
||||
matches = []
|
||||
|
||||
for i in range(desktop.childCount):
|
||||
app = desktop.getChildAtIndex(i)
|
||||
if not app:
|
||||
continue
|
||||
|
||||
# Filter by app name
|
||||
if app_name and app_name.lower() not in (app.name or "").lower():
|
||||
continue
|
||||
|
||||
for j in range(app.childCount):
|
||||
window = app.getChildAtIndex(j)
|
||||
if not window:
|
||||
continue
|
||||
|
||||
window_name = window.name or ""
|
||||
|
||||
# Filter by window pattern
|
||||
if pattern and pattern.lower() not in window_name.lower():
|
||||
continue
|
||||
|
||||
if all_windows or pattern or app_name:
|
||||
matches.append({
|
||||
"app": app.name or "(unnamed)",
|
||||
"window": window_name,
|
||||
"accessible": window,
|
||||
})
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Extract text from AT-SPI windows")
|
||||
parser.add_argument("pattern", nargs="?", help="Window name pattern to match")
|
||||
parser.add_argument("--app", help="Filter by application name")
|
||||
parser.add_argument("--all", action="store_true", help="Extract from all windows")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--max-depth", type=int, default=10, help="Max tree depth")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.pattern and not args.app and not args.all:
|
||||
parser.print_help()
|
||||
print("\nExamples:")
|
||||
print(" get-text.py 'Firefox' # Window containing 'Firefox'")
|
||||
print(" get-text.py --app waybar # All windows from waybar")
|
||||
print(" get-text.py --all # Everything (verbose!)")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
windows = find_windows(args.pattern, args.app, args.all)
|
||||
except Exception as e:
|
||||
print(f"Error accessing AT-SPI: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not windows:
|
||||
print("No matching windows found.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
results = []
|
||||
for win in windows:
|
||||
tree = extract_text_tree(win["accessible"], max_depth=args.max_depth)
|
||||
if tree:
|
||||
results.append({
|
||||
"app": win["app"],
|
||||
"window": win["window"],
|
||||
"content": tree,
|
||||
})
|
||||
|
||||
if args.json:
|
||||
output = [{"app": r["app"], "window": r["window"],
|
||||
"content": r["content"].to_dict()} for r in results]
|
||||
print(json.dumps(output, indent=2))
|
||||
else:
|
||||
for r in results:
|
||||
print(f"\n=== {r['app']} / {r['window']} ===\n")
|
||||
lines = flatten_text(r["content"])
|
||||
print("\n".join(lines) if lines else "(no text content)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -12,11 +12,15 @@ case "$CMD" in
|
|||
list-windows|list|ls)
|
||||
SCRIPT="list-windows.py"
|
||||
;;
|
||||
get-text|text)
|
||||
SCRIPT="get-text.py"
|
||||
;;
|
||||
*)
|
||||
echo "Usage: ui-query <command> [options]"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " list-windows List all AT-SPI accessible windows"
|
||||
echo " get-text Extract text content from a window"
|
||||
echo ""
|
||||
echo "Options are passed through to the underlying script."
|
||||
exit 1
|
||||
|
|
|
|||
Loading…
Reference in a new issue