feat(ui-query): add find-element.py for element search

Search AT-SPI tree for elements by role and/or name:
- Filter by role (button, label, text, menu-item, etc.)
- Filter by name substring (case-insensitive)
- Scope to specific window or app
- Returns position, states, path context

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
dan 2026-01-15 14:17:37 -08:00
parent d3b24e91c7
commit 6ed762ad05
2 changed files with 251 additions and 2 deletions

View file

@ -0,0 +1,245 @@
#!/usr/bin/env python3
"""Find UI elements by role or name via AT-SPI.
Usage:
find-element.py --role button
find-element.py --name "Save"
find-element.py --role button --name "OK"
find-element.py --window "Firefox" --role link
Options:
--role ROLE Filter by role (button, label, text, menu-item, etc.)
--name PATTERN Filter by name (substring, case-insensitive)
--window PATTERN Limit search to matching window
--app APP Limit search to application
--json Output as JSON
--limit N Max results (default: 20)
"""
import argparse
import json
import sys
import pyatspi
def get_element_info(element):
"""Extract useful info from an accessible element."""
info = {
"role": element.getRoleName(),
"name": element.name or "",
}
# Get description if available
desc = element.description
if desc:
info["description"] = desc
# Get states
state = element.getState()
states = []
for s in pyatspi.STATE_VALUE_TO_NAME:
if state.contains(s):
states.append(pyatspi.STATE_VALUE_TO_NAME[s])
info["states"] = states
# Get geometry
try:
component = element.queryComponent()
if component:
rect = component.getExtents(pyatspi.DESKTOP_COORDS)
info["geometry"] = {
"x": rect.x,
"y": rect.y,
"width": rect.width,
"height": rect.height,
}
except Exception:
pass
# Get text content if available
try:
text_iface = element.queryText()
if text_iface and text_iface.characterCount > 0:
text = text_iface.getText(0, min(100, text_iface.characterCount))
if text:
info["text"] = text
except Exception:
pass
# Build path for context
path = []
parent = element
for _ in range(5): # Max 5 levels up
parent = parent.parent
if not parent:
break
if parent.name:
path.insert(0, parent.name)
elif parent.getRoleName() not in ("application", "desktop frame"):
path.insert(0, f"[{parent.getRoleName()}]")
if path:
info["path"] = " > ".join(path)
return info
def find_elements(accessible, role=None, name=None, results=None, max_depth=15, depth=0, limit=20):
"""Recursively search for matching elements."""
if results is None:
results = []
if len(results) >= limit or depth > max_depth:
return results
try:
# Check if this element matches
matches = True
if role:
elem_role = accessible.getRoleName().lower().replace(" ", "-")
if role.lower() not in elem_role:
matches = False
if name and matches:
elem_name = (accessible.name or "").lower()
if name.lower() not in elem_name:
matches = False
if matches and (role or name): # Only add if we're filtering
results.append(get_element_info(accessible))
# Search children
for i in range(accessible.childCount):
if len(results) >= limit:
break
try:
child = accessible.getChildAtIndex(i)
if child:
find_elements(child, role, name, results, max_depth, depth + 1, limit)
except Exception:
continue
except Exception:
pass
return results
def find_windows(pattern=None, app_name=None):
"""Find windows matching criteria."""
desktop = pyatspi.Registry.getDesktop(0)
matches = []
for i in range(desktop.childCount):
app = desktop.getChildAtIndex(i)
if not app:
continue
if app_name and app_name.lower() not in (app.name or "").lower():
continue
for j in range(app.childCount):
window = app.getChildAtIndex(j)
if not window:
continue
window_name = window.name or ""
if pattern and pattern.lower() not in window_name.lower():
continue
matches.append({
"app": app.name or "(unnamed)",
"window": window_name,
"accessible": window,
})
return matches
def print_results(results, as_json=False):
"""Print search results."""
if as_json:
print(json.dumps(results, indent=2))
return
if not results:
print("No matching elements found.")
return
for i, elem in enumerate(results, 1):
geo = elem.get("geometry", {})
geo_str = f"({geo['x']},{geo['y']} {geo['width']}x{geo['height']})" if geo else ""
print(f"\n{i}. [{elem['role']}] {elem['name'] or '(unnamed)'}")
if elem.get("text"):
print(f" Text: {elem['text'][:60]}{'...' if len(elem.get('text', '')) > 60 else ''}")
if geo_str:
print(f" Position: {geo_str}")
if elem.get("path"):
print(f" Path: {elem['path']}")
# Key states only
key_states = [s for s in elem.get("states", [])
if s in ("focused", "selected", "checked", "expanded", "disabled")]
if key_states:
print(f" States: {', '.join(key_states)}")
def main():
parser = argparse.ArgumentParser(description="Find UI elements via AT-SPI")
parser.add_argument("--role", "-r", help="Filter by role (button, label, text, etc.)")
parser.add_argument("--name", "-n", help="Filter by name (substring match)")
parser.add_argument("--window", "-w", help="Limit to matching window")
parser.add_argument("--app", "-a", help="Limit to application")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--limit", type=int, default=20, help="Max results")
args = parser.parse_args()
if not args.role and not args.name:
parser.print_help()
print("\nExamples:")
print(" find-element.py --role button # All buttons")
print(" find-element.py --name 'Save' # Elements named 'Save'")
print(" find-element.py --role button --name OK # OK buttons")
print(" find-element.py -w Firefox -r link # Links in Firefox")
sys.exit(1)
try:
windows = find_windows(args.window, args.app)
except Exception as e:
print(f"Error accessing AT-SPI: {e}", file=sys.stderr)
sys.exit(1)
if not windows:
if args.window or args.app:
print("No matching windows found.", file=sys.stderr)
else:
# Search all windows
desktop = pyatspi.Registry.getDesktop(0)
windows = []
for i in range(desktop.childCount):
app = desktop.getChildAtIndex(i)
if app:
for j in range(app.childCount):
win = app.getChildAtIndex(j)
if win:
windows.append({"accessible": win})
all_results = []
for win in windows:
results = find_elements(
win["accessible"],
role=args.role,
name=args.name,
limit=args.limit - len(all_results)
)
all_results.extend(results)
if len(all_results) >= args.limit:
break
print_results(all_results, args.json)
if __name__ == "__main__":
main()

View file

@ -15,12 +15,16 @@ case "$CMD" in
get-text|text)
SCRIPT="get-text.py"
;;
find-element|find|search)
SCRIPT="find-element.py"
;;
*)
echo "Usage: ui-query <command> [options]"
echo ""
echo "Commands:"
echo " list-windows List all AT-SPI accessible windows"
echo " get-text Extract text content from a window"
echo " list-windows List all AT-SPI accessible windows"
echo " get-text Extract text content from a window"
echo " find-element Find elements by role or name"
echo ""
echo "Options are passed through to the underlying script."
exit 1