From 91f324949cac34821b67b24760aacbbca4da1c7f Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 10 Jan 2026 15:56:53 -0800 Subject: [PATCH] bd sync: 2026-01-10 15:56:52 --- .beads/issues.jsonl | 2 +- .beads/last-touched | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 8080a04..87261c4 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -147,7 +147,7 @@ {"id":"skills-s6y","title":"Multi-agent orchestration: Lego brick architecture","description":"Simple, composable primitives for multi-agent coordination. Inspired by Gastown but dramatically simpler - Lego bricks not pirate ships.\n\n## Architecture\n- Human-attended orchestrator (any agent)\n- Background workers (any agent, own git branch)\n- Local message passing (.worker-state/messages/*.jsonl)\n- Review gates (review-gate CLI)\n- File-based state (.worker-state/)\n\n## Core Primitives\n1. worker spawn - start background agent on own branch\n2. worker status - check all workers via local files\n3. worker merge - merge completed worker branch\n4. review-gate - quality gates (pass/fail)\n5. worker stuck - detect non-progress (Rubber Duck)\n6. worker veto - block without doing (for reviewers)\n\n## Phased Approach\nPhase 1 (Now): Simple JSONL, basic primitives, pass/fail review\nPhase 2 (When Needed): Threading, SQLite cache, richer review scoring\nPhase 3 (Future): Talu or service layer for rich querying\n\n## Key Principles\n- Everything is local filesystem (no network for coordination)\n- Any agent that can read/write files can participate\n- Git branches for code isolation\n- Start simple, grow as needed\n- 80-90% automation realistic (human for the rest)\n\n## Research Sources\n- OpenHands: Worker → critique → feedback loop, git-based coordination\n- JWZ (emes): JSONL + SQLite, topic-based messaging\n- Gastown: What NOT to do (too complex, Kubernetes-level)\n- LangGraph: State serialization breakpoints\n- MetaGPT: Role-based agents","status":"open","priority":1,"issue_type":"epic","created_at":"2026-01-10T12:14:16.141746066-08:00","created_by":"dan","updated_at":"2026-01-10T13:34:00.126490996-08:00"} {"id":"skills-s92","title":"Add tests for config injection (deploy-skill.sh)","description":"File: bin/deploy-skill.sh (lines 112-137)\n\nCritical logic with NO test coverage:\n- Idempotency (running twice should be safe)\n- Correct brace matching in Nix\n- Syntax validity of injected config\n- Rollback on failure\n\nRisk: MEDIUM-HIGH - can break dotfiles Nix config\n\nFix:\n- Test idempotent injection\n- Validate Nix syntax after injection\n- Test with malformed input\n\nSeverity: MEDIUM","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-24T02:51:01.314513824-05:00","updated_at":"2026-01-06T16:29:18.728097676-08:00","closed_at":"2026-01-06T16:29:18.728097676-08:00","close_reason":"21 tests added covering idempotency, brace preservation, inject_home_file wrapper, edge cases"} {"id":"skills-sh6","title":"Research: OpenHands iterative refinement pattern","description":"Document OpenHands SDK patterns for our architecture.\n\n## Iterative Refinement Loop\n1. Worker agent does work\n2. Critique agent evaluates (correctness, quality, completeness)\n3. If not good → worker tries again with feedback\n4. Repeat until standard met\n\n## Parallel Agent Orchestration\n- Git-based coordination (not direct communication)\n- Each agent works on own branch\n- PRs to intermediate 'rolling branch'\n- Human reviews and merges\n- Agents pull latest, handle conflicts\n\n## Key Quote\n'Don't expect 100% automation—tasks are 80-90% automatable.\nYou need a human who understands full context.'\n\n## Mapping to Our Architecture\n- Worker = their refactoring agent\n- Reviewer = their critique agent\n- review-gate = their quality threshold\n- Human orchestrator = their human on rolling branch\n\n## Sources\n- https://openhands.dev/blog/automating-massive-refactors-with-parallel-agents\n- https://arxiv.org/abs/2511.03690\n- https://docs.openhands.dev/sdk","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-10T12:24:02.368542878-08:00","created_by":"dan","updated_at":"2026-01-10T12:24:02.368542878-08:00","dependencies":[{"issue_id":"skills-sh6","depends_on_id":"skills-s6y","type":"blocks","created_at":"2026-01-10T12:24:07.013388857-08:00","created_by":"dan"}]} -{"id":"skills-sse","title":"Design: worker spawn/status primitives","description":"Basic worker lifecycle management.\n\n## Commands\n- worker spawn \"task\" --id X - runs agent in background\n- worker status - shows all worker states\n- worker kill X - terminate worker\n\n## State\n.worker-state/X.json:\n - id, status, task, started_at\n - output (truncated)\n - exit_code (when done)\n\n## Implementation\n~50 lines bash wrapper that:\n- Spawns claude/gemini/codex in background\n- Redirects output to log file\n- Writes status JSON\n- Updates on completion","notes":"MVP Tier 1: Core CLI commands for worker spawn/status/assign/merge","status":"open","priority":1,"issue_type":"task","created_at":"2026-01-10T12:14:33.115131833-08:00","created_by":"dan","updated_at":"2026-01-10T15:44:17.939255017-08:00","dependencies":[{"issue_id":"skills-sse","depends_on_id":"skills-s6y","type":"blocks","created_at":"2026-01-10T12:15:10.014285119-08:00","created_by":"dan"}]} +{"id":"skills-sse","title":"Design: worker spawn/status primitives","description":"Basic worker lifecycle management.\n\n## Commands\n- worker spawn \"task\" --id X - runs agent in background\n- worker status - shows all worker states\n- worker kill X - terminate worker\n\n## State\n.worker-state/X.json:\n - id, status, task, started_at\n - output (truncated)\n - exit_code (when done)\n\n## Implementation\n~50 lines bash wrapper that:\n- Spawns claude/gemini/codex in background\n- Redirects output to log file\n- Writes status JSON\n- Updates on completion","design":"docs/design/worker-cli-primitives.md","notes":"Design complete. Consensus from 4 models (gemini, gpt, qwen, sonar): (1) spawn prepares workspace only, doesn't start agent, (2) Python CLI, (3) all commands idempotent, (4) Worker ID = Task ID, (5) SQLite as state truth. Commands: spawn/status/merge (human), start/done/heartbeat (agent). Local .worker-ctx.json for context discovery. Hybrid approach for heartbeats.","status":"open","priority":1,"issue_type":"task","created_at":"2026-01-10T12:14:33.115131833-08:00","created_by":"dan","updated_at":"2026-01-10T15:56:48.549069038-08:00","dependencies":[{"issue_id":"skills-sse","depends_on_id":"skills-s6y","type":"blocks","created_at":"2026-01-10T12:15:10.014285119-08:00","created_by":"dan"}]} {"id":"skills-thk","title":"Design: Hybrid hook + gate architecture","description":"Design enforcement that uses hooks where available, orchestrator gates elsewhere.\n\n## Hook-Capable Agents (Claude, Gemini)\n- Stop hook checks beads for review status\n- Mechanical enforcement - agent can't bypass\n\n## Non-Hook Agents (OpenCode, Codex) \n- Orchestrator pattern enforces gate\n- Orchestrator checks beads before declaring done\n- Worker can't bypass because doesn't control session\n\n## Shared Components\n- beads: persistent state (issues, review status)\n- jwz: transient state (session messages, async handoffs)\n- review-gate CLI: checks state, returns exit code\n\n## Deliverable\nArchitecture doc showing:\n1. Hook configuration for Claude/Gemini\n2. Orchestrator flow for OpenCode/Codex\n3. State schema in beads\n4. review-gate CLI design","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-09T19:01:24.270855877-08:00","created_by":"dan","updated_at":"2026-01-09T19:33:36.705975116-08:00","closed_at":"2026-01-09T19:33:36.705975116-08:00","close_reason":"Consolidated into skills-8sj"} {"id":"skills-tta","title":"Design: Circuit breaker patterns","description":"Design circuit breakers to prevent agent infinite loops.\n\n## Patterns to Implement\n\n### Semantic Drift Detection\n- Embed last N agent thoughts\n- If \u003e95% similar, inject \"try different approach\"\n- Use cheap embedding model\n\n### Three-Strike Tool Rule \n- Track tool call signatures (tool + args + error)\n- 3 identical failures → force strategy shift\n- Implement in PostToolUse hook\n\n### Budget-Based Interrupts\n- Allocate token budget per sub-task\n- Pause if \u003e50% budget used with \u003c30% progress\n- Request plan refinement\n\n### Time-Based Breaker\n- Timeout per task type\n- Escalate to review if exceeded\n\n## Implementation Options\n- Hook-based (Claude/Gemini)\n- Wrapper-based (all agents)\n- Orchestrator-enforced (all agents)\n\n## Deliverable\n- Circuit breaker design doc\n- Prototype implementation for one pattern","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-09T19:01:44.536499408-08:00","created_by":"dan","updated_at":"2026-01-09T19:59:37.700476328-08:00","closed_at":"2026-01-09T19:59:37.700476328-08:00","close_reason":"Covered in architecture design doc (docs/design/cross-agent-enforcement-architecture.md)"} {"id":"skills-ty7","title":"Define trace levels (audit vs debug)","description":"Two trace levels to manage noise vs utility:\n\n1. Audit trace (minimal, safe, always on):\n - skill id/ref, start/end\n - high-level checkpoints\n - artifact hashes/paths\n - exit status\n\n2. Debug trace (opt-in, verbose):\n - tool calls with args\n - stdout/stderr snippets\n - expanded inputs\n - timing details\n\nConsider OpenTelemetry span model as reference.\nGPT proposed this; Gemini focused on rotation/caps instead.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-23T19:49:48.514684945-05:00","updated_at":"2025-12-29T13:55:35.838961236-05:00","closed_at":"2025-12-29T13:55:35.838961236-05:00","close_reason":"Parked with ADR-001: skills-molecules integration deferred. Current simpler approach (skills as standalone) works well. Revisit when complex orchestration needed."} diff --git a/.beads/last-touched b/.beads/last-touched index cdfa50d..33c0661 100644 --- a/.beads/last-touched +++ b/.beads/last-touched @@ -1 +1 @@ -skills-byq +skills-sse