From abe2adfead41645e546c29f13e22294df627c437 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 23 Dec 2025 01:12:27 -0500 Subject: [PATCH] refactor: standardize agent instruction files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AGENTS.md: Consolidated source of truth (159 lines) - CLAUDE.md: Thin wrapper (@AGENTS.md import) - GEMINI.md: New, with skills pointer for Gemini CLI Previously CLAUDE.md had 477 lines duplicating/extending AGENTS.md content. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .beads/issues.jsonl | 51 ++--- AGENTS.md | 173 +++++++++++++--- CLAUDE.md | 478 +------------------------------------------- GEMINI.md | 9 + 4 files changed, 179 insertions(+), 532 deletions(-) create mode 100644 GEMINI.md diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 8276628..d457c80 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,39 +1,40 @@ {"id":"ops-jrz1-00e","title":"Upgrade NixOS from 24.05 to 24.11","description":"Running NixOS 24.05.20241230 (Uakari). Current stable is 24.11. May be missing security patches. Low priority as no known critical CVEs, but should plan upgrade.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-04T21:03:22.760228514-08:00","updated_at":"2025-12-04T21:04:35.805980055-08:00","comments":[{"id":1,"issue_id":"ops-jrz1-00e","author":"dan","text":"Analysis Findings:\n1. Version Mismatch: Local flake.nix is pinned to 'nixos-24.05', but the dev environment reports '25.11' (Unstable), indicating state divergence.\n2. Upstream Bugs: Blocking issues in mautrix-slack (ops-jrz1-blh) and maubot (sync failure) are present in the current unstable revision (2025-12-02).\n3. Recommendation: Upgrade platform to NixOS 24.11 (Stable) to align environment, ensure stability, and pull fresh upstream fixes.","created_at":"2025-12-08T23:54:57Z"}]} {"id":"ops-jrz1-03o","title":"Upgrade mautrix-slack to v25.11","description":"Upgrade is just flake update + deploy. Current deployed: v0.2.3+dev.unknown (Oct 13). Flake lock: v25.10 (Oct 22). Latest nixpkgs-unstable: v25.11. Run: nix flake update nixpkgs-unstable \u0026\u0026 deploy. May fix edit panic (ops-jrz1-qxr).","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T18:24:18.332067067-08:00","updated_at":"2025-12-05T19:07:09.156981447-08:00","closed_at":"2025-12-05T19:07:09.156981447-08:00"} -{"id":"ops-jrz1-3ca","title":"Persist opencode state/cache across restarts","description":"opencode may store index/cache in ~/.cache or other dirs not covered by current bind mounts. AI context could be lost on container restart. Verify and add mounts.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:30.90315778-08:00","updated_at":"2025-12-05T15:32:30.90315778-08:00","dependencies":[{"issue_id":"ops-jrz1-3ca","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.247361009-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-3fd","title":"Deploy and test single-user instance (Phase 1)","description":"Deploy one container for testing. Validate: WebSocket, extensions, terminal, opencode, memory usage. Access via SSH tunnel initially.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.783260036-08:00","updated_at":"2025-12-05T17:16:54.783260036-08:00","dependencies":[{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.400677984-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-5oe","type":"blocks","created_at":"2025-12-05T17:17:38.708397909-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-av0","type":"blocks","created_at":"2025-12-05T17:17:38.721665448-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-9gd","type":"blocks","created_at":"2025-12-05T17:17:38.737824478-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-3ca","title":"Persist opencode state/cache across restarts","description":"opencode may store index/cache in ~/.cache or other dirs not covered by current bind mounts. AI context could be lost on container restart. Verify and add mounts.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:30.90315778-08:00","updated_at":"2025-12-05T15:32:30.90315778-08:00","dependencies":[{"issue_id":"ops-jrz1-3ca","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.247361009-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-3fd","title":"Deploy and test single-user instance (Phase 1)","description":"Deploy one container for testing. Validate: WebSocket, extensions, terminal, opencode, memory usage. Access via SSH tunnel initially.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.783260036-08:00","updated_at":"2025-12-05T17:16:54.783260036-08:00","dependencies":[{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.400677984-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-5oe","type":"blocks","created_at":"2025-12-05T17:17:38.708397909-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-av0","type":"blocks","created_at":"2025-12-05T17:17:38.721665448-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-3fd","depends_on_id":"ops-jrz1-9gd","type":"blocks","created_at":"2025-12-05T17:17:38.737824478-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-3so","title":"Browser-based dev environment with opencode","description":"Epic: Provide VS Code in browser via code-server with opencode AI integration.\n\nKey decisions:\n- code-server in Podman containers (rootless)\n- opencode CLI + VS Code extension pre-installed\n- Subdomain routing (dan.code.clarun.xyz)\n- Custom container image\n- Target users: non-programmers, testers, learners\n\nDesign doc: specs/004-browser-dev-environment/design.md\n\nMigrated from ops-jrz1-ndl","status":"open","priority":1,"issue_type":"epic","created_at":"2025-12-05T17:04:36.709352529-08:00","updated_at":"2025-12-05T17:04:36.709352529-08:00"} -{"id":"ops-jrz1-3x4","title":"Add maubot SDK and deploy script to container image","description":"Container image needs:\n- Python 3.11 + maubot SDK\n- deploy.sh script (zip → .mbp → curl to maubot API)\n- maubot API reachable from container (host network or port forward)\n\nPart of learner onboarding for bot development.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-06T12:18:06.841708662-08:00","updated_at":"2025-12-06T12:18:06.841708662-08:00","dependencies":[{"issue_id":"ops-jrz1-3x4","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:16.085519885-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-3x4","depends_on_id":"ops-jrz1-d58","type":"blocks","created_at":"2025-12-06T12:18:16.110944935-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-3x4","title":"Add maubot SDK and deploy script to container image","description":"Container image needs:\n- Python 3.11 + maubot SDK\n- deploy.sh script (zip → .mbp → curl to maubot API)\n- maubot API reachable from container (host network or port forward)\n\nPart of learner onboarding for bot development.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-06T12:18:06.841708662-08:00","updated_at":"2025-12-06T12:18:06.841708662-08:00","dependencies":[{"issue_id":"ops-jrz1-3x4","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:16.085519885-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-3x4","depends_on_id":"ops-jrz1-d58","type":"blocks","created_at":"2025-12-06T12:18:16.110944935-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-45v","title":"Matrix/Slack identity mismatch: dan vs vlad","description":"Matrix user @dan:clarun.xyz is linked to Slack user 'vlad'. Messages appear as vlad in Slack but dan in Element. Cosmetic confusion. Options: rename Matrix display name, or re-login bridge with different Slack account.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T19:38:19.899555475-08:00","updated_at":"2025-12-05T19:38:19.899555475-08:00"} -{"id":"ops-jrz1-46y","title":"Write onboarding documentation","description":"Critical for non-programmers. Cover: login, opencode usage, Git setup (PAT workflow), resource limits, security hygiene. Keep concise.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:41.586544583-08:00","updated_at":"2025-12-05T15:32:41.586544583-08:00","dependencies":[{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-7j4","type":"blocks","created_at":"2025-12-05T15:33:25.328712413-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-wj2","type":"blocks","created_at":"2025-12-05T15:33:25.351559821-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.401868669-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-46y","title":"Write onboarding documentation","description":"Critical for non-programmers. Cover: login, opencode usage, Git setup (PAT workflow), resource limits, security hygiene. Keep concise.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:41.586544583-08:00","updated_at":"2025-12-05T15:32:41.586544583-08:00","dependencies":[{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-7j4","type":"blocks","created_at":"2025-12-05T15:33:25.328712413-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-wj2","type":"blocks","created_at":"2025-12-05T15:33:25.351559821-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.401868669-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-4jm","title":"Smoke test Matrix server (conduwuit)","description":"Verify Matrix homeserver is healthy: check /_matrix/client/versions endpoint, test registration, verify federation status (disabled). Quick health check after deployments.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T18:09:47.220765063-08:00","updated_at":"2025-12-05T18:19:33.059734881-08:00","closed_at":"2025-12-05T18:19:33.059734881-08:00"} {"id":"ops-jrz1-5fk","title":"Smoke test Maubot service","description":"Verify Maubot is healthy: check management UI accessible via SSH tunnel, verify bot instances running, test plugin functionality. Quick health check after deployments.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T18:09:47.33773092-08:00","updated_at":"2025-12-05T18:19:33.061388913-08:00","closed_at":"2025-12-05T18:19:33.061388913-08:00"} -{"id":"ops-jrz1-5ki","title":"Set up programmatic QA test user for bridge testing","description":"","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T20:17:04.312571398-08:00","updated_at":"2025-12-05T20:17:04.312571398-08:00"} -{"id":"ops-jrz1-5oe","title":"Create NixOS module for code-server containers","description":"Module to manage per-user Podman containers, nginx routing, secrets. Use virtualisation.oci-containers. Generate systemd units.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.656121092-08:00","updated_at":"2025-12-05T17:16:54.656121092-08:00","dependencies":[{"issue_id":"ops-jrz1-5oe","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.386278268-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-5oe","depends_on_id":"ops-jrz1-d58","type":"blocks","created_at":"2025-12-05T17:17:38.694752468-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-6of","title":"AI cost/rate limiting per user","description":"One user could drain API credits with runaway script. Need rate limiting per user, either via proxy middleware or opencode config. Track usage.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:30.772304538-08:00","updated_at":"2025-12-05T17:42:42.773613559-08:00","closed_at":"2025-12-05T17:42:42.773613559-08:00","dependencies":[{"issue_id":"ops-jrz1-6of","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.206816868-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-6of","depends_on_id":"ops-jrz1-wj2","type":"blocks","created_at":"2025-12-05T17:17:38.658742196-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-7j4","title":"Git credential strategy for non-programmers","description":"Non-programmers can't manage SSH keys. Pre-configure git-credential-store or provide simple PAT workflow with docs. Store in persistent home with 600 perms.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:19.673999683-08:00","updated_at":"2025-12-05T17:38:54.788694408-08:00","closed_at":"2025-12-05T17:38:54.788694408-08:00","dependencies":[{"issue_id":"ops-jrz1-7j4","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.139749437-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-5ki","title":"Set up programmatic QA test user for bridge testing","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T20:17:04.312571398-08:00","updated_at":"2025-12-05T20:17:04.312571398-08:00"} +{"id":"ops-jrz1-5oe","title":"Create NixOS module for code-server containers","description":"Module to manage per-user Podman containers, nginx routing, secrets. Use virtualisation.oci-containers. Generate systemd units.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.656121092-08:00","updated_at":"2025-12-05T17:16:54.656121092-08:00","dependencies":[{"issue_id":"ops-jrz1-5oe","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.386278268-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-5oe","depends_on_id":"ops-jrz1-d58","type":"blocks","created_at":"2025-12-05T17:17:38.694752468-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-6of","title":"AI cost/rate limiting per user","description":"One user could drain API credits with runaway script. Need rate limiting per user, either via proxy middleware or opencode config. Track usage.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:30.772304538-08:00","updated_at":"2025-12-05T17:42:42.773613559-08:00","closed_at":"2025-12-05T17:42:42.773613559-08:00","dependencies":[{"issue_id":"ops-jrz1-6of","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.206816868-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-6of","depends_on_id":"ops-jrz1-wj2","type":"blocks","created_at":"2025-12-05T17:17:38.658742196-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-7j4","title":"Git credential strategy for non-programmers","description":"Non-programmers can't manage SSH keys. Pre-configure git-credential-store or provide simple PAT workflow with docs. Store in persistent home with 600 perms.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:19.673999683-08:00","updated_at":"2025-12-05T17:38:54.788694408-08:00","closed_at":"2025-12-05T17:38:54.788694408-08:00","dependencies":[{"issue_id":"ops-jrz1-7j4","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.139749437-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-88o","title":"Implement backup strategy for VPS","description":"No backups configured. Critical data: Matrix DB (622M), PostgreSQL (161M), Forgejo (2.5M), maubot (320K). No recovery path if disk fails. Need automated backups with off-site storage.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-04T22:55:25.546850172-08:00","updated_at":"2025-12-05T00:56:27.720623612-08:00","closed_at":"2025-12-05T00:56:27.720623612-08:00"} -{"id":"ops-jrz1-9gd","title":"Upgrade VPS RAM for dev environments","description":"Current: 2GB. Need 4-8GB for multiple code-server containers. Coordinate with Vultr, plan maintenance window.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.267689439-08:00","updated_at":"2025-12-05T17:16:54.267689439-08:00","dependencies":[{"issue_id":"ops-jrz1-9gd","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.331146543-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-av0","title":"Configure wildcard DNS and ACME cert","description":"Set up *.code.clarun.xyz DNS record and wildcard SSL cert via ACME. Depends on subdomain routing decision (kg0).","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.387356964-08:00","updated_at":"2025-12-05T17:16:54.387356964-08:00","dependencies":[{"issue_id":"ops-jrz1-av0","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.34918436-08:00","created_by":"daemon"},{"issue_id":"ops-jrz1-av0","depends_on_id":"ops-jrz1-kg0","type":"blocks","created_at":"2025-12-05T17:17:38.676800677-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-bhk","title":"Add disk quotas for user workspaces","description":"User could fill host disk via /var/lib/vscode/\u003cuser\u003e/. Add per-directory quotas or monitoring/alerting on disk usage.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.199417226-08:00","updated_at":"2025-12-05T15:32:41.199417226-08:00","dependencies":[{"issue_id":"ops-jrz1-bhk","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.309592029-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-9gd","title":"Upgrade VPS RAM for dev environments","description":"Current: 2GB. Need 4-8GB for multiple code-server containers. Coordinate with Vultr, plan maintenance window.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.267689439-08:00","updated_at":"2025-12-05T17:16:54.267689439-08:00","dependencies":[{"issue_id":"ops-jrz1-9gd","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.331146543-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-av0","title":"Configure wildcard DNS and ACME cert","description":"Set up *.code.clarun.xyz DNS record and wildcard SSL cert via ACME. Depends on subdomain routing decision (kg0).","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.387356964-08:00","updated_at":"2025-12-05T17:16:54.387356964-08:00","dependencies":[{"issue_id":"ops-jrz1-av0","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.34918436-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-av0","depends_on_id":"ops-jrz1-kg0","type":"blocks","created_at":"2025-12-05T17:17:38.676800677-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-ayl","title":"Rename sna-instagram-bot to something memorable","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-08T16:54:31.223265094-08:00","updated_at":"2025-12-08T16:54:31.223265094-08:00"} +{"id":"ops-jrz1-bhk","title":"Add disk quotas for user workspaces","description":"User could fill host disk via /var/lib/vscode/\u003cuser\u003e/. Add per-directory quotas or monitoring/alerting on disk usage.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.199417226-08:00","updated_at":"2025-12-05T15:32:41.199417226-08:00","dependencies":[{"issue_id":"ops-jrz1-bhk","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.309592029-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-blh","title":"mautrix-slack edit panic persists in v25.11","description":"mautrix-slack panic on rapid message edits (race condition)\n\n**Root cause**: Edit event arrives before original message is stored in DB. ConvertEdit accesses nil metadata.\n\n**Location**: handleslack.go:575 - has TODO comment: 'this can panic?'\n\n**Reproduction**: Edit a Slack message within ~1 second of sending\n\n**Upstream status**: \n- v25.11 is latest (we're on it)\n- Known to devs (TODO in code)\n- No open issue filed yet\n\n**Stack trace**:\ngo.mau.fi/mautrix-slack/pkg/connector.(*SlackMessage).ConvertEdit\n handleslack.go:575\nmaunium.net/go/mautrix/bridgev2.(*Portal).handleRemoteEdit\n portal.go:2838","status":"open","priority":2,"issue_type":"bug","created_at":"2025-12-05T19:40:33.255395189-08:00","updated_at":"2025-12-05T23:05:05.344825241-08:00","comments":[{"id":2,"issue_id":"ops-jrz1-blh","author":"dan","text":"Confirmed panic exists in nixpkgs-unstable from 2025-12-02. Fix will be addressed via platform upgrade (see ops-jrz1-00e).","created_at":"2025-12-08T23:54:57Z"}]} -{"id":"ops-jrz1-d58","title":"Build custom code-server container image","description":"Dockerfile with: code-server, opencode CLI, opencode VS Code extension (Open VSX), Python, Node, Git. Push to registry or build locally.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.507577308-08:00","updated_at":"2025-12-05T17:16:54.507577308-08:00","dependencies":[{"issue_id":"ops-jrz1-d58","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.369590207-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-dhj","title":"Port forwarding strategy for user apps","description":"When user runs app on localhost:3000, how do they view it? code-server has /proxy/\u003cport\u003e but URL is confusing for learners. Need clear UX or docs.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:30.649292743-08:00","updated_at":"2025-12-05T17:41:01.486505687-08:00","closed_at":"2025-12-05T17:41:01.486505687-08:00","dependencies":[{"issue_id":"ops-jrz1-dhj","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.175857247-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-dt9","title":"Increase container RAM limits (2GB too tight)","description":"2GB hard limit will OOM with code-server + opencode + LSP + user app. Gemini/GPT recommend 3-4GB per container or add swap. Need to size server appropriately.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:19.400171408-08:00","updated_at":"2025-12-05T17:38:54.770433169-08:00","closed_at":"2025-12-05T17:38:54.770433169-08:00","dependencies":[{"issue_id":"ops-jrz1-dt9","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.066130377-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-dux","title":"Container isolation: maubot API access only","description":"Security design for learner containers:\n\n**Container CAN access**:\n- maubot API (:29316) for plugin deploy\n- Matrix rooms via bot (through maubot)\n- Slack via bridge (through Matrix)\n\n**Container CANNOT access**:\n- Host filesystem\n- Other containers\n- PostgreSQL directly\n- Matrix homeserver directly\n- sops secrets\n\nImplementation: Podman network config, no --privileged, limited port exposure.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-06T12:18:16.212646624-08:00","updated_at":"2025-12-06T12:18:16.212646624-08:00","dependencies":[{"issue_id":"ops-jrz1-dux","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:21.627621772-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-ezf","title":"Maubot plugin dev workflow for learners","description":"Design frictionless dev workflow for Python/Go learners building maubot plugins.\n\n**Requirements**:\n- No SSH tunnel setup for learners\n- Fast feedback loop (edit → see bot respond)\n- Circuit breakers (allowed_rooms, rate limits)\n- Test channel: #vlads-pad (Slack) ↔ Matrix\n\n**Options being considered**:\n1. Git-push deploy: push to repo → CI builds .mbp → deploys to maubot\n2. Code-server containers: browser IDE on VPS, deploy script talks to maubot locally\n3. Hybrid: code-server + git workflow\n\n**Related**: ops-jrz1-3so (browser-dev-environment epic)","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-06T01:36:26.529372206-08:00","updated_at":"2025-12-06T01:36:26.529372206-08:00","dependencies":[{"issue_id":"ops-jrz1-ezf","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:06.743837766-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-gci","title":"Enable fail2ban for SSH brute force protection","description":"SSH brute force attempts generate log noise but don't pose security risk (key-only auth). fail2ban would help but is low priority. Deferred pending RFC on SSH log management strategy.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-04T21:03:22.651495544-08:00","updated_at":"2025-12-04T22:55:13.805471391-08:00","dependencies":[{"issue_id":"ops-jrz1-gci","depends_on_id":"ops-jrz1-nir","type":"blocks","created_at":"2025-12-04T22:56:14.777377818-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-glk","title":"VS Code extension policy (security)","description":"Extensions can run arbitrary code. Decide: allow arbitrary installs, or curate/restrict? For non-programmers, pre-install safe set and optionally disable marketplace.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.463030936-08:00","updated_at":"2025-12-05T15:32:41.463030936-08:00","dependencies":[{"issue_id":"ops-jrz1-glk","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.372120465-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-i8i","title":"Enable mautrix-slack relay mode for bot bridging","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-06T19:09:42.087506995-08:00","updated_at":"2025-12-06T19:09:47.612545472-08:00","closed_at":"2025-12-06T19:09:47.612545472-08:00"} +{"id":"ops-jrz1-d58","title":"Build custom code-server container image","description":"Dockerfile with: code-server, opencode CLI, opencode VS Code extension (Open VSX), Python, Node, Git. Push to registry or build locally.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.507577308-08:00","updated_at":"2025-12-05T17:16:54.507577308-08:00","dependencies":[{"issue_id":"ops-jrz1-d58","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.369590207-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-dhj","title":"Port forwarding strategy for user apps","description":"When user runs app on localhost:3000, how do they view it? code-server has /proxy/\u003cport\u003e but URL is confusing for learners. Need clear UX or docs.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:30.649292743-08:00","updated_at":"2025-12-05T17:41:01.486505687-08:00","closed_at":"2025-12-05T17:41:01.486505687-08:00","dependencies":[{"issue_id":"ops-jrz1-dhj","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.175857247-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-dt9","title":"Increase container RAM limits (2GB too tight)","description":"2GB hard limit will OOM with code-server + opencode + LSP + user app. Gemini/GPT recommend 3-4GB per container or add swap. Need to size server appropriately.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:19.400171408-08:00","updated_at":"2025-12-05T17:38:54.770433169-08:00","closed_at":"2025-12-05T17:38:54.770433169-08:00","dependencies":[{"issue_id":"ops-jrz1-dt9","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.066130377-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-dux","title":"Container isolation: maubot API access only","description":"Security design for learner containers:\n\n**Container CAN access**:\n- maubot API (:29316) for plugin deploy\n- Matrix rooms via bot (through maubot)\n- Slack via bridge (through Matrix)\n\n**Container CANNOT access**:\n- Host filesystem\n- Other containers\n- PostgreSQL directly\n- Matrix homeserver directly\n- sops secrets\n\nImplementation: Podman network config, no --privileged, limited port exposure.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-06T12:18:16.212646624-08:00","updated_at":"2025-12-06T12:18:16.212646624-08:00","dependencies":[{"issue_id":"ops-jrz1-dux","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:21.627621772-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-ezf","title":"Maubot plugin dev workflow for learners","description":"Design frictionless dev workflow for Python/Go learners building maubot plugins.\n\n**Requirements**:\n- No SSH tunnel setup for learners\n- Fast feedback loop (edit → see bot respond)\n- Circuit breakers (allowed_rooms, rate limits)\n- Test channel: #vlads-pad (Slack) ↔ Matrix\n\n**Options being considered**:\n1. Git-push deploy: push to repo → CI builds .mbp → deploys to maubot\n2. Code-server containers: browser IDE on VPS, deploy script talks to maubot locally\n3. Hybrid: code-server + git workflow\n\n**Related**: ops-jrz1-3so (browser-dev-environment epic)","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-06T01:36:26.529372206-08:00","updated_at":"2025-12-06T01:36:26.529372206-08:00","dependencies":[{"issue_id":"ops-jrz1-ezf","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:06.743837766-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-gci","title":"Enable fail2ban for SSH brute force protection","description":"SSH brute force attempts generate log noise but don't pose security risk (key-only auth). fail2ban would help but is low priority. Deferred pending RFC on SSH log management strategy.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-04T21:03:22.651495544-08:00","updated_at":"2025-12-04T22:55:13.805471391-08:00","dependencies":[{"issue_id":"ops-jrz1-gci","depends_on_id":"ops-jrz1-nir","type":"blocks","created_at":"2025-12-04T22:56:14.777377818-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-glk","title":"VS Code extension policy (security)","description":"Extensions can run arbitrary code. Decide: allow arbitrary installs, or curate/restrict? For non-programmers, pre-install safe set and optionally disable marketplace.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.463030936-08:00","updated_at":"2025-12-05T15:32:41.463030936-08:00","dependencies":[{"issue_id":"ops-jrz1-glk","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.372120465-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-i8i","title":"Enable mautrix-slack relay mode for bot bridging","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-06T19:09:42.087506995-08:00","updated_at":"2025-12-06T19:09:47.612545472-08:00","closed_at":"2025-12-06T19:09:47.612545472-08:00"} {"id":"ops-jrz1-iok","title":"Instagram bot missing base-config.yaml","description":"Plugin was missing base-config.yaml required by maubot Config class. Fixed in commit 4b9481d.","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-06T13:02:10.103730128-08:00","updated_at":"2025-12-06T13:02:15.055396318-08:00","closed_at":"2025-12-06T13:02:15.055396318-08:00"} -{"id":"ops-jrz1-jit","title":"Logging and monitoring for dev environments","description":"No observability plan. Need: container CPU/mem metrics, nginx logs, disk usage monitoring, alert on repeated 401s or resource exhaustion.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.318448038-08:00","updated_at":"2025-12-05T15:32:41.318448038-08:00","dependencies":[{"issue_id":"ops-jrz1-jit","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.343610481-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-kg0","title":"Switch to subdomain routing (dan.code.clarun.xyz)","description":"Path-based routing (/code/dan/) is fragile. Extensions assume root path, cookies scope incorrectly, PWA breaks. Switch to wildcard subdomains for cleaner isolation.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-05T15:32:19.283887085-08:00","updated_at":"2025-12-05T17:23:11.983564455-08:00","closed_at":"2025-12-05T17:23:11.983564455-08:00","dependencies":[{"issue_id":"ops-jrz1-kg0","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.043217984-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-kia","title":"Container reset mechanism (keep workspace)","description":"If user breaks their environment, need simple way to wipe container and restore default image while preserving /workspace. Script or admin command.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:31.045592689-08:00","updated_at":"2025-12-05T15:32:31.045592689-08:00","dependencies":[{"issue_id":"ops-jrz1-kia","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.275530016-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-jit","title":"Logging and monitoring for dev environments","description":"No observability plan. Need: container CPU/mem metrics, nginx logs, disk usage monitoring, alert on repeated 401s or resource exhaustion.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.318448038-08:00","updated_at":"2025-12-05T15:32:41.318448038-08:00","dependencies":[{"issue_id":"ops-jrz1-jit","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.343610481-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-kg0","title":"Switch to subdomain routing (dan.code.clarun.xyz)","description":"Path-based routing (/code/dan/) is fragile. Extensions assume root path, cookies scope incorrectly, PWA breaks. Switch to wildcard subdomains for cleaner isolation.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-05T15:32:19.283887085-08:00","updated_at":"2025-12-05T17:23:11.983564455-08:00","closed_at":"2025-12-05T17:23:11.983564455-08:00","dependencies":[{"issue_id":"ops-jrz1-kg0","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.043217984-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-kia","title":"Container reset mechanism (keep workspace)","description":"If user breaks their environment, need simple way to wipe container and restore default image while preserving /workspace. Script or admin command.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:31.045592689-08:00","updated_at":"2025-12-05T15:32:31.045592689-08:00","dependencies":[{"issue_id":"ops-jrz1-kia","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.275530016-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-ndl","title":"Browser-based dev environment (code-server)","description":"Explore setting up browser-based development:\n\nOptions:\n- code-server / openvscode-server - VS Code in browser\n- ttyd / wetty - terminal in browser \n- PWA install to home screen for native app feel\n\nCould combine with Tailscale for secure access without exposing ports.\n\nRef: ops-dev thin client brainstorm session","notes":"Design doc created: specs/004-browser-dev-environment/design.md - covers architecture, tech choices, resource planning, security model, rollout phases","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-04T15:08:02.406274744-08:00","updated_at":"2025-12-05T17:05:52.872944892-08:00","closed_at":"2025-12-05T17:05:52.872944892-08:00"} {"id":"ops-jrz1-nir","title":"RFC: SSH log noise reduction strategy","description":"Research showed 99.8% of SSH logs are scanner noise (9000 failed attempts/day). Options: (1) Change SSH port - simple, ~99% reduction (2) journald filter - surgical but complex (3) LogLevel ERROR - loses successful login audit trail (4) fail2ban - bans IPs, partial reduction. Orch consensus: Gemini opposed LogLevel ERROR due to losing audit trail, GPT supported. Need RFC to decide approach. See posture review from Dec 2025 session.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-04T22:55:13.990334935-08:00","updated_at":"2025-12-04T22:55:13.990334935-08:00"} {"id":"ops-jrz1-nvx","title":"Slack bot architecture: Matrix-first approach","description":"**Decision**: Use Matrix as primary platform for Slack bot development.\n\n**Architecture**: Bots run as maubot plugins (or Matrix bots), communicate to Slack via mautrix-slack bridge.\n\n**Rationale**:\n- Existing infrastructure (maubot deployed, bridge working)\n- Single platform to manage\n- Bots work with Matrix users too\n- Avoid Socket Mode contention (only one xapp- connection allowed)\n\n**Trade-offs accepted**:\n- Bridge dependency (edit panic bug exists)\n- Extra latency through bridge hop\n- Limited to bridged channels\n\n**Alternative considered (Option B - direct Slack API)**:\n- Could use xoxb- token for outbound-only (REST)\n- Would need new Slack app for full Socket Mode independence\n- Deferred for now\n\n**Credentials available**:\n- slack-oauth-token (xoxb-) - shareable for REST calls if needed\n- slack-app-token (xapp-) - reserved for bridge Socket Mode\n\n**Status**: DECIDED - staying with Matrix-first","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-05T23:12:22.011872713-08:00","updated_at":"2025-12-05T23:12:28.329467732-08:00","closed_at":"2025-12-05T23:12:28.329467732-08:00"} -{"id":"ops-jrz1-qxr","title":"mautrix-slack message edit panic (upstream bug)","description":"Bridge upgraded to v25.11. Need to verify if edit panic is fixed by testing a Slack message edit. Watch logs: journalctl -u mautrix-slack -f | grep -E 'ERR|panic|edit'","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-05T18:22:38.18203834-08:00","updated_at":"2025-12-05T19:36:00.556011621-08:00","closed_at":"2025-12-05T19:36:00.556011621-08:00","dependencies":[{"issue_id":"ops-jrz1-qxr","depends_on_id":"ops-jrz1-03o","type":"blocks","created_at":"2025-12-05T18:24:23.259399275-08:00","created_by":"daemon"}]} -{"id":"ops-jrz1-u0w","title":"Security review of running server","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-04T21:03:22.420507724-08:00","updated_at":"2025-12-04T21:04:31.989886731-08:00","closed_at":"2025-12-04T21:04:31.989886731-08:00"} -{"id":"ops-jrz1-wj2","title":"Design API key provisioning strategy","description":"opencode needs API keys (OpenAI, Anthropic). Options: 1) Shared key with proxy + rate limiting, 2) Per-user keys in sops-nix. Need to prevent key exposure and enable usage tracking.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-05T15:32:19.526073243-08:00","updated_at":"2025-12-05T17:25:10.534718515-08:00","closed_at":"2025-12-05T17:25:10.534718515-08:00","dependencies":[{"issue_id":"ops-jrz1-wj2","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.103332379-08:00","created_by":"daemon"}]} +{"id":"ops-jrz1-qxr","title":"mautrix-slack message edit panic (upstream bug)","description":"Bridge upgraded to v25.11. Need to verify if edit panic is fixed by testing a Slack message edit. Watch logs: journalctl -u mautrix-slack -f | grep -E 'ERR|panic|edit'","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-05T18:22:38.18203834-08:00","updated_at":"2025-12-05T19:36:00.556011621-08:00","closed_at":"2025-12-05T19:36:00.556011621-08:00","dependencies":[{"issue_id":"ops-jrz1-qxr","depends_on_id":"ops-jrz1-03o","type":"blocks","created_at":"2025-12-05T18:24:23.259399275-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-u0w","title":"Security review of running server","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-04T21:03:22.420507724-08:00","updated_at":"2025-12-04T21:04:31.989886731-08:00","closed_at":"2025-12-04T21:04:31.989886731-08:00"} +{"id":"ops-jrz1-wj2","title":"Design API key provisioning strategy","description":"opencode needs API keys (OpenAI, Anthropic). Options: 1) Shared key with proxy + rate limiting, 2) Per-user keys in sops-nix. Need to prevent key exposure and enable usage tracking.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-05T15:32:19.526073243-08:00","updated_at":"2025-12-05T17:25:10.534718515-08:00","closed_at":"2025-12-05T17:25:10.534718515-08:00","dependencies":[{"issue_id":"ops-jrz1-wj2","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.103332379-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-xz1","title":"Fix maubot admin UI exposed to internet (port 29316)","description":"Maubot admin UI on port 29316 is publicly accessible (returns 401 but API surface exposed). Firewall explicitly allows this port. Risk: brute force on admin password, direct exploit of any maubot vulnerabilities. Fix: bind to 127.0.0.1 only, remove from firewall, access via SSH tunnel.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-04T21:03:22.531676543-08:00","updated_at":"2025-12-04T22:35:24.162735368-08:00","closed_at":"2025-12-04T22:35:24.162735368-08:00"} {"id":"ops-jrz1-zvh","title":"Fix maubot health check (failing every 5 min)","description":"Health check at /_matrix/maubot/v1/version returns 401 (auth required). Check script doesn't provide auth token. Spamming error logs every 5 minutes.","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-04T22:55:25.755541054-08:00","updated_at":"2025-12-05T02:00:19.284410671-08:00","closed_at":"2025-12-05T02:00:19.284410671-08:00"} diff --git a/AGENTS.md b/AGENTS.md index d64d8ba..a2a0197 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,8 +1,12 @@ -# Beads Issue Tracking +# AGENTS.md + +Repository guidelines for AI coding agents. + +## Issue Tracking (Beads) **Session start**: Run `bd ready` to see available work. -## Commands +**Commands:** - `bd ready` - Issues with no blockers - `bd show ` - Issue details - `bd update --status=in_progress` - Claim work @@ -10,39 +14,146 @@ - `bd create --title="..." --type=task|bug|feature` - New issue - `bd dep add ` - Add dependency -## Session End -Before finishing: `git status`, `git add`, `git commit`. This is an ephemeral branch - merge to main locally. +**Session end**: `git status`, `git add`, `git commit`. Ephemeral branch - merge to main locally. -# Repository Guidelines +## Overview -## Project Structure & Module Organization -- `configuration.nix` holds shared system defaults; adjust service toggles in host overlays instead of editing it directly. -- `hosts/ops-jrz1.nix` and `hosts/ops-jrz1-vm.nix` override environment-specific networking, secrets, and hardware details; mirror changes across both when possible. -- `modules/` contains composable NixOS modules (`matrix-continuwuity.nix`, `mautrix-*.nix`, `security/*`); keep new modules kebab-cased and expose options via `lib.mkOption`. -- `scripts/` provides sanitization utilities. Stage external imports under `staging/`, run `./scripts/sanitize-files.sh SRC staging/modules`, then promote files into `modules/` once validation passes. -- `specs/` and `docs/` capture design intent and runbooks; update the relevant spec when changing feature scope. +NixOS-based Matrix homeserver (conduwuit) with mautrix-slack bridge for Slack ↔ Matrix messaging. -## Build, Test, and Development Commands -- `nix flake check` validates module wiring, options, and formatting before review. -- `nix build .#nixosConfigurations.ops-jrz1` produces the deployable system closure; use this to catch evaluation regressions. -- `nixos-rebuild switch --flake .#ops-jrz1 --target-host root@ops-jrz1` deploys to the VPS; replace the target host when testing elsewhere. -- `./scripts/validate-sanitization.sh modules/` ensures redacted content before commit; rerun after manual edits to sanitized files. +**Technologies**: Nix 2.x, NixOS 24.05+, conduwuit, mautrix-slack, PostgreSQL 15, sops-nix -## Coding Style & Naming Conventions -- Prefer two-space indentation in Nix files; align attribute sets and option blocks for readability. -- Use `lowerCamelCase` for option names, kebab-case for file names, and leave explanatory comments above non-obvious logic paths only. -- Format Nix with `nix fmt` (nixpkgs-fmt) or equivalent before committing to keep diffs minimal. +## Project Structure -## Testing Guidelines -- Treat `nix flake check` as the minimum gate; add targeted VM tests in `hosts/ops-jrz1-vm.nix` when introducing new services. -- Name ad-hoc verification scripts under `scripts/local-*` and avoid committing transient debug helpers. -- Capture manual verification steps in `docs/worklogs/` immediately after deploys for traceability. +``` +. +├── hosts/ # NixOS host configurations +│ └── ops-jrz1.nix # VPS configuration +├── modules/ # NixOS modules +│ ├── dev-services.nix # PostgreSQL, Forgejo, bridge coordination +│ ├── mautrix-slack.nix # Slack bridge module +│ └── matrix-continuwuity.nix # Matrix homeserver +├── secrets/ # sops-encrypted secrets +│ └── secrets.yaml # Encrypted credentials (age) +├── specs/ # Feature specifications +│ ├── 001-extract-matrix-platform/ +│ └── 002-slack-bridge-integration/ +├── docs/ # Documentation +│ ├── platform-vision.md # North star document +│ └── worklogs/ # Deployment logs +└── scripts/ # Utility scripts +``` -## Commit & Pull Request Guidelines -- Follow the existing Git log style: single-line, capitalized summaries in ~70 characters (e.g., `Tighten bridge secret validation`). -- Reference related specs or worklogs in the body, and list `nix flake check` (and any VM smoke tests) under a short "Validation" block. -- PRs should link the tracked task, summarize scope, highlight sanitization steps, and mention any secrets or infra touchpoints reviewers must provision. +## Commands -## Security & Secrets Handling -- Never commit decrypted material; use `sops secrets/secrets.yaml` for edits and confirm `git status` shows only encrypted blobs. -- Replace real domains, IPs, and tokens with repository-safe placeholders. When importing upstream configs, run the sanitize and validate scripts before staging changes. +### Deployment +```bash +# Deploy to VPS +nixos-rebuild switch --flake .#ops-jrz1 --target-host root@ops-jrz1 --build-host localhost + +# Validate before deploy +nix flake check +nix build .#nixosConfigurations.ops-jrz1 +``` + +### Bridge Management +```bash +# Check bridge status +ssh root@ops-jrz1 'systemctl status mautrix-slack' + +# View bridge logs +ssh root@ops-jrz1 'journalctl -u mautrix-slack -f' + +# Check for errors +ssh root@ops-jrz1 'journalctl -u mautrix-slack --since "1 hour ago" | grep -E "ERR|WRN|FTL"' +``` + +### Secrets Management +```bash +# Edit encrypted secrets +sops secrets/secrets.yaml + +# View decrypted (never commit output) +sops -d secrets/secrets.yaml +``` + +### SSH Tunnels +```bash +# Maubot web UI +ssh -L 29316:localhost:29316 root@ops-jrz1 +# Access: http://localhost:29316 + +# Matrix homeserver (debugging) +ssh -L 8008:localhost:8008 root@ops-jrz1 +``` + +## Coding Conventions + +- Two-space indentation in Nix files +- Use `lowerCamelCase` for options, kebab-case for filenames +- Format with `nix fmt` before committing +- NixOS modules: Use nixpkgs pattern (options, config, mkIf) +- Never hardcode secrets, use sops-nix + +## Git Workflow + +**Trunk-Based Development:** +- `main`: Single long-lived branch, always deployable +- Feature branches: Short-lived, naming `###-feature-name` +- Tag releases after merging: `v0.MINOR.PATCH` + +**Commits:** +- Clear, concise messages (~70 chars) +- No emojis or marketing language +- Reference specs/worklogs in body + +## Development Patterns + +### Slack Bridge +- **Authentication**: Interactive login via Matrix chat (`login app` command) +- **Socket Mode**: WebSocket connection, no public endpoint needed +- **Portal Creation**: Automatic based on activity +- **Tokens**: Bot token (xoxb-) + app-level token (xapp-) + +### Secrets Flow +- Encryption: Age via SSH host key +- Storage: secrets/secrets.yaml (encrypted, safe to commit) +- Runtime: Decrypted to /run/secrets/ (tmpfs) + +### Deployment Workflow +1. Make configuration changes locally +2. Commit to git +3. Deploy via nixos-rebuild +4. Verify service status and logs +5. Document in worklogs/ + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ clarun.xyz VPS │ +│ │ +│ nginx :443 (HTTPS) │ +│ ├─→ conduwuit :8008 (Matrix homeserver) │ +│ └─→ Forgejo :3000 │ +│ │ +│ mautrix-slack :29319 │ +│ └─→ PostgreSQL (unix socket) │ +│ │ +└─────────────────────────────────────────────────────┘ + │ + └─→ Slack API (Socket Mode WebSocket) +``` + +**Critical**: All internal services use IPv4 (127.0.0.1), NOT "localhost" (which resolves to IPv6). + +## Known Issues + +- olm-3.2.16 marked insecure (permitted via nixpkgs.config) +- Fresh database required after conduwuit version upgrades + +## Testing + +- `nix flake check` - minimum gate +- Add VM tests in `hosts/ops-jrz1-vm.nix` for new services +- Capture verification steps in `docs/worklogs/` +- Test message latency: should be <5 seconds diff --git a/CLAUDE.md b/CLAUDE.md index dbd92fd..f6aa6c0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,477 +1,3 @@ -# ops-jrz1 Development Guidelines +# CLAUDE.md -Auto-generated from all feature plans. Last updated: 2025-10-22 - -## Active Technologies -- Nix 2.x, NixOS 24.05+, Bash 5.x (for scripts) (001-extract-matrix-platform) -- mautrix-slack (Python 3.11), PostgreSQL 15.10, sops-nix (002-slack-bridge-integration) -- Matrix homeserver: conduwuit (clarun.xyz) -- Secrets management: sops-nix with age encryption - -## Project Structure -``` -. -├── hosts/ # NixOS host configurations -│ └── ops-jrz1.nix # VPS configuration (45.77.205.49) -├── modules/ # NixOS modules -│ ├── dev-services.nix # PostgreSQL, Forgejo, bridge coordination -│ ├── mautrix-slack.nix # Slack bridge module -│ └── matrix-continuwuity.nix # Matrix homeserver -├── secrets/ # sops-encrypted secrets -│ └── secrets.yaml # Encrypted credentials (age) -├── specs/ # Feature specifications -│ ├── 001-extract-matrix-platform/ -│ └── 002-slack-bridge-integration/ -│ ├── spec.md # Feature specification -│ ├── plan.md # Implementation plan -│ ├── research.md # Technical research findings -│ ├── data-model.md # Data model & state machines -│ ├── quickstart.md # Deployment runbook -│ └── contracts/ # Configuration schemas -├── docs/ # Documentation -│ ├── platform-vision.md # North star document -│ └── worklogs/ # Deployment logs -└── .specify/ # Spec-kit framework files -``` - -## Commands - -### Deployment -```bash -# Deploy configuration to VPS -nixos-rebuild switch --flake .#ops-jrz1 \ - --target-host root@45.77.205.49 \ - --build-host localhost - -# Deploy to staging -nixos-rebuild switch --flake .#ops-jrz1-staging \ - --target-host root@45.77.205.49 \ - --build-host localhost -``` - -### Bridge Management -```bash -# Check bridge status -ssh root@45.77.205.49 'systemctl status mautrix-slack' - -# View bridge logs -ssh root@45.77.205.49 'journalctl -u mautrix-slack -f' - -# Check Socket Mode connection -ssh root@45.77.205.49 'journalctl -u mautrix-slack -n 20 | grep -i socket' - -# Query bridge database -ssh root@45.77.205.49 'sudo -u mautrix_slack psql mautrix_slack -c "SELECT * FROM portal;"' -``` - -### Secrets Management -```bash -# Edit encrypted secrets -sops secrets/secrets.yaml - -# View decrypted secrets (never commit output) -sops -d secrets/secrets.yaml - -# Add new secret -sops secrets/secrets.yaml -# (Edit in your $EDITOR, auto-encrypts on save) -``` - -### Matrix Server -```bash -# Check Matrix homeserver -ssh root@45.77.205.49 'systemctl status matrix-continuwuity' - -# Test federation -ssh root@45.77.205.49 'curl -s http://localhost:8008/_matrix/client/versions | jq .' -``` - -### Database -```bash -# List databases -ssh root@45.77.205.49 'sudo -u postgres psql -l' - -# Check bridge database -ssh root@45.77.205.49 'sudo -u postgres psql mautrix_slack -c "\dt"' - -# Backup bridge database -ssh root@45.77.205.49 'sudo -u postgres pg_dump mautrix_slack' > backup.sql -``` - -### SSH Tunnels -```bash -# Maubot web UI (admin interface for managing bot instances) -ssh -L 29316:localhost:29316 root@45.77.205.49 -# Then access: http://localhost:29316 -# Login: admin / (password from secrets/secrets.yaml) - -# Matrix homeserver (for debugging) -ssh -L 8008:localhost:8008 root@45.77.205.49 -# Then access: http://localhost:8008 - -# Keep tunnel open in background -ssh -fN -L 29316:localhost:29316 root@45.77.205.49 -``` - -## Code Style -- Nix 2.x, NixOS 24.05+, Bash 5.x: Follow standard conventions -- NixOS modules: Use nixpkgs module pattern (options, config, mkIf) -- Configuration: Declarative over imperative -- Secrets: Never hardcode, use sops-nix or interactive login -- Logging: Use appropriate levels (debug for troubleshooting, info for production) - -## Development Patterns - -### Slack Bridge (002-slack-bridge-integration) -- **Authentication**: Interactive login via Matrix chat (`login app` command) -- **Socket Mode**: WebSocket connection, no public endpoint needed -- **Portal Creation**: Automatic based on activity (no manual channel mapping) -- **Secrets**: Stored in bridge database after authentication (not in NixOS config) -- **Token Requirements**: Bot token (xoxb-) + app-level token (xapp-) - -### Secrets Management -- **Encryption**: Age encryption via SSH host key (/etc/ssh/ssh_host_ed25519_key) -- **Storage**: secrets/secrets.yaml (encrypted, safe to commit) -- **Runtime**: Decrypted to /run/secrets/ (tmpfs, cleared on reboot) -- **Permissions**: 0440 for service-specific secrets, owned by service user - -### Deployment Workflow -1. Make configuration changes locally -2. Commit to git -3. Deploy via nixos-rebuild -4. Verify service status and logs -5. Document in worklogs/ -6. Test functionality -7. Monitor for stability - -## Git Workflow - -This project uses **Trunk-Based Development** for simplified collaboration and deployment. - -### Branch Strategy -- **main**: Single long-lived branch, always deployable -- **Feature branches**: Short-lived (hours to days), naming: `###-feature-name` -- **No long-lived branches**: Feature branches merge or delete quickly - -### Feature Development Workflow -```bash -# 1. Start feature from latest main -git checkout main -git pull origin main -git checkout -b 003-feature-name - -# 2. Develop with frequent commits -# Make changes, commit often with clear messages - -# 3. Keep main in sync (if feature takes >1 day) -git checkout main -git pull origin main -git checkout 003-feature-name -git rebase main - -# 4. When feature complete, merge to main -git checkout main -git merge 003-feature-name # Fast-forward merge preferred - -# 5. Tag release if deploying -git tag -a v0.3.0 -m "Release notes..." -git push origin main --tags - -# 6. Delete feature branch -git branch -d 003-feature-name -``` - -### Release Tagging -- **Version scheme**: v0.MINOR.PATCH (semver-like) -- **When to tag**: After completing and merging a feature -- **Tag format**: Annotated tags with comprehensive release notes -- **Example**: - ```bash - git tag -a v0.3.0 -m "Release v0.3.0: Feature Description - - - Key changes - - Architecture updates - - Known issues - " - ``` - -### Branch Naming Convention -- Format: `###-short-description` -- Examples: `002-slack-bridge-integration`, `003-monitoring-setup` -- Number matches spec directory in `specs/###-feature-name/` - -### Commit Guidelines -- Clear, concise commit messages -- No emojis or marketing language -- Focus on "what" and "why" not "how" -- Group related changes in single commit -- Example: "Fix bridge homeserver URL to use IPv4 (127.0.0.1) instead of localhost" - -### Main Branch Protection -- Always keep main deployable -- Test before merging to main -- Document breaking changes in commit message -- Tag releases for deployment milestones - -## Recent Changes -- 003-maubot-integration: Added [if applicable, e.g., PostgreSQL, CoreData, files or N/A] -- 001-extract-matrix-platform: Added Nix 2.x, NixOS 24.05+, Bash 5.x (for scripts) -- 002-slack-bridge-integration: Deployed mautrix-slack bridge with Socket Mode (2025-10-26) - - Phase 0-1: Research and design complete - - Phase 2: Infrastructure deployed and operational - - Status: Bidirectional message flow working (Slack ↔ Matrix) - - ~50 Slack channels synced to Matrix rooms - -## Known Issues -- olm-3.2.16 marked insecure (permitted via nixpkgs.config.permittedInsecurePackages) -- Fresh database required after conduwuit version upgrades (wipe /var/lib/matrix-continuwuity/db/) - -## Resolved Issues -- ✅ conduwuit debug logging (reverted to "info" 2025-10-26) -- ✅ Manual sender_localpart fix (automated in mautrix-slack.nix 2025-10-26) - -## Testing Guidelines -- Test message latency: Should be <5 seconds (FR-001, FR-002) -- Test reactions, edits, file attachments -- Monitor health indicators: connection_status, last_successful_message, error_count -- Stability target: 99% uptime over 7-day period - - - -## Configuration Notes - -### mautrix-slack Registration File Fix (RESOLVED) - -**Issue:** The bridge's registration generator (`-g` flag) creates a random `sender_localpart` instead of using the configured `bot.username` value. - -**Root Cause:** mautrix-slack generates registration independently of `config.yaml` settings. - -**Solution:** ✅ Automated fix implemented in `modules/mautrix-slack.nix` (lines 339-341) - -The module now automatically patches the sender_localpart during registration generation: -```nix -# In ExecStartPre, after registration generation: -${pkgs.gnused}/bin/sed -i "s/^sender_localpart: .*/sender_localpart: ${cfg.appservice.senderLocalpart}/" "$REG_PATH" -``` - -**Status:** No manual intervention required on fresh deploys. The fix is applied automatically during service startup. - -**Verification:** Tested 2025-10-26 - registration file correctly generated with `sender_localpart: slackbot` matching configuration. - ---- - -## QA Testing Checklist - -### Core Features (✅ Tested & Working) -- [x] Bidirectional text messaging (Slack ↔ Matrix) -- [x] Channel discovery and room creation (~50 channels synced) -- [x] Socket Mode WebSocket connection -- [x] Bot authentication with Matrix homeserver -- [x] Bridge startup and recovery after restart - -### Features Requiring QA Testing (⚠️ Untested) -- [ ] **File Attachments** - - Upload file in Slack → verify appears in Matrix - - Upload file in Matrix → verify appears in Slack - - Test various file types (images, PDFs, archives) - - Test large files (>10MB) - -- [ ] **Emoji Reactions** - - Add reaction in Slack → verify appears in Matrix - - Add reaction in Matrix → verify appears in Slack - - Remove reaction → verify syncs - -- [ ] **Message Edits** - - Edit message in Slack → verify updates in Matrix - - Edit message in Matrix → verify updates in Slack - -- [ ] **Message Deletion** - - Delete message in Slack → verify removes from Matrix - - Delete message in Matrix → verify removes from Slack - -- [ ] **Thread Replies** - - Reply in Slack thread → verify threading in Matrix - - Reply in Matrix thread → verify threading in Slack - -- [ ] **User Profile Sync** - - Change Slack display name → verify updates Matrix puppet - - Change Slack avatar → verify updates Matrix puppet - -- [ ] **Error Handling** - - Network interruption recovery - - Matrix homeserver restart handling - - Slack WebSocket reconnection - - Invalid token handling - -- [ ] **Performance** - - High-volume channel (>100 messages/hour) - - Large file transfer times - - Message latency under load - -### Test Commands -```bash -# Monitor bridge during testing -ssh root@45.77.205.49 'journalctl -u mautrix-slack -f' - -# Check for errors -ssh root@45.77.205.49 'journalctl -u mautrix-slack --since "1 hour ago" | grep -E "ERR|WRN|FTL"' - -# Verify message flow -# Test in #vlads-pad or similar channel -# Send from Slack, verify in Matrix room -# Send from Matrix room, verify in Slack -``` - ---- - -## Future Infrastructure Needs - -### Monitoring & Alerting (Not Implemented) - -**Health Checks Needed:** -- Bridge WebSocket connection status -- Matrix homeserver availability -- Message processing latency -- Database connection health -- Error rate thresholds - -**Potential Solutions:** -```bash -# Option 1: Simple systemd monitoring -systemctl status mautrix-slack | grep -q "active (running)" || alert - -# Option 2: Prometheus + Alertmanager -# - Export bridge metrics (if available) -# - Alert on service down, high error rate, message lag - -# Option 3: Uptime monitoring -# - External ping to Matrix homeserver -# - Check /_matrix/client/versions endpoint -# - Alert on HTTP errors or timeout -``` - -**Metrics to Track:** -- Bridge uptime percentage -- Messages processed (Slack → Matrix, Matrix → Slack) -- WebSocket reconnection events -- Database query performance -- Error counts by type - -**Alert Conditions:** -- Bridge down for >5 minutes -- No messages processed in >15 minutes (if active channels exist) -- Error rate >5% of total messages -- Database connection failures -- Disk space <10% free - -### Backup Strategy (Not Implemented) - -**Critical Data:** -- Matrix RocksDB: `/var/lib/matrix-continuwuity/db/` (66M) -- Bridge PostgreSQL: `mautrix_slack` database (172K) -- Registration files: `/var/lib/matrix-appservices/*.yaml` -- Secrets: sops-encrypted `secrets/secrets.yaml` (in git) - -**Backup Approach:** -```bash -# Daily database backups -ssh root@45.77.205.49 'tar czf /root/backups/matrix-$(date +%Y%m%d).tar.gz /var/lib/matrix-continuwuity/db/' -ssh root@45.77.205.49 'sudo -u postgres pg_dump mautrix_slack > /root/backups/bridge-$(date +%Y%m%d).sql' - -# Retention: 7 daily, 4 weekly, 12 monthly -# Store off-VPS (rsync to backup server or cloud storage) -``` - -**Recovery Procedure:** -1. Deploy NixOS configuration -2. Restore database backups -3. Restore registration files -4. Re-authenticate with Slack (new tokens via `login app`) -5. Verify message flow - -**Note:** Matrix database can be wiped and rebuilt from Slack if needed (current architecture treats Matrix as ephemeral view layer). - ---- - -## Current Architecture State (2025-10-26) - -### Deployed Services -``` -┌─────────────────────────────────────────────────────┐ -│ clarun.xyz (45.77.205.49) │ -│ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ nginx :443 (HTTPS) │ │ -│ │ - Matrix Client-Server API │ │ -│ │ - Forgejo (git.clarun.xyz) │ │ -│ └────────────┬────────────────────────────────┘ │ -│ │ │ -│ ├─→ conduwuit :8008 (127.0.0.1) │ -│ │ - Matrix homeserver │ -│ │ - RocksDB schema v18 │ -│ │ - 66M database │ -│ │ │ -│ └─→ Forgejo :3000 (127.0.0.1) │ -│ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ mautrix-slack :29319 (127.0.0.1) │ │ -│ │ - Socket Mode WebSocket to Slack │ │ -│ │ - PostgreSQL backend (172K) │ │ -│ │ - ~50 portal rooms │ │ -│ └────────────┬────────────────────────────────┘ │ -│ │ │ -│ └─→ PostgreSQL :5432 (unix socket) │ -│ │ -└─────────────────────────────────────────────────────┘ - │ - └─→ Slack API (Socket Mode WebSocket) - - Workspace: chochacho - - Bot token: xoxb-... - - App token: xapp-... -``` - -### Critical Networking Details -- **All internal services use IPv4 (127.0.0.1)** - NOT "localhost" -- Reason: `localhost` resolves to IPv6 `[::1]` but services bind IPv4-only -- Fixed in: nginx proxy_pass, bridge homeserverUrl configuration - -### Service Dependencies -``` -postgresql.service - └─→ mautrix-slack.service - └─→ matrix-continuwuity.service - └─→ nginx.service -``` - -### Data Flow -1. **Slack → Matrix:** - - Slack pushes event via Socket Mode WebSocket - - Bridge receives, transforms to Matrix event - - Bridge POSTs to conduwuit appservice endpoint - - conduwuit distributes to Matrix rooms - - Element clients receive via /sync - -2. **Matrix → Slack:** - - Element client sends message via conduwuit - - conduwuit forwards to bridge appservice endpoint - - Bridge transforms to Slack API call - - Bridge POSTs to Slack API (bot token) - - Appears in Slack channel - -### Security Model -- **Secrets:** Managed via sops-nix, deployed to `/run/secrets/` -- **Bridge tokens:** - - `as_token`: Bridge authenticates to Matrix - - `hs_token`: Matrix authenticates to bridge -- **Slack tokens:** - - `xoxb-`: Bot API calls - - `xapp-`: Socket Mode connection -- **No public bridge endpoint:** Socket Mode eliminates webhook requirement - -### Operational Notes -- Matrix database disposable (can rebuild from Slack) -- Bridge config fully declarative except sender_localpart fix -- Fresh database recommended after conduwuit version upgrades -- Debug logging currently enabled on conduwuit - - +@AGENTS.md diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 0000000..54e415d --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,9 @@ +# GEMINI.md + +@AGENTS.md + +## Skills + +For specialized tasks, read skill docs at `~/.claude/skills/*/SKILL.md`: + +- **worklog**: `~/.claude/skills/worklog/SKILL.md` - Create session worklogs