diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 1fa1d4f..61a746c 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -15,16 +15,20 @@ {"id":"ops-jrz1-45v","title":"Matrix/Slack identity mismatch: dan vs vlad","description":"Matrix user @dan:clarun.xyz is linked to Slack user 'vlad'. Messages appear as vlad in Slack but dan in Element. Cosmetic confusion. Options: rename Matrix display name, or re-login bridge with different Slack account.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T19:38:19.899555475-08:00","updated_at":"2025-12-05T19:38:19.899555475-08:00"} {"id":"ops-jrz1-46y","title":"Write onboarding documentation","description":"Critical for non-programmers. Cover: login, opencode usage, Git setup (PAT workflow), resource limits, security hygiene. Keep concise.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:41.586544583-08:00","updated_at":"2025-12-28T00:08:06.743739166-05:00","closed_at":"2025-12-28T00:08:06.743739166-05:00","close_reason":"Browser-based dev environment cancelled","dependencies":[{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-7j4","type":"blocks","created_at":"2025-12-05T15:33:25.328712413-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-wj2","type":"blocks","created_at":"2025-12-05T15:33:25.351559821-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-46y","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.401868669-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-4jm","title":"Smoke test Matrix server (conduwuit)","description":"Verify Matrix homeserver is healthy: check /_matrix/client/versions endpoint, test registration, verify federation status (disabled). Quick health check after deployments.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T18:09:47.220765063-08:00","updated_at":"2025-12-05T18:19:33.059734881-08:00","closed_at":"2025-12-05T18:19:33.059734881-08:00"} +{"id":"ops-jrz1-5ef","title":"Add local-scripts to systemPackages","description":"Add the local-scripts derivation to environment.systemPackages so scripts are available system-wide.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-03T08:39:53.979819917-08:00","created_by":"dan","updated_at":"2026-01-03T08:45:47.753131986-08:00","closed_at":"2026-01-03T08:45:47.753131986-08:00","close_reason":"Merged into ops-jrz1-o9c (admin-scripts package includes systemPackages addition)","dependencies":[{"issue_id":"ops-jrz1-5ef","depends_on_id":"ops-jrz1-vw4","type":"blocks","created_at":"2026-01-03T08:40:02.788186493-08:00","created_by":"dan"}]} {"id":"ops-jrz1-5fk","title":"Smoke test Maubot service","description":"Verify Maubot is healthy: check management UI accessible via SSH tunnel, verify bot instances running, test plugin functionality. Quick health check after deployments.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T18:09:47.33773092-08:00","updated_at":"2025-12-05T18:19:33.061388913-08:00","closed_at":"2025-12-05T18:19:33.061388913-08:00"} {"id":"ops-jrz1-5ki","title":"Set up programmatic QA test user for bridge testing","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-05T20:17:04.312571398-08:00","updated_at":"2025-12-05T20:17:04.312571398-08:00"} {"id":"ops-jrz1-5oe","title":"Create NixOS module for code-server containers","description":"Module to manage per-user Podman containers, nginx routing, secrets. Use virtualisation.oci-containers. Generate systemd units.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.656121092-08:00","updated_at":"2025-12-28T00:05:44.743524099-05:00","closed_at":"2025-12-28T00:05:44.743524099-05:00","close_reason":"Parent epic cancelled - browser-based dev approach abandoned","dependencies":[{"issue_id":"ops-jrz1-5oe","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.386278268-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-5oe","depends_on_id":"ops-jrz1-d58","type":"blocks","created_at":"2025-12-05T17:17:38.694752468-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-6of","title":"AI cost/rate limiting per user","description":"One user could drain API credits with runaway script. Need rate limiting per user, either via proxy middleware or opencode config. Track usage.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:30.772304538-08:00","updated_at":"2025-12-05T17:42:42.773613559-08:00","closed_at":"2025-12-05T17:42:42.773613559-08:00","dependencies":[{"issue_id":"ops-jrz1-6of","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.206816868-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-6of","depends_on_id":"ops-jrz1-wj2","type":"blocks","created_at":"2025-12-05T17:17:38.658742196-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-7j4","title":"Git credential strategy for non-programmers","description":"Non-programmers can't manage SSH keys. Pre-configure git-credential-store or provide simple PAT workflow with docs. Store in persistent home with 600 perms.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T15:32:19.673999683-08:00","updated_at":"2025-12-05T17:38:54.788694408-08:00","closed_at":"2025-12-05T17:38:54.788694408-08:00","dependencies":[{"issue_id":"ops-jrz1-7j4","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.139749437-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-86g","title":"Add per-user resource limits (not just slice-wide)","description":"Currently user.slice has TasksMax=500, MemoryMax=80%, but individual user-XXXX.slice has infinity. One user can starve others. Add per-user limits via systemd drop-ins or user-XXXX.slice config.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-03T08:40:25.937465595-08:00","created_by":"dan","updated_at":"2026-01-03T08:40:25.937465595-08:00"} {"id":"ops-jrz1-88o","title":"Implement backup strategy for VPS","description":"No backups configured. Critical data: Matrix DB (622M), PostgreSQL (161M), Forgejo (2.5M), maubot (320K). No recovery path if disk fails. Need automated backups with off-site storage.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-04T22:55:25.546850172-08:00","updated_at":"2025-12-05T00:56:27.720623612-08:00","closed_at":"2025-12-05T00:56:27.720623612-08:00"} {"id":"ops-jrz1-8m7","title":"Add cgroups limits for user slices","description":"Add soft resource limits to prevent one user/agent from crashing server.\n\n## Config\n```nix\nsystemd.slices.\"user\".sliceConfig = {\n MemoryMax = \"80%\";\n TasksMax = 500;\n CPUWeight = 100; # Fair sharing, no hard quota\n};\n```\n\n## Behavior\n- Memory: Users collectively can't exceed 80% RAM\n- Tasks: Max 500 processes per user (prevents fork bombs)\n- CPU: Fair sharing when contended, bursts allowed\n\n## Testing\n- Verify with `systemctl show user-1001.slice`\n- Test fork bomb doesn't crash server","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T20:16:22.600133044-08:00","created_by":"dan","updated_at":"2026-01-02T21:02:35.455928291-08:00","closed_at":"2026-01-02T21:02:35.455928291-08:00","close_reason":"Closed"} +{"id":"ops-jrz1-8mc","title":"configuration.nix: Document UID range 1000:65534 rationale","description":"UID range 1000:65534 excludes root but includes nobody (65534). Add comment explaining rationale. configuration.nix:70","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-03T08:17:35.893969961-08:00","created_by":"dan","updated_at":"2026-01-03T08:26:28.782329252-08:00"} {"id":"ops-jrz1-9gd","title":"Upgrade VPS RAM for dev environments","description":"Current: 2GB. Need 4-8GB for multiple code-server containers. Coordinate with Vultr, plan maintenance window.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.267689439-08:00","updated_at":"2025-12-28T00:08:06.748175273-05:00","closed_at":"2025-12-28T00:08:06.748175273-05:00","close_reason":"Browser-based dev environment cancelled","dependencies":[{"issue_id":"ops-jrz1-9gd","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.331146543-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-9pe","title":"Research: System packages for learner accounts","description":"How do dev users get access to toolchains (Go, Node, Rust, etc.)?\n\n## Findings\n\n**Users CAN self-install packages:**\n```bash\nnix profile install nixpkgs#go\nnix profile install nixpkgs#nodejs\nnix profile install nixpkgs#rustc\n```\n\nPackages go to `~/.nix-profile/bin`, already in PATH. Works today.\n\n**Devshells work too:**\n```bash\n# In project with flake.nix\nnix develop\n```\n\n## Options\n\n| Option | Pros | Cons |\n|--------|------|------|\n| **Self-service only** | Minimal config, user learns nix | Cold start friction |\n| **Global defaults** | Zero friction for common tools | Bloats system, version conflicts |\n| **Starter script** | One command setup, customizable | Another thing to maintain |\n| **direnv + devshells** | Per-project envs, reproducible | Needs direnv installed globally |\n\n## Current State\n- `nix profile install` works for users ✅\n- `nix develop` works ✅\n- direnv NOT installed globally\n- Only python3, uv in system packages\n\n## Recommendation\n1. Add `direnv` to global packages (enables per-project devshells)\n2. Document `nix profile install` for quick one-offs\n3. Provide example flake.nix templates for Go, Node, Rust projects\n4. Keep system packages minimal (python3, uv, direnv, git, vim)\n\n## Test Results\n```\n$ nix profile install nixpkgs#go\n$ go version\ngo version go1.22.8 linux/amd64\n```","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T12:27:32.894163417-08:00","created_by":"dan","updated_at":"2026-01-02T12:32:32.502649201-08:00","closed_at":"2026-01-02T12:32:32.502649201-08:00","close_reason":"Users can self-install via nix profile. Added direnv globally for devshells."} {"id":"ops-jrz1-9x8","title":"Claude CLI update mechanism","description":"Claude Code CLI is manually installed to /usr/local/bin/claude.\n\n## Current state\n- Installed via: curl -fsSL https://claude.ai/install.sh | bash\n- Copied to /usr/local/bin/claude\n- No automatic updates\n\n## Options\n1. Periodic manual update (run install script again)\n2. Systemd timer to check for updates\n3. Package via nix (would need custom derivation)\n\n## Acceptance criteria\nDocument the update process at minimum.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-02T16:46:03.908575951-08:00","created_by":"dan","updated_at":"2026-01-02T16:46:03.908575951-08:00"} +{"id":"ops-jrz1-a3w","title":"egress-watchdog: Document GNU grep -P dependency","description":"grep -oP requires GNU grep with PCRE. NixOS always has it, but add comment for portability awareness. scripts/egress-watchdog:25","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-03T08:17:35.642729617-08:00","created_by":"dan","updated_at":"2026-01-03T08:26:28.546281769-08:00"} {"id":"ops-jrz1-av0","title":"Configure wildcard DNS and ACME cert","description":"Set up *.code.clarun.xyz DNS record and wildcard SSL cert via ACME. Depends on subdomain routing decision (kg0).","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.387356964-08:00","updated_at":"2025-12-05T17:16:54.387356964-08:00","dependencies":[{"issue_id":"ops-jrz1-av0","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.34918436-08:00","created_by":"daemon","metadata":"{}"},{"issue_id":"ops-jrz1-av0","depends_on_id":"ops-jrz1-kg0","type":"blocks","created_at":"2025-12-05T17:17:38.676800677-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-ayl","title":"Rename sna-instagram-bot to something memorable","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-08T16:54:31.223265094-08:00","updated_at":"2025-12-08T16:54:31.223265094-08:00"} {"id":"ops-jrz1-b09","title":"Research: Forgejo backup strategy","description":"Consider backup strategy for the Forgejo instance.\n\n## Questions\n- What data needs backing up? (repos, issues, users, config)\n- Where to back up to? (off-site, object storage, etc.)\n- Frequency?\n- Restore procedure?\n\n## Context\nForgejo runs on ops-jrz1 at :3000, stores git repos and metadata.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-02T19:01:55.854723242-08:00","created_by":"dan","updated_at":"2026-01-02T19:01:55.854723242-08:00"} @@ -33,6 +37,7 @@ {"id":"ops-jrz1-bhk","title":"Add disk quotas for user workspaces","description":"User could fill host disk via /var/lib/vscode/\u003cuser\u003e/. Add per-directory quotas or monitoring/alerting on disk usage.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.199417226-08:00","updated_at":"2025-12-28T00:05:44.7635372-05:00","closed_at":"2025-12-28T00:05:44.7635372-05:00","close_reason":"Parent epic cancelled - browser-based dev approach abandoned","dependencies":[{"issue_id":"ops-jrz1-bhk","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.309592029-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-blh","title":"mautrix-slack edit panic persists in v25.11","description":"mautrix-slack panic on rapid message edits (race condition)\n\n**Root cause**: Edit event arrives before original message is stored in DB. ConvertEdit accesses nil metadata.\n\n**Location**: handleslack.go:575 - has TODO comment: 'this can panic?'\n\n**Reproduction**: Edit a Slack message within ~1 second of sending\n\n**Upstream status**: \n- v25.11 is latest (we're on it)\n- Known to devs (TODO in code)\n- No open issue filed yet\n\n**Stack trace**:\ngo.mau.fi/mautrix-slack/pkg/connector.(*SlackMessage).ConvertEdit\n handleslack.go:575\nmaunium.net/go/mautrix/bridgev2.(*Portal).handleRemoteEdit\n portal.go:2838","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-05T19:40:33.255395189-08:00","updated_at":"2025-12-28T00:06:14.637057055-05:00","closed_at":"2025-12-28T00:06:14.637057055-05:00","close_reason":"Duplicate of ops-jrz1-f15 which has fix ready","comments":[{"id":2,"issue_id":"ops-jrz1-blh","author":"dan","text":"Confirmed panic exists in nixpkgs-unstable from 2025-12-02. Fix will be addressed via platform upgrade (see ops-jrz1-00e).","created_at":"2025-12-08T23:54:57Z"}]} {"id":"ops-jrz1-cmv","title":"Add egress rate limiting (iptables)","description":"Hard limit outbound connections per user to prevent mass exfil/scanning.\n\n## Config\n```nix\nnetworking.firewall.extraCommands = ''\n # Rate limit new outbound connections for regular users (uid 1000+)\n iptables -A OUTPUT -m state --state NEW -m owner --uid-owner 1000:65534 \\\n -m limit --limit 30/min --limit-burst 60 -j ACCEPT\n iptables -A OUTPUT -m state --state NEW -m owner --uid-owner 1000:65534 \\\n -j LOG --log-prefix \"EGRESS-LIMIT: \"\n iptables -A OUTPUT -m state --state NEW -m owner --uid-owner 1000:65534 \\\n -j REJECT\n'';\n```\n\n## Behavior\n- 30 new connections/min sustained, burst of 60\n- Over limit: logged and rejected\n- Doesn't affect established connections\n\n## Testing\n- `for i in {1..100}; do curl -s ifconfig.me \u0026 done`\n- Should see EGRESS-LIMIT in journal after ~60","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T20:16:32.276607792-08:00","created_by":"dan","updated_at":"2026-01-02T21:12:35.5888406-08:00","closed_at":"2026-01-02T21:12:35.5888406-08:00","close_reason":"Closed"} +{"id":"ops-jrz1-cpm","title":"Add admin alerting when killswitch fires","description":"Killswitch only logs to journald. Consider external notification (email, Matrix message, webhook) when users are terminated.","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-03T08:40:26.416998752-08:00","created_by":"dan","updated_at":"2026-01-03T08:40:26.416998752-08:00"} {"id":"ops-jrz1-d38","title":"Add tmux to system packages","description":"Add tmux for session persistence. Users can run bots in tmux, disconnect, reconnect.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T15:13:58.514256583-08:00","created_by":"dan","updated_at":"2026-01-02T17:25:59.102158299-08:00","closed_at":"2026-01-02T17:25:59.102158299-08:00","close_reason":"Closed"} {"id":"ops-jrz1-d58","title":"Build custom code-server container image","description":"Dockerfile with: code-server, opencode CLI, opencode VS Code extension (Open VSX), Python, Node, Git. Push to registry or build locally.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-05T17:16:54.507577308-08:00","updated_at":"2025-12-28T00:05:44.736614157-05:00","closed_at":"2025-12-28T00:05:44.736614157-05:00","close_reason":"Parent epic cancelled - browser-based dev approach abandoned","dependencies":[{"issue_id":"ops-jrz1-d58","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:17:36.369590207-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-dg9","title":"Document pattern for adding dev tools to system","description":"Create documentation for the standard pattern of adding dev tools.\n\n## Pattern\n1. Add flake input (if not in nixpkgs)\n2. Add to environment.systemPackages\n3. Run nixos-rebuild switch\n4. Config stays per-user/per-repo\n\n## Document should cover\n- How to add a tool from nixpkgs\n- How to add a tool from external flake\n- How to package a tool not yet packaged\n- How to update a tool (flake lock update)\n\n## Location\ndocs/adding-dev-tools.md or similar","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-02T16:36:04.613581812-08:00","created_by":"dan","updated_at":"2026-01-02T16:36:04.613581812-08:00"} @@ -42,29 +47,40 @@ {"id":"ops-jrz1-ecw","title":"Support multiple agentic coders: opencode, gemini, codex","description":"Ensure dev users can use their choice of agentic coding tool, not just Claude.\n\n## Tools Supported\n\n| Tool | Status | Install Method |\n|------|--------|----------------|\n| Claude Code | ✅ /usr/local/bin/claude | Manual install |\n| opencode | ✅ System package | Flake input (github:sst/opencode) |\n| gemini-cli | ✅ User install | `npm install -g @google/gemini-cli` |\n| codex | ✅ User install | `npm install -g @openai/codex` |\n\n## System Packages Added\n- `opencode` - AI coding agent (v1.0.224)\n- `nodejs_22` - For npm-based tool installation\n\n## User Install Instructions\nFor gemini-cli or codex:\n```bash\nnpm install -g @google/gemini-cli\nnpm install -g @openai/codex\n```\n\n## Auth Storage\nSee ops-jrz1-xz7 for multi-user auth research.\n\n## Remaining\n- gastown (gt) blocked - no release binaries yet (ops-jrz1-30e)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T16:25:11.578992492-08:00","created_by":"dan","updated_at":"2026-01-02T17:41:40.793211973-08:00","closed_at":"2026-01-02T17:41:40.793211973-08:00","close_reason":"Closed"} {"id":"ops-jrz1-ezf","title":"Maubot plugin dev workflow for learners","description":"Design frictionless dev workflow for Python/Go learners building maubot plugins.\n\n**Requirements**:\n- No SSH tunnel setup for learners\n- Fast feedback loop (edit → see bot respond)\n- Circuit breakers (allowed_rooms, rate limits)\n- Test channel: #vlads-pad (Slack) ↔ Matrix\n\n**Options being considered**:\n1. Git-push deploy: push to repo → CI builds .mbp → deploys to maubot\n2. Code-server containers: browser IDE on VPS, deploy script talks to maubot locally\n3. Hybrid: code-server + git workflow\n\n**Related**: ops-jrz1-3so (browser-dev-environment epic)","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-06T01:36:26.529372206-08:00","updated_at":"2025-12-28T10:12:31.096280407-05:00","closed_at":"2025-12-28T10:12:31.096280407-05:00","close_reason":"Depends on cancelled browser-dev epic (ops-jrz1-3so)","dependencies":[{"issue_id":"ops-jrz1-ezf","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-06T12:18:06.743837766-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-f15","title":"mautrix-slack ConvertEdit panic on message edits","description":"mautrix-slack v25.11 panics with nil pointer dereference at handleslack.go:575 when processing Slack message_changed events. \n\nFIX READY at /tmp/mautrix-slack-fix (branch: fix-convert-edit-nil-panic)\n\nThree changes needed in pkg/connector/handleslack.go:\n1. Line 181: Add 'evt.SubMessage != nil \u0026\u0026' before evt.SubMessage.SubType\n2. Lines 573-578: Add nil checks for SubMessage and SubMessage.Edited at start of ConvertEdit()\n3. Line 609: Add 's.Data.SubMessage != nil \u0026\u0026' before accessing Timestamp\n\nTo submit PR: fork mautrix/slack, push branch, open PR against main.","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-27T10:03:28.857940916-05:00","created_by":"dan","updated_at":"2025-12-28T10:02:58.660206297-05:00","closed_at":"2025-12-28T10:02:58.660206297-05:00","close_reason":"Won't fix for now - fix ready in /tmp/mautrix-slack-fix if needed later"} +{"id":"ops-jrz1-f6i","title":"Update docs for declarative script workflow","description":"Update AGENTS.md and any onboarding docs to remove manual scp deployment steps. Scripts now deploy automatically with nixos-rebuild.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-03T08:39:54.759066801-08:00","created_by":"dan","updated_at":"2026-01-03T09:21:09.793836001-08:00","closed_at":"2026-01-03T09:21:09.793836001-08:00","close_reason":"Updated AGENTS.md with declarative script deployment info","dependencies":[{"issue_id":"ops-jrz1-f6i","depends_on_id":"ops-jrz1-sdz","type":"blocks","created_at":"2026-01-03T08:40:02.875606451-08:00","created_by":"dan"}]} {"id":"ops-jrz1-gci","title":"Enable fail2ban for SSH brute force protection","description":"SSH brute force attempts generate log noise but don't pose security risk (key-only auth). fail2ban would help but is low priority. Deferred pending RFC on SSH log management strategy.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-04T21:03:22.651495544-08:00","updated_at":"2025-12-04T22:55:13.805471391-08:00","dependencies":[{"issue_id":"ops-jrz1-gci","depends_on_id":"ops-jrz1-nir","type":"blocks","created_at":"2025-12-04T22:56:14.777377818-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-ghd","title":"Research: User isolation and sandboxing options","description":"Can users see each other's code? What isolation do we need?\n\n## Current state\n- Standard Unix permissions\n- No containers or VMs\n- Users in same `users` group\n\n## Options to evaluate\n1. **Unix perms only** - `chmod 700 ~` (current)\n2. **Separate groups** - Per-user primary group\n3. **Containers** - systemd-nspawn, podman\n4. **VMs** - microVMs per user (heavy)\n5. **Firejail/bubblewrap** - Process sandboxing\n\n## Questions\n- What's the threat model? (curious learners vs malicious actors)\n- How much isolation is worth the complexity?\n- Does NixOS offer anything special here?","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-02T12:27:33.291275677-08:00","created_by":"dan","updated_at":"2026-01-02T12:27:33.291275677-08:00"} {"id":"ops-jrz1-glk","title":"VS Code extension policy (security)","description":"Extensions can run arbitrary code. Decide: allow arbitrary installs, or curate/restrict? For non-programmers, pre-install safe set and optionally disable marketplace.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.463030936-08:00","updated_at":"2025-12-28T00:08:06.752037252-05:00","closed_at":"2025-12-28T00:08:06.752037252-05:00","close_reason":"Browser-based dev environment cancelled","dependencies":[{"issue_id":"ops-jrz1-glk","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.372120465-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-gwk","title":"Declarative script deployment via NixOS","description":"Scripts in scripts/ are manually deployed via scp to /usr/local/bin/. Convert to NixOS declarative deployment using writeShellApplication.\n\nTwo packages (per orch consensus):\n1. watchdog-scripts - killswitch, cpu-watchdog, egress-watchdog\n → Referenced directly by systemd, NOT in PATH\n2. admin-scripts - learner-add.sh, learner-remove.sh\n → Added to systemPackages for interactive use\n\nSubtasks:\n- ops-jrz1-vw4: Create watchdog-scripts package\n- ops-jrz1-o9c: Create admin-scripts package (parallel)\n- ops-jrz1-ujw: Update systemd services to use store paths\n- ops-jrz1-sdz: Remove manual /usr/local/bin scripts\n- ops-jrz1-f6i: Update docs","status":"closed","priority":2,"issue_type":"epic","created_at":"2026-01-03T08:39:41.242421474-08:00","created_by":"dan","updated_at":"2026-01-03T09:21:09.83027789-08:00","closed_at":"2026-01-03T09:21:09.83027789-08:00","close_reason":"Epic complete: All scripts now deployed declaratively via writeShellApplication"} {"id":"ops-jrz1-i8i","title":"Enable mautrix-slack relay mode for bot bridging","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-06T19:09:42.087506995-08:00","updated_at":"2025-12-06T19:09:47.612545472-08:00","closed_at":"2025-12-06T19:09:47.612545472-08:00"} {"id":"ops-jrz1-iok","title":"Instagram bot missing base-config.yaml","description":"Plugin was missing base-config.yaml required by maubot Config class. Fixed in commit 4b9481d.","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-06T13:02:10.103730128-08:00","updated_at":"2025-12-06T13:02:15.055396318-08:00","closed_at":"2025-12-06T13:02:15.055396318-08:00"} {"id":"ops-jrz1-jit","title":"Logging and monitoring for dev environments","description":"No observability plan. Need: container CPU/mem metrics, nginx logs, disk usage monitoring, alert on repeated 401s or resource exhaustion.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:41.318448038-08:00","updated_at":"2025-12-28T00:08:06.750234292-05:00","closed_at":"2025-12-28T00:08:06.750234292-05:00","close_reason":"Browser-based dev environment cancelled","dependencies":[{"issue_id":"ops-jrz1-jit","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.343610481-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-jvt","title":"Add beads (bd) to system packages via flake input","description":"Add beads CLI as a system-wide package using the flake input pattern.\n\n## Implementation\n```nix\n# flake.nix inputs\nbeads = {\n url = \"github:steveyegge/beads\";\n inputs.nixpkgs.follows = \"nixpkgs-unstable\";\n};\n\n# configuration.nix or module\nenvironment.systemPackages = [\n inputs.beads.packages.${system}.default\n];\n```\n\n## Why this approach\nPer orch consensus:\n- Flake inputs + systemPackages is NixOS best practice\n- Reproducible, version-pinned via flake.lock\n- All users get same version\n- Config stays per-repo (.beads/)\n\n## Acceptance criteria\n- `bd --version` works for all users\n- No manual install needed\n- Pinned in flake.lock","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T16:36:04.106750429-08:00","created_by":"dan","updated_at":"2026-01-02T17:25:59.114217153-08:00","closed_at":"2026-01-02T17:25:59.114217153-08:00","close_reason":"Closed"} +{"id":"ops-jrz1-k1z","title":"killswitch: Rename USER variable to avoid shadowing","description":"USER=\"$1\" shadows shell builtin $USER. Rename to TARGET_USER for clarity. scripts/killswitch:15","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-03T08:17:35.258067292-08:00","created_by":"dan","updated_at":"2026-01-03T08:17:50.295351625-08:00"} {"id":"ops-jrz1-k2a","title":"Security posture and threat analysis for dev server","description":"Perform security analysis for the shared dev server environment.\n\n## Scope\n- Multi-user SSH access\n- Agentic coders (Claude, etc.) running with user privileges\n- Shared Slack tokens\n- Network egress\n- Nix store access\n\n## Questions to answer\n- What's the threat model? (curious devs vs malicious actors)\n- What can a compromised user account access?\n- What can an agentic coder do that a human couldn't?\n- Are there secrets at risk?\n- What isolation is needed?\n\n## Deliverables\n- Threat model document\n- Risk assessment\n- Recommendations for mitigations","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T16:34:32.681248615-08:00","created_by":"dan","updated_at":"2026-01-02T19:14:53.774623716-08:00","closed_at":"2026-01-02T19:14:53.774623716-08:00","close_reason":"Closed"} {"id":"ops-jrz1-kg0","title":"Switch to subdomain routing (dan.code.clarun.xyz)","description":"Path-based routing (/code/dan/) is fragile. Extensions assume root path, cookies scope incorrectly, PWA breaks. Switch to wildcard subdomains for cleaner isolation.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-05T15:32:19.283887085-08:00","updated_at":"2025-12-05T17:23:11.983564455-08:00","closed_at":"2025-12-05T17:23:11.983564455-08:00","dependencies":[{"issue_id":"ops-jrz1-kg0","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.043217984-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-kia","title":"Container reset mechanism (keep workspace)","description":"If user breaks their environment, need simple way to wipe container and restore default image while preserving /workspace. Script or admin command.","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-05T15:32:31.045592689-08:00","updated_at":"2025-12-28T00:05:44.757842852-05:00","closed_at":"2025-12-28T00:05:44.757842852-05:00","close_reason":"Parent epic cancelled - browser-based dev approach abandoned","dependencies":[{"issue_id":"ops-jrz1-kia","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.275530016-08:00","created_by":"daemon","metadata":"{}"}]} +{"id":"ops-jrz1-lae","title":"egress-watchdog: Fix subshell gotcha in while-read pipeline","description":"while-read in pipeline runs in subshell - variables don't persist outside loop. Use process substitution: while read ...; done \u003c \u003c(echo \"$hits\" | grep ...). scripts/egress-watchdog:25","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-03T08:17:35.401495377-08:00","created_by":"dan","updated_at":"2026-01-03T08:26:28.256622008-08:00"} +{"id":"ops-jrz1-meh","title":"cpu-watchdog: Add flock for atomic strike counter updates","description":"Read-modify-write of strike counter not atomic. Systemd timer serializes runs so low risk now, but add flock if parallelism added later. scripts/cpu-watchdog:29-31","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-03T08:17:35.759126212-08:00","created_by":"dan","updated_at":"2026-01-03T08:26:28.66076716-08:00"} {"id":"ops-jrz1-mh2","title":"Research: Forgejo integration for shared projects","description":"How does beads/bd integrate with our Forgejo git server (git.clarun.xyz)?\n\n## Questions\n- Can bd sync to Forgejo repos?\n- How do dev users on the server collaborate on shared projects?\n- Is there a git workflow that makes sense (forks? shared repo? branches?)\n- Does bd need any special config for Forgejo vs GitHub?\n\n## Context\n- Forgejo running at git.clarun.xyz\n- Dev users have SSH access to server\n- May want shared project tracking via beads","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-02T16:24:01.771168961-08:00","created_by":"dan","updated_at":"2026-01-02T16:24:01.771168961-08:00"} {"id":"ops-jrz1-ndl","title":"Browser-based dev environment (code-server)","description":"Explore setting up browser-based development:\n\nOptions:\n- code-server / openvscode-server - VS Code in browser\n- ttyd / wetty - terminal in browser \n- PWA install to home screen for native app feel\n\nCould combine with Tailscale for secure access without exposing ports.\n\nRef: ops-dev thin client brainstorm session","notes":"Design doc created: specs/004-browser-dev-environment/design.md - covers architecture, tech choices, resource planning, security model, rollout phases","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-04T15:08:02.406274744-08:00","updated_at":"2025-12-05T17:05:52.872944892-08:00","closed_at":"2025-12-05T17:05:52.872944892-08:00"} {"id":"ops-jrz1-nir","title":"RFC: SSH log noise reduction strategy","description":"Research showed 99.8% of SSH logs are scanner noise (9000 failed attempts/day). Options: (1) Change SSH port - simple, ~99% reduction (2) journald filter - surgical but complex (3) LogLevel ERROR - loses successful login audit trail (4) fail2ban - bans IPs, partial reduction. Orch consensus: Gemini opposed LogLevel ERROR due to losing audit trail, GPT supported. Need RFC to decide approach. See posture review from Dec 2025 session.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-04T22:55:13.990334935-08:00","updated_at":"2025-12-04T22:55:13.990334935-08:00"} {"id":"ops-jrz1-nvx","title":"Slack bot architecture: Matrix-first approach","description":"**Decision**: Use Matrix as primary platform for Slack bot development.\n\n**Architecture**: Bots run as maubot plugins (or Matrix bots), communicate to Slack via mautrix-slack bridge.\n\n**Rationale**:\n- Existing infrastructure (maubot deployed, bridge working)\n- Single platform to manage\n- Bots work with Matrix users too\n- Avoid Socket Mode contention (only one xapp- connection allowed)\n\n**Trade-offs accepted**:\n- Bridge dependency (edit panic bug exists)\n- Extra latency through bridge hop\n- Limited to bridged channels\n\n**Alternative considered (Option B - direct Slack API)**:\n- Could use xoxb- token for outbound-only (REST)\n- Would need new Slack app for full Socket Mode independence\n- Deferred for now\n\n**Credentials available**:\n- slack-oauth-token (xoxb-) - shareable for REST calls if needed\n- slack-app-token (xapp-) - reserved for bridge Socket Mode\n\n**Status**: DECIDED - staying with Matrix-first","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-05T23:12:22.011872713-08:00","updated_at":"2025-12-05T23:12:28.329467732-08:00","closed_at":"2025-12-05T23:12:28.329467732-08:00"} {"id":"ops-jrz1-nwv","title":"Package graphite CLI (gt) for NixOS","description":"Graphite CLI (gt) is not in nixpkgs. Need to package it.\n\n## Research needed\n- How is gt distributed? (npm, binary, go?)\n- Is there an existing nix package or flake?\n- If not, create minimal derivation\n\n## Options\n1. Find existing flake/overlay\n2. Use buildNpmPackage if it's npm-based\n3. Fetch pre-built binary\n\n## Once packaged\nAdd to system packages via flake input pattern (same as beads).","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-02T16:36:04.374192123-08:00","created_by":"dan","updated_at":"2026-01-02T16:37:46.981193033-08:00","closed_at":"2026-01-02T16:37:46.981193033-08:00","close_reason":"Wrong tool - gt is gastown, not graphite"} +{"id":"ops-jrz1-o9c","title":"Create admin-scripts package for systemPackages","description":"Package learner-add.sh, learner-remove.sh using writeShellApplication. Add to environment.systemPackages so they're available in PATH for interactive admin use.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-03T08:45:35.623169977-08:00","created_by":"dan","updated_at":"2026-01-03T09:20:08.655105165-08:00","closed_at":"2026-01-03T09:20:08.655105165-08:00","close_reason":"Implemented admin-scripts (learner-add, learner-remove) using writeShellApplication, added to systemPackages"} +{"id":"ops-jrz1-oxx","title":"Add disk quota or watchdog for /home","description":"No disk limits for users. Could fill /home. Options: ext4 quotas, btrfs subvolume limits, or simple watchdog.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-03T08:40:26.188569342-08:00","created_by":"dan","updated_at":"2026-01-03T08:40:26.188569342-08:00"} {"id":"ops-jrz1-p2d","title":"Add egress connection logging","description":"Log all new outbound connections for forensics.\n\n## Config\n```nix\nnetworking.firewall.extraCommands = ''\n # Log all new outbound from regular users\n iptables -A OUTPUT -m state --state NEW -m owner --uid-owner 1000:65534 \\\n -j LOG --log-prefix \"EGRESS: \" --log-level info\n'';\n```\n\n## Usage\n```bash\n# View egress logs\njournalctl -k | grep EGRESS\n\n# Watch live\njournalctl -kf | grep EGRESS\n```\n\n## Notes\n- Logs before rate limit rules (if both implemented)\n- Includes source UID, dest IP, dest port","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T20:17:39.566590459-08:00","created_by":"dan","updated_at":"2026-01-02T21:12:35.575052381-08:00","closed_at":"2026-01-02T21:12:35.575052381-08:00","close_reason":"Closed"} {"id":"ops-jrz1-qxr","title":"mautrix-slack message edit panic (upstream bug)","description":"Bridge upgraded to v25.11. Need to verify if edit panic is fixed by testing a Slack message edit. Watch logs: journalctl -u mautrix-slack -f | grep -E 'ERR|panic|edit'","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-05T18:22:38.18203834-08:00","updated_at":"2025-12-05T19:36:00.556011621-08:00","closed_at":"2025-12-05T19:36:00.556011621-08:00","dependencies":[{"issue_id":"ops-jrz1-qxr","depends_on_id":"ops-jrz1-03o","type":"blocks","created_at":"2025-12-05T18:24:23.259399275-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-rkp","title":"Add egress abuse watchdog","description":"Monitor for users hitting egress rate limits, kill if sustained.\n\n## Script: /usr/local/bin/egress-watchdog\n```bash\n#\\!/usr/bin/env bash\n# Kill users who keep hitting egress limits\nTHRESHOLD=10 # EGRESS-LIMIT hits per minute\nCOUNTFILE=\"/var/lib/egress-watchdog\"\nmkdir -p \"$COUNTFILE\"\n\n# Count recent limit hits per UID\njournalctl -k --since \"1 minute ago\" 2\u003e/dev/null | grep \"EGRESS-LIMIT\" | \\\n grep -oP 'UID=\\K[0-9]+' | sort | uniq -c | while read count uid; do\n \n user=$(getent passwd \"$uid\" | cut -d: -f1)\n [ -z \"$user\" ] \u0026\u0026 continue\n \n if [ \"$count\" -gt \"$THRESHOLD\" ]; then\n strikes=$(cat \"$COUNTFILE/$user\" 2\u003e/dev/null || echo 0)\n strikes=$((strikes + 1))\n echo \"$strikes\" \u003e \"$COUNTFILE/$user\"\n logger -t egress-watchdog \"User $user hit egress limit $count times (strike $strikes/3)\"\n \n if [ \"$strikes\" -ge 3 ]; then\n /usr/local/bin/killswitch \"$user\" \"egress abuse ($count hits)\"\n rm -f \"$COUNTFILE/$user\"\n fi\n else\n rm -f \"$COUNTFILE/$user\"\n fi\ndone\n```\n\n## Behavior\n- Runs every minute (same timer as CPU watchdog, or separate)\n- 3 consecutive minutes of \u003e10 blocked connections = kill\n- Works with egress rate limiting (ops-jrz1-cmv)\n\n## Dependencies\n- Requires ops-jrz1-cmv (egress rate limiting)\n- Requires ops-jrz1-396 (killswitch script)","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-02T20:21:09.516724064-08:00","created_by":"dan","updated_at":"2026-01-03T06:02:02.132992356-08:00","closed_at":"2026-01-03T06:02:02.132992356-08:00","close_reason":"Egress watchdog deployed and tested. Script monitors EGRESS-LIMIT kernel log entries, tracks strikes per user, kills after 3 strikes.","dependencies":[{"issue_id":"ops-jrz1-rkp","depends_on_id":"ops-jrz1-396","type":"blocks","created_at":"2026-01-02T20:21:14.314011866-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-rkp","depends_on_id":"ops-jrz1-cmv","type":"blocks","created_at":"2026-01-02T20:21:14.352411765-08:00","created_by":"dan"}]} +{"id":"ops-jrz1-sdz","title":"Remove /usr/local/bin scripts from server","description":"After declarative deployment works, clean up manually deployed scripts from /usr/local/bin on the server.","status":"closed","priority":3,"issue_type":"task","created_at":"2026-01-03T08:39:54.483032394-08:00","created_by":"dan","updated_at":"2026-01-03T09:20:34.591216526-08:00","closed_at":"2026-01-03T09:20:34.591216526-08:00","close_reason":"Removed all manual scripts from /usr/local/bin/","dependencies":[{"issue_id":"ops-jrz1-sdz","depends_on_id":"ops-jrz1-ujw","type":"blocks","created_at":"2026-01-03T08:40:02.851476398-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-sdz","depends_on_id":"ops-jrz1-o9c","type":"blocks","created_at":"2026-01-03T08:45:48.023849189-08:00","created_by":"dan"}]} {"id":"ops-jrz1-t73","title":"Rename learner to dev in scripts and docs","description":"Rename terminology from \"learner\" to \"dev\" or \"user\" across:\n\n- scripts/learner-add.sh → dev-add.sh\n- scripts/learner-remove.sh → dev-remove.sh\n- /etc/slack-learner.env → /etc/slack-dev.env\n- learners group → devs group\n- docs/learner-*.md\n- tests/test-learner-env.sh\n\nLow priority cleanup.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-02T12:32:40.340984626-08:00","created_by":"dan","updated_at":"2026-01-02T12:32:40.340984626-08:00"} {"id":"ops-jrz1-u0w","title":"Security review of running server","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-04T21:03:22.420507724-08:00","updated_at":"2025-12-04T21:04:31.989886731-08:00","closed_at":"2025-12-04T21:04:31.989886731-08:00"} +{"id":"ops-jrz1-ujw","title":"Update systemd services to use nix store paths","description":"Change ExecStart from /usr/local/bin/cpu-watchdog to use the derivation path. Either reference package directly or use pkgs.writeShellApplication.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-03T08:39:54.227335183-08:00","created_by":"dan","updated_at":"2026-01-03T09:20:08.685831615-08:00","closed_at":"2026-01-03T09:20:08.685831615-08:00","close_reason":"Systemd services now reference Nix store paths via ${pkg}/bin/script","dependencies":[{"issue_id":"ops-jrz1-ujw","depends_on_id":"ops-jrz1-5ef","type":"blocks","created_at":"2026-01-03T08:40:02.815677839-08:00","created_by":"dan"}]} {"id":"ops-jrz1-vix","title":"Evaluate home-manager for per-user config","description":"Evaluate whether home-manager adds value for our setup.\n\n## What home-manager could manage\n- Shell config (.bashrc, .zshrc)\n- Git config (.gitconfig)\n- Tool configs (~/.config/*)\n- direnv integration\n\n## Questions\n- Do we need declarative per-user dotfiles?\n- Is the complexity worth it for a small team?\n- Can we start without it and add later?\n\n## Recommendation from consensus\n\"Optional but recommended\" - good for pushing default configs to all devs.\nStart without it, add if pain point emerges.","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-02T16:36:04.849881753-08:00","created_by":"dan","updated_at":"2026-01-02T16:36:04.849881753-08:00"} +{"id":"ops-jrz1-vw4","title":"Create watchdog-scripts package (writeShellApplication)","description":"Package killswitch, cpu-watchdog, egress-watchdog using writeShellApplication. Include runtimeInputs for procps, gawk, systemd, etc. These scripts are NOT added to PATH - only referenced directly by systemd services.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-03T08:39:53.745385806-08:00","created_by":"dan","updated_at":"2026-01-03T09:20:08.62999227-08:00","closed_at":"2026-01-03T09:20:08.62999227-08:00","close_reason":"Implemented watchdog-scripts (killswitch, cpu-watchdog, egress-watchdog) using writeShellApplication with proper runtimeInputs"} {"id":"ops-jrz1-w68","title":"Remote dev environment security and setup research","description":"Research and test security/setup questions for learner remote dev environments.\n\n## Context\nTwo dev paths identified:\n1. **Server-first**: SSH in, run agentic coders on server\n2. **Local VS Code**: Remote-SSH extension, code on server\n\nBoth have open questions around sandboxing, system packages, deployment access, and security boundaries.\n\n## Scope\n- System package management for learners\n- User isolation and sandboxing options\n- Agentic coder security (what can Claude do?)\n- Deployment pipeline security\n- VS Code extension behavior testing\n- Resource limits and quotas\n\n## Deliverables\n- Answers to open questions (documented)\n- Security recommendations\n- Implementation plan for chosen approach","status":"open","priority":2,"issue_type":"epic","created_at":"2026-01-02T12:26:48.104374079-08:00","created_by":"dan","updated_at":"2026-01-02T12:26:48.104374079-08:00","dependencies":[{"issue_id":"ops-jrz1-w68","depends_on_id":"ops-jrz1-9pe","type":"blocks","created_at":"2026-01-02T12:27:59.277687811-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-w68","depends_on_id":"ops-jrz1-ghd","type":"blocks","created_at":"2026-01-02T12:27:59.32841874-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-w68","depends_on_id":"ops-jrz1-3b1","type":"blocks","created_at":"2026-01-02T12:27:59.375858081-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-w68","depends_on_id":"ops-jrz1-3au","type":"blocks","created_at":"2026-01-02T12:27:59.428509997-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-w68","depends_on_id":"ops-jrz1-3jo","type":"blocks","created_at":"2026-01-02T12:27:59.473581774-08:00","created_by":"dan"},{"issue_id":"ops-jrz1-w68","depends_on_id":"ops-jrz1-bbn","type":"blocks","created_at":"2026-01-02T12:27:59.523975339-08:00","created_by":"dan"}]} {"id":"ops-jrz1-wj2","title":"Design API key provisioning strategy","description":"opencode needs API keys (OpenAI, Anthropic). Options: 1) Shared key with proxy + rate limiting, 2) Per-user keys in sops-nix. Need to prevent key exposure and enable usage tracking.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-05T15:32:19.526073243-08:00","updated_at":"2025-12-05T17:25:10.534718515-08:00","closed_at":"2025-12-05T17:25:10.534718515-08:00","dependencies":[{"issue_id":"ops-jrz1-wj2","depends_on_id":"ops-jrz1-3so","type":"parent-child","created_at":"2025-12-05T17:05:47.103332379-08:00","created_by":"daemon","metadata":"{}"}]} {"id":"ops-jrz1-xz1","title":"Fix maubot admin UI exposed to internet (port 29316)","description":"Maubot admin UI on port 29316 is publicly accessible (returns 401 but API surface exposed). Firewall explicitly allows this port. Risk: brute force on admin password, direct exploit of any maubot vulnerabilities. Fix: bind to 127.0.0.1 only, remove from firewall, access via SSH tunnel.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-04T21:03:22.531676543-08:00","updated_at":"2025-12-04T22:35:24.162735368-08:00","closed_at":"2025-12-04T22:35:24.162735368-08:00"} {"id":"ops-jrz1-xz7","title":"Research: Multi-user auth storage for agentic coders","description":"Investigate where auth credentials are stored for each agentic coder when multiple users authenticate:\n\n## Questions\n- Claude Code: Where is OAuth token stored? ~/.claude? Conflicts between users?\n- opencode: Auth storage location?\n- gemini-cli: Auth storage?\n- codex: Auth storage?\n\n## Goal\nUnderstand if there are isolation issues when multiple users auth on same server.","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-02T17:30:15.028994987-08:00","created_by":"dan","updated_at":"2026-01-02T17:30:15.028994987-08:00"} +{"id":"ops-jrz1-yhu","title":"configuration.nix: Consider custom iptables chain for egress rules","description":"Same iptables match pattern repeated 8 times. Could create custom chain for cleaner rule management. Optional - readability tradeoff. configuration.nix:68-79","status":"open","priority":3,"issue_type":"task","created_at":"2026-01-03T08:17:35.532609792-08:00","created_by":"dan","updated_at":"2026-01-03T08:26:28.411759428-08:00"} {"id":"ops-jrz1-zvh","title":"Fix maubot health check (failing every 5 min)","description":"Health check at /_matrix/maubot/v1/version returns 401 (auth required). Check script doesn't provide auth token. Spamming error logs every 5 minutes.","status":"closed","priority":2,"issue_type":"bug","created_at":"2025-12-04T22:55:25.755541054-08:00","updated_at":"2025-12-05T02:00:19.284410671-08:00","closed_at":"2025-12-05T02:00:19.284410671-08:00"} diff --git a/AGENTS.md b/AGENTS.md index a2a0197..5db13ab 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,7 +40,12 @@ NixOS-based Matrix homeserver (conduwuit) with mautrix-slack bridge for Slack ├── docs/ # Documentation │ ├── platform-vision.md # North star document │ └── worklogs/ # Deployment logs -└── scripts/ # Utility scripts +└── scripts/ # Utility scripts (packaged via writeShellApplication) + ├── killswitch # Emergency user termination + ├── cpu-watchdog # CPU abuse detection + ├── egress-watchdog # Egress rate limit abuse detection + ├── learner-add.sh # Add dev user account + └── learner-remove.sh # Remove dev user account ``` ## Commands @@ -121,10 +126,11 @@ ssh -L 8008:localhost:8008 root@ops-jrz1 ### Deployment Workflow 1. Make configuration changes locally -2. Commit to git -3. Deploy via nixos-rebuild -4. Verify service status and logs -5. Document in worklogs/ +2. Run `nix flake check` to validate +3. Commit to git +4. Deploy via nixos-rebuild (scripts deploy automatically) +5. Verify service status and logs +6. Document in worklogs/ ## Architecture diff --git a/configuration.nix b/configuration.nix index 0d8ef77..16c6ca5 100644 --- a/configuration.nix +++ b/configuration.nix @@ -1,5 +1,53 @@ { config, pkgs, beads, opencode, ... }: +let + # ========================================================================== + # Watchdog Scripts - Referenced directly by systemd, NOT in PATH + # ========================================================================== + + killswitch = pkgs.writeShellApplication { + name = "killswitch"; + runtimeInputs = with pkgs; [ procps systemd util-linux coreutils ]; + text = builtins.readFile ./scripts/killswitch; + }; + + cpu-watchdog = pkgs.writeShellApplication { + name = "cpu-watchdog"; + runtimeInputs = with pkgs; [ procps gawk coreutils util-linux killswitch ]; + text = builtins.replaceStrings + [ "/usr/local/bin/killswitch" ] + [ "killswitch" ] + (builtins.readFile ./scripts/cpu-watchdog); + }; + + egress-watchdog = pkgs.writeShellApplication { + name = "egress-watchdog"; + # Note: -e removed intentionally - grep returns 1 when no matches + bashOptions = [ "nounset" "pipefail" ]; + runtimeInputs = with pkgs; [ systemd gnugrep coreutils util-linux killswitch gawk ]; + text = builtins.replaceStrings + [ "/usr/local/bin/killswitch" ] + [ "killswitch" ] + (builtins.readFile ./scripts/egress-watchdog); + }; + + # ========================================================================== + # Admin Scripts - Added to systemPackages for interactive use + # ========================================================================== + + learner-add = pkgs.writeShellApplication { + name = "learner-add"; + runtimeInputs = with pkgs; [ shadow coreutils iproute2 gnugrep gawk ]; + text = builtins.readFile ./scripts/learner-add.sh; + }; + + learner-remove = pkgs.writeShellApplication { + name = "learner-remove"; + runtimeInputs = with pkgs; [ shadow coreutils gnutar procps findutils ]; + text = builtins.readFile ./scripts/learner-remove.sh; + }; + +in { # Main NixOS configuration for ops-jrz1 server # Imports host-specific configuration from hosts/ops-jrz1.nix @@ -34,6 +82,9 @@ opencode # AI coding agent (opencode CLI) # For npm-based AI tools (gemini-cli, codex): users run npm install nodejs_22 + # Admin scripts (declarative deployment) + learner-add + learner-remove ]; # Add ~/.local/bin and /usr/local/bin to PATH for manually installed tools @@ -117,7 +168,7 @@ description = "CPU abuse watchdog"; serviceConfig = { Type = "oneshot"; - ExecStart = "/usr/local/bin/cpu-watchdog"; + ExecStart = "${cpu-watchdog}/bin/cpu-watchdog"; }; }; @@ -135,7 +186,7 @@ description = "Egress abuse watchdog"; serviceConfig = { Type = "oneshot"; - ExecStart = "/usr/local/bin/egress-watchdog"; + ExecStart = "${egress-watchdog}/bin/egress-watchdog"; }; }; diff --git a/scripts/egress-watchdog b/scripts/egress-watchdog index 005fa42..dcc2d1d 100755 --- a/scripts/egress-watchdog +++ b/scripts/egress-watchdog @@ -22,7 +22,7 @@ if [[ -z "$hits" ]]; then exit 0 fi -echo "$hits" | grep -oP 'UID=\K[0-9]+' | sort | uniq -c | while read count uid; do +echo "$hits" | grep -oP 'UID=\K[0-9]+' | sort | uniq -c | while read -r count uid; do # Skip if count or uid is empty [[ -z "$count" || -z "$uid" ]] && continue diff --git a/scripts/learner-add.sh b/scripts/learner-add.sh index 063dd0f..bdd71ff 100755 --- a/scripts/learner-add.sh +++ b/scripts/learner-add.sh @@ -78,9 +78,11 @@ create_user() { chown -R "$username:users" "$ssh_dir" # Add Slack env vars to bashrc - echo '' >> "/home/$username/.bashrc" - echo '# Slack bot development tokens' >> "/home/$username/.bashrc" - echo 'source /etc/slack-learner.env' >> "/home/$username/.bashrc" + { + echo '' + echo '# Slack bot development tokens' + echo 'source /etc/slack-learner.env' + } >> "/home/$username/.bashrc" log_info "User created with SSH access" } diff --git a/scripts/learner-remove.sh b/scripts/learner-remove.sh index be74c56..302f160 100755 --- a/scripts/learner-remove.sh +++ b/scripts/learner-remove.sh @@ -106,7 +106,8 @@ archive_home() { log_info "Archiving home directory..." mkdir -p "$ARCHIVE_DIR" - local archive_name="${username}_$(date +%Y%m%d_%H%M%S).tar.gz" + local archive_name + archive_name="${username}_$(date +%Y%m%d_%H%M%S).tar.gz" local archive_path="$ARCHIVE_DIR/$archive_name" tar -czf "$archive_path" -C /home "$username" @@ -128,7 +129,7 @@ remove_user() { userdel -r "$username" 2>/dev/null || { # If userdel -r fails, try without -r and manually remove home userdel "$username" 2>/dev/null || true - rm -rf "/home/$username" + rm -rf "/home/${username:?}" } log_info "User removed" @@ -174,7 +175,7 @@ main() { echo "Home directory will be PERMANENTLY DELETED" fi echo "" - read -p "Are you sure? (yes/no): " confirm + read -rp "Are you sure? (yes/no): " confirm if [[ "$confirm" != "yes" ]]; then log_info "Aborted" exit 0