# Sanitization Rules Contract # Defines all patterns to find and replace when extracting modules from ops-base version: "1.0" description: "Comprehensive sanitization rules for creating nixos-matrix-platform-template from ops-base" # Critical rules - MUST be applied, validation failures block publication critical_rules: - id: 1 name: "Replace primary domain clarun.xyz" pattern_type: domain pattern: "clarun\\.xyz" replacement: "example.com" applies_to: [code, docs, comments, configs] validation_method: grep validation_command: "rg 'clarun\\.xyz' --type nix --type md" expected_matches: 0 - id: 2 name: "Replace secondary domain talu.uno" pattern_type: domain pattern: "talu\\.uno" replacement: "matrix.example.org" applies_to: [code, docs, comments, configs] validation_method: grep validation_command: "rg 'talu\\.uno' --type nix --type md" expected_matches: 0 - id: 3 name: "Replace private IP range 192.168.1.x" pattern_type: ip_address pattern: "192\\.168\\.1\\.(\\d+)" replacement: "10.0.0.\\1" applies_to: [code, configs] validation_method: regex validation_command: "rg '192\\.168\\.1\\.' --type nix" expected_matches: 0 - id: 4 name: "Replace public VPS IP" pattern_type: ip_address pattern: "45\\.77\\.205\\.49" replacement: "203.0.113.10" # TEST-NET-3 applies_to: [code, docs, comments, configs] validation_method: grep validation_command: "rg '45\\.77\\.205\\.49'" expected_matches: 0 - id: 5 name: "Replace personal home path" pattern_type: path pattern: "/home/dan" replacement: "/home/user" applies_to: [code, docs, comments] validation_method: grep validation_command: "rg '/home/dan'" expected_matches: 0 - id: 6 name: "Replace hostname jrz1" pattern_type: hostname pattern: "\\bjrz1\\b" replacement: "matrix" applies_to: [code, docs, comments, configs] validation_method: regex validation_command: "rg '\\bjrz1\\b' --type nix --type md" expected_matches: 0 - id: 7 name: "Replace Matrix admin user" pattern_type: username pattern: "@admin:clarun\\.xyz" replacement: "@admin:example.com" applies_to: [code, docs, configs] validation_method: grep validation_command: "rg '@admin:clarun'" expected_matches: 0 - id: 8 name: "Detect Matrix access tokens" pattern_type: secret_pattern pattern: "syt_[a-zA-Z0-9_-]{20,}" replacement: null # Should not exist applies_to: [all] validation_method: gitleaks validation_command: "gitleaks detect --no-git --source ." expected_matches: 0 - id: 9 name: "Detect Slack tokens" pattern_type: secret_pattern pattern: "xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24,}" replacement: null # Should not exist applies_to: [all] validation_method: gitleaks validation_command: "gitleaks detect --no-git --source ." expected_matches: 0 - id: 10 name: "Detect age keys" pattern_type: secret_pattern pattern: "AGE-SECRET-KEY-[A-Z0-9]{59}" replacement: null # Should not exist applies_to: [all] validation_method: gitleaks validation_command: "gitleaks detect --no-git --source ." expected_matches: 0 # High priority rules - SHOULD be applied, warnings if validation fails high_priority_rules: - id: 11 name: "Replace workspace name" pattern_type: identifier pattern: "my-workspace" replacement: "your-workspace" applies_to: [code, configs] validation_method: grep validation_command: "rg 'my-workspace' --type nix" expected_matches: 0 - id: 12 name: "Replace personal email" pattern_type: email pattern: "dlei@duck\\.com" replacement: "admin@example.com" applies_to: [code, configs] validation_method: grep validation_command: "rg 'dlei@duck\\.com'" expected_matches: 0 - id: 13 name: "Replace project-specific paths" pattern_type: path pattern: "/home/dan/proj/ops-base" replacement: "/path/to/ops-base" applies_to: [docs, comments] validation_method: grep validation_command: "rg '/home/dan/proj'" expected_matches: 0 - id: 14 name: "Replace continuwuity local path" pattern_type: path pattern: "git\\+file:///home/dan/proj/continuwuity" replacement: "github:girlbossceo/conduwuit" applies_to: [code] validation_method: grep validation_command: "rg 'git\\+file://'" expected_matches: 0 - id: 15 name: "Sanitize registration tokens (example values)" pattern_type: secret_pattern pattern: "9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3" replacement: "GENERATE_WITH_openssl_rand_hex_32" applies_to: [docs] validation_method: grep validation_command: "rg '9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3'" expected_matches: 0 # Medium priority rules - COULD be applied, informational only medium_priority_rules: - id: 16 name: "Add REPLACE_ME comments to domain fields" pattern_type: comment_addition pattern: 'serverName = "([^"]+)";' replacement: 'serverName = "\\1"; # REPLACE: Your Matrix server domain' applies_to: [code] validation_method: manual note: "Add helpful comments to guide users" - id: 17 name: "Add REPLACE_ME comments to workspace fields" pattern_type: comment_addition pattern: 'workspace = "([^"]+)";' replacement: 'workspace = "\\1"; # REPLACE: Your Slack workspace name' applies_to: [code] validation_method: manual note: "Add helpful comments to guide users" - id: 18 name: "Sanitize temporary paths" pattern_type: path pattern: "/tmp/[a-zA-Z0-9_-]+" replacement: "/tmp/example-path" applies_to: [docs] validation_method: grep validation_command: "rg '/tmp/[a-zA-Z0-9_-]+' docs/" expected_matches: 0 # Special rules for worklogs → documentation extraction worklog_sanitization: - id: 19 name: "Remove time-stamped session markers" pattern_type: metadata pattern: "^\\* \\[\\d{4}-\\d{2}-\\d{2}.*\\].*$" replacement: null # Delete these lines applies_to: [worklogs] validation_method: manual note: "Remove org-mode timestamps when extracting to markdown" - id: 20 name: "Sanitize error messages with IPs" pattern_type: error_context pattern: "connection to (192\\.168\\.1\\.\\d+|45\\.77\\.205\\.49)" replacement: "connection to " applies_to: [worklogs, docs] validation_method: grep validation_command: "rg 'connection to (192\\.168|45\\.77)' docs/" expected_matches: 0 - id: 21 name: "Sanitize SSH commands with real hosts" pattern_type: command_sanitization pattern: "ssh root@(45\\.77\\.205\\.49|192\\.168\\.1\\.\\d+)" replacement: "ssh root@" applies_to: [docs] validation_method: grep validation_command: "rg 'ssh root@(45\\.77|192\\.168)' docs/" expected_matches: 0 - id: 22 name: "Sanitize curl commands with real domains" pattern_type: command_sanitization pattern: "curl https?://(clarun\\.xyz|talu\\.uno)" replacement: "curl https://example.com" applies_to: [docs] validation_method: grep validation_command: "rg 'curl.*clarun|curl.*talu' docs/" expected_matches: 0 # Validation steps (executed in order) validation_pipeline: - step: 1 name: "Automated pattern replacement" script: "scripts/sanitize-files.sh" input: "staging/" output: "sanitized/" - step: 2 name: "Grep validation for critical patterns" command: | rg 'clarun\.xyz|talu\.uno|192\.168\.1\.|45\.77\.205\.49|/home/dan|jrz1' \ --type nix --type md sanitized/ expected_exit_code: 1 # No matches - step: 3 name: "gitleaks secret scanning" command: "gitleaks detect --no-git --source sanitized/" expected_exit_code: 0 # No secrets found - step: 4 name: "Manual review checklist" checklist: - "Review all comments for personal context" - "Check git commit messages (if any preserved)" - "Scan for personal workspace names" - "Verify all secret placeholders have REPLACE_ME or generation instructions" - "Check documentation for personal debugging sessions" - "Verify example configurations use only generic values" - step: 5 name: "Nix build validation" command: | nix flake check sanitized/ nix build sanitized/#nixosConfigurations.example-vps.config.system.build.toplevel nix build sanitized/#nixosConfigurations.example-dev.config.system.build.toplevel expected_exit_code: 0 # All builds succeed # Post-sanitization verification verification: required_placeholders: - pattern: "example\\.com" min_occurrences: 10 reason: "Domain must be replaced throughout" - pattern: "matrix\\.example\\.org" min_occurrences: 3 reason: "Secondary domain must be replaced" - pattern: "10\\.0\\.0\\." min_occurrences: 5 reason: "Private IPs must use RFC 1918" - pattern: "REPLACE|GENERATE_WITH" min_occurrences: 5 reason: "User guidance comments required" forbidden_patterns: - pattern: "clarun\\.xyz" max_occurrences: 0 severity: critical - pattern: "talu\\.uno" max_occurrences: 0 severity: critical - pattern: "192\\.168\\.1\\." max_occurrences: 0 severity: critical - pattern: "45\\.77\\.205\\.49" max_occurrences: 0 severity: critical - pattern: "/home/dan" max_occurrences: 0 severity: critical - pattern: "syt_|xox[baprs]-|AGE-SECRET-KEY" max_occurrences: 0 severity: critical # Sanitization script integration script_usage: | # scripts/sanitize-files.sh usage: ./scripts/sanitize-files.sh Example: ./scripts/sanitize-files.sh ~/proj/ops-base/modules staging/modules The script will: 1. Copy files from source to staging 2. Apply all critical_rules in order 3. Apply all high_priority_rules 4. Run validation pipeline 5. Report any failures or warnings 6. Exit 0 if all critical validations pass # Manual review guide manual_review_guide: | After automated sanitization, perform manual review: 1. Read each .nix file: - Check comments for personal references - Verify all domains are generic (example.com, matrix.example.org) - Ensure all IPs use RFC 1918 or TEST-NET ranges - Look for hardcoded workspace/project names 2. Read all documentation: - Check for personal debugging notes - Verify command examples use generic hosts - Ensure error messages don't expose real infrastructure - Check screenshots for sensitive data (if any) 3. Review git history (if preserved): - Scan commit messages for personal context - Check for accidentally committed secrets - Verify no ops-base commits included 4. Final validation: - Run gitleaks on full repository - Build all example configurations - Test deployment guide on clean VPS (Phase 3) # Success criteria success_criteria: - All critical_rules validation commands return expected_matches: 0 - All high_priority_rules validation commands return expected_matches: 0 - gitleaks returns 0 findings - nix flake check succeeds for all configurations - Manual review checklist 100% complete - No personal domains/IPs/paths in published repository - Fresh git history (no ops-base commits)