ops-jrz1/specs/001-extract-matrix-platform/contracts/sanitization-rules.yaml
Dan 894e7241f1 Initialize ops-jrz1 repository with Matrix platform extraction foundation
- Add speckit workflow infrastructure (.claude, .specify)
- Create NixOS configuration skeleton (flake.nix, configuration.nix, hosts/ops-jrz1.nix)
- Add sanitization scripts with 22 rules for personal info removal
- Add validation scripts with gitleaks integration
- Configure git hooks (pre-commit, pre-push) for security validation
- Add project documentation (README, LICENSE)
- Add comprehensive .gitignore for Nix, secrets, staging

Phase 1 and Phase 2 complete. Foundation ready for module extraction from ops-base.
2025-10-13 13:37:17 -07:00

366 lines
11 KiB
YAML

# Sanitization Rules Contract
# Defines all patterns to find and replace when extracting modules from ops-base
version: "1.0"
description: "Comprehensive sanitization rules for creating nixos-matrix-platform-template from ops-base"
# Critical rules - MUST be applied, validation failures block publication
critical_rules:
- id: 1
name: "Replace primary domain clarun.xyz"
pattern_type: domain
pattern: "clarun\\.xyz"
replacement: "example.com"
applies_to: [code, docs, comments, configs]
validation_method: grep
validation_command: "rg 'clarun\\.xyz' --type nix --type md"
expected_matches: 0
- id: 2
name: "Replace secondary domain talu.uno"
pattern_type: domain
pattern: "talu\\.uno"
replacement: "matrix.example.org"
applies_to: [code, docs, comments, configs]
validation_method: grep
validation_command: "rg 'talu\\.uno' --type nix --type md"
expected_matches: 0
- id: 3
name: "Replace private IP range 192.168.1.x"
pattern_type: ip_address
pattern: "192\\.168\\.1\\.(\\d+)"
replacement: "10.0.0.\\1"
applies_to: [code, configs]
validation_method: regex
validation_command: "rg '192\\.168\\.1\\.' --type nix"
expected_matches: 0
- id: 4
name: "Replace public VPS IP"
pattern_type: ip_address
pattern: "45\\.77\\.205\\.49"
replacement: "203.0.113.10" # TEST-NET-3
applies_to: [code, docs, comments, configs]
validation_method: grep
validation_command: "rg '45\\.77\\.205\\.49'"
expected_matches: 0
- id: 5
name: "Replace personal home path"
pattern_type: path
pattern: "/home/dan"
replacement: "/home/user"
applies_to: [code, docs, comments]
validation_method: grep
validation_command: "rg '/home/dan'"
expected_matches: 0
- id: 6
name: "Replace hostname jrz1"
pattern_type: hostname
pattern: "\\bjrz1\\b"
replacement: "matrix"
applies_to: [code, docs, comments, configs]
validation_method: regex
validation_command: "rg '\\bjrz1\\b' --type nix --type md"
expected_matches: 0
- id: 7
name: "Replace Matrix admin user"
pattern_type: username
pattern: "@admin:clarun\\.xyz"
replacement: "@admin:example.com"
applies_to: [code, docs, configs]
validation_method: grep
validation_command: "rg '@admin:clarun'"
expected_matches: 0
- id: 8
name: "Detect Matrix access tokens"
pattern_type: secret_pattern
pattern: "syt_[a-zA-Z0-9_-]{20,}"
replacement: null # Should not exist
applies_to: [all]
validation_method: gitleaks
validation_command: "gitleaks detect --no-git --source ."
expected_matches: 0
- id: 9
name: "Detect Slack tokens"
pattern_type: secret_pattern
pattern: "xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24,}"
replacement: null # Should not exist
applies_to: [all]
validation_method: gitleaks
validation_command: "gitleaks detect --no-git --source ."
expected_matches: 0
- id: 10
name: "Detect age keys"
pattern_type: secret_pattern
pattern: "AGE-SECRET-KEY-[A-Z0-9]{59}"
replacement: null # Should not exist
applies_to: [all]
validation_method: gitleaks
validation_command: "gitleaks detect --no-git --source ."
expected_matches: 0
# High priority rules - SHOULD be applied, warnings if validation fails
high_priority_rules:
- id: 11
name: "Replace workspace name"
pattern_type: identifier
pattern: "my-workspace"
replacement: "your-workspace"
applies_to: [code, configs]
validation_method: grep
validation_command: "rg 'my-workspace' --type nix"
expected_matches: 0
- id: 12
name: "Replace personal email"
pattern_type: email
pattern: "dlei@duck\\.com"
replacement: "admin@example.com"
applies_to: [code, configs]
validation_method: grep
validation_command: "rg 'dlei@duck\\.com'"
expected_matches: 0
- id: 13
name: "Replace project-specific paths"
pattern_type: path
pattern: "/home/dan/proj/ops-base"
replacement: "/path/to/ops-base"
applies_to: [docs, comments]
validation_method: grep
validation_command: "rg '/home/dan/proj'"
expected_matches: 0
- id: 14
name: "Replace continuwuity local path"
pattern_type: path
pattern: "git\\+file:///home/dan/proj/continuwuity"
replacement: "github:girlbossceo/conduwuit"
applies_to: [code]
validation_method: grep
validation_command: "rg 'git\\+file://'"
expected_matches: 0
- id: 15
name: "Sanitize registration tokens (example values)"
pattern_type: secret_pattern
pattern: "9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3"
replacement: "GENERATE_WITH_openssl_rand_hex_32"
applies_to: [docs]
validation_method: grep
validation_command: "rg '9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3'"
expected_matches: 0
# Medium priority rules - COULD be applied, informational only
medium_priority_rules:
- id: 16
name: "Add REPLACE_ME comments to domain fields"
pattern_type: comment_addition
pattern: 'serverName = "([^"]+)";'
replacement: 'serverName = "\\1"; # REPLACE: Your Matrix server domain'
applies_to: [code]
validation_method: manual
note: "Add helpful comments to guide users"
- id: 17
name: "Add REPLACE_ME comments to workspace fields"
pattern_type: comment_addition
pattern: 'workspace = "([^"]+)";'
replacement: 'workspace = "\\1"; # REPLACE: Your Slack workspace name'
applies_to: [code]
validation_method: manual
note: "Add helpful comments to guide users"
- id: 18
name: "Sanitize temporary paths"
pattern_type: path
pattern: "/tmp/[a-zA-Z0-9_-]+"
replacement: "/tmp/example-path"
applies_to: [docs]
validation_method: grep
validation_command: "rg '/tmp/[a-zA-Z0-9_-]+' docs/"
expected_matches: 0
# Special rules for worklogs → documentation extraction
worklog_sanitization:
- id: 19
name: "Remove time-stamped session markers"
pattern_type: metadata
pattern: "^\\* \\[\\d{4}-\\d{2}-\\d{2}.*\\].*$"
replacement: null # Delete these lines
applies_to: [worklogs]
validation_method: manual
note: "Remove org-mode timestamps when extracting to markdown"
- id: 20
name: "Sanitize error messages with IPs"
pattern_type: error_context
pattern: "connection to (192\\.168\\.1\\.\\d+|45\\.77\\.205\\.49)"
replacement: "connection to <host>"
applies_to: [worklogs, docs]
validation_method: grep
validation_command: "rg 'connection to (192\\.168|45\\.77)' docs/"
expected_matches: 0
- id: 21
name: "Sanitize SSH commands with real hosts"
pattern_type: command_sanitization
pattern: "ssh root@(45\\.77\\.205\\.49|192\\.168\\.1\\.\\d+)"
replacement: "ssh root@<vps-ip>"
applies_to: [docs]
validation_method: grep
validation_command: "rg 'ssh root@(45\\.77|192\\.168)' docs/"
expected_matches: 0
- id: 22
name: "Sanitize curl commands with real domains"
pattern_type: command_sanitization
pattern: "curl https?://(clarun\\.xyz|talu\\.uno)"
replacement: "curl https://example.com"
applies_to: [docs]
validation_method: grep
validation_command: "rg 'curl.*clarun|curl.*talu' docs/"
expected_matches: 0
# Validation steps (executed in order)
validation_pipeline:
- step: 1
name: "Automated pattern replacement"
script: "scripts/sanitize-files.sh"
input: "staging/"
output: "sanitized/"
- step: 2
name: "Grep validation for critical patterns"
command: |
rg 'clarun\.xyz|talu\.uno|192\.168\.1\.|45\.77\.205\.49|/home/dan|jrz1' \
--type nix --type md sanitized/
expected_exit_code: 1 # No matches
- step: 3
name: "gitleaks secret scanning"
command: "gitleaks detect --no-git --source sanitized/"
expected_exit_code: 0 # No secrets found
- step: 4
name: "Manual review checklist"
checklist:
- "Review all comments for personal context"
- "Check git commit messages (if any preserved)"
- "Scan for personal workspace names"
- "Verify all secret placeholders have REPLACE_ME or generation instructions"
- "Check documentation for personal debugging sessions"
- "Verify example configurations use only generic values"
- step: 5
name: "Nix build validation"
command: |
nix flake check sanitized/
nix build sanitized/#nixosConfigurations.example-vps.config.system.build.toplevel
nix build sanitized/#nixosConfigurations.example-dev.config.system.build.toplevel
expected_exit_code: 0 # All builds succeed
# Post-sanitization verification
verification:
required_placeholders:
- pattern: "example\\.com"
min_occurrences: 10
reason: "Domain must be replaced throughout"
- pattern: "matrix\\.example\\.org"
min_occurrences: 3
reason: "Secondary domain must be replaced"
- pattern: "10\\.0\\.0\\."
min_occurrences: 5
reason: "Private IPs must use RFC 1918"
- pattern: "REPLACE|GENERATE_WITH"
min_occurrences: 5
reason: "User guidance comments required"
forbidden_patterns:
- pattern: "clarun\\.xyz"
max_occurrences: 0
severity: critical
- pattern: "talu\\.uno"
max_occurrences: 0
severity: critical
- pattern: "192\\.168\\.1\\."
max_occurrences: 0
severity: critical
- pattern: "45\\.77\\.205\\.49"
max_occurrences: 0
severity: critical
- pattern: "/home/dan"
max_occurrences: 0
severity: critical
- pattern: "syt_|xox[baprs]-|AGE-SECRET-KEY"
max_occurrences: 0
severity: critical
# Sanitization script integration
script_usage: |
# scripts/sanitize-files.sh usage:
./scripts/sanitize-files.sh <source-dir> <output-dir>
Example:
./scripts/sanitize-files.sh ~/proj/ops-base/modules staging/modules
The script will:
1. Copy files from source to staging
2. Apply all critical_rules in order
3. Apply all high_priority_rules
4. Run validation pipeline
5. Report any failures or warnings
6. Exit 0 if all critical validations pass
# Manual review guide
manual_review_guide: |
After automated sanitization, perform manual review:
1. Read each .nix file:
- Check comments for personal references
- Verify all domains are generic (example.com, matrix.example.org)
- Ensure all IPs use RFC 1918 or TEST-NET ranges
- Look for hardcoded workspace/project names
2. Read all documentation:
- Check for personal debugging notes
- Verify command examples use generic hosts
- Ensure error messages don't expose real infrastructure
- Check screenshots for sensitive data (if any)
3. Review git history (if preserved):
- Scan commit messages for personal context
- Check for accidentally committed secrets
- Verify no ops-base commits included
4. Final validation:
- Run gitleaks on full repository
- Build all example configurations
- Test deployment guide on clean VPS (Phase 3)
# Success criteria
success_criteria:
- All critical_rules validation commands return expected_matches: 0
- All high_priority_rules validation commands return expected_matches: 0
- gitleaks returns 0 findings
- nix flake check succeeds for all configurations
- Manual review checklist 100% complete
- No personal domains/IPs/paths in published repository
- Fresh git history (no ops-base commits)