- Add speckit workflow infrastructure (.claude, .specify) - Create NixOS configuration skeleton (flake.nix, configuration.nix, hosts/ops-jrz1.nix) - Add sanitization scripts with 22 rules for personal info removal - Add validation scripts with gitleaks integration - Configure git hooks (pre-commit, pre-push) for security validation - Add project documentation (README, LICENSE) - Add comprehensive .gitignore for Nix, secrets, staging Phase 1 and Phase 2 complete. Foundation ready for module extraction from ops-base.
366 lines
11 KiB
YAML
366 lines
11 KiB
YAML
# Sanitization Rules Contract
|
|
# Defines all patterns to find and replace when extracting modules from ops-base
|
|
|
|
version: "1.0"
|
|
description: "Comprehensive sanitization rules for creating nixos-matrix-platform-template from ops-base"
|
|
|
|
# Critical rules - MUST be applied, validation failures block publication
|
|
critical_rules:
|
|
- id: 1
|
|
name: "Replace primary domain clarun.xyz"
|
|
pattern_type: domain
|
|
pattern: "clarun\\.xyz"
|
|
replacement: "example.com"
|
|
applies_to: [code, docs, comments, configs]
|
|
validation_method: grep
|
|
validation_command: "rg 'clarun\\.xyz' --type nix --type md"
|
|
expected_matches: 0
|
|
|
|
- id: 2
|
|
name: "Replace secondary domain talu.uno"
|
|
pattern_type: domain
|
|
pattern: "talu\\.uno"
|
|
replacement: "matrix.example.org"
|
|
applies_to: [code, docs, comments, configs]
|
|
validation_method: grep
|
|
validation_command: "rg 'talu\\.uno' --type nix --type md"
|
|
expected_matches: 0
|
|
|
|
- id: 3
|
|
name: "Replace private IP range 192.168.1.x"
|
|
pattern_type: ip_address
|
|
pattern: "192\\.168\\.1\\.(\\d+)"
|
|
replacement: "10.0.0.\\1"
|
|
applies_to: [code, configs]
|
|
validation_method: regex
|
|
validation_command: "rg '192\\.168\\.1\\.' --type nix"
|
|
expected_matches: 0
|
|
|
|
- id: 4
|
|
name: "Replace public VPS IP"
|
|
pattern_type: ip_address
|
|
pattern: "45\\.77\\.205\\.49"
|
|
replacement: "203.0.113.10" # TEST-NET-3
|
|
applies_to: [code, docs, comments, configs]
|
|
validation_method: grep
|
|
validation_command: "rg '45\\.77\\.205\\.49'"
|
|
expected_matches: 0
|
|
|
|
- id: 5
|
|
name: "Replace personal home path"
|
|
pattern_type: path
|
|
pattern: "/home/dan"
|
|
replacement: "/home/user"
|
|
applies_to: [code, docs, comments]
|
|
validation_method: grep
|
|
validation_command: "rg '/home/dan'"
|
|
expected_matches: 0
|
|
|
|
- id: 6
|
|
name: "Replace hostname jrz1"
|
|
pattern_type: hostname
|
|
pattern: "\\bjrz1\\b"
|
|
replacement: "matrix"
|
|
applies_to: [code, docs, comments, configs]
|
|
validation_method: regex
|
|
validation_command: "rg '\\bjrz1\\b' --type nix --type md"
|
|
expected_matches: 0
|
|
|
|
- id: 7
|
|
name: "Replace Matrix admin user"
|
|
pattern_type: username
|
|
pattern: "@admin:clarun\\.xyz"
|
|
replacement: "@admin:example.com"
|
|
applies_to: [code, docs, configs]
|
|
validation_method: grep
|
|
validation_command: "rg '@admin:clarun'"
|
|
expected_matches: 0
|
|
|
|
- id: 8
|
|
name: "Detect Matrix access tokens"
|
|
pattern_type: secret_pattern
|
|
pattern: "syt_[a-zA-Z0-9_-]{20,}"
|
|
replacement: null # Should not exist
|
|
applies_to: [all]
|
|
validation_method: gitleaks
|
|
validation_command: "gitleaks detect --no-git --source ."
|
|
expected_matches: 0
|
|
|
|
- id: 9
|
|
name: "Detect Slack tokens"
|
|
pattern_type: secret_pattern
|
|
pattern: "xox[baprs]-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24,}"
|
|
replacement: null # Should not exist
|
|
applies_to: [all]
|
|
validation_method: gitleaks
|
|
validation_command: "gitleaks detect --no-git --source ."
|
|
expected_matches: 0
|
|
|
|
- id: 10
|
|
name: "Detect age keys"
|
|
pattern_type: secret_pattern
|
|
pattern: "AGE-SECRET-KEY-[A-Z0-9]{59}"
|
|
replacement: null # Should not exist
|
|
applies_to: [all]
|
|
validation_method: gitleaks
|
|
validation_command: "gitleaks detect --no-git --source ."
|
|
expected_matches: 0
|
|
|
|
# High priority rules - SHOULD be applied, warnings if validation fails
|
|
high_priority_rules:
|
|
- id: 11
|
|
name: "Replace workspace name"
|
|
pattern_type: identifier
|
|
pattern: "my-workspace"
|
|
replacement: "your-workspace"
|
|
applies_to: [code, configs]
|
|
validation_method: grep
|
|
validation_command: "rg 'my-workspace' --type nix"
|
|
expected_matches: 0
|
|
|
|
- id: 12
|
|
name: "Replace personal email"
|
|
pattern_type: email
|
|
pattern: "dlei@duck\\.com"
|
|
replacement: "admin@example.com"
|
|
applies_to: [code, configs]
|
|
validation_method: grep
|
|
validation_command: "rg 'dlei@duck\\.com'"
|
|
expected_matches: 0
|
|
|
|
- id: 13
|
|
name: "Replace project-specific paths"
|
|
pattern_type: path
|
|
pattern: "/home/dan/proj/ops-base"
|
|
replacement: "/path/to/ops-base"
|
|
applies_to: [docs, comments]
|
|
validation_method: grep
|
|
validation_command: "rg '/home/dan/proj'"
|
|
expected_matches: 0
|
|
|
|
- id: 14
|
|
name: "Replace continuwuity local path"
|
|
pattern_type: path
|
|
pattern: "git\\+file:///home/dan/proj/continuwuity"
|
|
replacement: "github:girlbossceo/conduwuit"
|
|
applies_to: [code]
|
|
validation_method: grep
|
|
validation_command: "rg 'git\\+file://'"
|
|
expected_matches: 0
|
|
|
|
- id: 15
|
|
name: "Sanitize registration tokens (example values)"
|
|
pattern_type: secret_pattern
|
|
pattern: "9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3"
|
|
replacement: "GENERATE_WITH_openssl_rand_hex_32"
|
|
applies_to: [docs]
|
|
validation_method: grep
|
|
validation_command: "rg '9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3'"
|
|
expected_matches: 0
|
|
|
|
# Medium priority rules - COULD be applied, informational only
|
|
medium_priority_rules:
|
|
- id: 16
|
|
name: "Add REPLACE_ME comments to domain fields"
|
|
pattern_type: comment_addition
|
|
pattern: 'serverName = "([^"]+)";'
|
|
replacement: 'serverName = "\\1"; # REPLACE: Your Matrix server domain'
|
|
applies_to: [code]
|
|
validation_method: manual
|
|
note: "Add helpful comments to guide users"
|
|
|
|
- id: 17
|
|
name: "Add REPLACE_ME comments to workspace fields"
|
|
pattern_type: comment_addition
|
|
pattern: 'workspace = "([^"]+)";'
|
|
replacement: 'workspace = "\\1"; # REPLACE: Your Slack workspace name'
|
|
applies_to: [code]
|
|
validation_method: manual
|
|
note: "Add helpful comments to guide users"
|
|
|
|
- id: 18
|
|
name: "Sanitize temporary paths"
|
|
pattern_type: path
|
|
pattern: "/tmp/[a-zA-Z0-9_-]+"
|
|
replacement: "/tmp/example-path"
|
|
applies_to: [docs]
|
|
validation_method: grep
|
|
validation_command: "rg '/tmp/[a-zA-Z0-9_-]+' docs/"
|
|
expected_matches: 0
|
|
|
|
# Special rules for worklogs → documentation extraction
|
|
worklog_sanitization:
|
|
- id: 19
|
|
name: "Remove time-stamped session markers"
|
|
pattern_type: metadata
|
|
pattern: "^\\* \\[\\d{4}-\\d{2}-\\d{2}.*\\].*$"
|
|
replacement: null # Delete these lines
|
|
applies_to: [worklogs]
|
|
validation_method: manual
|
|
note: "Remove org-mode timestamps when extracting to markdown"
|
|
|
|
- id: 20
|
|
name: "Sanitize error messages with IPs"
|
|
pattern_type: error_context
|
|
pattern: "connection to (192\\.168\\.1\\.\\d+|45\\.77\\.205\\.49)"
|
|
replacement: "connection to <host>"
|
|
applies_to: [worklogs, docs]
|
|
validation_method: grep
|
|
validation_command: "rg 'connection to (192\\.168|45\\.77)' docs/"
|
|
expected_matches: 0
|
|
|
|
- id: 21
|
|
name: "Sanitize SSH commands with real hosts"
|
|
pattern_type: command_sanitization
|
|
pattern: "ssh root@(45\\.77\\.205\\.49|192\\.168\\.1\\.\\d+)"
|
|
replacement: "ssh root@<vps-ip>"
|
|
applies_to: [docs]
|
|
validation_method: grep
|
|
validation_command: "rg 'ssh root@(45\\.77|192\\.168)' docs/"
|
|
expected_matches: 0
|
|
|
|
- id: 22
|
|
name: "Sanitize curl commands with real domains"
|
|
pattern_type: command_sanitization
|
|
pattern: "curl https?://(clarun\\.xyz|talu\\.uno)"
|
|
replacement: "curl https://example.com"
|
|
applies_to: [docs]
|
|
validation_method: grep
|
|
validation_command: "rg 'curl.*clarun|curl.*talu' docs/"
|
|
expected_matches: 0
|
|
|
|
# Validation steps (executed in order)
|
|
validation_pipeline:
|
|
- step: 1
|
|
name: "Automated pattern replacement"
|
|
script: "scripts/sanitize-files.sh"
|
|
input: "staging/"
|
|
output: "sanitized/"
|
|
|
|
- step: 2
|
|
name: "Grep validation for critical patterns"
|
|
command: |
|
|
rg 'clarun\.xyz|talu\.uno|192\.168\.1\.|45\.77\.205\.49|/home/dan|jrz1' \
|
|
--type nix --type md sanitized/
|
|
expected_exit_code: 1 # No matches
|
|
|
|
- step: 3
|
|
name: "gitleaks secret scanning"
|
|
command: "gitleaks detect --no-git --source sanitized/"
|
|
expected_exit_code: 0 # No secrets found
|
|
|
|
- step: 4
|
|
name: "Manual review checklist"
|
|
checklist:
|
|
- "Review all comments for personal context"
|
|
- "Check git commit messages (if any preserved)"
|
|
- "Scan for personal workspace names"
|
|
- "Verify all secret placeholders have REPLACE_ME or generation instructions"
|
|
- "Check documentation for personal debugging sessions"
|
|
- "Verify example configurations use only generic values"
|
|
|
|
- step: 5
|
|
name: "Nix build validation"
|
|
command: |
|
|
nix flake check sanitized/
|
|
nix build sanitized/#nixosConfigurations.example-vps.config.system.build.toplevel
|
|
nix build sanitized/#nixosConfigurations.example-dev.config.system.build.toplevel
|
|
expected_exit_code: 0 # All builds succeed
|
|
|
|
# Post-sanitization verification
|
|
verification:
|
|
required_placeholders:
|
|
- pattern: "example\\.com"
|
|
min_occurrences: 10
|
|
reason: "Domain must be replaced throughout"
|
|
|
|
- pattern: "matrix\\.example\\.org"
|
|
min_occurrences: 3
|
|
reason: "Secondary domain must be replaced"
|
|
|
|
- pattern: "10\\.0\\.0\\."
|
|
min_occurrences: 5
|
|
reason: "Private IPs must use RFC 1918"
|
|
|
|
- pattern: "REPLACE|GENERATE_WITH"
|
|
min_occurrences: 5
|
|
reason: "User guidance comments required"
|
|
|
|
forbidden_patterns:
|
|
- pattern: "clarun\\.xyz"
|
|
max_occurrences: 0
|
|
severity: critical
|
|
|
|
- pattern: "talu\\.uno"
|
|
max_occurrences: 0
|
|
severity: critical
|
|
|
|
- pattern: "192\\.168\\.1\\."
|
|
max_occurrences: 0
|
|
severity: critical
|
|
|
|
- pattern: "45\\.77\\.205\\.49"
|
|
max_occurrences: 0
|
|
severity: critical
|
|
|
|
- pattern: "/home/dan"
|
|
max_occurrences: 0
|
|
severity: critical
|
|
|
|
- pattern: "syt_|xox[baprs]-|AGE-SECRET-KEY"
|
|
max_occurrences: 0
|
|
severity: critical
|
|
|
|
# Sanitization script integration
|
|
script_usage: |
|
|
# scripts/sanitize-files.sh usage:
|
|
|
|
./scripts/sanitize-files.sh <source-dir> <output-dir>
|
|
|
|
Example:
|
|
./scripts/sanitize-files.sh ~/proj/ops-base/modules staging/modules
|
|
|
|
The script will:
|
|
1. Copy files from source to staging
|
|
2. Apply all critical_rules in order
|
|
3. Apply all high_priority_rules
|
|
4. Run validation pipeline
|
|
5. Report any failures or warnings
|
|
6. Exit 0 if all critical validations pass
|
|
|
|
# Manual review guide
|
|
manual_review_guide: |
|
|
After automated sanitization, perform manual review:
|
|
|
|
1. Read each .nix file:
|
|
- Check comments for personal references
|
|
- Verify all domains are generic (example.com, matrix.example.org)
|
|
- Ensure all IPs use RFC 1918 or TEST-NET ranges
|
|
- Look for hardcoded workspace/project names
|
|
|
|
2. Read all documentation:
|
|
- Check for personal debugging notes
|
|
- Verify command examples use generic hosts
|
|
- Ensure error messages don't expose real infrastructure
|
|
- Check screenshots for sensitive data (if any)
|
|
|
|
3. Review git history (if preserved):
|
|
- Scan commit messages for personal context
|
|
- Check for accidentally committed secrets
|
|
- Verify no ops-base commits included
|
|
|
|
4. Final validation:
|
|
- Run gitleaks on full repository
|
|
- Build all example configurations
|
|
- Test deployment guide on clean VPS (Phase 3)
|
|
|
|
# Success criteria
|
|
success_criteria:
|
|
- All critical_rules validation commands return expected_matches: 0
|
|
- All high_priority_rules validation commands return expected_matches: 0
|
|
- gitleaks returns 0 findings
|
|
- nix flake check succeeds for all configurations
|
|
- Manual review checklist 100% complete
|
|
- No personal domains/IPs/paths in published repository
|
|
- Fresh git history (no ops-base commits)
|