ops-jrz1/scripts/sanitize-files.sh
Dan 894e7241f1 Initialize ops-jrz1 repository with Matrix platform extraction foundation
- Add speckit workflow infrastructure (.claude, .specify)
- Create NixOS configuration skeleton (flake.nix, configuration.nix, hosts/ops-jrz1.nix)
- Add sanitization scripts with 22 rules for personal info removal
- Add validation scripts with gitleaks integration
- Configure git hooks (pre-commit, pre-push) for security validation
- Add project documentation (README, LICENSE)
- Add comprehensive .gitignore for Nix, secrets, staging

Phase 1 and Phase 2 complete. Foundation ready for module extraction from ops-base.
2025-10-13 13:37:17 -07:00

110 lines
4.6 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Sanitization script for extracting Matrix platform modules from ops-base
# Based on contracts/sanitization-rules.yaml
#
# Usage: ./scripts/sanitize-files.sh <source-dir> <output-dir>
#
# This script applies all sanitization rules to remove personal information
# and prepare modules for publication in ops-jrz1 repository.
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Check arguments
if [ $# -ne 2 ]; then
echo "Usage: $0 <source-dir> <output-dir>"
echo "Example: $0 ~/proj/ops-base/modules staging/modules"
exit 1
fi
SOURCE_DIR="$1"
OUTPUT_DIR="$2"
echo "==> Sanitizing files from $SOURCE_DIR to $OUTPUT_DIR"
# Create output directory
mkdir -p "$OUTPUT_DIR"
# Copy files to output directory
echo "==> Copying files..."
rsync -av --exclude='.git' "$SOURCE_DIR/" "$OUTPUT_DIR/"
echo "==> Applying sanitization rules..."
# Critical Rule 1: Replace clarun.xyz domain
echo " - Replacing clarun.xyz → example.com"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/clarun\.xyz/example.com/g' {} \;
# Critical Rule 2: Replace talu.uno domain
echo " - Replacing talu.uno → matrix.example.org"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/talu\.uno/matrix.example.org/g' {} \;
# Critical Rule 3: Replace private IP 192.168.1.x → 10.0.0.x
echo " - Replacing 192.168.1.x → 10.0.0.x"
find "$OUTPUT_DIR" -type f -name "*.nix" -exec sed -i 's/192\.168\.1\.\([0-9]\+\)/10.0.0.\1/g' {} \;
# Critical Rule 4: Replace public VPS IP
echo " - Replacing 45.77.205.49 → 203.0.113.10"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/45\.77\.205\.49/203.0.113.10/g' {} \;
# Critical Rule 5: Replace personal home path
echo " - Replacing /home/dan → /home/user"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's|/home/dan|/home/user|g' {} \;
# Critical Rule 6: Replace hostname jrz1 → matrix
echo " - Replacing hostname jrz1 → matrix"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/\bjrz1\b/matrix/g' {} \;
# Critical Rule 7: Replace Matrix admin user
echo " - Replacing @admin:clarun.xyz → @admin:example.com"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/@admin:clarun\.xyz/@admin:example.com/g' {} \;
# High Priority Rule 11: Replace workspace name
echo " - Replacing my-workspace → your-workspace"
find "$OUTPUT_DIR" -type f -name "*.nix" -exec sed -i 's/my-workspace/your-workspace/g' {} \;
# High Priority Rule 12: Replace personal email
echo " - Replacing dlei@duck.com → admin@example.com"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/dlei@duck\.com/admin@example.com/g' {} \;
# High Priority Rule 13: Replace project paths
echo " - Replacing /home/dan/proj/ops-base → /path/to/ops-base"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's|/home/dan/proj/ops-base|/path/to/ops-base|g' {} \;
# High Priority Rule 14: Replace continuwuity local path
echo " - Replacing git+file:///home/dan/proj/continuwuity → github:girlbossceo/conduwuit"
find "$OUTPUT_DIR" -type f -name "*.nix" -exec sed -i 's|git+file:///home/dan/proj/continuwuity|github:girlbossceo/conduwuit|g' {} \;
# High Priority Rule 15: Sanitize example registration token
echo " - Replacing example registration token → GENERATE_WITH_openssl_rand_hex_32"
find "$OUTPUT_DIR" -type f \( -name "*.nix" -o -name "*.md" \) -exec sed -i 's/9a3ad59ee136e5a9dc1612cc179c9b7ff8da78c537682aad82c8084e5ae6b5c3/GENERATE_WITH_openssl_rand_hex_32/g' {} \;
# Worklog Rule 20: Sanitize error messages with IPs
echo " - Sanitizing error messages with IPs"
find "$OUTPUT_DIR" -type f -name "*.md" -exec sed -i 's/connection to \(192\.168\.1\.[0-9]\+\|45\.77\.205\.49\)/connection to <host>/g' {} \;
# Worklog Rule 21: Sanitize SSH commands
echo " - Sanitizing SSH commands"
find "$OUTPUT_DIR" -type f -name "*.md" -exec sed -i 's/ssh root@\(45\.77\.205\.49\|192\.168\.1\.[0-9]\+\)/ssh root@<vps-ip>/g' {} \;
# Worklog Rule 22: Sanitize curl commands
echo " - Sanitizing curl commands"
find "$OUTPUT_DIR" -type f -name "*.md" -exec sed -i 's|curl https\?://\(clarun\.xyz\|talu\.uno\)|curl https://example.com|g' {} \;
echo -e "${GREEN}✓ Sanitization complete${NC}"
echo ""
echo "Next steps:"
echo "1. Run manual review: Check for personal references in comments"
echo "2. Run validation: ./scripts/validate-sanitization.sh $OUTPUT_DIR"
echo "3. Review changes: git diff $OUTPUT_DIR"