ops-jrz1/scripts/egress-watchdog

54 lines
1.6 KiB
Bash
Executable file

#!/run/current-system/sw/bin/bash
# egress-watchdog - Kill users who keep hitting egress rate limits
# Runs every minute via systemd timer
set -uo pipefail
# NixOS paths
PATH="/run/current-system/sw/bin:$PATH"
THRESHOLD=10 # EGRESS-LIMIT hits per minute to trigger strike
MAX_STRIKES=3 # Strikes before kill
COUNTDIR="/var/lib/egress-watchdog"
mkdir -p "$COUNTDIR"
# Count recent limit hits per UID from kernel log
# Note: grep returns 1 if no matches, so we use || true
hits=$(journalctl -k --since "1 minute ago" 2>/dev/null | grep "EGRESS-LIMIT" || true)
if [[ -z "$hits" ]]; then
# No hits, nothing to do
exit 0
fi
# Process substitution avoids subshell - variables persist outside loop
# Note: grep -oP requires GNU grep with PCRE (provided by runtimeInputs in Nix)
while read -r count uid; do
# Skip if count or uid is empty
[[ -z "$count" || -z "$uid" ]] && continue
# Get username from UID
user=$(getent passwd "$uid" 2>/dev/null | cut -d: -f1)
[[ -z "$user" ]] && continue
if [ "$count" -gt "$THRESHOLD" ]; then
# Increment strike counter
strikes=$(cat "$COUNTDIR/$user" 2>/dev/null || echo 0)
strikes=$((strikes + 1))
echo "$strikes" > "$COUNTDIR/$user"
logger -t egress-watchdog "User $user hit egress limit $count times (strike $strikes/$MAX_STRIKES)"
if [ "$strikes" -ge "$MAX_STRIKES" ]; then
killswitch "$user" "egress abuse ($count blocked connections)"
rm -f "$COUNTDIR/$user"
fi
else
# Reset counter if below threshold
rm -f "$COUNTDIR/$user"
fi
done < <(echo "$hits" | grep -oP 'UID=\K[0-9]+' | sort | uniq -c)
exit 0