diff --git a/modules/backup-b2.nix b/modules/backup-b2.nix index e887cfb..fb9f6ce 100644 --- a/modules/backup-b2.nix +++ b/modules/backup-b2.nix @@ -65,11 +65,26 @@ in }; }; + # Backup failure notification service + systemd.services.backup-b2-failed = { + description = "Handle backup failure notification"; + serviceConfig = { + Type = "oneshot"; + User = "root"; + }; + script = '' + echo "BACKUP FAILED at $(date)" | tee -a /var/log/backup-failures.log + echo "Check: journalctl -u backup-b2 -n 50" + # TODO: Add Matrix notification or healthchecks.io ping + ''; + }; + # Backup service systemd.services.backup-b2 = { description = "Restic backup to Backblaze B2"; after = [ "network-online.target" "postgresql.service" ]; wants = [ "network-online.target" ]; + onFailure = [ "backup-b2-failed.service" ]; # Don't require postgres - backup should still run even if DB is down # (will just skip the dump files if they don't exist) @@ -79,6 +94,8 @@ in # Low priority IOSchedulingClass = "idle"; Nice = 19; + # Timeout after 2 hours to prevent hung backups + TimeoutStartSec = "2h"; }; path = [ pkgs.restic ]; @@ -126,12 +143,17 @@ in # Weekly integrity check service systemd.services.backup-b2-check = { description = "Verify B2 backup integrity"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + onFailure = [ "backup-b2-failed.service" ]; serviceConfig = { Type = "oneshot"; User = "root"; IOSchedulingClass = "idle"; Nice = 19; + # Timeout after 1 hour for integrity check + TimeoutStartSec = "1h"; }; path = [ pkgs.restic ];