ops-jrz1/scripts/smoke-test.sh
Dan 3d33a45cc9 Add learner dev environment, testing infrastructure, and skills
Learner account management:
- learner-add.sh: create accounts with SSH, plugin skeleton
- learner-remove.sh: remove accounts with optional archive
- plugin-skeleton template: starter maubot plugin

Testing:
- flake.nix: add checks output for pre-deploy validation
- smoke-test.sh: post-deploy service verification

Documentation:
- learner-onboarding.md: VS Code Remote-SSH setup guide
- learner-admin.md: account management procedures

Skills:
- code-review.md: multi-lens code review skill
- orch, worklog: symlinks to shared skills
2025-12-28 22:23:06 -05:00

200 lines
4.6 KiB
Bash
Executable file

#!/usr/bin/env bash
# smoke-test.sh - Post-deploy smoke tests for ops-jrz1
#
# Usage:
# ./smoke-test.sh # Run against production VPS
# ./smoke-test.sh --local # Run locally (for VM testing)
# SSH_HOST=user@host ./smoke-test.sh # Custom SSH target
#
# Exit codes:
# 0 - All checks passed
# 1 - One or more checks failed
set -euo pipefail
# Configuration
SSH_HOST="${SSH_HOST:-root@45.77.205.49}"
LOCAL_MODE=false
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Counters
PASSED=0
FAILED=0
WARNINGS=0
# Parse args
while [[ $# -gt 0 ]]; do
case "$1" in
--local)
LOCAL_MODE=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
# Run command locally or via SSH
run() {
if [[ "$LOCAL_MODE" == true ]]; then
bash -c "$1"
else
ssh -o ConnectTimeout=10 "$SSH_HOST" "$1"
fi
}
# Test helpers
pass() {
echo -e "${GREEN}${NC} $1"
((PASSED++)) || true
}
fail() {
echo -e "${RED}${NC} $1"
((FAILED++)) || true
}
warn() {
echo -e "${YELLOW}${NC} $1"
((WARNINGS++)) || true
}
# Check if systemd service is active
check_service() {
local service="$1"
local desc="${2:-$service}"
if run "systemctl is-active --quiet $service" 2>/dev/null; then
pass "$desc is running"
return 0
else
fail "$desc is not running"
return 0 # Don't exit script on failure
fi
}
# Check if port responds to HTTP
check_http() {
local url="$1"
local desc="$2"
local expected_code="${3:-200}"
local code
code=$(run "curl -s -o /dev/null -w '%{http_code}' --connect-timeout 5 '$url'" 2>/dev/null) || code="000"
if [[ "$code" == "$expected_code" ]]; then
pass "$desc (HTTP $code)"
elif [[ "$code" == "401" && "$expected_code" == "401" ]]; then
pass "$desc (HTTP 401 - auth required, expected)"
elif [[ "$code" == "000" ]]; then
fail "$desc (connection failed)"
else
fail "$desc (HTTP $code, expected $expected_code)"
fi
}
# Check if port is listening
check_port() {
local port="$1"
local desc="$2"
if run "ss -tlnp | grep -q ':$port '" 2>/dev/null; then
pass "$desc listening on port $port"
else
fail "$desc not listening on port $port"
fi
}
# Main tests
echo "================================"
echo " Smoke Tests for ops-jrz1"
echo "================================"
echo ""
if [[ "$LOCAL_MODE" == true ]]; then
echo "Mode: Local"
else
echo "Target: $SSH_HOST"
fi
echo ""
# Test SSH connectivity first
echo "── Connectivity ──"
if [[ "$LOCAL_MODE" == true ]]; then
pass "Local mode - no SSH needed"
else
if ssh -o ConnectTimeout=5 "$SSH_HOST" "echo ok" &>/dev/null; then
pass "SSH connection"
else
fail "SSH connection failed"
echo ""
echo "Cannot connect to $SSH_HOST - aborting"
exit 1
fi
fi
echo ""
# Systemd services
echo "── Core Services ──"
check_service "nginx" "nginx (reverse proxy)"
check_service "postgresql" "PostgreSQL"
check_service "matrix-continuwuity" "Matrix homeserver (continuwuity)"
check_service "mautrix-slack" "Slack bridge"
check_service "maubot" "Maubot"
echo ""
# Ports
echo "── Ports ──"
check_port 443 "HTTPS"
check_port 8008 "Matrix homeserver"
check_port 29316 "Maubot admin"
check_port 29319 "Slack bridge"
echo ""
# HTTP endpoints
echo "── HTTP Endpoints ──"
check_http "https://clarun.xyz/.well-known/matrix/server" "Matrix server discovery" "200"
check_http "https://clarun.xyz/.well-known/matrix/client" "Matrix client discovery" "200"
check_http "http://127.0.0.1:8008/_matrix/client/versions" "Matrix API" "200"
check_http "http://127.0.0.1:29316/_matrix/maubot/v1/login" "Maubot API" "401"
echo ""
# Database connectivity
echo "── Database ──"
if run "sudo -u postgres psql -c 'SELECT 1' >/dev/null 2>&1"; then
pass "PostgreSQL responds to queries"
else
fail "PostgreSQL not responding"
fi
if run "sudo -u postgres psql -d mautrix_slack -c 'SELECT 1' >/dev/null 2>&1"; then
pass "mautrix_slack database exists"
else
warn "mautrix_slack database not accessible"
fi
echo ""
# Summary
echo "================================"
echo " Results"
echo "================================"
echo -e " ${GREEN}Passed:${NC} $PASSED"
echo -e " ${RED}Failed:${NC} $FAILED"
echo -e " ${YELLOW}Warnings:${NC} $WARNINGS"
echo ""
if [[ $FAILED -gt 0 ]]; then
echo -e "${RED}SMOKE TEST FAILED${NC}"
exit 1
else
echo -e "${GREEN}SMOKE TEST PASSED${NC}"
exit 0
fi