#!/usr/bin/env bash # ============================================================ # infra-health.sh — Infrastructure health checks # Checks: disk, memory, apache, key services, SSL certs # Usage: ./scripts/infra-health.sh [--json] # Author: Luna (@luna) — QA # ============================================================ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$REPO_ROOT/shared/lib/test_helpers.sh" $JSON_MODE || { echo "============================================" echo " INFRASTRUCTURE HEALTH CHECK" echo " $(date -u '+%Y-%m-%d %H:%M:%S UTC')" echo " Host: $(hostname)" echo "============================================" } # ─── Disk ───────────────────────────────────────────────────── section "DISK USAGE" while read -r filesystem size used avail pct mount; do pct_num=${pct%%%} if [ "$pct_num" -lt 80 ]; then pass "$mount: ${pct} used ($avail available)" elif [ "$pct_num" -lt 90 ]; then fail "$mount: ${pct} used — WARNING" "$avail remaining" else fail "$mount: ${pct} used — CRITICAL" "$avail remaining" fi done < <(df -h --output=source,size,used,avail,pcent,target -x tmpfs -x devtmpfs 2>/dev/null | tail -n +2) # ─── Memory ─────────────────────────────────────────────────── section "MEMORY" read -r total used free shared buff avail < <(free -m | awk '/^Mem:/ {print $2,$3,$4,$5,$6,$7}') pct_used=$((used * 100 / total)) if [ "$pct_used" -lt 80 ]; then pass "RAM: ${used}M / ${total}M (${pct_used}%) — ${avail}M available" else fail "RAM: ${used}M / ${total}M (${pct_used}%)" "only ${avail}M available" fi read -r stotal sused sfree < <(free -m | awk '/^Swap:/ {print $2,$3,$4}') if [ "${stotal:-0}" -gt 0 ]; then spct=$((sused * 100 / stotal)) if [ "$spct" -lt 50 ]; then pass "Swap: ${sused}M / ${stotal}M (${spct}%)" else fail "Swap: ${sused}M / ${stotal}M (${spct}%)" "high swap usage" fi fi # ─── Key services ───────────────────────────────────────────── section "SERVICES" for svc in apache2 mysql mariadb; do if systemctl is-active --quiet "$svc" 2>/dev/null; then pass "$svc is running" elif systemctl list-unit-files "$svc.service" 2>/dev/null | grep -q "$svc"; then fail "$svc is NOT running" fi done # ─── SSL cert checks ───────────────────────────────────────── section "SSL CERTIFICATES" DOMAINS=("payfrit.com" "dev.payfrit.com" "biz.payfrit.com" "grubflip.com" "dev.grubflip.com") for domain in "${DOMAINS[@]}"; do expiry=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2) cn=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -subject 2>/dev/null | grep -oP 'CN\s*=\s*\K.*') if [ -z "$expiry" ]; then fail "$domain SSL" "could not retrieve certificate" continue fi expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null) now_epoch=$(date +%s) days_left=$(( (expiry_epoch - now_epoch) / 86400 )) if echo "$cn" | grep -qi "$domain"; then cn_ok="CN matches" else cn_ok="CN MISMATCH: $cn" fi if [ "$days_left" -gt 30 ]; then pass "$domain — ${days_left}d until expiry, $cn_ok" elif [ "$days_left" -gt 0 ]; then fail "$domain — EXPIRING in ${days_left}d" "$cn_ok" else fail "$domain — EXPIRED" "$cn_ok" fi done # ─── Website reachability ──────────────────────────────────── section "WEBSITE REACHABILITY" SITES=("https://payfrit.com" "https://dev.payfrit.com" "https://biz.payfrit.com" "https://grubflip.com" "https://dev.grubflip.com") for url in "${SITES[@]}"; do result=$(http_get "$url") IFS='|' read -r code body ms <<< "$result" if [ "$code" = "200" ]; then pass "$url — HTTP $code (${ms}ms)" elif [ "$code" = "000" ]; then fail "$url" "unreachable" else fail "$url" "HTTP $code (${ms}ms)" fi done test_summary