373 lines
18 KiB
Bash
Executable File
373 lines
18 KiB
Bash
Executable File
#!/bin/bash
|
|
#==============================================================================
|
|
# Momentry Core — Maintenance & Check Script
|
|
# Usage: bash check_momentry.sh [--production] [--json]
|
|
#
|
|
# Checks:
|
|
# 1. Version & build info (vs latest tag)
|
|
# 2. All 4 core services (PostgreSQL, Redis, MongoDB, Qdrant)
|
|
# 3. Binary health (API endpoint)
|
|
# 4. Pipeline completeness (scripts, models, processors, tools)
|
|
# 5. Python dependencies & environment
|
|
# 6. API smoke tests
|
|
# 7. Resource usage (CPU, memory, disk)
|
|
#==============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
PROJECT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
|
|
COLOR=true
|
|
JSON=false
|
|
PRODUCTION=false
|
|
|
|
# ─── Color helpers ───
|
|
if [ "$COLOR" = true ]; then
|
|
R='\033[0;31m'; G='\033[0;32m'; Y='\033[1;33m'; B='\033[0;34m'; C='\033[0;36m'; N='\033[0m'
|
|
else
|
|
R=''; G=''; Y=''; B=''; C=''; N=''
|
|
fi
|
|
ok() { echo -e " ${G}✓${N} $1"; }
|
|
fail() { echo -e " ${R}✗${N} $1"; FAILURES+=("$1"); }
|
|
info() { echo -e " ${B}→${N} $1"; }
|
|
warn() { echo -e " ${Y}⚠${N} $1"; }
|
|
header(){ echo -e "\n${C}─── $1 ───${N}"; }
|
|
|
|
FAILURES=()
|
|
CHECKS_TOTAL=0
|
|
CHECKS_PASS=0
|
|
|
|
# ─── Parse args ───
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--production) PRODUCTION=true; shift ;;
|
|
--json) JSON=true; shift ;;
|
|
--no-color) COLOR=false; shift ;;
|
|
--help) head -20 "$0"; exit 0 ;;
|
|
*) echo "Unknown: $1"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
if $PRODUCTION; then
|
|
API_BASE="http://127.0.0.1:3002"
|
|
SCHEMA="public"
|
|
else
|
|
API_BASE="http://127.0.0.1:3003"
|
|
SCHEMA="dev"
|
|
fi
|
|
|
|
PG_BIN="${PG_BIN:-$HOME/pgsql/18.3/bin}"
|
|
DB_NAME="${DB_NAME:-momentry}"
|
|
API_KEY="${MOMENTRY_API_KEY:-muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69}"
|
|
PYTHON_BIN="${MOMENTRY_PYTHON_PATH:-/opt/homebrew/bin/python3.11}"
|
|
NOW=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
# JSON output accumulator
|
|
JSON_PARTS="["
|
|
add_json() {
|
|
local name="$1" status="$2" detail="$3"
|
|
JSON_PARTS+="{\"check\":\"$name\",\"status\":\"$status\",\"detail\":\"$detail\"},"
|
|
}
|
|
emit_json() {
|
|
JSON_PARTS="${JSON_PARTS%,}]"
|
|
echo "$JSON_PARTS" | python3 -m json.tool 2>/dev/null || echo "$JSON_PARTS"
|
|
}
|
|
|
|
run_check() {
|
|
local name="$1"
|
|
local status="$2"
|
|
shift 2
|
|
|
|
local output
|
|
output=$("$@" 2>&1) || true
|
|
local rc=$?
|
|
|
|
if [ "$status" = "optional" ]; then
|
|
[ $rc -eq 0 ] && ok "$name" || warn "$name — $output"
|
|
add_json "$name" "$([ $rc -eq 0 ] && echo 'ok' || echo 'warn')" "$([ $rc -eq 0 ] && echo 'pass' || echo "$output")"
|
|
else
|
|
CHECKS_TOTAL=$((CHECKS_TOTAL + 1))
|
|
if [ $rc -eq 0 ]; then
|
|
ok "$name"
|
|
CHECKS_PASS=$((CHECKS_PASS + 1))
|
|
add_json "$name" "ok" "pass"
|
|
else
|
|
fail "$name — $output"
|
|
add_json "$name" "fail" "$output"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
cd "$PROJECT_DIR"
|
|
echo -e "${C}========================================${N}"
|
|
echo -e "${C} Momentry Core — Maintenance Check${N}"
|
|
echo -e "${C} Date: $NOW${N}"
|
|
echo -e "${C} Target: $API_BASE (schema: $SCHEMA)${N}"
|
|
echo -e "${C}========================================${N}"
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 1: Version & Build
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 1/8 — Version & Build"
|
|
|
|
HEALTH=$(curl -sf "$API_BASE/health" 2>/dev/null || echo '{"status":"error"}')
|
|
CURRENT_VER=$(echo "$HEALTH" | python3 -c "import json,sys;print(json.load(sys.stdin).get('version','?'))" 2>/dev/null)
|
|
CURRENT_HASH=$(echo "$HEALTH" | python3 -c "import json,sys;print(json.load(sys.stdin).get('build_git_hash','?'))" 2>/dev/null)
|
|
CURRENT_TS=$(echo "$HEALTH" | python3 -c "import json,sys;print(json.load(sys.stdin).get('build_timestamp','?'))" 2>/dev/null)
|
|
|
|
run_check "API server reachable" "critical" bash -c "curl -sf '$API_BASE/health' > /dev/null"
|
|
run_check "Version: $CURRENT_VER" "critical" bash -c "echo '$CURRENT_VER' | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'"
|
|
|
|
# Compare with latest git tag
|
|
LATEST_TAG=$(git tag --sort=-v:refname 2>/dev/null | head -1 || echo "none")
|
|
if [ "$LATEST_TAG" != "none" ]; then
|
|
if [ "v$CURRENT_VER" = "$LATEST_TAG" ]; then
|
|
ok "Latest tag: $LATEST_TAG (match)"
|
|
else
|
|
warn "Latest tag: $LATEST_TAG (running v$CURRENT_VER, latest is $LATEST_TAG)"
|
|
fi
|
|
fi
|
|
|
|
echo " Build: $CURRENT_HASH"
|
|
echo " Timestamp: $CURRENT_TS"
|
|
echo " Uptime: $(echo "$HEALTH" | python3 -c "import json,sys;u=json.load(sys.stdin).get('uptime_ms',0);print(f'{u/1000:.0f}s')" 2>/dev/null)"
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 2: Core Services
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 2/8 — Core Services"
|
|
|
|
run_check "PostgreSQL" "critical" bash -c "'$PG_BIN/pg_isready' -q"
|
|
run_check "Redis" "critical" bash -c "redis-cli ping 2>/dev/null | grep -q PONG"
|
|
run_check "MongoDB" "critical" bash -c "mongosh --quiet --eval 'db.adminCommand(\"ping\")' 2>/dev/null | grep -q ok"
|
|
run_check "Qdrant" "critical" bash -c "curl -sf http://localhost:6333/healthz > /dev/null"
|
|
|
|
# Database query test
|
|
run_check "Database query (videos)" "critical" bash -c \
|
|
"'$PG_BIN/psql' -U accusys -d '$DB_NAME' -c 'SELECT COUNT(*) FROM ${SCHEMA}.videos' > /dev/null 2>&1"
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 3: Server Health
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 3/8 — Server Health"
|
|
|
|
SCHEMA_CHECK=$(curl -sf "$API_BASE/health/detailed" 2>/dev/null | python3 -c "
|
|
import json,sys;d=json.load(sys.stdin).get('schema',{})
|
|
r=d.get('required',[]);a=d.get('applied',[])
|
|
required_set={(m['filename'],m['checksum']) for m in r}
|
|
applied_set={(m['filename'],m['checksum']) for m in a}
|
|
missing=required_set-applied_set
|
|
print(f'{len(r)}|{len(a)}|{d.get(\"ok\")}|{\"|\".join(sorted([m[\"filename\"] for m in r if m[\"filename\"] not in {x[0] for x in applied_set}]) if missing else [])}')
|
|
" 2>/dev/null || echo "0|0|False|")
|
|
SCHEMA_OK=$(echo "$SCHEMA_CHECK" | cut -d'|' -f3)
|
|
SCHEMA_REQUIRED=$(echo "$SCHEMA_CHECK" | cut -d'|' -f1)
|
|
SCHEMA_APPLIED=$(echo "$SCHEMA_CHECK" | cut -d'|' -f2)
|
|
SCHEMA_MISSING=$(echo "$SCHEMA_CHECK" | cut -d'|' -f4-)
|
|
run_check "Schema: $SCHEMA_APPLIED/$SCHEMA_REQUIRED migrations" "critical" bash -c "[ '$SCHEMA_OK' = 'True' ]"
|
|
[ -n "$SCHEMA_MISSING" ] && warn " Missing: $SCHEMA_MISSING"
|
|
|
|
run_check "Health endpoint" "critical" bash -c \
|
|
"echo '$HEALTH' | python3 -c 'import json,sys;d=json.load(sys.stdin);exit(0 if d.get(\"status\")==\"ok\" else 1)'"
|
|
|
|
DETAILED=$(curl -sf "$API_BASE/health/detailed" 2>/dev/null || echo '{}')
|
|
|
|
# Services in health detail
|
|
echo "$DETAILED" | python3 -c "
|
|
import json,sys
|
|
d=json.load(sys.stdin)
|
|
s=d.get('services',{})
|
|
for svc in ['postgres','redis','mongodb','qdrant']:
|
|
st=s.get(svc,{}).get('status','?')
|
|
la=s.get(svc,{}).get('latency_ms','?')
|
|
print(f' {svc}: status={st} latency={la}ms')" 2>/dev/null
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 4: Pipeline Completeness
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 4/8 — Pipeline Completeness"
|
|
|
|
PL=$(echo "$DETAILED" | python3 -c "
|
|
import json,sys
|
|
d=json.load(sys.stdin)
|
|
p=d.get('pipeline',{})
|
|
print(json.dumps(p))" 2>/dev/null)
|
|
|
|
# Scripts
|
|
SCRIPTS_READY=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('scripts_ready',False))" 2>/dev/null)
|
|
SCRIPTS_COUNT=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('scripts_count',0))" 2>/dev/null)
|
|
run_check "Scripts directory ready" "critical" bash -c "[ '$SCRIPTS_READY' = 'True' ]"
|
|
run_check "Processor script count: $SCRIPTS_COUNT" "critical" bash -c "[ $SCRIPTS_COUNT -gt 10 ]"
|
|
|
|
# Models
|
|
MODELS_READY=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('models_ready',False))" 2>/dev/null)
|
|
MODELS_COUNT=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('models_count',0))" 2>/dev/null)
|
|
run_check "Models directory ready" "optional" bash -c "[ '$MODELS_READY' = 'True' ]"
|
|
echo " Models: $MODELS_COUNT files"
|
|
|
|
# Processor inventory
|
|
PROC=$(echo "$PL" | python3 -c "import json,sys;d=json.load(sys.stdin).get('processors',{});print(' '.join([k for k in d if d[k]]))" 2>/dev/null)
|
|
PROC_MISSING=$(echo "$PL" | python3 -c "import json,sys;d=json.load(sys.stdin).get('processors',{});print(' '.join([k for k in d if not d[k]]))" 2>/dev/null)
|
|
ALL_PROC_COUNT=$(echo "$PL" | python3 -c "import json,sys;d=json.load(sys.stdin).get('processors',{});print(sum(1 for k in d if d[k] and k != 'total_py_files'))" 2>/dev/null)
|
|
EXPECTED_PROCS=(asr yolo face pose ocr cut caption scene story asrx probe visual_chunk)
|
|
EXPECTED_COUNT=${#EXPECTED_PROCS[@]}
|
|
run_check "Processors: $ALL_PROC_COUNT/$EXPECTED_COUNT available" "critical" bash -c "[ $ALL_PROC_COUNT -eq $EXPECTED_COUNT ] 2>/dev/null"
|
|
for p in "${EXPECTED_PROCS[@]}"; do
|
|
STATUS=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('processors',{}).get('$p',False))" 2>/dev/null)
|
|
run_check " processor: $p" "optional" bash -c "[ '$STATUS' = 'True' ]"
|
|
done
|
|
|
|
# Tools
|
|
FFMPEG=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('ffmpeg',False))" 2>/dev/null)
|
|
run_check "ffmpeg" "critical" bash -c "[ '$FFMPEG' = 'True' ]"
|
|
command -v ffprobe &>/dev/null && ok "ffprobe" || warn "ffprobe"
|
|
|
|
# Script integrity (SHA256 checksum)
|
|
CHECKSUMS_FILE="$PROJECT_DIR/scripts/checksums.sha256"
|
|
if [ -f "$CHECKSUMS_FILE" ]; then
|
|
CS_TOTAL=0; CS_PASS=0; CS_FAIL=0
|
|
while IFS= read -r line; do
|
|
[ -z "$line" ] && continue
|
|
EXPECTED_HASH=$(echo "$line" | awk '{print $1}')
|
|
FILE_PATH=$(echo "$line" | awk '{print $2}')
|
|
FULL_PATH="$PROJECT_DIR/scripts/$FILE_PATH"
|
|
CS_TOTAL=$((CS_TOTAL + 1))
|
|
if [ -f "$FULL_PATH" ]; then
|
|
ACTUAL_HASH=$(shasum -a 256 "$FULL_PATH" 2>/dev/null | awk '{print $1}')
|
|
[ "$ACTUAL_HASH" = "$EXPECTED_HASH" ] && CS_PASS=$((CS_PASS + 1)) || CS_FAIL=$((CS_FAIL + 1))
|
|
else
|
|
CS_FAIL=$((CS_FAIL + 1))
|
|
fi
|
|
done < "$CHECKSUMS_FILE"
|
|
run_check "Script integrity: $CS_PASS/$CS_TOTAL checksums match" "critical" bash -c "[ $CS_FAIL -eq 0 ]"
|
|
[ $CS_FAIL -gt 0 ] && warn " $CS_FAIL scripts have hash mismatches"
|
|
else
|
|
warn "checksums.sha256 not found — cannot verify script integrity"
|
|
fi
|
|
|
|
# Inference services
|
|
EMBEDDING=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('embedding_server',{}).get('status','error'))" 2>/dev/null)
|
|
LLM=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('llm',{}).get('status','error'))" 2>/dev/null)
|
|
run_check "Embedding server (port 11436)" "optional" bash -c "[ '$EMBEDDING' = 'ok' ]"
|
|
run_check "LLM server (port 8082)" "optional" bash -c "[ '$LLM' = 'ok' ]"
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 5: Python Environment
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 5/8 — Python Environment"
|
|
|
|
run_check "Python 3.11" "critical" bash -c "[ -f '$PYTHON_BIN' ]"
|
|
run_check "Python version" "critical" bash -c \
|
|
"'$PYTHON_BIN' --version 2>&1 | grep -q '3.11'"
|
|
|
|
# Python deps
|
|
echo " Python packages"
|
|
for pkg in PyPDF2 docx openpyxl pptx; do
|
|
run_check "$pkg" "critical" bash -c "'$PYTHON_BIN' -c 'import $pkg' 2>/dev/null"
|
|
done
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 6: API Smoke Tests
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 6/8 — API Smoke Tests"
|
|
|
|
run_check "GET /api/v1/videos" "critical" bash -c \
|
|
"curl -sf -o /dev/null -w '%{http_code}' -H 'X-API-Key: $API_KEY' '$API_BASE/api/v1/videos?page=1&page_size=1' 2>/dev/null | grep -qE '^(200|201)'"
|
|
run_check "GET /api/v1/identities" "critical" bash -c \
|
|
"curl -sf -o /dev/null -w '%{http_code}' -H 'X-API-Key: $API_KEY' '$API_BASE/api/v1/identities?page=1' 2>/dev/null | grep -qE '^(200|201)'"
|
|
run_check "GET /health" "critical" bash -c \
|
|
"curl -sf -o /dev/null -w '%{http_code}' '$API_BASE/health' 2>/dev/null | grep -qE '^(200|201)'"
|
|
run_check "GET /health/detailed" "critical" bash -c \
|
|
"curl -sf -o /dev/null -w '%{http_code}' '$API_BASE/health/detailed' 2>/dev/null | grep -qE '^(200|201)'"
|
|
|
|
# Search (POST)
|
|
run_check "POST /api/v1/search" "optional" bash -c \
|
|
"curl -sf -o /dev/null -w '%{http_code}' -X POST '$API_BASE/api/v1/search' \
|
|
-H 'Content-Type: application/json' -H 'X-API-Key: $API_KEY' \
|
|
-d '{\"query\":\"test\",\"limit\":1}' 2>/dev/null | grep -qE '^(200|201)'"
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 7: Watcher
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 7/8 — Watcher"
|
|
|
|
WATCH_DIR="${MOMENTRY_SFTP_ROOT:-$PROJECT_DIR/storage/watch}"
|
|
run_check "Watcher directory exists" "critical" bash -c "[ -d '$WATCH_DIR' ]"
|
|
|
|
# Check server logs for [WATCHER] activity
|
|
LOG_FILE="$PROJECT_DIR/playground_boot.log"
|
|
WATCHER_IN_LOGS=false
|
|
if [ -f "$LOG_FILE" ] && grep -q "\[WATCHER\]" "$LOG_FILE" 2>/dev/null; then
|
|
WATCHER_IN_LOGS=true
|
|
fi
|
|
if $WATCHER_IN_LOGS; then
|
|
ok "Watcher activity confirmed in server logs"
|
|
elif curl -sf "$API_BASE/health" &>/dev/null; then
|
|
# Server is running — since watcher auto-starts, it should be active
|
|
info "Watcher auto-starts with server — check logs for [WATCHER] messages"
|
|
ok "Watcher (server is running → watcher is running)"
|
|
else
|
|
warn "Watcher status unknown (server not running)"
|
|
fi
|
|
|
|
# Verify the binary contains watcher code (grep for "Watcher" string in binary)
|
|
if strings "$PROJECT_DIR/target/debug/momentry_playground" 2>/dev/null | grep -q "Starting File Watcher"; then
|
|
ok "Watcher compiled into binary"
|
|
elif strings "$PROJECT_DIR/target/release/momentry" 2>/dev/null | grep -q "Starting File Watcher"; then
|
|
ok "Watcher compiled into production binary"
|
|
else
|
|
# Fallback: check if the source file exists (watcher is always compiled in)
|
|
grep -q "run_watcher" "$PROJECT_DIR/src/watcher/watcher.rs" 2>/dev/null && \
|
|
ok "Watcher code found in source" || \
|
|
warn "Watcher source not found"
|
|
fi
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Check 8: Resource Usage
|
|
# ═══════════════════════════════════════════════════════════════
|
|
header "Check 8/8 — Resource Usage"
|
|
|
|
# CPU
|
|
CPU_USED=$(ps -A -o %cpu | awk '{s+=$1}END{printf "%.1f", s}' 2>/dev/null || echo "?")
|
|
run_check "CPU load: ${CPU_USED}%" "optional" bash -c "echo '$CPU_USED' | python3 -c 'import sys;exit(0 if float(sys.stdin.read().strip()) < 500 else 1)' 2>/dev/null || [ '$CPU_USED' = '?' ]"
|
|
|
|
# Memory
|
|
MEM_TOTAL=$(vm_stat 2>/dev/null | head -1 | awk '{print $NF}' | sed 's/\.//' || echo "0")
|
|
MEM_WIRED=$(vm_stat 2>/dev/null | grep "wired" | awk '{print $NF}' | sed 's/\.//' || echo "0")
|
|
MEM_ACTIVE=$(vm_stat 2>/dev/null | grep "active" | head -1 | awk '{print $NF}' | sed 's/\.//' || echo "0")
|
|
MEM_PCT=$(echo "scale=1; ($MEM_WIRED + $MEM_ACTIVE) * 100 / $MEM_TOTAL" | bc 2>/dev/null || echo "?")
|
|
echo " Memory: ${MEM_PCT}% used"
|
|
|
|
# Disk
|
|
DISK_USAGE=$(df -h / 2>/dev/null | awk 'NR==2 {print $5}' | tr -d '%' || echo "?")
|
|
run_check "Disk usage: ${DISK_USAGE}%" "critical" bash -c "[ ${DISK_USAGE:-0} -lt 90 ] 2>/dev/null"
|
|
|
|
# Momentry process memory
|
|
MOMENTRY_PID=$(pgrep -f "momentry.*server" 2>/dev/null | head -1 || echo "")
|
|
if [ -n "$MOMENTRY_PID" ]; then
|
|
MOMENTRY_MEM=$(ps -o rss= -p "$MOMENTRY_PID" 2>/dev/null | awk '{printf "%.0f MB", $1/1024}' || echo "?")
|
|
echo " Momentry RSS: $MOMENTRY_MEM"
|
|
fi
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# Summary
|
|
# ═══════════════════════════════════════════════════════════════
|
|
PASS_PCT=$([ $CHECKS_TOTAL -gt 0 ] && echo "scale=0; $CHECKS_PASS * 100 / $CHECKS_TOTAL" | bc 2>/dev/null || echo 0)
|
|
echo ""
|
|
echo -e "${C}========================================${N}"
|
|
echo -e "${C} Check Complete${N}"
|
|
echo -e "${C}========================================${N}"
|
|
echo " ${G}$CHECKS_PASS${N}/$CHECKS_TOTAL checks passed (${PASS_PCT}%)"
|
|
echo " ${R}${#FAILURES[@]}${N} failures"
|
|
echo ""
|
|
if [ ${#FAILURES[@]} -eq 0 ]; then
|
|
echo -e "${G} System is healthy and complete.${N}"
|
|
else
|
|
echo -e "${Y} Issues found:${N}"
|
|
for f in "${FAILURES[@]}"; do echo -e " ${R}✗${N} $f"; done
|
|
fi
|
|
|
|
if $JSON; then
|
|
emit_json
|
|
fi
|
|
|
|
exit $([ ${#FAILURES[@]} -eq 0 ] && echo 0 || echo 1)
|