Files
momentry_core/scripts/setup/check_momentry.sh

373 lines
18 KiB
Bash
Executable File

#!/bin/bash
#==============================================================================
# Momentry Core — Maintenance & Check Script
# Usage: bash check_momentry.sh [--production] [--json]
#
# Checks:
# 1. Version & build info (vs latest tag)
# 2. All 4 core services (PostgreSQL, Redis, MongoDB, Qdrant)
# 3. Binary health (API endpoint)
# 4. Pipeline completeness (scripts, models, processors, tools)
# 5. Python dependencies & environment
# 6. API smoke tests
# 7. Resource usage (CPU, memory, disk)
#==============================================================================
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
COLOR=true
JSON=false
PRODUCTION=false
# ─── Color helpers ───
if [ "$COLOR" = true ]; then
R='\033[0;31m'; G='\033[0;32m'; Y='\033[1;33m'; B='\033[0;34m'; C='\033[0;36m'; N='\033[0m'
else
R=''; G=''; Y=''; B=''; C=''; N=''
fi
ok() { echo -e " ${G}${N} $1"; }
fail() { echo -e " ${R}${N} $1"; FAILURES+=("$1"); }
info() { echo -e " ${B}${N} $1"; }
warn() { echo -e " ${Y}${N} $1"; }
header(){ echo -e "\n${C}─── $1 ───${N}"; }
FAILURES=()
CHECKS_TOTAL=0
CHECKS_PASS=0
# ─── Parse args ───
while [ $# -gt 0 ]; do
case "$1" in
--production) PRODUCTION=true; shift ;;
--json) JSON=true; shift ;;
--no-color) COLOR=false; shift ;;
--help) head -20 "$0"; exit 0 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
if $PRODUCTION; then
API_BASE="http://127.0.0.1:3002"
SCHEMA="public"
else
API_BASE="http://127.0.0.1:3003"
SCHEMA="dev"
fi
PG_BIN="${PG_BIN:-$HOME/pgsql/18.3/bin}"
DB_NAME="${DB_NAME:-momentry}"
API_KEY="${MOMENTRY_API_KEY:-muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69}"
PYTHON_BIN="${MOMENTRY_PYTHON_PATH:-/opt/homebrew/bin/python3.11}"
NOW=$(date '+%Y-%m-%d %H:%M:%S')
# JSON output accumulator
JSON_PARTS="["
add_json() {
local name="$1" status="$2" detail="$3"
JSON_PARTS+="{\"check\":\"$name\",\"status\":\"$status\",\"detail\":\"$detail\"},"
}
emit_json() {
JSON_PARTS="${JSON_PARTS%,}]"
echo "$JSON_PARTS" | python3 -m json.tool 2>/dev/null || echo "$JSON_PARTS"
}
run_check() {
local name="$1"
local status="$2"
shift 2
local output
output=$("$@" 2>&1) || true
local rc=$?
if [ "$status" = "optional" ]; then
[ $rc -eq 0 ] && ok "$name" || warn "$name$output"
add_json "$name" "$([ $rc -eq 0 ] && echo 'ok' || echo 'warn')" "$([ $rc -eq 0 ] && echo 'pass' || echo "$output")"
else
CHECKS_TOTAL=$((CHECKS_TOTAL + 1))
if [ $rc -eq 0 ]; then
ok "$name"
CHECKS_PASS=$((CHECKS_PASS + 1))
add_json "$name" "ok" "pass"
else
fail "$name$output"
add_json "$name" "fail" "$output"
fi
fi
}
cd "$PROJECT_DIR"
echo -e "${C}========================================${N}"
echo -e "${C} Momentry Core — Maintenance Check${N}"
echo -e "${C} Date: $NOW${N}"
echo -e "${C} Target: $API_BASE (schema: $SCHEMA)${N}"
echo -e "${C}========================================${N}"
# ═══════════════════════════════════════════════════════════════
# Check 1: Version & Build
# ═══════════════════════════════════════════════════════════════
header "Check 1/8 — Version & Build"
HEALTH=$(curl -sf "$API_BASE/health" 2>/dev/null || echo '{"status":"error"}')
CURRENT_VER=$(echo "$HEALTH" | python3 -c "import json,sys;print(json.load(sys.stdin).get('version','?'))" 2>/dev/null)
CURRENT_HASH=$(echo "$HEALTH" | python3 -c "import json,sys;print(json.load(sys.stdin).get('build_git_hash','?'))" 2>/dev/null)
CURRENT_TS=$(echo "$HEALTH" | python3 -c "import json,sys;print(json.load(sys.stdin).get('build_timestamp','?'))" 2>/dev/null)
run_check "API server reachable" "critical" bash -c "curl -sf '$API_BASE/health' > /dev/null"
run_check "Version: $CURRENT_VER" "critical" bash -c "echo '$CURRENT_VER' | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'"
# Compare with latest git tag
LATEST_TAG=$(git tag --sort=-v:refname 2>/dev/null | head -1 || echo "none")
if [ "$LATEST_TAG" != "none" ]; then
if [ "v$CURRENT_VER" = "$LATEST_TAG" ]; then
ok "Latest tag: $LATEST_TAG (match)"
else
warn "Latest tag: $LATEST_TAG (running v$CURRENT_VER, latest is $LATEST_TAG)"
fi
fi
echo " Build: $CURRENT_HASH"
echo " Timestamp: $CURRENT_TS"
echo " Uptime: $(echo "$HEALTH" | python3 -c "import json,sys;u=json.load(sys.stdin).get('uptime_ms',0);print(f'{u/1000:.0f}s')" 2>/dev/null)"
# ═══════════════════════════════════════════════════════════════
# Check 2: Core Services
# ═══════════════════════════════════════════════════════════════
header "Check 2/8 — Core Services"
run_check "PostgreSQL" "critical" bash -c "'$PG_BIN/pg_isready' -q"
run_check "Redis" "critical" bash -c "redis-cli ping 2>/dev/null | grep -q PONG"
run_check "MongoDB" "critical" bash -c "mongosh --quiet --eval 'db.adminCommand(\"ping\")' 2>/dev/null | grep -q ok"
run_check "Qdrant" "critical" bash -c "curl -sf http://localhost:6333/healthz > /dev/null"
# Database query test
run_check "Database query (videos)" "critical" bash -c \
"'$PG_BIN/psql' -U accusys -d '$DB_NAME' -c 'SELECT COUNT(*) FROM ${SCHEMA}.videos' > /dev/null 2>&1"
# ═══════════════════════════════════════════════════════════════
# Check 3: Server Health
# ═══════════════════════════════════════════════════════════════
header "Check 3/8 — Server Health"
SCHEMA_CHECK=$(curl -sf "$API_BASE/health/detailed" 2>/dev/null | python3 -c "
import json,sys;d=json.load(sys.stdin).get('schema',{})
r=d.get('required',[]);a=d.get('applied',[])
required_set={(m['filename'],m['checksum']) for m in r}
applied_set={(m['filename'],m['checksum']) for m in a}
missing=required_set-applied_set
print(f'{len(r)}|{len(a)}|{d.get(\"ok\")}|{\"|\".join(sorted([m[\"filename\"] for m in r if m[\"filename\"] not in {x[0] for x in applied_set}]) if missing else [])}')
" 2>/dev/null || echo "0|0|False|")
SCHEMA_OK=$(echo "$SCHEMA_CHECK" | cut -d'|' -f3)
SCHEMA_REQUIRED=$(echo "$SCHEMA_CHECK" | cut -d'|' -f1)
SCHEMA_APPLIED=$(echo "$SCHEMA_CHECK" | cut -d'|' -f2)
SCHEMA_MISSING=$(echo "$SCHEMA_CHECK" | cut -d'|' -f4-)
run_check "Schema: $SCHEMA_APPLIED/$SCHEMA_REQUIRED migrations" "critical" bash -c "[ '$SCHEMA_OK' = 'True' ]"
[ -n "$SCHEMA_MISSING" ] && warn " Missing: $SCHEMA_MISSING"
run_check "Health endpoint" "critical" bash -c \
"echo '$HEALTH' | python3 -c 'import json,sys;d=json.load(sys.stdin);exit(0 if d.get(\"status\")==\"ok\" else 1)'"
DETAILED=$(curl -sf "$API_BASE/health/detailed" 2>/dev/null || echo '{}')
# Services in health detail
echo "$DETAILED" | python3 -c "
import json,sys
d=json.load(sys.stdin)
s=d.get('services',{})
for svc in ['postgres','redis','mongodb','qdrant']:
st=s.get(svc,{}).get('status','?')
la=s.get(svc,{}).get('latency_ms','?')
print(f' {svc}: status={st} latency={la}ms')" 2>/dev/null
# ═══════════════════════════════════════════════════════════════
# Check 4: Pipeline Completeness
# ═══════════════════════════════════════════════════════════════
header "Check 4/8 — Pipeline Completeness"
PL=$(echo "$DETAILED" | python3 -c "
import json,sys
d=json.load(sys.stdin)
p=d.get('pipeline',{})
print(json.dumps(p))" 2>/dev/null)
# Scripts
SCRIPTS_READY=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('scripts_ready',False))" 2>/dev/null)
SCRIPTS_COUNT=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('scripts_count',0))" 2>/dev/null)
run_check "Scripts directory ready" "critical" bash -c "[ '$SCRIPTS_READY' = 'True' ]"
run_check "Processor script count: $SCRIPTS_COUNT" "critical" bash -c "[ $SCRIPTS_COUNT -gt 10 ]"
# Models
MODELS_READY=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('models_ready',False))" 2>/dev/null)
MODELS_COUNT=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('models_count',0))" 2>/dev/null)
run_check "Models directory ready" "optional" bash -c "[ '$MODELS_READY' = 'True' ]"
echo " Models: $MODELS_COUNT files"
# Processor inventory
PROC=$(echo "$PL" | python3 -c "import json,sys;d=json.load(sys.stdin).get('processors',{});print(' '.join([k for k in d if d[k]]))" 2>/dev/null)
PROC_MISSING=$(echo "$PL" | python3 -c "import json,sys;d=json.load(sys.stdin).get('processors',{});print(' '.join([k for k in d if not d[k]]))" 2>/dev/null)
ALL_PROC_COUNT=$(echo "$PL" | python3 -c "import json,sys;d=json.load(sys.stdin).get('processors',{});print(sum(1 for k in d if d[k] and k != 'total_py_files'))" 2>/dev/null)
EXPECTED_PROCS=(asr yolo face pose ocr cut caption scene story asrx probe visual_chunk)
EXPECTED_COUNT=${#EXPECTED_PROCS[@]}
run_check "Processors: $ALL_PROC_COUNT/$EXPECTED_COUNT available" "critical" bash -c "[ $ALL_PROC_COUNT -eq $EXPECTED_COUNT ] 2>/dev/null"
for p in "${EXPECTED_PROCS[@]}"; do
STATUS=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('processors',{}).get('$p',False))" 2>/dev/null)
run_check " processor: $p" "optional" bash -c "[ '$STATUS' = 'True' ]"
done
# Tools
FFMPEG=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('ffmpeg',False))" 2>/dev/null)
run_check "ffmpeg" "critical" bash -c "[ '$FFMPEG' = 'True' ]"
command -v ffprobe &>/dev/null && ok "ffprobe" || warn "ffprobe"
# Script integrity (SHA256 checksum)
CHECKSUMS_FILE="$PROJECT_DIR/scripts/checksums.sha256"
if [ -f "$CHECKSUMS_FILE" ]; then
CS_TOTAL=0; CS_PASS=0; CS_FAIL=0
while IFS= read -r line; do
[ -z "$line" ] && continue
EXPECTED_HASH=$(echo "$line" | awk '{print $1}')
FILE_PATH=$(echo "$line" | awk '{print $2}')
FULL_PATH="$PROJECT_DIR/scripts/$FILE_PATH"
CS_TOTAL=$((CS_TOTAL + 1))
if [ -f "$FULL_PATH" ]; then
ACTUAL_HASH=$(shasum -a 256 "$FULL_PATH" 2>/dev/null | awk '{print $1}')
[ "$ACTUAL_HASH" = "$EXPECTED_HASH" ] && CS_PASS=$((CS_PASS + 1)) || CS_FAIL=$((CS_FAIL + 1))
else
CS_FAIL=$((CS_FAIL + 1))
fi
done < "$CHECKSUMS_FILE"
run_check "Script integrity: $CS_PASS/$CS_TOTAL checksums match" "critical" bash -c "[ $CS_FAIL -eq 0 ]"
[ $CS_FAIL -gt 0 ] && warn " $CS_FAIL scripts have hash mismatches"
else
warn "checksums.sha256 not found — cannot verify script integrity"
fi
# Inference services
EMBEDDING=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('embedding_server',{}).get('status','error'))" 2>/dev/null)
LLM=$(echo "$PL" | python3 -c "import json,sys;print(json.load(sys.stdin).get('llm',{}).get('status','error'))" 2>/dev/null)
run_check "Embedding server (port 11436)" "optional" bash -c "[ '$EMBEDDING' = 'ok' ]"
run_check "LLM server (port 8082)" "optional" bash -c "[ '$LLM' = 'ok' ]"
# ═══════════════════════════════════════════════════════════════
# Check 5: Python Environment
# ═══════════════════════════════════════════════════════════════
header "Check 5/8 — Python Environment"
run_check "Python 3.11" "critical" bash -c "[ -f '$PYTHON_BIN' ]"
run_check "Python version" "critical" bash -c \
"'$PYTHON_BIN' --version 2>&1 | grep -q '3.11'"
# Python deps
echo " Python packages"
for pkg in PyPDF2 docx openpyxl pptx; do
run_check "$pkg" "critical" bash -c "'$PYTHON_BIN' -c 'import $pkg' 2>/dev/null"
done
# ═══════════════════════════════════════════════════════════════
# Check 6: API Smoke Tests
# ═══════════════════════════════════════════════════════════════
header "Check 6/8 — API Smoke Tests"
run_check "GET /api/v1/videos" "critical" bash -c \
"curl -sf -o /dev/null -w '%{http_code}' -H 'X-API-Key: $API_KEY' '$API_BASE/api/v1/videos?page=1&page_size=1' 2>/dev/null | grep -qE '^(200|201)'"
run_check "GET /api/v1/identities" "critical" bash -c \
"curl -sf -o /dev/null -w '%{http_code}' -H 'X-API-Key: $API_KEY' '$API_BASE/api/v1/identities?page=1' 2>/dev/null | grep -qE '^(200|201)'"
run_check "GET /health" "critical" bash -c \
"curl -sf -o /dev/null -w '%{http_code}' '$API_BASE/health' 2>/dev/null | grep -qE '^(200|201)'"
run_check "GET /health/detailed" "critical" bash -c \
"curl -sf -o /dev/null -w '%{http_code}' '$API_BASE/health/detailed' 2>/dev/null | grep -qE '^(200|201)'"
# Search (POST)
run_check "POST /api/v1/search" "optional" bash -c \
"curl -sf -o /dev/null -w '%{http_code}' -X POST '$API_BASE/api/v1/search' \
-H 'Content-Type: application/json' -H 'X-API-Key: $API_KEY' \
-d '{\"query\":\"test\",\"limit\":1}' 2>/dev/null | grep -qE '^(200|201)'"
# ═══════════════════════════════════════════════════════════════
# Check 7: Watcher
# ═══════════════════════════════════════════════════════════════
header "Check 7/8 — Watcher"
WATCH_DIR="${MOMENTRY_SFTP_ROOT:-$PROJECT_DIR/storage/watch}"
run_check "Watcher directory exists" "critical" bash -c "[ -d '$WATCH_DIR' ]"
# Check server logs for [WATCHER] activity
LOG_FILE="$PROJECT_DIR/playground_boot.log"
WATCHER_IN_LOGS=false
if [ -f "$LOG_FILE" ] && grep -q "\[WATCHER\]" "$LOG_FILE" 2>/dev/null; then
WATCHER_IN_LOGS=true
fi
if $WATCHER_IN_LOGS; then
ok "Watcher activity confirmed in server logs"
elif curl -sf "$API_BASE/health" &>/dev/null; then
# Server is running — since watcher auto-starts, it should be active
info "Watcher auto-starts with server — check logs for [WATCHER] messages"
ok "Watcher (server is running → watcher is running)"
else
warn "Watcher status unknown (server not running)"
fi
# Verify the binary contains watcher code (grep for "Watcher" string in binary)
if strings "$PROJECT_DIR/target/debug/momentry_playground" 2>/dev/null | grep -q "Starting File Watcher"; then
ok "Watcher compiled into binary"
elif strings "$PROJECT_DIR/target/release/momentry" 2>/dev/null | grep -q "Starting File Watcher"; then
ok "Watcher compiled into production binary"
else
# Fallback: check if the source file exists (watcher is always compiled in)
grep -q "run_watcher" "$PROJECT_DIR/src/watcher/watcher.rs" 2>/dev/null && \
ok "Watcher code found in source" || \
warn "Watcher source not found"
fi
# ═══════════════════════════════════════════════════════════════
# Check 8: Resource Usage
# ═══════════════════════════════════════════════════════════════
header "Check 8/8 — Resource Usage"
# CPU
CPU_USED=$(ps -A -o %cpu | awk '{s+=$1}END{printf "%.1f", s}' 2>/dev/null || echo "?")
run_check "CPU load: ${CPU_USED}%" "optional" bash -c "echo '$CPU_USED' | python3 -c 'import sys;exit(0 if float(sys.stdin.read().strip()) < 500 else 1)' 2>/dev/null || [ '$CPU_USED' = '?' ]"
# Memory
MEM_TOTAL=$(vm_stat 2>/dev/null | head -1 | awk '{print $NF}' | sed 's/\.//' || echo "0")
MEM_WIRED=$(vm_stat 2>/dev/null | grep "wired" | awk '{print $NF}' | sed 's/\.//' || echo "0")
MEM_ACTIVE=$(vm_stat 2>/dev/null | grep "active" | head -1 | awk '{print $NF}' | sed 's/\.//' || echo "0")
MEM_PCT=$(echo "scale=1; ($MEM_WIRED + $MEM_ACTIVE) * 100 / $MEM_TOTAL" | bc 2>/dev/null || echo "?")
echo " Memory: ${MEM_PCT}% used"
# Disk
DISK_USAGE=$(df -h / 2>/dev/null | awk 'NR==2 {print $5}' | tr -d '%' || echo "?")
run_check "Disk usage: ${DISK_USAGE}%" "critical" bash -c "[ ${DISK_USAGE:-0} -lt 90 ] 2>/dev/null"
# Momentry process memory
MOMENTRY_PID=$(pgrep -f "momentry.*server" 2>/dev/null | head -1 || echo "")
if [ -n "$MOMENTRY_PID" ]; then
MOMENTRY_MEM=$(ps -o rss= -p "$MOMENTRY_PID" 2>/dev/null | awk '{printf "%.0f MB", $1/1024}' || echo "?")
echo " Momentry RSS: $MOMENTRY_MEM"
fi
# ═══════════════════════════════════════════════════════════════
# Summary
# ═══════════════════════════════════════════════════════════════
PASS_PCT=$([ $CHECKS_TOTAL -gt 0 ] && echo "scale=0; $CHECKS_PASS * 100 / $CHECKS_TOTAL" | bc 2>/dev/null || echo 0)
echo ""
echo -e "${C}========================================${N}"
echo -e "${C} Check Complete${N}"
echo -e "${C}========================================${N}"
echo " ${G}$CHECKS_PASS${N}/$CHECKS_TOTAL checks passed (${PASS_PCT}%)"
echo " ${R}${#FAILURES[@]}${N} failures"
echo ""
if [ ${#FAILURES[@]} -eq 0 ]; then
echo -e "${G} System is healthy and complete.${N}"
else
echo -e "${Y} Issues found:${N}"
for f in "${FAILURES[@]}"; do echo -e " ${R}${N} $f"; done
fi
if $JSON; then
emit_json
fi
exit $([ ${#FAILURES[@]} -eq 0 ] && echo 0 || echo 1)