Files
momentry_core/scripts/setup/install_momentry.sh

442 lines
20 KiB
Bash
Executable File

#!/bin/bash
#==============================================================================
# Momentry Core — Fresh Install Script
# Usage: bash install_momentry.sh
#
# Phases:
# 1. 環境 (Environment) — system prereqs, service dependencies, config
# 2. Core — build/install the core binary (API server)
# 3. Worker — build/install the worker binary (pipeline processor)
# 4. Agents/Processors — processor scripts, Python deps, verification
#==============================================================================
set -euo pipefail
PROJECT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
COLOR=true
# ─── Color helpers ───
if [ "$COLOR" = true ]; then
R='\033[0;31m'; G='\033[0;32m'; Y='\033[1;33m'; B='\033[0;34m'; C='\033[0;36m'; N='\033[0m'
else
R=''; G=''; Y=''; B=''; C=''; N=''
fi
ok() { echo -e " ${G}${N} $1"; }
fail() { echo -e " ${R}${N} $1"; FAILURES+=("$1"); }
info() { echo -e " ${B}${N} $1"; }
warn() { echo -e " ${Y}${N} $1"; }
header(){ echo -e "\n${C}─── $1 ───${N}"; }
sub() { echo -e "\n ${B}$1${N}"; }
FAILURES=()
cd "$PROJECT_DIR"
echo -e "${C}========================================${N}"
echo -e "${C} Momentry Core — Fresh Install${N}"
echo -e "${C} Project: $PROJECT_DIR${N}"
echo -e "${C} Date: $(date '+%Y-%m-%d %H:%M:%S')${N}"
echo -e "${C}========================================${N}"
# ═══════════════════════════════════════════════════════════════
# Phase 1: 環境 (Environment)
# ═══════════════════════════════════════════════════════════════
header "Phase 1/4 — 環境 (Environment)"
# ── 1a. System prerequisites ──
sub "System prerequisites"
if xcode-select -p &>/dev/null; then
ok "Xcode CLI tools"
else
info "Installing Xcode Command Line Tools..."
xcode-select --install || true
echo " Press any key after installation completes, then re-run."
read -rn1
xcode-select -p &>/dev/null && ok "Xcode CLI tools" || fail "Xcode CLI tools"
fi
if command -v brew &>/dev/null; then
ok "Homebrew $(brew --version | head -1)"
else
info "Installing Homebrew..."
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
eval "$(/opt/homebrew/bin/brew shellenv)"
command -v brew &>/dev/null && ok "Homebrew" || fail "Homebrew"
fi
for tool in git curl jq wget tree cmake pkg-config; do
command -v "$tool" &>/dev/null || brew install "$tool" &>/dev/null || true
done
ok "Basic tools (git curl jq wget tree cmake pkg-config)"
if command -v rustc &>/dev/null; then
ok "Rust $(rustc --version)"
else
info "Installing Rust via rustup..."
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source "$HOME/.cargo/env"
rustc --version &>/dev/null && ok "Rust" || fail "Rust"
fi
PYTHON_BIN="${MOMENTRY_PYTHON_PATH:-/opt/homebrew/bin/python3.11}"
if [ ! -f "$PYTHON_BIN" ]; then
info "Installing Python 3.11 via Homebrew..."
brew install python@3.11
PYTHON_BIN="/opt/homebrew/bin/python3.11"
fi
ok "Python 3.11 ($PYTHON_BIN)"
# PG build deps
for dep in readline zlib icu4c openssl e2fsprogs; do
brew list "$dep" &>/dev/null 2>&1 || brew install "$dep" &>/dev/null || true
done
ok "Build deps (readline zlib icu4c openssl e2fsprogs)"
# ── 1b. Service dependencies ──
sub "Service dependencies"
# PostgreSQL
PG_BIN="${PG_BIN:-$HOME/pgsql/18.3/bin}"
PG_DATA="${PG_DATA:-$HOME/pgsql/data}"
if [ ! -f "$PG_BIN/postgres" ]; then
info "Building PostgreSQL 18.3 from source..."
bash "$PROJECT_DIR/scripts/setup/01_postgresql.sh"
fi
"$PG_BIN/pg_isready" -q 2>/dev/null || "$PG_BIN/pg_ctl" -D "$PG_DATA" -l "$HOME/pgsql/pg.log" start 2>/dev/null || true
sleep 2
"$PG_BIN/pg_isready" -q 2>/dev/null && ok "PostgreSQL" || fail "PostgreSQL"
DB_NAME="${DB_NAME:-momentry}"
"$PG_BIN/psql" -U accusys -d postgres -tc "SELECT 1 FROM pg_database WHERE datname='$DB_NAME'" | grep -q 1 || \
"$PG_BIN/createdb" -U accusys "$DB_NAME" 2>/dev/null || true
"$PG_BIN/psql" -U accusys -d "$DB_NAME" -c "CREATE EXTENSION IF NOT EXISTS vector" &>/dev/null || true
"$PG_BIN/psql" -U accusys -d "$DB_NAME" -c "SELECT 1" &>/dev/null && ok "Database '$DB_NAME'" || fail "Database '$DB_NAME'"
# Redis
redis-cli ping 2>/dev/null | grep -q PONG || { brew install redis &>/dev/null && brew services start redis &>/dev/null || true; }
redis-cli ping 2>/dev/null | grep -q PONG && ok "Redis" || fail "Redis"
# MongoDB
if ! command -v mongosh &>/dev/null || ! mongosh --quiet --eval "db.adminCommand('ping')" &>/dev/null; then
brew tap mongodb/brew &>/dev/null
brew install mongodb-community &>/dev/null && brew services start mongodb-community &>/dev/null || true
sleep 3
fi
mongosh --quiet --eval "db.adminCommand('ping')" &>/dev/null && ok "MongoDB" || fail "MongoDB"
# Qdrant
QDRANT_BIN="$PROJECT_DIR/services/qdrant/target/release/qdrant"
if [ ! -f "$QDRANT_BIN" ] && [ -d "$PROJECT_DIR/services/qdrant" ]; then
info "Building Qdrant..."
cd "$PROJECT_DIR/services/qdrant" && cargo build --release --bin qdrant 2>&1 | tail -3 && cd "$PROJECT_DIR"
fi
if curl -sf http://localhost:6333/healthz &>/dev/null; then
ok "Qdrant"
elif [ -f "$QDRANT_BIN" ]; then
nohup "$QDRANT_BIN" > "$HOME/qdrant.log" 2>&1 &
for i in $(seq 1 15); do sleep 2; curl -sf http://localhost:6333/healthz &>/dev/null && break; done
curl -sf http://localhost:6333/healthz &>/dev/null && ok "Qdrant" || fail "Qdrant"
else
warn "Qdrant source not found — skip (will need manual setup)"
fi
# ── 1c. External tools ──
sub "External tools"
for tool in ffmpeg ffprobe rsync yt-dlp; do
command -v "$tool" &>/dev/null || brew install "$tool" &>/dev/null || true
done
ok "ffmpeg ffprobe rsync yt-dlp"
command -v soffice &>/dev/null || brew install --cask libreoffice &>/dev/null || true
command -v soffice &>/dev/null && ok "LibreOffice" || warn "LibreOffice"
# ── 1d. Configuration ──
sub "Configuration"
[ -f "$PROJECT_DIR/.env" ] && ok ".env exists" || {
cp "$PROJECT_DIR/.env.example" "$PROJECT_DIR/.env"
ok ".env created from template"
info " Edit $PROJECT_DIR/.env to customize settings"
}
WATCH_DIR="${MOMENTRY_SFTP_ROOT:-$PROJECT_DIR/storage/watch}"
for d in output output_dev thumbnails storage data; do mkdir -p "$PROJECT_DIR/$d"; done
mkdir -p "$WATCH_DIR"
ok "Directories (output output_dev thumbnails storage data watch)"
"$PG_BIN/psql" -U accusys -d "$DB_NAME" -c "CREATE SCHEMA IF NOT EXISTS dev" &>/dev/null
"$PG_BIN/psql" -U accusys -d "$DB_NAME" -c "CREATE SCHEMA IF NOT EXISTS public" &>/dev/null
ok "Database schemas (dev, public)"
# Git repo (build.rs needs git hash)
if [ ! -d "$PROJECT_DIR/.git" ]; then
git init && git add -A && git commit -m "init" 2>/dev/null || true
fi
ok "Git repository (for build hash)"
# ── 1e. Startup check ──
sub "Startup check"
cd "$PROJECT_DIR"
"$PG_BIN/pg_isready" -q 2>/dev/null && ok "PostgreSQL" || fail "PostgreSQL"
redis-cli ping 2>/dev/null | grep -q PONG && ok "Redis" || fail "Redis"
mongosh --quiet --eval "db.adminCommand('ping')" &>/dev/null && ok "MongoDB" || fail "MongoDB"
curl -sf http://localhost:6333/healthz &>/dev/null && ok "Qdrant" || warn "Qdrant"
ok "All core services verified"
# ═══════════════════════════════════════════════════════════════
# Phase 2: Core (API server binary)
# ═══════════════════════════════════════════════════════════════
header "Phase 2/4 — Core (API Server)"
cd "$PROJECT_DIR"
# Migrations — apply all release/migrate_*.sql in order
for mig in "$PROJECT_DIR"/release/migrate_*.sql; do
[ ! -f "$mig" ] && continue
MIG_NAME=$(basename "$mig")
MIG_HASH=$(shasum -a 256 "$mig" | awk '{print $1}')
# Check if already applied
ALREADY=$("$PG_BIN/psql" -U accusys -d "$DB_NAME" -t -A -c \
"SELECT COUNT(*) FROM schema_migrations WHERE filename='$MIG_NAME'" 2>/dev/null || echo "0")
if [ "$ALREADY" -gt 0 ]; then
ok "Migration $MIG_NAME (already applied)"
continue
fi
# Apply migration
T0=$(date +%s%N)
if "$PG_BIN/psql" -U accusys -d "$DB_NAME" -f "$mig" &>/dev/null; then
T1=$(date +%s%N)
DURATION_MS=$(( (T1 - T0) / 1000000 ))
# Record in schema_migrations
"$PG_BIN/psql" -U accusys -d "$DB_NAME" -c \
"INSERT INTO schema_migrations (filename, checksum, duration_ms) VALUES ('$MIG_NAME', '$MIG_HASH', $DURATION_MS) ON CONFLICT (filename) DO UPDATE SET checksum=EXCLUDED.checksum" &>/dev/null || true
ok "Migration $MIG_NAME (${DURATION_MS}ms)"
else
fail "Migration $MIG_NAME FAILED"
fi
done
ok "Database migrations applied"
# Build core binary
info "Building momentry_playground (API + worker binary)..."
cargo build --bin momentry_playground 2>&1 | tail -3
if [ -f "$PROJECT_DIR/target/debug/momentry_playground" ]; then
ok "momentry_playground binary ($(ls -lh target/debug/momentry_playground | awk '{print $5}'))"
else
fail "momentry_playground build"
fi
# Start API server
if curl -sf http://127.0.0.1:3003/health &>/dev/null; then
ok "API server already running"
else
DATABASE_SCHEMA=dev nohup target/debug/momentry_playground server --port 3003 \
> "$PROJECT_DIR/playground_boot.log" 2>&1 &
for i in $(seq 1 10); do sleep 2; curl -sf http://127.0.0.1:3003/health &>/dev/null && break; done
curl -sf http://127.0.0.1:3003/health &>/dev/null && \
ok "API server started (port 3003)" || fail "API server start"
fi
# Health check
HEALTH=$(curl -sf http://127.0.0.1:3003/health 2>/dev/null || echo '{"status":"error"}')
echo "$HEALTH" | python3 -c "
import json,sys; d=json.load(sys.stdin)
print(f' Version: {d.get(\"version\",\"?\")}')
print(f' Build: {d.get(\"build_git_hash\",\"?\")}')
print(f' Timestamp: {d.get(\"build_timestamp\",\"?\")}')
print(f' Status: {d.get(\"status\",\"?\")}')" 2>/dev/null
echo "$HEALTH" | python3 -c "import json,sys;d=json.load(sys.stdin);exit(0 if d.get('status')=='ok' else 1)" 2>/dev/null && \
ok "Health: ok" || warn "Health: degraded"
# ═══════════════════════════════════════════════════════════════
# Phase 3: Worker (pipeline processing binary)
# ═══════════════════════════════════════════════════════════════
header "Phase 3/4 — Worker (Pipeline Processor)"
# Worker is the same binary (`momentry_playground worker`), already built above.
# This phase verifies it can start and pick up jobs.
# Test worker configuration
info "Worker binary: target/debug/momentry_playground"
info "Worker command: DATABASE_SCHEMA=dev ./target/debug/momentry_playground worker --max-concurrent 2 --poll-interval 5"
# Create Qdrant collection for dev
QDRANT_COLLECTION="${QDRANT_COLLECTION:-momentry_dev_rule1_v2}"
EXISTS=$(curl -sf "http://localhost:6333/collections/$QDRANT_COLLECTION" 2>/dev/null | \
python3 -c "import sys,json;d=json.load(sys.stdin);print(d.get('result',{}).get('status','not_found'))" 2>/dev/null || echo "error")
if [ "$EXISTS" = "not_found" ] || [ "$EXISTS" = "error" ]; then
curl -sf -X PUT "http://localhost:6333/collections/$QDRANT_COLLECTION" \
-H "Content-Type: application/json" \
-d '{"vectors":{"size":768,"distance":"Cosine"}}' &>/dev/null || true
fi
curl -sf "http://localhost:6333/collections/$QDRANT_COLLECTION" &>/dev/null && \
ok "Qdrant collection '$QDRANT_COLLECTION'" || warn "Qdrant collection"
# Test worker dry-run (verify it can at least parse config)
info "Verifying worker can start (dry-run)..."
DATABASE_SCHEMA=dev timeout 5 ./target/debug/momentry_playground worker \
--max-concurrent 1 --poll-interval 10 2>&1 | head -5 || true
ok "Worker binary verified"
# ═══════════════════════════════════════════════════════════════
# Phase 4: Watcher (File Detection)
# ═══════════════════════════════════════════════════════════════
header "Phase 4/5 — Watcher (File Detection)"
# Watcher is embedded in the server binary — auto-starts with `momentry_playground server`.
# It polls the watch directory every 60s, detecting new files (detection only,
# never auto-modifies). Configuration via MOMENTRY_SFTP_ROOT env var.
WATCH_DIR="${MOMENTRY_SFTP_ROOT:-$PROJECT_DIR/storage/watch}"
mkdir -p "$WATCH_DIR"
# Verify watcher auto-started with the server (check logs for [WATCHER] message)
if [ -f "$PROJECT_DIR/playground_boot.log" ] && grep -q "\[WATCHER\]" "$PROJECT_DIR/playground_boot.log" 2>/dev/null; then
ok "Watcher started (check server logs for [WATCHER] messages)"
elif curl -sf http://127.0.0.1:3003/health &>/dev/null; then
# Server is running — watcher should be running inside it
ok "Watcher should be running (auto-started with server)"
info " Watch dir: $WATCH_DIR"
info " Poll interval: 60s"
else
warn "Watcher status unknown (server not running)"
fi
# Place a test marker file to confirm watcher detects it
TEST_MARKER="$WATCH_DIR/.watcher_test_$(date +%s)"
touch "$TEST_MARKER" 2>/dev/null && ok "Watcher directory writable" || warn "Watcher directory not writable"
rm -f "$TEST_MARKER" 2>/dev/null || true
# ═══════════════════════════════════════════════════════════════
# Phase 5: Agents/Processors (Python scripts + ML models)
# ═══════════════════════════════════════════════════════════════
header "Phase 5/5 — Agents/Processors"
# ── 4a. Python dependencies ──
sub "Python packages"
for pkg in PyPDF2 python-docx openpyxl python-pptx; do
if "$PYTHON_BIN" -c "import ${pkg%%=*}" &>/dev/null 2>&1; then
ok "$pkg"
else
"$PYTHON_BIN" -m pip install "$pkg" --quiet && ok "$pkg" || warn "$pkg"
fi
done
# ── 4b. Processor script inventory ──
sub "Processor script inventory"
cd "$PROJECT_DIR"
SCRIPT_COUNT=$(find scripts -name '*.py' -type f 2>/dev/null | wc -l | tr -d ' ')
ok "Total .py files: $SCRIPT_COUNT"
# Check core processor scripts (required)
PROCESSORS=(asr_processor yolo_processor face_processor pose_processor \
ocr_processor cut_processor caption_processor scene_classifier \
story_processor asrx_processor probe_file visual_chunk_processor)
MISSING=0
for p in "${PROCESSORS[@]}"; do
FILE=$(find scripts -name "${p}.py" -type f 2>/dev/null | head -1)
if [ -n "$FILE" ]; then
ok "Processor: $p"
else
warn "Processor: $p — not found"
MISSING=$((MISSING + 1))
fi
done
# ── 4c. Script integrity (SHA256 check) ──
sub "Script integrity (SHA256)"
CHECKSUMS_FILE="$PROJECT_DIR/scripts/checksums.sha256"
CS_TOTAL=0; CS_PASS=0; CS_FAIL=0
if [ -f "$CHECKSUMS_FILE" ]; then
while IFS= read -r line; do
[ -z "$line" ] && continue
EXPECTED_HASH=$(echo "$line" | awk '{print $1}')
FILE_PATH=$(echo "$line" | awk '{print $2}')
FULL_PATH="$PROJECT_DIR/scripts/$FILE_PATH"
CS_TOTAL=$((CS_TOTAL + 1))
if [ -f "$FULL_PATH" ]; then
ACTUAL_HASH=$(shasum -a 256 "$FULL_PATH" 2>/dev/null | awk '{print $1}')
if [ "$ACTUAL_HASH" = "$EXPECTED_HASH" ]; then
CS_PASS=$((CS_PASS + 1))
else
CS_FAIL=$((CS_FAIL + 1))
[ $CS_FAIL -le 5 ] && warn "$FILE_PATH — hash mismatch"
fi
else
CS_FAIL=$((CS_FAIL + 1))
[ $CS_FAIL -le 5 ] && warn "$FILE_PATH — not found"
fi
done < "$CHECKSUMS_FILE"
ok "$CS_PASS/$CS_TOTAL scripts match checksums"
[ $CS_FAIL -gt 0 ] && fail "Script integrity: $CS_FAIL mismatches" || true
else
warn "checksums.sha256 not found — skipping integrity check"
fi
# ── 4d. ML models ──
sub "ML models"
MODEL_COUNT=$(find models -type f 2>/dev/null | wc -l | tr -d ' ')
ok "Model files: $MODEL_COUNT"
# ── 4e. Processor verification via API ──
sub "Processor verification"
DETAILED=$(curl -sf http://127.0.0.1:3003/health/detailed 2>/dev/null || echo '{}')
echo "$DETAILED" | python3 -c "
import json,sys
d=json.load(sys.stdin)
p=d.get('pipeline',{})
proc=p.get('processors',{})
print(f' Scripts dir: {p.get(\"scripts_ready\",\"?\")}')
print(f' Script count: {p.get(\"scripts_count\",\"?\")}')
print(f' Models dir: {p.get(\"models_ready\",\"?\")}')
print(f' Model count: {p.get(\"models_count\",\"?\")}')
print(f' ffmpeg: {p.get(\"ffmpeg\",\"?\")}')
print(f' Embedding: {p.get(\"embedding_server\",{}).get(\"status\",\"?\")}')
print(f' LLM: {p.get(\"llm\",{}).get(\"status\",\"?\")}')
print(f' rsync: {p.get(\"rsync\",{}).get(\"status\",\"?\")}')
print(f' Embedding srv:{p.get(\"embedding_server\",{}).get(\"status\",\"?\")}')
for k in ['asr','yolo','face','pose','ocr','cut','caption','scene','story','asrx','probe','visual_chunk']:
print(f' {k}: {\"✓\" if proc.get(k) else \"✗\"}')
" 2>/dev/null
# ── 4f. API smoke test ──
sub "API smoke test"
API_KEY="${MOMENTRY_API_KEY:-muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69}"
STATUS=$(curl -sf -o /dev/null -w "%{http_code}" \
-H "X-API-Key: $API_KEY" \
"http://127.0.0.1:3003/api/v1/videos?page=1&page_size=1" 2>/dev/null || echo "000")
ok "GET /api/v1/videos → HTTP $STATUS"
# ═══════════════════════════════════════════════════════════════
# Summary
# ═══════════════════════════════════════════════════════════════
echo ""
echo -e "${C}========================================${N}"
if [ ${#FAILURES[@]} -eq 0 ]; then
echo -e "${G} Install Complete — All Checks Passed${N}"
else
echo -e "${Y} Install Complete — ${#FAILURES[@]} Warnings${N}"
for f in "${FAILURES[@]}"; do echo -e " ${R}${N} $f"; done
fi
echo -e "${C}========================================${N}"
echo ""
echo " API: http://127.0.0.1:3003"
echo " Project: $PROJECT_DIR"
echo " Config: $PROJECT_DIR/.env"
echo " Scripts: $SCRIPT_COUNT .py files"
echo " Watch dir: $WATCH_DIR"
echo ""
echo " Commands:"
echo " Start server: DATABASE_SCHEMA=dev ./target/debug/momentry_playground server --port 3003"
echo " Start worker: DATABASE_SCHEMA=dev ./target/debug/momentry_playground worker --max-concurrent 2"
echo " Quick check: bash scripts/setup/check_momentry.sh"
echo " Upgrade: bash scripts/setup/upgrade_momentry.sh <delivery_dir>"
echo ""
echo " Components installed:"
echo " ✓ 環境 (Environment) — services, tools, config"
echo " ✓ Core — API server (port 3003)"
echo " ✓ Worker — pipeline processor"
echo " ✓ Watcher — file detection (auto-started with server)"
echo " ✓ Agents/Processors — $SCRIPT_COUNT .py scripts, 12 processors"
echo ""
exit $([ ${#FAILURES[@]} -eq 0 ] && echo 0 || echo 1)