#!/bin/bash # Momentry 服務健康檢查 (Layer 2) # 路徑: /Users/accusys/momentry_core_0.1/monitor/service/health_check.sh SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" MONITOR_DIR="$(dirname "$SCRIPT_DIR")" # 載入密碼配置 if [ -f "$MONITOR_DIR/common/load_credentials.sh" ]; then source "$MONITOR_DIR/common/load_credentials.sh" fi LOG_DIR="/Users/accusys/momentry/log/monitor" mkdir -p "$LOG_DIR" LOG_FILE="$LOG_DIR/service_check.log" log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" } # 顏色 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # 記錄結果到資料庫 record_service() { local service=$1 local status=$2 local response_time=$3 local error_msg=$4 PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d momentry << EOF 2>/dev/null INSERT INTO monitor_services (service_name, service_type, status, response_time_ms, error_message, checked_at) VALUES ('$service', 'service', '$status', $response_time, '$error_msg', NOW()); EOF } # 檢查 PostgreSQL check_postgresql() { local start=$(date +%s%N) if PGPASSWORD="$PG_PASSWORD" pg_isready -h localhost -p 5432 -U "$PG_USER" > /dev/null 2>&1; then local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) echo -e "${GREEN}✓${NC} PostgreSQL (5432) - ${ms}ms" record_service "postgresql" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} PostgreSQL (5432) - Down" record_service "postgresql" "down" "0" "Connection failed" return 1 fi } # 檢查 Redis check_redis() { local start=$(date +%s%N) if redis-cli -a "$REDIS_PASSWORD" ping 2>/dev/null | grep -q "PONG"; then local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) echo -e "${GREEN}✓${NC} Redis (6379) - ${ms}ms" record_service "redis" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} Redis (6379) - Down" record_service "redis" "down" "0" "Connection failed" return 1 fi } # 檢查 MariaDB check_mariadb() { local start=$(date +%s%N) if mysql -u "$MARIADB_USER" -p"$MARIADB_PASSWORD" -e "SELECT 1" > /dev/null 2>&1; then local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) echo -e "${GREEN}✓${NC} MariaDB (3306) - ${ms}ms" record_service "mariadb" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} MariaDB (3306) - Down" record_service "mariadb" "down" "0" "Connection failed" return 1 fi } # 檢查 n8n check_n8n() { local start=$(date +%s%N) local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8085/ --max-time 5) local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) if [ "$http_code" = "200" ] || [ "$http_code" = "302" ]; then echo -e "${GREEN}✓${NC} n8n (8085) - ${ms}ms" record_service "n8n" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} n8n (8085) - HTTP $http_code" record_service "n8n" "down" "0" "HTTP $http_code" return 1 fi } # 檢查 Caddy check_caddy() { local start=$(date +%s%N) local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:2019/config/ --max-time 5) local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) if [ "$http_code" = "200" ]; then echo -e "${GREEN}✓${NC} Caddy (2019) - ${ms}ms" record_service "caddy" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} Caddy (2019) - HTTP $http_code" record_service "caddy" "down" "0" "HTTP $http_code" return 1 fi } # 檢查 Gitea check_gitea() { local start=$(date +%s%N) local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/ --max-time 5) local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) if [ "$http_code" = "200" ]; then echo -e "${GREEN}✓${NC} Gitea (3000) - ${ms}ms" record_service "gitea" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} Gitea (3000) - HTTP $http_code" record_service "gitea" "down" "0" "HTTP $http_code" return 1 fi } # 檢查 SFTPGo check_sftpgo() { local start=$(date +%s%N) local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080 --max-time 5) local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) # 檢查 SFTP 端口 local sftp_port=$(lsof -i :2022 2>/dev/null | grep -c LISTEN || echo "0") local webdav_port=$(lsof -i :8090 2>/dev/null | grep -c LISTEN || echo "0") # 檢查 PostgreSQL 連接 local db_conn=$(PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d postgres -t -c "SELECT numbackends FROM pg_stat_database WHERE datname='sftpgo';" 2>/dev/null | xargs || echo "0") if [ "$http_code" = "200" ] || [ "$http_code" = "301" ] || [ "$http_code" = "302" ]; then echo -e "${GREEN}✓${NC} SFTPGo (8080) - ${ms}ms | SFTP:$sftp_port | WebDAV:$webdav_port | DB:$db_conn" record_service "sftpgo" "up" "$ms" "SFTP:$sftp_port WebDAV:$webdav_port DB:$db_conn" return 0 else echo -e "${RED}✗${NC} SFTPGo (8080) - HTTP $http_code" record_service "sftpgo" "down" "0" "HTTP $http_code" return 1 fi } # SFTPGo 詳細監控 check_sftpgo_detailed() { echo "" echo "=== SFTPGo 詳細監控 ===" # 1. 服務狀態 echo "1. 服務狀態:" ps aux | grep sftpgo | grep -v grep | awk '{print " PID: "$2" CMD: "$11" "$12}' # 2. 端口監聽 echo "2. 端口監聽:" echo " - HTTP (8080): $(lsof -i :8080 2>/dev/null | grep -c LISTEN || echo '0')" echo " - SFTP (2022): $(lsof -i :2022 2>/dev/null | grep -c LISTEN || echo '0')" echo " - WebDAV (8090): $(lsof -i :8090 2>/dev/null | grep -c LISTEN || echo '0')" # 3. PostgreSQL 連接 echo "3. PostgreSQL 連接:" PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d postgres -c "SELECT numbackends, xact_commit, xact_rollback FROM pg_stat_database WHERE datname='sftpgo';" 2>/dev/null | grep -v "numbackends\|^$\|row)" || echo " 無數據" # 4. 用戶統計 echo "4. 用戶統計:" PGPASSWORD="$SFTPGO_PASSWORD" psql -U "$SFTPGO_USER" -h localhost -d sftpgo -c "SELECT 'users' as type, COUNT(*) as count FROM users UNION ALL SELECT 'admins', COUNT(*) FROM admins UNION ALL SELECT 'api_keys', COUNT(*) FROM api_keys;" 2>/dev/null | grep -v "^$\|type\|^(\|row)" || echo " 無數據" # 5. 數據庫大小 echo "5. 數據庫大小:" PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d postgres -t -c "SELECT pg_size_pretty(pg_database_size('sftpgo'));" 2>/dev/null | xargs || echo " 無法獲取" # 6. 磁盤使用 echo "6. 文件存儲使用:" du -sh /Users/accusys/momentry/var/sftpgo/data/ 2>/dev/null | awk '{print " "$2": "$1}' } # SFTPGo 認證失敗監控 check_sftpgo_auth_failures() { local log_file="/Users/accusys/momentry/log/sftpgo.log" local threshold=${1:-5} # 默認 5 次失敗 if [ ! -f "$log_file" ]; then return 0 fi # 檢查過去 1 小時的認證失敗 local failures=$(grep -i "authentication error\|invalid credentials\|login failed\|auth error" "$log_file" 2>/dev/null | wc -l) if [ "$failures" -gt "$threshold" ]; then echo "⚠️ SFTPGo 認證失敗過多: $failures 次" return 1 else echo "✓ SFTPGo 認證失敗: $failures 次 (閾值: $threshold)" return 0 fi } # SFTPGo 傳輸統計 check_sftpgo_transfers() { echo "" echo "=== SFTPGo 傳輸統計 ===" # 檢查活動傳輸 local active_transfers=$(PGPASSWORD="$SFTPGO_PASSWORD" psql -U "$SFTPGO_USER" -h localhost -d sftpgo -t -c "SELECT COUNT(*) FROM active_transfers;" 2>/dev/null | xargs || echo "0") echo "活動傳輸: $active_transfers" # 檢查今日訪問IP echo "今日訪問來源:" tail -1000 /Users/accusys/momentry/log/sftpgo_access.log 2>/dev/null | grep -o '"remote_ip":"[^"]*"' | cut -d'"' -f4 | sort | uniq -c | sort -rn | head -5 | awk '{print " "$2": "$1" 次"}' } # 檢查 Ollama check_ollama() { local start=$(date +%s%N) local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:11434/api/tags --max-time 5) local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) if [ "$http_code" = "200" ]; then echo -e "${GREEN}✓${NC} Ollama (11434) - ${ms}ms" record_service "ollama" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} Ollama (11434) - HTTP $http_code" record_service "ollama" "down" "0" "HTTP $http_code" return 1 fi } # 檢查 Qdrant check_qdrant() { local start=$(date +%s%N) local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:6333/collections --max-time 5) local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) if [ "$http_code" = "200" ] || [ "$http_code" = "401" ]; then echo -e "${GREEN}✓${NC} Qdrant (6333) - ${ms}ms" record_service "qdrant" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} Qdrant (6333) - HTTP $http_code" record_service "qdrant" "down" "0" "HTTP $http_code" return 1 fi } # 檢查 MongoDB check_mongodb() { local start=$(date +%s%N) if mongosh --quiet --eval "db.adminCommand('ping')" > /dev/null 2>&1; then local end=$(date +%s%N) local ms=$(( (end - start) / 1000000 )) echo -e "${GREEN}✓${NC} MongoDB (27017) - ${ms}ms" record_service "mongodb" "up" "$ms" "" return 0 else echo -e "${RED}✗${NC} MongoDB (27017) - Down" record_service "mongodb" "down" "0" "Connection failed" return 1 fi } # 檢查 PHP-FPM check_php() { if pgrep -f "php-fpm" > /dev/null 2>&1; then echo -e "${GREEN}✓${NC} PHP-FPM - Running" record_service "php" "up" "1" "" return 0 else echo -e "${RED}✗${NC} PHP-FPM - Not running" record_service "php" "down" "0" "Process not found" return 1 fi } # 檢查 RustDesk check_rustdesk() { local hbbs_ok=false local hbbr_ok=false if nc -z localhost 21116 > /dev/null 2>&1; then hbbs_ok=true fi if nc -z localhost 21117 > /dev/null 2>&1; then hbbr_ok=true fi if $hbbs_ok && $hbbr_ok; then echo -e "${GREEN}✓${NC} RustDesk (21116/21117) - Running" record_service "rustdesk" "up" "1" "" return 0 else echo -e "${YELLOW}⚠${NC} RustDesk - Partial (hbbs: $hbbs_ok, hbbr: $hbbr_ok)" record_service "rustdesk" "degraded" "0" "hbbs:$hbbs_ok hbbr:$hbbr_ok" return 1 fi } # 檢查 Node.js 版本 check_node() { local LOCKED_NODE_VERSION="22" local version_issues=0 local node_pids=$(pgrep -f "n8n" 2>/dev/null) if [ -z "$node_pids" ]; then echo -e "${YELLOW}⚠${NC} Node.js - n8n not running" record_service "node" "degraded" "1" "n8n not running" return 1 fi for pid in $node_pids; do local node_path=$(lsof -p $pid 2>/dev/null | grep "txt" | grep "node" | head -1 | awk '{print $NF}' | grep -v "dylib") if [ -n "$node_path" ] && [ -f "$node_path" ]; then local node_version=$($node_path --version 2>/dev/null | sed 's/v//') local node_major=$(echo "$node_version" | cut -d. -f1) if [ "$node_major" != "$LOCKED_NODE_VERSION" ]; then version_issues=$((version_issues + 1)) fi fi done if [ $version_issues -gt 0 ]; then echo -e "${RED}✗${NC} Node.js - Version issues detected" record_service "node" "degraded" "1" "$version_issues version issues" return 1 else echo -e "${GREEN}✓${NC} Node.js (${LOCKED_NODE_VERSION}.x) - Running" record_service "node" "up" "1" "" return 0 fi } # 檢查 Python 版本 check_python() { local LOCKED_PYTHON_VERSION="3.11.14" local script_issues=0 local scripts=( "/Users/accusys/momentry_core_0.1/scripts/asr_processor.py" "/Users/accusys/momentry_core_0.1/scripts/thumbnail_extractor.py" ) for script in "${scripts[@]}"; do if [ -f "$script" ]; then local shebang=$(head -1 "$script") if [[ "$shebang" != *"python3.11"* ]]; then script_issues=$((script_issues + 1)) fi fi done if [ $script_issues -gt 0 ]; then echo -e "${RED}✗${NC} Python - Script version issues" record_service "python" "degraded" "1" "$script_issues script issues" return 1 else echo -e "${GREEN}✓${NC} Python (${LOCKED_PYTHON_VERSION}) - Configured" record_service "python" "up" "1" "" return 0 fi } # 主程序 echo "========================================" echo "Layer 2: Service Health Check" echo "Time: $(date)" echo "========================================" echo "" total=0 passed=0 total=$((total + 1)) check_postgresql && passed=$((passed + 1)) total=$((total + 1)) check_redis && passed=$((passed + 1)) total=$((total + 1)) check_mariadb && passed=$((passed + 1)) total=$((total + 1)) check_n8n && passed=$((passed + 1)) total=$((total + 1)) check_caddy && passed=$((passed + 1)) total=$((total + 1)) check_gitea && passed=$((passed + 1)) total=$((total + 1)) check_sftpgo && passed=$((passed + 1)) total=$((total + 1)) check_ollama && passed=$((passed + 1)) total=$((total + 1)) check_qdrant && passed=$((passed + 1)) total=$((total + 1)) check_mongodb && passed=$((passed + 1)) total=$((total + 1)) check_php && passed=$((passed + 1)) total=$((total + 1)) check_rustdesk && passed=$((passed + 1)) total=$((total + 1)) check_node && passed=$((passed + 1)) total=$((total + 1)) check_python && passed=$((passed + 1)) echo "" echo "========================================" echo "Result: $passed / $total services healthy" echo "========================================" log "Service check completed: $passed/$total healthy"