## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
451 lines
14 KiB
Bash
Executable File
451 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Momentry 服務健康檢查 (Layer 2)
|
|
# 路徑: /Users/accusys/momentry_core_0.1/monitor/service/health_check.sh
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
MONITOR_DIR="$(dirname "$SCRIPT_DIR")"
|
|
|
|
# 載入密碼配置
|
|
if [ -f "$MONITOR_DIR/common/load_credentials.sh" ]; then
|
|
source "$MONITOR_DIR/common/load_credentials.sh"
|
|
fi
|
|
|
|
LOG_DIR="/Users/accusys/momentry/log/monitor"
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
LOG_FILE="$LOG_DIR/service_check.log"
|
|
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
|
}
|
|
|
|
# 顏色
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
# 記錄結果到資料庫
|
|
record_service() {
|
|
local service=$1
|
|
local status=$2
|
|
local response_time=$3
|
|
local error_msg=$4
|
|
|
|
PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d momentry << EOF 2>/dev/null
|
|
INSERT INTO monitor_services (service_name, service_type, status, response_time_ms, error_message, checked_at)
|
|
VALUES ('$service', 'service', '$status', $response_time, '$error_msg', NOW());
|
|
EOF
|
|
}
|
|
|
|
# 檢查 PostgreSQL
|
|
check_postgresql() {
|
|
local start=$(date +%s%N)
|
|
if PGPASSWORD="$PG_PASSWORD" pg_isready -h localhost -p 5432 -U "$PG_USER" > /dev/null 2>&1; then
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
echo -e "${GREEN}✓${NC} PostgreSQL (5432) - ${ms}ms"
|
|
record_service "postgresql" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} PostgreSQL (5432) - Down"
|
|
record_service "postgresql" "down" "0" "Connection failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 Redis
|
|
check_redis() {
|
|
local start=$(date +%s%N)
|
|
if redis-cli -a "$REDIS_PASSWORD" ping 2>/dev/null | grep -q "PONG"; then
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
echo -e "${GREEN}✓${NC} Redis (6379) - ${ms}ms"
|
|
record_service "redis" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} Redis (6379) - Down"
|
|
record_service "redis" "down" "0" "Connection failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 MariaDB
|
|
check_mariadb() {
|
|
local start=$(date +%s%N)
|
|
if mysql -u "$MARIADB_USER" -p"$MARIADB_PASSWORD" -e "SELECT 1" > /dev/null 2>&1; then
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
echo -e "${GREEN}✓${NC} MariaDB (3306) - ${ms}ms"
|
|
record_service "mariadb" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} MariaDB (3306) - Down"
|
|
record_service "mariadb" "down" "0" "Connection failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 n8n
|
|
check_n8n() {
|
|
local start=$(date +%s%N)
|
|
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8085/ --max-time 5)
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
|
|
if [ "$http_code" = "200" ] || [ "$http_code" = "302" ]; then
|
|
echo -e "${GREEN}✓${NC} n8n (8085) - ${ms}ms"
|
|
record_service "n8n" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} n8n (8085) - HTTP $http_code"
|
|
record_service "n8n" "down" "0" "HTTP $http_code"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 Caddy
|
|
check_caddy() {
|
|
local start=$(date +%s%N)
|
|
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:2019/config/ --max-time 5)
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
|
|
if [ "$http_code" = "200" ]; then
|
|
echo -e "${GREEN}✓${NC} Caddy (2019) - ${ms}ms"
|
|
record_service "caddy" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} Caddy (2019) - HTTP $http_code"
|
|
record_service "caddy" "down" "0" "HTTP $http_code"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 Gitea
|
|
check_gitea() {
|
|
local start=$(date +%s%N)
|
|
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:3000/ --max-time 5)
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
|
|
if [ "$http_code" = "200" ]; then
|
|
echo -e "${GREEN}✓${NC} Gitea (3000) - ${ms}ms"
|
|
record_service "gitea" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} Gitea (3000) - HTTP $http_code"
|
|
record_service "gitea" "down" "0" "HTTP $http_code"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 SFTPGo
|
|
check_sftpgo() {
|
|
local start=$(date +%s%N)
|
|
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080 --max-time 5)
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
|
|
# 檢查 SFTP 端口
|
|
local sftp_port=$(lsof -i :2022 2>/dev/null | grep -c LISTEN || echo "0")
|
|
local webdav_port=$(lsof -i :8090 2>/dev/null | grep -c LISTEN || echo "0")
|
|
|
|
# 檢查 PostgreSQL 連接
|
|
local db_conn=$(PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d postgres -t -c "SELECT numbackends FROM pg_stat_database WHERE datname='sftpgo';" 2>/dev/null | xargs || echo "0")
|
|
|
|
if [ "$http_code" = "200" ] || [ "$http_code" = "301" ] || [ "$http_code" = "302" ]; then
|
|
echo -e "${GREEN}✓${NC} SFTPGo (8080) - ${ms}ms | SFTP:$sftp_port | WebDAV:$webdav_port | DB:$db_conn"
|
|
record_service "sftpgo" "up" "$ms" "SFTP:$sftp_port WebDAV:$webdav_port DB:$db_conn"
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} SFTPGo (8080) - HTTP $http_code"
|
|
record_service "sftpgo" "down" "0" "HTTP $http_code"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# SFTPGo 詳細監控
|
|
check_sftpgo_detailed() {
|
|
echo ""
|
|
echo "=== SFTPGo 詳細監控 ==="
|
|
|
|
# 1. 服務狀態
|
|
echo "1. 服務狀態:"
|
|
ps aux | grep sftpgo | grep -v grep | awk '{print " PID: "$2" CMD: "$11" "$12}'
|
|
|
|
# 2. 端口監聽
|
|
echo "2. 端口監聽:"
|
|
echo " - HTTP (8080): $(lsof -i :8080 2>/dev/null | grep -c LISTEN || echo '0')"
|
|
echo " - SFTP (2022): $(lsof -i :2022 2>/dev/null | grep -c LISTEN || echo '0')"
|
|
echo " - WebDAV (8090): $(lsof -i :8090 2>/dev/null | grep -c LISTEN || echo '0')"
|
|
|
|
# 3. PostgreSQL 連接
|
|
echo "3. PostgreSQL 連接:"
|
|
PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d postgres -c "SELECT numbackends, xact_commit, xact_rollback FROM pg_stat_database WHERE datname='sftpgo';" 2>/dev/null | grep -v "numbackends\|^$\|row)" || echo " 無數據"
|
|
|
|
# 4. 用戶統計
|
|
echo "4. 用戶統計:"
|
|
PGPASSWORD="$SFTPGO_PASSWORD" psql -U "$SFTPGO_USER" -h localhost -d sftpgo -c "SELECT 'users' as type, COUNT(*) as count FROM users UNION ALL SELECT 'admins', COUNT(*) FROM admins UNION ALL SELECT 'api_keys', COUNT(*) FROM api_keys;" 2>/dev/null | grep -v "^$\|type\|^(\|row)" || echo " 無數據"
|
|
|
|
# 5. 數據庫大小
|
|
echo "5. 數據庫大小:"
|
|
PGPASSWORD="$PG_PASSWORD" psql -U "$PG_USER" -h localhost -d postgres -t -c "SELECT pg_size_pretty(pg_database_size('sftpgo'));" 2>/dev/null | xargs || echo " 無法獲取"
|
|
|
|
# 6. 磁盤使用
|
|
echo "6. 文件存儲使用:"
|
|
du -sh /Users/accusys/momentry/var/sftpgo/data/ 2>/dev/null | awk '{print " "$2": "$1}'
|
|
}
|
|
|
|
# SFTPGo 認證失敗監控
|
|
check_sftpgo_auth_failures() {
|
|
local log_file="/Users/accusys/momentry/log/sftpgo.log"
|
|
local threshold=${1:-5} # 默認 5 次失敗
|
|
|
|
if [ ! -f "$log_file" ]; then
|
|
return 0
|
|
fi
|
|
|
|
# 檢查過去 1 小時的認證失敗
|
|
local failures=$(grep -i "authentication error\|invalid credentials\|login failed\|auth error" "$log_file" 2>/dev/null | wc -l)
|
|
|
|
if [ "$failures" -gt "$threshold" ]; then
|
|
echo "⚠️ SFTPGo 認證失敗過多: $failures 次"
|
|
return 1
|
|
else
|
|
echo "✓ SFTPGo 認證失敗: $failures 次 (閾值: $threshold)"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# SFTPGo 傳輸統計
|
|
check_sftpgo_transfers() {
|
|
echo ""
|
|
echo "=== SFTPGo 傳輸統計 ==="
|
|
|
|
# 檢查活動傳輸
|
|
local active_transfers=$(PGPASSWORD="$SFTPGO_PASSWORD" psql -U "$SFTPGO_USER" -h localhost -d sftpgo -t -c "SELECT COUNT(*) FROM active_transfers;" 2>/dev/null | xargs || echo "0")
|
|
echo "活動傳輸: $active_transfers"
|
|
|
|
# 檢查今日訪問IP
|
|
echo "今日訪問來源:"
|
|
tail -1000 /Users/accusys/momentry/log/sftpgo_access.log 2>/dev/null | grep -o '"remote_ip":"[^"]*"' | cut -d'"' -f4 | sort | uniq -c | sort -rn | head -5 | awk '{print " "$2": "$1" 次"}'
|
|
}
|
|
|
|
# 檢查 Ollama
|
|
check_ollama() {
|
|
local start=$(date +%s%N)
|
|
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:11434/api/tags --max-time 5)
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
|
|
if [ "$http_code" = "200" ]; then
|
|
echo -e "${GREEN}✓${NC} Ollama (11434) - ${ms}ms"
|
|
record_service "ollama" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} Ollama (11434) - HTTP $http_code"
|
|
record_service "ollama" "down" "0" "HTTP $http_code"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 Qdrant
|
|
check_qdrant() {
|
|
local start=$(date +%s%N)
|
|
local http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:6333/collections --max-time 5)
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
|
|
if [ "$http_code" = "200" ] || [ "$http_code" = "401" ]; then
|
|
echo -e "${GREEN}✓${NC} Qdrant (6333) - ${ms}ms"
|
|
record_service "qdrant" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} Qdrant (6333) - HTTP $http_code"
|
|
record_service "qdrant" "down" "0" "HTTP $http_code"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 MongoDB
|
|
check_mongodb() {
|
|
local start=$(date +%s%N)
|
|
if mongosh --quiet --eval "db.adminCommand('ping')" > /dev/null 2>&1; then
|
|
local end=$(date +%s%N)
|
|
local ms=$(( (end - start) / 1000000 ))
|
|
echo -e "${GREEN}✓${NC} MongoDB (27017) - ${ms}ms"
|
|
record_service "mongodb" "up" "$ms" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} MongoDB (27017) - Down"
|
|
record_service "mongodb" "down" "0" "Connection failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 PHP-FPM
|
|
check_php() {
|
|
if pgrep -f "php-fpm" > /dev/null 2>&1; then
|
|
echo -e "${GREEN}✓${NC} PHP-FPM - Running"
|
|
record_service "php" "up" "1" ""
|
|
return 0
|
|
else
|
|
echo -e "${RED}✗${NC} PHP-FPM - Not running"
|
|
record_service "php" "down" "0" "Process not found"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 RustDesk
|
|
check_rustdesk() {
|
|
local hbbs_ok=false
|
|
local hbbr_ok=false
|
|
|
|
if nc -z localhost 21116 > /dev/null 2>&1; then
|
|
hbbs_ok=true
|
|
fi
|
|
|
|
if nc -z localhost 21117 > /dev/null 2>&1; then
|
|
hbbr_ok=true
|
|
fi
|
|
|
|
if $hbbs_ok && $hbbr_ok; then
|
|
echo -e "${GREEN}✓${NC} RustDesk (21116/21117) - Running"
|
|
record_service "rustdesk" "up" "1" ""
|
|
return 0
|
|
else
|
|
echo -e "${YELLOW}⚠${NC} RustDesk - Partial (hbbs: $hbbs_ok, hbbr: $hbbr_ok)"
|
|
record_service "rustdesk" "degraded" "0" "hbbs:$hbbs_ok hbbr:$hbbr_ok"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 檢查 Node.js 版本
|
|
check_node() {
|
|
local LOCKED_NODE_VERSION="22"
|
|
local version_issues=0
|
|
|
|
local node_pids=$(pgrep -f "n8n" 2>/dev/null)
|
|
|
|
if [ -z "$node_pids" ]; then
|
|
echo -e "${YELLOW}⚠${NC} Node.js - n8n not running"
|
|
record_service "node" "degraded" "1" "n8n not running"
|
|
return 1
|
|
fi
|
|
|
|
for pid in $node_pids; do
|
|
local node_path=$(lsof -p $pid 2>/dev/null | grep "txt" | grep "node" | head -1 | awk '{print $NF}' | grep -v "dylib")
|
|
|
|
if [ -n "$node_path" ] && [ -f "$node_path" ]; then
|
|
local node_version=$($node_path --version 2>/dev/null | sed 's/v//')
|
|
local node_major=$(echo "$node_version" | cut -d. -f1)
|
|
|
|
if [ "$node_major" != "$LOCKED_NODE_VERSION" ]; then
|
|
version_issues=$((version_issues + 1))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [ $version_issues -gt 0 ]; then
|
|
echo -e "${RED}✗${NC} Node.js - Version issues detected"
|
|
record_service "node" "degraded" "1" "$version_issues version issues"
|
|
return 1
|
|
else
|
|
echo -e "${GREEN}✓${NC} Node.js (${LOCKED_NODE_VERSION}.x) - Running"
|
|
record_service "node" "up" "1" ""
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# 檢查 Python 版本
|
|
check_python() {
|
|
local LOCKED_PYTHON_VERSION="3.11.14"
|
|
local script_issues=0
|
|
|
|
local scripts=(
|
|
"/Users/accusys/momentry_core_0.1/scripts/asr_processor.py"
|
|
"/Users/accusys/momentry_core_0.1/scripts/thumbnail_extractor.py"
|
|
)
|
|
|
|
for script in "${scripts[@]}"; do
|
|
if [ -f "$script" ]; then
|
|
local shebang=$(head -1 "$script")
|
|
|
|
if [[ "$shebang" != *"python3.11"* ]]; then
|
|
script_issues=$((script_issues + 1))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [ $script_issues -gt 0 ]; then
|
|
echo -e "${RED}✗${NC} Python - Script version issues"
|
|
record_service "python" "degraded" "1" "$script_issues script issues"
|
|
return 1
|
|
else
|
|
echo -e "${GREEN}✓${NC} Python (${LOCKED_PYTHON_VERSION}) - Configured"
|
|
record_service "python" "up" "1" ""
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# 主程序
|
|
echo "========================================"
|
|
echo "Layer 2: Service Health Check"
|
|
echo "Time: $(date)"
|
|
echo "========================================"
|
|
echo ""
|
|
|
|
total=0
|
|
passed=0
|
|
|
|
total=$((total + 1))
|
|
check_postgresql && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_redis && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_mariadb && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_n8n && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_caddy && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_gitea && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_sftpgo && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_ollama && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_qdrant && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_mongodb && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_php && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_rustdesk && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_node && passed=$((passed + 1))
|
|
|
|
total=$((total + 1))
|
|
check_python && passed=$((passed + 1))
|
|
|
|
echo ""
|
|
echo "========================================"
|
|
echo "Result: $passed / $total services healthy"
|
|
echo "========================================"
|
|
|
|
log "Service check completed: $passed/$total healthy"
|