feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system

This commit is contained in:
Accusys
2026-06-02 07:13:23 +08:00
parent e3066c3f49
commit e1572907ae
198 changed files with 43705 additions and 8910 deletions

View File

@@ -0,0 +1,225 @@
#!/opt/homebrew/bin/python3.11
"""Build HTML documentation from module source files."""
import os, markdown, re, glob, shutil
MODULES_DIR = os.path.join(os.path.dirname(__file__), "..", "docs_v1.0", "API_WORKSPACE", "modules")
DOC_DIR = os.path.join(os.path.dirname(__file__), "..", "docs_v1.0", "doc")
DOC_DEV_DIR = os.path.join(os.path.dirname(__file__), "..", "docs_v1.0", "doc_developer")
# User-facing modules (no developer content)
USER_MODULES = {
"01_auth", "02_health", "03_register", "04_lookup", "05_process",
"06_search", "07_identity", "08_identity_agent", "08_media",
"09_tmdb", "10_pipeline", "12_agent",
}
def md_to_html(md_text: str) -> str:
"""Convert Markdown to HTML."""
html = markdown.markdown(md_text, extensions=['fenced_code', 'tables', 'codehilite'])
# Wrap tables
html = re.sub(r'<table>', '<table class="table">', html)
return html
def build_index(files, dev=False):
"""Build index.html."""
links = []
for fname in sorted(files):
name = os.path.splitext(fname)[0]
label = MODULE_LABELS.get(name, name.replace("_", " ").title())
if "" in label:
cn, en = label.split("", 1)
else:
cn, en = label, ""
html_name = fname.replace(".md", ".html")
links.append(f'<tr onclick="window.location=\'{html_name}\'" style="cursor:pointer"><td class="cn">{cn}</td><td class="en">{en}</td></tr>')
title = "Momentry API 開發者文件" if dev else "Momentry API 文件"
subtitle = "開發者專用" if dev else "API 參考手冊 — 登入後可瀏覽各模組文件"
return f"""<!DOCTYPE html>
<html lang="zh-TW">
<head>
<meta charset="UTF-8">
<title>{title}</title>
<style>
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f5f5f5; color: #333; padding: 40px; }}
.container {{ max-width: 900px; margin: 0 auto; background: white; border-radius: 12px; box-shadow: 0 2px 12px rgba(0,0,0,0.08); padding: 40px; }}
h1 {{ font-size: 28px; margin-bottom: 8px; }}
p.subtitle {{ color: #666; margin-bottom: 24px; }}
table {{ width: 100%; border-collapse: collapse; }}
tr {{ border-bottom: 1px solid #eee; }}
tr:last-child {{ border: none; }}
td {{ padding: 10px 0; }}
td.cn {{ width: 140px; font-weight: 600; color: #333; }}
td.en {{ color: #666; font-size: 14px; }}
a {{ color: #0066cc; text-decoration: none; display: block; }}
a:hover td {{ background: #f8f8f8; border-radius: 4px; }}
</style>
</head>
<body>
<div class="container">
<h1>{title}</h1>
<p class="subtitle">{subtitle}</p>
<table>{"".join(links)}</table>
</div>
</body>
</html>"""
MODULE_LABELS = {
"01_auth": "安全認證Authentication",
"02_health": "健康檢查Health",
"03_register": "檔案註冊File Registration",
"04_lookup": "檔案屬性查詢File Lookup",
"05_process": "處理流程Processing",
"06_search": "搜尋功能Search",
"07_identity": "身份識別Identity",
"08_identity_agent": "智能身份綁定Smart Identity Binding",
"08_media": "串流與截圖Streaming & Thumbnails",
"09_tmdb": "TMDb 整合TMDb Integration",
"10_pipeline": "生產線Pipeline",
"11_error_codes": "錯誤碼Error Codes",
"12_agent": "智慧代理AI Agents",
}
def build_html(md_text: str, title: str) -> str:
"""Wrap MD content in HTML page."""
content = md_to_html(md_text)
return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>{title} - Momentry API Docs</title>
<style>
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f5f5f5; color: #333; padding: 40px; }}
.container {{ max-width: 960px; margin: 0 auto; background: white; border-radius: 12px; box-shadow: 0 2px 12px rgba(0,0,0,0.08); padding: 40px; }}
h1 {{ font-size: 24px; margin: 24px 0 12px; }}
h2 {{ font-size: 20px; margin: 20px 0 10px; color: #222; }}
h3 {{ font-size: 16px; margin: 16px 0 8px; color: #444; }}
p {{ line-height: 1.6; margin: 8px 0; }}
table {{ border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 14px; }}
th, td {{ border: 1px solid #ddd; padding: 8px 12px; text-align: left; }}
th {{ background: #f0f0f0; font-weight: 600; }}
code {{ background: #f0f0f0; padding: 2px 6px; border-radius: 3px; font-size: 13px; }}
pre {{ background: #f8f8f8; border: 1px solid #ddd; border-radius: 6px; padding: 12px; overflow-x: auto; margin: 12px 0; }}
pre code {{ background: none; padding: 0; }}
a {{ color: #0066cc; }}
.back {{ display: inline-block; margin-bottom: 20px; color: #666; }}
.back:hover {{ color: #333; }}
</style>
</head>
<body>
<div class="container">
<a class="back" href="index.html">&larr; Back to index</a>
{content}
</div>
</body>
</html>"""
def login_page() -> str:
return """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Login - Momentry Docs</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: #f5f5f5; display: flex; justify-content: center; align-items: center; height: 100vh; }
.card { background: white; border-radius: 12px; box-shadow: 0 2px 12px rgba(0,0,0,0.08); padding: 40px; width: 360px; }
h1 { font-size: 24px; margin-bottom: 24px; text-align: center; }
input { width: 100%; padding: 10px 12px; margin-bottom: 12px; border: 1px solid #ddd; border-radius: 6px; font-size: 14px; }
button { width: 100%; padding: 10px; background: #0066cc; color: white; border: none; border-radius: 6px; font-size: 16px; cursor: pointer; }
button:hover { background: #0052a3; }
.error { color: #cc0000; font-size: 13px; margin-bottom: 12px; display: none; }
</style>
</head>
<body>
<div class="card">
<h1>Momentry Docs</h1>
<form id="loginForm">
<input type="text" id="username" placeholder="Username" value="demo" required>
<input type="password" id="password" placeholder="Password" value="demo" required>
<div class="error" id="error">Invalid credentials</div>
<button type="submit">Login</button>
</form>
</div>
<script>
document.getElementById('loginForm').onsubmit = async function(e) {
e.preventDefault();
const resp = await fetch('/api/v1/auth/login', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
username: document.getElementById('username').value,
password: document.getElementById('password').value
})
});
if (resp.ok) {
window.location.href = '/doc/index.html';
} else {
document.getElementById('error').style.display = 'block';
}
};
</script>
</body>
</html>"""
def main():
# Clean and recreate doc dirs
for d in [DOC_DIR, DOC_DEV_DIR]:
if os.path.exists(d):
shutil.rmtree(d)
os.makedirs(d)
md_files = sorted(glob.glob(os.path.join(MODULES_DIR, "*.md")))
if not md_files:
print(f"No MD files found in {MODULES_DIR}")
return
user_html = []
dev_html = []
for md_path in md_files:
with open(md_path) as f:
md_text = f.read()
fname = os.path.basename(md_path)
stem = os.path.splitext(fname)[0]
# Skip template
if stem == "_template":
continue
# Skip error codes (developer-only)
if stem == "11_error_codes":
dev_only = True
else:
dev_only = stem not in USER_MODULES
title = stem.replace("_", " ").title()
html = build_html(md_text, title)
if dev_only:
out_path = os.path.join(DOC_DEV_DIR, fname.replace(".md", ".html"))
with open(out_path, "w") as f:
f.write(html)
dev_html.append(fname)
print(f" [dev] {fname}")
else:
out_path = os.path.join(DOC_DIR, fname.replace(".md", ".html"))
with open(out_path, "w") as f:
f.write(html)
user_html.append(fname)
print(f" [doc] {fname}")
# Build indexes + login page
for d, files, label in [(DOC_DIR, user_html, "User"), (DOC_DEV_DIR, dev_html, "Dev")]:
index = build_index(files)
with open(os.path.join(d, "index.html"), "w") as f:
f.write(index)
with open(os.path.join(d, "login.html"), "w") as f:
f.write(login_page())
print(f" {label}: {len(files)} pages -> {d}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,148 @@
#!/bin/bash
# sync_dev_to_public.sh — 比對 dev/public schema同步 pipeline 資料
# Usage: ./sync_dev_to_public.sh [check|sync] [file_uuid]
PSQL="/opt/homebrew/opt/libpq/bin/psql"
set -euo pipefail
SCHEMA="${MOMENTRY_DB_SCHEMA:-dev}"
DB_URL="${DATABASE_URL:-postgres://accusys@localhost:5432/momentry}"
MODE="${1:-check}"
FILE_UUID="${2:-}"
TABLES=("videos" "chunk" "face_detections" "processor_results" "monitor_jobs"
"identities" "identity_bindings" "tkg_nodes" "tkg_edges")
TARGET="public"
if [ -z "$FILE_UUID" ]; then
echo "Usage: $0 [check|sync] <file_uuid>"
echo ""
echo "Examples:"
echo " $0 check bd80fec92b0b6963d177a2c55bf713e2"
echo " $0 sync bd80fec92b0b6963d177a2c55bf713e2"
exit 1
fi
echo "=== Schema Sync: $SCHEMA$TARGET ==="
echo "File UUID: $FILE_UUID"
echo "Mode: $MODE"
echo ""
check_table() {
local table=$1
local col=$2
local src_count dev_count pub_count
dev_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${SCHEMA}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "ERROR")
pub_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "ERROR")
if [ "$dev_count" = "ERROR" ] || [ "$pub_count" = "ERROR" ]; then
echo " ⚠️ $table — query error (table may not exist in $TARGET)"
return 1
fi
if [ "$dev_count" -eq "$pub_count" ]; then
echo "$table$dev_count rows (match)"
return 0
else
echo "$table — dev=$dev_count pub=$pub_count (MISMATCH)"
return 1
fi
}
sync_table() {
local table=$1
local col=$2
local src_count dev_count pub_count
dev_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${SCHEMA}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "0")
pub_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "0")
if [ "$dev_count" = "0" ]; then
echo " ⏭️ $table — dev has 0 rows, skipping"
return
fi
if [ "$dev_count" -eq "$pub_count" ]; then
echo "$table — already synced ($dev_count rows)"
return
fi
echo " 🔄 Syncing $table: dev=$dev_count → pub=$pub_count ..."
# Delete existing public rows, insert from dev
$PSQL "$DB_URL" -q -c "DELETE FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || true
# Get columns list (excluding id for SERIAL)
COLS=$($PSQL -At "$DB_URL" -c "
SELECT string_agg(column_name, ', ' ORDER BY ordinal_position)
FROM information_schema.columns
WHERE table_schema='${SCHEMA}' AND table_name='${table}'
AND column_name != 'id'
AND is_updatable='YES';
")
$PSQL "$DB_URL" -q -c "
INSERT INTO ${TARGET}.${table} (${COLS})
SELECT ${COLS}
FROM ${SCHEMA}.${table}
WHERE ${col} = '${FILE_UUID}';
" 2>/dev/null && echo "$table synced" || echo "$table sync FAILED"
}
echo "=== Checking Tables ==="
echo ""
MISMATCH=0
for table in "${TABLES[@]}"; do
# Determine the UUID column name for each table
case "$table" in
videos) col="file_uuid" ;;
chunk) col="file_uuid" ;;
face_detections) col="file_uuid" ;;
processor_results) col="file_uuid" ;;
monitor_jobs) col="uuid" ;;
identities) col="uuid" ;; # identities.uuid is UUID type
identity_bindings) col="uuid" ;;
tkg_nodes) col="file_uuid" ;;
tkg_edges) col="file_uuid" ;;
*) col="file_uuid" ;;
esac
if ! check_table "$table" "$col"; then
MISMATCH=$((MISMATCH + 1))
fi
done
echo ""
if [ "$MISMATCH" -eq 0 ]; then
echo "✅ All tables in sync"
exit 0
fi
if [ "$MODE" != "sync" ]; then
echo "⚠️ $MISMATCH table(s) have mismatches. Run '$0 sync $FILE_UUID' to fix."
exit 1
fi
echo "=== Syncing Tables ==="
echo ""
for table in "${TABLES[@]}"; do
case "$table" in
videos) col="file_uuid" ;;
chunk) col="file_uuid" ;;
face_detections) col="file_uuid" ;;
processor_results) col="file_uuid" ;;
monitor_jobs) col="uuid" ;;
identities) col="uuid" ;;
identity_bindings) col="uuid" ;;
tkg_nodes) col="file_uuid" ;;
tkg_edges) col="file_uuid" ;;
*) col="file_uuid" ;;
esac
sync_table "$table" "$col"
done
echo ""
echo "✅ Sync complete"

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env python3
"""批量更新 Qdrant collection 中的 file_uuid (舊→新)"""
import json
import subprocess
import sys
QDRANT_URL = "http://localhost:6333"
# UUID mapping: 舊 → 新
UUID_MAP = {
"aeed71342a899fe4b4c57b7d41bcb692": [
"bd80fec92b0b6963d177a2c55bf713e2",
],
}
# Collections to process
COLLECTIONS = [
"momentry_dev_v1",
"momentry_dev_stories",
"momentry_dev_voice",
"momentry_dev_rule1_v2",
"momentry_dev_faces",
"sentence_story",
"sentence_summary",
]
def qdrant_get(path: str) -> dict:
res = subprocess.run(
["curl", "-s", "-X", "GET", f"{QDRANT_URL}{path}"],
capture_output=True, text=True
)
return json.loads(res.stdout) if res.stdout.strip() else {}
def qdrant_post(path: str, body: dict) -> dict:
tmp = "/tmp/qdrant_post.json"
with open(tmp, "w") as f:
json.dump(body, f)
res = subprocess.run(
["curl", "-s", "-X", "POST", f"{QDRANT_URL}{path}",
"-H", "Content-Type: application/json", "-d", f"@{tmp}"],
capture_output=True, text=True
)
return json.loads(res.stdout) if res.stdout.strip() else {}
def qdrant_put(path: str, body: dict) -> dict:
tmp = "/tmp/qdrant_update.json"
with open(tmp, "w") as f:
json.dump(body, f)
res = subprocess.run(
["curl", "-s", "-X", "PUT", f"{QDRANT_URL}{path}",
"-H", "Content-Type: application/json", "-d", f"@{tmp}"],
capture_output=True, text=True
)
return json.loads(res.stdout) if res.stdout.strip() else {}
def scroll_all(collection: str, filter_old: dict) -> list:
"""Scroll all matching points from a collection"""
points = []
offset = None
while True:
body = {
"limit": 1000,
"with_payload": True,
"with_vector": True,
"filter": filter_old,
}
if offset:
body["offset"] = offset
result = qdrant_post(f"/collections/{collection}/points/scroll", body)
batch = result.get("result", {}).get("points", [])
points.extend(batch)
next_offset = result.get("result", {}).get("next_page_offset")
if next_offset is None:
break
offset = next_offset
return points
def update_points(collection: str, points: list, old_uuid: str, new_uuid: str):
"""Update file_uuid in payload for the given points"""
if not points:
return 0
updated = []
for p in points:
pl = p.get("payload", {})
# Check both 'uuid' and 'file_uuid' fields
changed = False
if pl.get("uuid") == old_uuid:
pl["uuid"] = new_uuid
changed = True
if pl.get("file_uuid") == old_uuid:
pl["file_uuid"] = new_uuid
changed = True
if changed:
updated.append({
"id": p["id"],
"vector": p["vector"],
"payload": pl,
})
if not updated:
return 0
# Update in batches of 500
total = len(updated)
for i in range(0, total, 500):
batch = updated[i:i+500]
result = qdrant_put(
f"/collections/{collection}/points?wait=true",
{"points": batch}
)
if result.get("status") != "ok":
print(f" Error at {i}: {result}")
return i
return total
def main():
for collection in COLLECTIONS:
# Check if collection exists
info = qdrant_get(f"/collections/{collection}")
if "result" not in info:
continue
for old_uuid, new_uuids in UUID_MAP.items():
for new_uuid in new_uuids:
# Scroll all points with this old UUID
filter_body = {
"must": [
{"should": [
{"key": "uuid", "match": {"value": old_uuid}},
{"key": "file_uuid", "match": {"value": old_uuid}},
]}
]
}
points = scroll_all(collection, filter_body)
if not points:
continue
print(f"{collection}: {len(points)} points with UUID {old_uuid[:8]}...")
updated = update_points(collection, points, old_uuid, new_uuid)
print(f"{updated} points updated to {new_uuid[:8]}...")
# Verify
print("\n=== Verification ===")
for collection in COLLECTIONS:
for old_uuid, new_uuids in UUID_MAP.items():
for what, uuid in [("old", old_uuid), ("new", new_uuids[0])]:
filter_body = {
"must": [
{"should": [
{"key": "uuid", "match": {"value": uuid}},
{"key": "file_uuid", "match": {"value": uuid}},
]}
]
}
result = qdrant_post(
f"/collections/{collection}/points/count",
{"filter": filter_body}
)
cnt = result.get("result", {}).get("count", 0)
if cnt > 0:
print(f" {collection}: {cnt} points with {what} UUID")
print("✅ Done")
if __name__ == "__main__":
main()