fix: pipeline timeline log, chunk lookup, face processor no fallback, Qdrant UUID script, delete safety rules

2026-05-18 00:36:14 +08:00
parent a880c80556
commit 088aefdac7
7 changed files with 503 additions and 29 deletions
--- a/scripts/face_processor_v1.py
+++ b/scripts/face_processor_v1.py
@@ -64,6 +64,27 @@ def process_face(

    app = None
    coreml_embedder = None
+
+    # 載入 CoreML FaceNet（必要，無 fallback）
+    try:
+        import coremltools as ct
+        coreml_path = os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            "../models/facenet512.mlpackage"
+        )
+        if not os.path.exists(coreml_path):
+            raise FileNotFoundError(f"CoreML model not found at {coreml_path}")
+        coreml_embedder = ct.models.MLModel(coreml_path)
+        framework.publish_info("COREML_FACENET_LOADED")
+    except Exception as e:
+        error_msg = f"CoreML FaceNet512 load failed: {e}"
+        print(f"[FACE] {error_msg}")
+        framework.publish_error(error_msg)
+        result = {"metadata": {"status": "error", "error": error_msg}, "frames": {}}
+        with open(output_path, "w") as f:
+            json.dump(result, f, indent=2)
+        return result
+
    try:
        framework.publish_info("LOADING_INSIGHTFACE")
        app = insightface.app.FaceAnalysis(
@@ -72,21 +93,6 @@ def process_face(
        app.prepare(ctx_id=0, det_size=(320, 320))
        framework.publish_info("INSIGHTFACE_LOADED")

-        # 嘗試載入 CoreML FaceNet 模型（MIT license，可用 ANE）
-        try:
-            import coremltools as ct
-            coreml_path = os.path.join(
-                os.path.dirname(os.path.abspath(__file__)),
-                "../models/facenet512.mlpackage"
-            )
-            if os.path.exists(coreml_path):
-                coreml_embedder = ct.models.MLModel(coreml_path)
-                framework.publish_info("COREML_FACENET_LOADED")
-            else:
-                print(f"[FACE] CoreML model not found at {coreml_path}, using InsightFace embedding")
-        except Exception as e:
-            print(f"[FACE] CoreML load failed: {e}, using InsightFace embedding")
-
    except Exception as e:
        print(f"[FACE] InsightFace failed to load (REQUIRED): {e}")
        error_msg = f"InsightFace failed to load (REQUIRED): {e}"
@@ -219,8 +225,7 @@ def process_face(
                            embedding = coreml_out[emb_key].flatten().tolist()
                    except Exception as e:
                        print(f"[FACE] CoreML embedding error for face at ({x1},{y1}): {e}")
-                if embedding is None and hasattr(face, "embedding"):
-                    embedding = face.embedding.tolist()
+

                landmarks = None
                if hasattr(face, "kps"):
--- a/scripts/sync_dev_to_public.sh
+++ b/scripts/sync_dev_to_public.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+# sync_dev_to_public.sh — 比對 dev/public schema，同步 pipeline 資料
+# Usage: ./sync_dev_to_public.sh [check|sync] [file_uuid]
+
+PSQL="/opt/homebrew/opt/libpq/bin/psql"
+
+set -euo pipefail
+
+SCHEMA="${MOMENTRY_DB_SCHEMA:-dev}"
+DB_URL="${DATABASE_URL:-postgres://accusys@localhost:5432/momentry}"
+MODE="${1:-check}"
+FILE_UUID="${2:-}"
+
+TABLES=("videos" "chunk" "face_detections" "processor_results" "monitor_jobs"
+        "identities" "identity_bindings" "tkg_nodes" "tkg_edges")
+
+TARGET="public"
+
+if [ -z "$FILE_UUID" ]; then
+    echo "Usage: $0 [check|sync] <file_uuid>"
+    echo ""
+    echo "Examples:"
+    echo "  $0 check bd80fec92b0b6963d177a2c55bf713e2"
+    echo "  $0 sync  bd80fec92b0b6963d177a2c55bf713e2"
+    exit 1
+fi
+
+echo "=== Schema Sync: $SCHEMA → $TARGET ==="
+echo "File UUID: $FILE_UUID"
+echo "Mode: $MODE"
+echo ""
+
+check_table() {
+    local table=$1
+    local col=$2
+    local src_count dev_count pub_count
+
+    dev_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${SCHEMA}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "ERROR")
+    pub_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "ERROR")
+
+    if [ "$dev_count" = "ERROR" ] || [ "$pub_count" = "ERROR" ]; then
+        echo "  ⚠️  $table — query error (table may not exist in $TARGET)"
+        return 1
+    fi
+
+    if [ "$dev_count" -eq "$pub_count" ]; then
+        echo "  ✅ $table — $dev_count rows (match)"
+        return 0
+    else
+        echo "  ❌ $table — dev=$dev_count  pub=$pub_count (MISMATCH)"
+        return 1
+    fi
+}
+
+sync_table() {
+    local table=$1
+    local col=$2
+    local src_count dev_count pub_count
+
+    dev_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${SCHEMA}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "0")
+    pub_count=$($PSQL -At "$DB_URL" -c "SELECT COUNT(*) FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || echo "0")
+
+    if [ "$dev_count" = "0" ]; then
+        echo "  ⏭️  $table — dev has 0 rows, skipping"
+        return
+    fi
+
+    if [ "$dev_count" -eq "$pub_count" ]; then
+        echo "  ✅ $table — already synced ($dev_count rows)"
+        return
+    fi
+
+    echo "  🔄 Syncing $table: dev=$dev_count → pub=$pub_count ..."
+
+    # Delete existing public rows, insert from dev
+    $PSQL "$DB_URL" -q -c "DELETE FROM ${TARGET}.${table} WHERE ${col} = '${FILE_UUID}';" 2>/dev/null || true
+
+    # Get columns list (excluding id for SERIAL)
+    COLS=$($PSQL -At "$DB_URL" -c "
+        SELECT string_agg(column_name, ', ' ORDER BY ordinal_position)
+        FROM information_schema.columns
+        WHERE table_schema='${SCHEMA}' AND table_name='${table}'
+          AND column_name != 'id'
+          AND is_updatable='YES';
+    ")
+
+    $PSQL "$DB_URL" -q -c "
+        INSERT INTO ${TARGET}.${table} (${COLS})
+        SELECT ${COLS}
+        FROM ${SCHEMA}.${table}
+        WHERE ${col} = '${FILE_UUID}';
+    " 2>/dev/null && echo "  ✅ $table synced" || echo "  ❌ $table sync FAILED"
+}
+
+echo "=== Checking Tables ==="
+echo ""
+MISMATCH=0
+for table in "${TABLES[@]}"; do
+    # Determine the UUID column name for each table
+    case "$table" in
+        videos) col="file_uuid" ;;
+        chunk) col="file_uuid" ;;
+        face_detections) col="file_uuid" ;;
+        processor_results) col="file_uuid" ;;
+        monitor_jobs) col="uuid" ;;
+        identities) col="uuid" ;;  # identities.uuid is UUID type
+        identity_bindings) col="uuid" ;;
+        tkg_nodes) col="file_uuid" ;;
+        tkg_edges) col="file_uuid" ;;
+        *) col="file_uuid" ;;
+    esac
+
+    if ! check_table "$table" "$col"; then
+        MISMATCH=$((MISMATCH + 1))
+    fi
+done
+
+echo ""
+if [ "$MISMATCH" -eq 0 ]; then
+    echo "✅ All tables in sync"
+    exit 0
+fi
+
+if [ "$MODE" != "sync" ]; then
+    echo "⚠️  $MISMATCH table(s) have mismatches. Run '$0 sync $FILE_UUID' to fix."
+    exit 1
+fi
+
+echo "=== Syncing Tables ==="
+echo ""
+for table in "${TABLES[@]}"; do
+    case "$table" in
+        videos) col="file_uuid" ;;
+        chunk) col="file_uuid" ;;
+        face_detections) col="file_uuid" ;;
+        processor_results) col="file_uuid" ;;
+        monitor_jobs) col="uuid" ;;
+        identities) col="uuid" ;;
+        identity_bindings) col="uuid" ;;
+        tkg_nodes) col="file_uuid" ;;
+        tkg_edges) col="file_uuid" ;;
+        *) col="file_uuid" ;;
+    esac
+    sync_table "$table" "$col"
+done
+
+echo ""
+echo "✅ Sync complete"
--- a/scripts/update_qdrant_uuid.py
+++ b/scripts/update_qdrant_uuid.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""批量更新 Qdrant collection 中的 file_uuid (舊→新)"""
+
+import json
+import subprocess
+import sys
+
+QDRANT_URL = "http://localhost:6333"
+
+# UUID mapping: 舊 → 新
+UUID_MAP = {
+    "aeed71342a899fe4b4c57b7d41bcb692": [
+        "bd80fec92b0b6963d177a2c55bf713e2",
+    ],
+}
+
+# Collections to process
+COLLECTIONS = [
+    "momentry_dev_v1",
+    "momentry_dev_stories",
+    "momentry_dev_voice",
+    "momentry_dev_rule1_v2",
+    "momentry_dev_faces",
+    "sentence_story",
+    "sentence_summary",
+]
+
+
+def qdrant_get(path: str) -> dict:
+    res = subprocess.run(
+        ["curl", "-s", "-X", "GET", f"{QDRANT_URL}{path}"],
+        capture_output=True, text=True
+    )
+    return json.loads(res.stdout) if res.stdout.strip() else {}
+
+
+def qdrant_post(path: str, body: dict) -> dict:
+    tmp = "/tmp/qdrant_post.json"
+    with open(tmp, "w") as f:
+        json.dump(body, f)
+    res = subprocess.run(
+        ["curl", "-s", "-X", "POST", f"{QDRANT_URL}{path}",
+         "-H", "Content-Type: application/json", "-d", f"@{tmp}"],
+        capture_output=True, text=True
+    )
+    return json.loads(res.stdout) if res.stdout.strip() else {}
+
+
+def qdrant_put(path: str, body: dict) -> dict:
+    tmp = "/tmp/qdrant_update.json"
+    with open(tmp, "w") as f:
+        json.dump(body, f)
+    res = subprocess.run(
+        ["curl", "-s", "-X", "PUT", f"{QDRANT_URL}{path}",
+         "-H", "Content-Type: application/json", "-d", f"@{tmp}"],
+        capture_output=True, text=True
+    )
+    return json.loads(res.stdout) if res.stdout.strip() else {}
+
+
+def scroll_all(collection: str, filter_old: dict) -> list:
+    """Scroll all matching points from a collection"""
+    points = []
+    offset = None
+    while True:
+        body = {
+            "limit": 1000,
+            "with_payload": True,
+            "with_vector": True,
+            "filter": filter_old,
+        }
+        if offset:
+            body["offset"] = offset
+        result = qdrant_post(f"/collections/{collection}/points/scroll", body)
+        batch = result.get("result", {}).get("points", [])
+        points.extend(batch)
+        next_offset = result.get("result", {}).get("next_page_offset")
+        if next_offset is None:
+            break
+        offset = next_offset
+    return points
+
+
+def update_points(collection: str, points: list, old_uuid: str, new_uuid: str):
+    """Update file_uuid in payload for the given points"""
+    if not points:
+        return 0
+
+    updated = []
+    for p in points:
+        pl = p.get("payload", {})
+        # Check both 'uuid' and 'file_uuid' fields
+        changed = False
+        if pl.get("uuid") == old_uuid:
+            pl["uuid"] = new_uuid
+            changed = True
+        if pl.get("file_uuid") == old_uuid:
+            pl["file_uuid"] = new_uuid
+            changed = True
+        if changed:
+            updated.append({
+                "id": p["id"],
+                "vector": p["vector"],
+                "payload": pl,
+            })
+
+    if not updated:
+        return 0
+
+    # Update in batches of 500
+    total = len(updated)
+    for i in range(0, total, 500):
+        batch = updated[i:i+500]
+        result = qdrant_put(
+            f"/collections/{collection}/points?wait=true",
+            {"points": batch}
+        )
+        if result.get("status") != "ok":
+            print(f"    Error at {i}: {result}")
+            return i
+    return total
+
+
+def main():
+    for collection in COLLECTIONS:
+        # Check if collection exists
+        info = qdrant_get(f"/collections/{collection}")
+        if "result" not in info:
+            continue
+
+        for old_uuid, new_uuids in UUID_MAP.items():
+            for new_uuid in new_uuids:
+                # Scroll all points with this old UUID
+                filter_body = {
+                    "must": [
+                        {"should": [
+                            {"key": "uuid", "match": {"value": old_uuid}},
+                            {"key": "file_uuid", "match": {"value": old_uuid}},
+                        ]}
+                    ]
+                }
+                points = scroll_all(collection, filter_body)
+                if not points:
+                    continue
+
+                print(f"{collection}: {len(points)} points with UUID {old_uuid[:8]}...")
+                updated = update_points(collection, points, old_uuid, new_uuid)
+                print(f"  → {updated} points updated to {new_uuid[:8]}...")
+
+    # Verify
+    print("\n=== Verification ===")
+    for collection in COLLECTIONS:
+        for old_uuid, new_uuids in UUID_MAP.items():
+            for what, uuid in [("old", old_uuid), ("new", new_uuids[0])]:
+                filter_body = {
+                    "must": [
+                        {"should": [
+                            {"key": "uuid", "match": {"value": uuid}},
+                            {"key": "file_uuid", "match": {"value": uuid}},
+                        ]}
+                    ]
+                }
+                result = qdrant_post(
+                    f"/collections/{collection}/points/count",
+                    {"filter": filter_body}
+                )
+                cnt = result.get("result", {}).get("count", 0)
+                if cnt > 0:
+                    print(f"  {collection}: {cnt} points with {what} UUID")
+    print("✅ Done")
+
+
+if __name__ == "__main__":
+    main()