M4 handover: coordinate fixes, detector registry, deploy v2, YOLOv8s, identity lifecycle

- Fix swift_pose/swift_ocr Y-flip bugs (BUG-003~006) - Add heuristic_scene module + post-processing trigger (replaces Places365) - YOLOv5nu → YOLOv8s CoreML (+33% detections, +390% scene indicators) - Per-table SQL export (split 4.7GB single file → 478MB max per table) - Version/build check in deploy.sh (compare /health vs file_info.json) - Add file_uuid column to identities table + backfill - Identity pre-clean step in deploy (avoids UNIQUE conflicts on re-deploy) - Stranger_xxx naming fix with UUID context - Add DETECTOR_REGISTRY.md (25 detectors), DETECTOR_SELECTION_SOP.md - Update SPATIAL_COORDINATE_REGISTRY.md (P layer, 6-layer architecture) - New IDENTITY_LIFECYCLE.md - M4 response docs for deploy_script_fix and 111614 test report
2026-05-13 20:00:47 +08:00
parent d34bcae145
commit ffc30d7377
25 changed files with 2219 additions and 118 deletions
--- a/scripts/deploy_package.sh
+++ b/scripts/deploy_package.sh
@@ -15,10 +15,38 @@ echo "=== Momentry Package Deploy ==="
 echo "UUID: $UUID"
 echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
 echo ""
+echo "=== Momentry Package Deploy ==="
+echo "UUID: $UUID"
+echo "Time: $(date '+%Y-%m-%d %H:%M:%S')"
+echo ""
+
+# 0. Version & build compatibility check
+echo "[0/8] Checking system version and build..."
+PKG_VER=$(python3 -c "import json; f=json.load(open('$DIR/file_info.json')); print(f.get('momentry_version','?'))")
+PKG_BUILD=$(python3 -c "import json; f=json.load(open('$DIR/file_info.json')); print(f.get('momentry_build','?'))")
+SRV=$(curl -sf http://localhost:3003/health | python3 -c "
+import json,sys
+d=json.load(sys.stdin)
+print(d.get('version','unknown'), d.get('build_git_hash','unknown'))
+" 2>/dev/null || echo "down down")
+SRV_VER=$(echo "$SRV" | cut -d' ' -f1)
+SRV_BUILD=$(echo "$SRV" | cut -d' ' -f2)
+if [ "$SRV_VER" = "down" ]; then
+    echo "  ⚠️  Cannot reach server at localhost:3003, skipping version check"
+elif [ "$SRV_VER" != "$PKG_VER" ] || [ "$SRV_BUILD" != "$PKG_BUILD" ]; then
+    echo "  ❌ Mismatch:"
+    echo "                 Package      Server"
+    echo "    Version:    $PKG_VER      $SRV_VER"
+    echo "    Build:      $PKG_BUILD    $SRV_BUILD"
+    echo ""
+    echo "    Please obtain the matching system upgrade package."
+    exit 1
+else
+    echo "  ✅ Server v$SRV_VER (build $SRV_BUILD) matches package"
+fi

 # 1. Verify package integrity
-echo "[1/5] Verifying package..."
-REQUIRED_FILES=("data.sql" "file_info.json")
+echo "[1/8] Verifying package..."
 MISSING=0
 for f in "${REQUIRED_FILES[@]}"; do
    if [ ! -f "$DIR/$f" ]; then
@@ -32,28 +60,38 @@ if [ $MISSING -eq 1 ]; then
 fi
 echo "  ✅ Package verified"

-# 2. Import data.sql
-echo "[2/5] Importing DB data..."
-"$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -f "$DIR/data.sql" 2>&1 | tail -3
+# 2. Pre-clean: remove existing identities for this file (avoids UNIQUE(name) conflicts on COPY)
+echo "[2/8] Pre-cleaning existing identities for this file..."
+"$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -c "DELETE FROM dev.identities WHERE file_uuid = '$UUID'" > /dev/null 2>&1
+echo "  ✅ Cleared identities for $UUID"
+
+# 3. Import data.sql (uses \i to load per-table files from sql/)
+echo "[3/8] Importing DB data..."
+(cd "$DIR" && "$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -f data.sql 2>&1) | tail -5
 echo "  ✅ Data imported"

-# 3. Copy video to demo dir
+# 4. Copy video to demo dir (only this package's video, not scanning others)
 VIDEO_FILE=$(ls "$DIR"/*.mp4 "$DIR"/*.mov "$DIR"/*.avi "$DIR"/*.mkv 2>/dev/null | head -1)
 if [ -n "$VIDEO_FILE" ]; then
    VIDEO_NAME=$(basename "$VIDEO_FILE")
    DEST="$DEMO_DIR/$VIDEO_NAME"
    if [ ! -f "$DEST" ]; then
        cp "$VIDEO_FILE" "$DEST"
-        echo "[3/5] Video copied: $VIDEO_NAME → $DEMO_DIR"
+        echo "[4/8] Video copied: $VIDEO_NAME → $DEMO_DIR"
    else
-        echo "[3/5] Video already in demo dir, skipping"
+        echo "[4/8] Video already in demo dir, skipping"
    fi
 else
-    echo "[3/5] No video file in package, skipping"
+    echo "[4/8] No video file in package, skipping"
 fi

-# 4. Copy output files
-echo "[4/5] Copying output files..."
+# 5. Set video status to completed (package is fully processed)
+echo "[5/8] Setting deployment status..."
+"$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -c "UPDATE dev.videos SET status = 'completed' WHERE file_uuid = '$UUID'" > /dev/null 2>&1
+echo "  ✅ Status set to 'completed'"
+
+# 6. Copy output files
+echo "[6/8] Copying output files..."
 COPIED=0
 for f in "$DIR"/*.json "$DIR"/*.sqlite "$DIR"/*.sqlite; do
    if [ -f "$f" ]; then
@@ -66,20 +104,25 @@ for f in "$DIR"/*.json "$DIR"/*.sqlite "$DIR"/*.sqlite; do
 done
 echo "  ✅ $COPIED files copied to $OUTPUT_DIR"

-# 5. Verify deployment
-echo "[5/5] Verifying deployment..."
-CHUNKS=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.chunk WHERE file_uuid='$UUID' AND chunk_type='sentence'" 2>/dev/null || echo "?")
+# 7. Verify deployment
+echo "[7/8] Verifying deployment..."
+CHUNKS=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.chunk WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
 FACES=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
+IDENTS=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.identities WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
+TKG_NODES=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.tkg_nodes WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")
+TKG_EDGES=$("$PG_BIN/psql" -U "$DB_USER" -d "$DB_NAME" -t -A -c "SELECT COUNT(*) FROM dev.tkg_edges WHERE file_uuid='$UUID'" 2>/dev/null || echo "?")

 echo ""
 echo "=== Deploy Complete ==="
-echo "  UUID:    $UUID"
-echo "  Chunks:  $CHUNKS"
-echo "  Faces:   $FACES"
-echo "  Output:  $OUTPUT_DIR/"
+echo "  UUID:      $UUID"
+echo "  Chunks:    $CHUNKS"
+echo "  Faces:     $FACES"
+echo "  Identities: $IDENTS"
+echo "  TKG nodes: $TKG_NODES"
+echo "  TKG edges: $TKG_EDGES"
+echo "  Output:    $OUTPUT_DIR/"
 echo ""
-echo "Next: trigger pipeline processing"
-echo "  curl -X POST http://localhost:3003/api/v1/file/$UUID/process"
+echo "Package is self-contained — no further processing needed."
 echo ""
-echo "Or open the offline report:"
-echo "  python3 render_offline_report.py $OUTPUT_DIR/$UUID.sqlite"
+echo "Offline report:"
+echo "  python3 scripts/render_offline_report.py $OUTPUT_DIR/$UUID.sqlite"
--- a/scripts/export_file_package.py
+++ b/scripts/export_file_package.py
@@ -13,6 +13,8 @@ TABLES = [
    ("dev.chunk", "file_uuid"),
    ("dev.chunk_vectors", "uuid"),
    ("dev.face_detections", "file_uuid"),
+    ("dev.tkg_nodes", "file_uuid"),
+    ("dev.tkg_edges", "file_uuid"),
 ]

 def main():
@@ -47,8 +49,9 @@ def main():
                    f.write("\n")
                f.write("\\.\n\n")

-        # Export identities referenced by this file's face_detections
-        f.write(f"-- dev.identities (referenced by face_detections WHERE file_uuid='{uuid}')\n")
+        # Export identities for this file (by file_uuid column) plus global identities
+        # Global: tmdb + merged + user_defined (exclude inactive auto)
+        f.write(f"-- dev.identities (WHERE file_uuid='{uuid}' OR global tmdb/merged/user_defined)\n")
        r = subprocess.run(
            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
             "-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"],
@@ -56,7 +59,7 @@ def main():
        cols = r.stdout.strip()
        r = subprocess.run(
            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
-             f"COPY (SELECT DISTINCT i.* FROM dev.identities i INNER JOIN dev.face_detections fd ON fd.identity_id = i.id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
+             f"COPY (SELECT * FROM dev.identities WHERE file_uuid = '{uuid}' OR (file_uuid IS NULL AND source IN ('tmdb', 'merged', 'user_defined'))) TO STDOUT WITH CSV HEADER"],
            capture_output=True, text=True, timeout=60)
        if r.stdout.strip():
            f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n")
@@ -74,7 +77,7 @@ def main():
        cols = r.stdout.strip()
        r = subprocess.run(
            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
-             f"COPY (SELECT DISTINCT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
+             f"COPY (SELECT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id AND fd.trace_id IS NOT NULL WHERE fd.file_uuid = '{uuid}' AND ib.identity_value IN (SELECT DISTINCT trace_id::text FROM dev.face_detections WHERE file_uuid = '{uuid}' AND trace_id IS NOT NULL)) TO STDOUT WITH CSV HEADER"],
            capture_output=True, text=True, timeout=60)
        if r.stdout.strip():
            f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n")
@@ -111,6 +114,9 @@ def main():
        capture_output=True, text=True, timeout=15)
    if r.stdout.strip():
        info = json.loads(r.stdout.strip())
+        info["momentry_version"] = "1.0.0"  # keep in sync with Cargo.toml version
+        info["momentry_build"] = subprocess.run(["git", "rev-parse", "--short", "HEAD"],
+            capture_output=True, text=True, timeout=5).stdout.strip()
        with open(os.path.join(outdir, "file_info.json"), "w") as f:
            json.dump(info, f, indent=2)
        print(f"  file_info.json")
--- a/scripts/export_sqlite.py
+++ b/scripts/export_sqlite.py
@@ -87,7 +87,7 @@ pg_to_sqlite(

 # chunk
 pg_to_sqlite(
-    "SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, metadata->>'speaker_id' as speaker_id FROM dev.chunk WHERE file_uuid=%s AND chunk_type='sentence' ORDER BY chunk_id",
+    "SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, metadata->>'speaker_id' as speaker_id FROM dev.chunk WHERE file_uuid=%s ORDER BY chunk_id",
    "chunk",
    """CREATE TABLE IF NOT EXISTS chunk (
        file_uuid TEXT, chunk_id TEXT, chunk_type TEXT,
--- a/scripts/identity_bind.py
+++ b/scripts/identity_bind.py
@@ -77,11 +77,11 @@ for cluster_id in sorted(set(labels)):
    # Create new identity
    identity_uuid = None
    cur.execute("""
-        INSERT INTO dev.identities (name, identity_type, source, status, created_at)
-        VALUES (%s, 'face', 'auto', 'active', NOW())
-        ON CONFLICT (name) DO UPDATE SET status = 'active'
+        INSERT INTO dev.identities (name, identity_type, source, status, created_at, file_uuid)
+        VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
+        ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
        RETURNING id
-    """, (f"PERSON_{UUID[:8]}_{cluster_id}",))
+    """, (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID))
    identity_id = cur.fetchone()[0]
    cluster_to_identity[cluster_id] = identity_id
    print(f"  Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
--- a/scripts/match_identities_to_tmdb.py
+++ b/scripts/match_identities_to_tmdb.py
@@ -0,0 +1,133 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Match auto-generated identities to TMDB identities via centroid embedding similarity.
+Updates identity name, tmdb_id, source for matches above threshold.
+
+Usage: python3 match_identities_to_tmdb.py <file_uuid>
+"""
+import sys
+import psycopg2
+import psycopg2.extras
+import numpy as np
+
+DB = "dbname=momentry user=accusys host=localhost"
+THRESHOLD = 0.55
+
+
+def cosine_similarity(a, b):
+    dot = np.dot(a, b)
+    na = np.linalg.norm(a)
+    nb = np.linalg.norm(b)
+    if na == 0 or nb == 0:
+        return 0.0
+    return dot / (na * nb)
+
+
+def main():
+    uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
+    conn = psycopg2.connect(DB)
+    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+
+    # Load TMDB identities with face_embedding (pgvector)
+    cur.execute("""
+        SELECT id, name, tmdb_id, face_embedding::text as emb_text
+        FROM dev.identities
+        WHERE source = 'tmdb' AND face_embedding IS NOT NULL
+    """)
+    tmdb_identities = []
+    for row in cur.fetchall():
+        emb_str = row["emb_text"]
+        if not emb_str:
+            continue
+        emb = np.array([float(x) for x in emb_str.strip("[]").split(",")])
+        tmdb_identities.append({
+            "id": row["id"],
+            "name": row["name"],
+            "tmdb_id": row["tmdb_id"],
+            "embedding": emb,
+        })
+    print(f"Loaded {len(tmdb_identities)} TMDB identities with embeddings")
+
+    if not tmdb_identities:
+        print("No TMDB identities found. Run tmdb_embed_extractor.py first.")
+        cur.close()
+        conn.close()
+        return
+
+    # Get auto identities linked to this file with their centroid embeddings
+    cur.execute("""
+        SELECT DISTINCT i.id, i.name
+        FROM dev.identities i
+        INNER JOIN dev.face_detections fd ON fd.identity_id = i.id
+        WHERE fd.file_uuid = %s AND i.source = 'auto'
+    """, (uuid,))
+    auto_rows = cur.fetchall()
+    print(f"Auto identities for {uuid[:8]}...: {len(auto_rows)}")
+
+    matched = 0
+    for row in auto_rows:
+        auto_id = row["id"]
+        auto_name = row["name"]
+
+        # Get face embeddings from face_detections for this identity
+        cur.execute("""
+            SELECT embedding
+            FROM dev.face_detections
+            WHERE file_uuid = %s AND identity_id = %s AND embedding IS NOT NULL
+            LIMIT 500
+        """, (uuid, auto_id))
+        emb_rows = cur.fetchall()
+        if not emb_rows:
+            continue
+
+        # Compute centroid
+        all_embs = [np.array(r["embedding"], dtype=np.float32) for r in emb_rows]
+        centroid = np.mean(all_embs, axis=0)
+
+        # Match against TMDB identities
+        best_sim = 0.0
+        best_tmdb = None
+        for tmdb in tmdb_identities:
+            sim = cosine_similarity(centroid, tmdb["embedding"])
+            if sim > best_sim:
+                best_sim = sim
+                best_tmdb = tmdb
+
+        if best_tmdb and best_sim >= THRESHOLD:
+            fm = best_tmdb["name"]
+            tmdb_identity_id = best_tmdb["id"]
+            print(f"  {auto_name} → {fm} (sim={best_sim:.3f})")
+
+            # Update face_detections to point to TMDB identity
+            cur.execute("""
+                UPDATE dev.face_detections
+                SET identity_id = %s
+                WHERE file_uuid = %s AND identity_id = %s
+            """, (tmdb_identity_id, uuid, auto_id))
+
+            # Update identity_bindings to point to TMDB identity
+            cur.execute("""
+                UPDATE dev.identity_bindings
+                SET identity_id = %s
+                WHERE identity_id = %s
+            """, (tmdb_identity_id, auto_id))
+
+            # Mark auto identity as merged (or we could delete it)
+            cur.execute("""
+                UPDATE dev.identities
+                SET source = 'merged', tmdb_id = %s
+                WHERE id = %s
+            """, (best_tmdb["tmdb_id"], auto_id))
+
+            matched += 1
+
+    conn.commit()
+    print(f"\nMatched {matched}/{len(auto_rows)} auto identities to TMDB")
+    print(f"Threshold: {THRESHOLD}")
+
+    cur.close()
+    conn.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/swift_processors/swift_ocr.swift
+++ b/scripts/swift_processors/swift_ocr.swift
@@ -126,7 +126,7 @@ struct SwiftOCR: ParsableCommand {
                let item: [String: Any] = [
                    "text": candidate.string,
                    "x": Int(bb.origin.x * CGFloat(cgW)),
-                    "y": Int(bb.origin.y * CGFloat(cgH)),
+                    "y": Int((1.0 - bb.origin.y - bb.size.height) * CGFloat(cgH)),
                    "width": Int(bb.size.width * CGFloat(cgW)),
                    "height": Int(bb.size.height * CGFloat(cgH)),
                    "confidence": conf
@@ -183,16 +183,19 @@ struct SwiftOCR: ParsableCommand {
        guard (try? handler.perform([request])) != nil,
              let results = request.results else { return texts }

+        let cgW = CGFloat(CVPixelBufferGetWidth(pixelBuffer))
+        let cgH = CGFloat(CVPixelBufferGetHeight(pixelBuffer))
+
        for obs in results {
            guard let candidate = obs.topCandidates(1).first,
                  candidate.confidence > 0.2 else { continue }
            let bb = obs.boundingBox
            texts.append([
                "text": candidate.string,
-                "x": Int(bb.origin.x * 640),
-                "y": Int(bb.origin.y * 360),
-                "width": Int(bb.size.width * 640),
-                "height": Int(bb.size.height * 360),
+                "x": Int(bb.origin.x * cgW),
+                "y": Int((1.0 - bb.origin.y - bb.size.height) * cgH),
+                "width": Int(bb.size.width * cgW),
+                "height": Int(bb.size.height * cgH),
                "confidence": candidate.confidence
            ])
        }
--- a/scripts/swift_processors/swift_pose.swift
+++ b/scripts/swift_processors/swift_pose.swift
@@ -151,17 +151,19 @@ struct SwiftPose: ParsableCommand {
                        if let mapped = nameMap[rawName] {
                            rawName = mapped
                        }
+                        let px = point.location.x * CGFloat(w)
+                        let py = CGFloat(h) - point.location.y * CGFloat(h)
                        keypoints.append([
                            "name": rawName.isEmpty ? "\(joint)" : rawName,
-                            "x": point.location.x * CGFloat(w),
-                            "y": point.location.y * CGFloat(h),
+                            "x": px,
+                            "y": py,
                            "confidence": point.confidence,
                        ])
                        if point.confidence > 0.1 {
-                            minX = min(minX, point.location.x)
-                            minY = min(minY, point.location.y)
-                            maxX = max(maxX, point.location.x)
-                            maxY = max(maxY, point.location.y)
+                            minX = min(minX, px)
+                            minY = min(minY, py)
+                            maxX = max(maxX, px)
+                            maxY = max(maxY, py)
                        }
                    }
                }
@@ -171,10 +173,10 @@ struct SwiftPose: ParsableCommand {
                ]
                if maxX > minX {
                    bbox = [
-                        "x": Int(minX * CGFloat(w)),
-                        "y": Int(minY * CGFloat(h)),
-                        "width": Int((maxX - minX) * CGFloat(w)),
-                        "height": Int((maxY - minY) * CGFloat(h)),
+                        "x": Int(minX),
+                        "y": Int(minY),
+                        "width": Int(maxX - minX),
+                        "height": Int(maxY - minY),
                    ]
                }