feat: trace-level matching, health watcher/worker status, timezone config

2026-05-21 01:08:30 +08:00
parent 8ede4be159
commit bebaa743ed
60 changed files with 6110 additions and 1586 deletions
--- a/.env.development
+++ b/.env.development
@@ -29,7 +29,7 @@ REDIS_PASSWORD=accusys
 # Qdrant Vector Database - Collection isolation
 QDRANT_URL=http://localhost:6333
 QDRANT_API_KEY=Test3200Test3200Test3200
-QDRANT_COLLECTION=momentry_dev_v1
+QDRANT_COLLECTION=momentry_dev_rule1_v2

 # Paths
 MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev
--- a/.env.example
+++ b/.env.example
@@ -22,6 +22,9 @@ QDRANT_COLLECTION=momentry_rule1
 # === API Keys ===
 MOMENTRY_API_KEY=muser_your_key_here
 MOMENTRY_DEMO_API_KEY=muser_your_demo_key_here
+JWT_SECRET=your_jwt_secret_here_change_in_production
+SFTPGO_BASE_URL=http://127.0.0.1:8080
+
 TMDB_API_KEY=your_tmdb_api_key_here

 # === LLM ===
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -178,6 +178,18 @@ dependencies = [
 "password-hash",
 ]

+[[package]]
+name = "async-compression"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac"
+dependencies = [
+ "compression-codecs",
+ "compression-core",
+ "pin-project-lite",
+ "tokio",
+]
+
 [[package]]
 name = "async-lock"
 version = "3.4.2"
@@ -615,6 +627,23 @@ dependencies = [
 "static_assertions",
 ]

+[[package]]
+name = "compression-codecs"
+version = "0.4.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf"
+dependencies = [
+ "compression-core",
+ "flate2",
+ "memchr",
+]
+
+[[package]]
+name = "compression-core"
+version = "0.4.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
+
 [[package]]
 name = "concurrent-queue"
 version = "2.5.0"
@@ -4861,13 +4890,18 @@ version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
 dependencies = [
+ "async-compression",
 "bitflags 2.11.1",
 "bytes",
+ "futures-core",
 "futures-util",
 "http",
 "http-body",
+ "http-body-util",
 "iri-string",
 "pin-project-lite",
+ "tokio",
+ "tokio-util",
 "tower 0.5.3",
 "tower-layer",
 "tower-service",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -55,7 +55,7 @@ sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "js
 mongodb = { version = "2", features = ["tokio-runtime"] }
 bson = { version = "2", features = ["chrono-0_4"] }
 qdrant-client = "1.7"
-reqwest = { version = "0.12", features = ["json"] }
+reqwest = { version = "0.12", features = ["json", "gzip"] }
 pgvector = { version = "0.3", features = ["sqlx"] }

 # HTTP Server
--- a/build.rs
+++ b/build.rs
@@ -60,7 +60,8 @@ fn sha256_hex(data: &[u8]) -> String {
    use std::io::Write;
    use std::process::{Command, Stdio};
    if let Ok(mut child) = Command::new("shasum")
-        .arg("-a").arg("256")
+        .arg("-a")
+        .arg("256")
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .spawn()
--- a/scripts/pycache/redis_publisher.cpython-311.pyc
+++ b/scripts/pycache/redis_publisher.cpython-311.pyc
--- a/scripts/checksums.sha256
+++ b/scripts/checksums.sha256
@@ -103,7 +103,7 @@ f4d1b4334a49357b74b80e390ad5a3d16263e51cbe5cab661af92bd2e9721f02  ./face_process
 802015c73dfce0866f2a0bc94c645aa35ba30a6de78244af23090bb1f1828c6e  ./face_processor_mps.py
 96ffdbde3f4d87e9942f9e1f4c93cbd999dc404b43e00d4cdcbb22de3c0f16b7  ./face_processor_optimized.py
 17e7d0bd142bddfead94b1dd959c1f41c0dad7063ffc677dff1a99d62aab6cf8  ./face_processor_v1.py
-15877adf5c160d861da688a25b93fd2edc189f326f9646ffb4de063e554f773a  ./face_processor.py
+d6ddad29a5e53b43b887554072d7965f0535e47fb62dad1a8b87e44fa1be6015  ./face_processor.py
 8edab61189ad1a8fa60c203077e814e82d46c5bae67054fa2ab1958e199c05f9  ./face_recognition_processor.py
 9ea19f357b3fcec6c8b3875c538e53cb46e407ab188cd544963e0123e535fa03  ./face_registration.py
 72648816de611fd9b84d2b98c177b8b4f24374024b69184e8151c06cf44d633b  ./face_statistics_report.py
@@ -174,15 +174,15 @@ fd39b779a0337f521940f3f7b159931f1f207f200eefd610183781fdcf3dfafd  ./object_searc
 42d2952fc78b57302b0d12bc3d45790a2c2c46d4ffa3c713a82686134bd63f13  ./ocr_benchmark_runner.py
 7b3ccb5c4ddd4c62c5ad04d0e3aafaecc2c1441012b6a98613cdcf055e2e50e8  ./ocr_processor_contract_v1.py
 271023eec42d6be4a1ce6ae2ce3f29e825210a57e6bb37554a6f7fdf54616f9a  ./ocr_processor_mps.py
-e666bc8488bb93cc45bcd6a70a4ef38a74af6631d7b87a789381bfbdab4569f5  ./ocr_processor.py
+2e73c41285e52ef013594fcd4d20df9f5781bfc26bcf62e54dd2c04ec44200c3  ./ocr_processor.py
 62196108cb3337b5f9a873d70d2981ac8f49152369afbcc8a12b3a13de579e80  ./opencv_stamp_search.py
 b2e8d552c272fd173c77693e9453a85fe16dfc12f7c2cd304d299c6188c14077  ./paligemma_vs_gdino.py
-2c6767e763cf69917af832b8383528f754c65db5a3f02cb4d63e3f896d5920b6  ./parent_chunk_5w1h.py
+1534d5b7617dbae77f7a37a2c33a89b90f965247a6828f00b73ea6b720f6f4fc  ./parent_chunk_5w1h.py
 5208c738d4b615282813d351daf09872ce516121bb604caa64968ef5e52c53d3  ./pipeline_checklist.py
 8f80c3a2be5c330e2d1853d9250a171c75db84598dbf3304280c42237ed4fb1f  ./pipeline_status.py
 94db44c0f49115a677d117d4901a1b7991c1517905300eaa495dd62b8ac1c79c  ./pose_processor_contract_v1.py
 167dee5e42c6bd46674bcffcfd92f368fc0b48a1f42c459c806853b281bc6482  ./pose_processor_mps.py
-a1cdb1efd992d229829ae156d8aa439347c51d664e2a606c14d2274a11c93a66  ./pose_processor.py
+a6ef3a785ef5c6dc47fa38dbed80d76bc7d4bf48cbaf0f7edb3d26df98d7262c  ./pose_processor.py
 45e6798dc5900f2f7c8776a2d260c122aae5068a075256b8a5c02e8d0be6c131  ./probe_file.py
 139a68b5915680ec697d4bb5420adbd20b89637de2c16a15d68aca4fc22da02b  ./qa/executor.py
 4a59b36c29e1ee6e2b169db3b0201d2f7088c6ccbfdf642a3b522aeb182bbeea  ./qa/judges/facenet.py
@@ -197,7 +197,7 @@ c4e4424aad1847d822e9cf7dc98a1b2e903735a61e8ec056c6a9be75f79486bd  ./qa/pipeline.
 01c7b3c30c1531224f9605f0ee633285fe8489ab2d0a3c9c6a41f2b2b60d6626  ./quick_stamp_search.py
 e3143673a2bff6139e05c82446fd8770c4b7e59a854a42c3b29662f5ac75efe2  ./rebuild_parents.py
 4aa98981632d4f8a11039c510e86aa296ae1cd4b399fc871ed664ac11e445bd9  ./rebuild_story_content.py
-45c437b412d34c7c6d5758e94b7205a2956b32b6fe170c3f56db7231ec6f5a15  ./redis_publisher.py
+205cfc47b603b5ab94d97dae8c25486b342b7c2858afe6d6dae27615ca0b2aeb  ./redis_publisher.py
 750f778946b56bc57c47d9d2295332bb0f8cec2c1aa03c6b882d39ef4432673d  ./refine_search.py
 0f8a6a6866a5797e964d3b17e2b7ef146fe7a798f09fcea982fcda6f629b4d06  ./regenerate_parent_5w1h.py
 3ee192b623f290136b36bd63abd018aad6e6639a9543970c3415734628b33bd6  ./register_sample_faces.py
@@ -303,7 +303,7 @@ d0ec8f4a67c1a1eb1356ad6e9b2f466575691bd336621cdbbfd31dd10159f2dc  ./utils/test_m
 ff98864f1b11795cc3bb64f30ccb6f8609771ddc7a5df2c003ba7c2233d16fc2  ./vectorize_chunks.py
 5880c128400e6e36c8eb7dffd009dbbc99dd13f8575b0037bdc854e25ddc41fb  ./video_comparison_statistics.py
 0a1501ffdc027236cdf88706b3d61229e2998ab268fd57fb60e399ccb734b6a1  ./vision_agent.py
-6831281de868d24ecd84151965909b57f895d534114d24300a81c396492c19f8  ./visual_chunk_processor.py
+eac8f90fbbb655614abcefc4b887e346bf94db5f015d33d37bc9514fb030489d  ./visual_chunk_processor.py
 c165dfc5fc981dc731b25ef414184ee58e56b73b148d41a32fdce985c701efd5  ./visualize_stamp.py
 6c65a82fdd1d585e20bee4fcb2d1bdec2e6220bda71d6ef9cd00d6a3cf74c4d7  ./voice_embedding_extractor.py
 2b3a7b357db4ddd07ca30bf200c6600724e33441d8def0a4d9a39673e2cfb1c0  ./weather_sound_detector.py
@@ -343,3 +343,4 @@ b2ee4f8a445a7e83f7b99ae5d4139fd525d9e3e58a360bfef054d441aa21d901  ./swift_proces
 fbca5ba0783153c4e21c174b0cbf75b582514f6ef0f92750a82d3178bc170f48  ./test_search_modes.sh
 f8c1647cdb4db8adef1829e41fbecd97f6b3b2e62927f195cd8e68127876069d  ./troubleshoot.sh
 992296b5218f3ef97ce53325be12f71848f3c3aeb3ee81d764bfe4bd61e1de05  ./verify_package.sh
+b6f95fa070cc0258bc5d005f10d13025ba8b08d3ee1598bcdad405ff1d3332ed  ./tmdb_agent.py
--- a/scripts/extract_face_embedding.py
+++ b/scripts/extract_face_embedding.py
@@ -0,0 +1,84 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Extract face embedding from an image using InsightFace + CoreML FaceNet.
+
+Usage:
+    python3 scripts/extract_face_embedding.py <image_path>
+
+Output: JSON with "embedding" key (512 floats) or "error" key.
+Exit code: 0 on success, 1 on failure.
+"""
+import json
+import os
+import sys
+
+# Prefer venv if it exists (has insightface + coremltools installed)
+VENV_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "venv")
+VENV_SITE = os.path.join(VENV_PATH, "lib", "python3.11", "site-packages")
+if os.path.isdir(VENV_SITE):
+    sys.path.insert(0, VENV_SITE)
+
+import cv2
+import numpy as np
+
+MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "models")
+FACENET_PATH = os.path.join(MODELS_DIR, "facenet512.mlpackage")
+
+
+def extract_embedding(image_path: str):
+    import io
+    import warnings
+    warnings.filterwarnings("ignore")
+
+    # Suppress InsightFace verbose stdout during model loading
+    old_stdout = sys.stdout
+    sys.stdout = io.StringIO()
+    try:
+        import insightface
+        from insightface.app import FaceAnalysis
+        import coremltools as ct
+
+        app = FaceAnalysis(name="buffalo_l", providers=["CPUExecutionProvider"])
+        app.prepare(ctx_id=0, det_thresh=0.5)
+        coreml_model = ct.models.MLModel(FACENET_PATH)
+    finally:
+        sys.stdout = old_stdout
+
+    img_bytes = open(image_path, "rb").read()
+    nparr = np.frombuffer(img_bytes, np.uint8)
+    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if img is None:
+        print(json.dumps({"error": "Failed to decode image"}))
+        sys.exit(1)
+
+    # Detect faces
+    faces = app.get(img)
+    if not faces:
+        print(json.dumps({"error": "No face detected"}))
+        sys.exit(1)
+
+    largest = max(faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]))
+    x1, y1, x2, y2 = [int(v) for v in largest.bbox]
+    x1, y1 = max(0, x1), max(0, y1)
+    x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
+    if x2 <= x1 or y2 <= y1:
+        print(json.dumps({"error": "Invalid face bbox"}))
+        sys.exit(1)
+
+    face_img = img[y1:y2, x1:x2]
+    face_img = cv2.resize(face_img, (160, 160))
+    normalized = (face_img.astype(np.float32) / 127.5) - 1.0
+    normalized = np.transpose(normalized, (2, 0, 1))
+    input_array = np.expand_dims(normalized, axis=0)
+
+    result = coreml_model.predict({"input": input_array})
+    emb_key = [k for k in result.keys() if k.startswith("var_")][0]
+    embedding = result[emb_key].flatten().tolist()
+    print(json.dumps({"embedding": embedding}))
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "Usage: extract_face_embedding.py <image_path>"}))
+        sys.exit(1)
+    extract_embedding(sys.argv[1])
--- a/scripts/face_landmark_qc.py
+++ b/scripts/face_landmark_qc.py
@@ -2,23 +2,30 @@
 """
 Face landmark QC: verify eyes/nose are within face bounding box.
 Flags faces in DB where landmarks don't match the bbox.
-Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--fix]
+Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--apply]
 """
-import sys, json, psycopg2, argparse
+import sys, json, psycopg2, argparse, os

 parser = argparse.ArgumentParser()
 parser.add_argument("uuid")
 parser.add_argument("--threshold", "-t", type=float, default=0.5,
    help="Fraction of landmark points that must be inside bbox (default: 0.5)")
-parser.add_argument("--fix", action="store_true", help="Update face_detections QC flag in DB")
+parser.add_argument("--apply", action="store_true",
+    help="Write qc_ok to face_detections.metadata in DB")
+parser.add_argument("--schema", default="dev",
+    help="DB schema (default: dev)")
 args = parser.parse_args()

 UUID = args.uuid
 THRESHOLD = args.threshold
-FACE_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.face.json"
+SCHEMA = args.schema
+OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", f"/Users/accusys/momentry/output_dev")
+FACE_PATH = f"{OUTPUT_DIR}/{UUID}.face.json"

 print(f"=== Face Landmark QC ===")
 print(f"UUID: {UUID}")
+print(f"Schema: {SCHEMA}")
+print(f"Face file: {FACE_PATH}")
 print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox")

 # Load face.json
@@ -29,8 +36,7 @@ total_faces = 0
 faces_with_lm = 0
 good_faces = 0
 bad_faces = 0
-bad_frame_ids = set()
-bad_face_details = []
+qc_results = []  # list of (frame, face_idx, qc_ok, x, y, w, h)

 # Build frame lookup for fast access
 frame_map = {}
@@ -42,13 +48,22 @@ for frame_num, frm in frame_map.items():
        total_faces += 1
        lm = face.get('landmarks')
        if not lm:
+            bbox = face.get('bbox', {})
+            qc_results.append((frame_num, fi, False, bbox.get('x'), bbox.get('y'),
+                               bbox.get('width'), bbox.get('height')))
+            bad_faces += 1
            continue
        faces_with_lm += 1

-        x, y, w, h = face['x'], face['y'], face['width'], face['height']
+        bbox = face.get('bbox', {})
+        x, y, w, h = bbox.get('x'), bbox.get('y'), bbox.get('width'), bbox.get('height')
+        if None in (x, y, w, h):
+            qc_results.append((frame_num, fi, False, x, y, w, h))
+            bad_faces += 1
+            continue
        inside_pts = 0
        total_pts = 0
-        eye_nose_inside = 0  # at least one point from each eye+nose inside
+        eye_nose_inside = 0

        for lm_type in ['left_eye', 'right_eye', 'nose']:
            points = lm.get(lm_type, [])
@@ -63,53 +78,39 @@ for frame_num, frm in frame_map.items():
                eye_nose_inside += 1

        ratio = inside_pts / max(1, total_pts)
+        qc_ok = (ratio >= THRESHOLD and eye_nose_inside >= 2)

-        if ratio >= THRESHOLD and eye_nose_inside >= 2:
+        qc_results.append((frame_num, fi, qc_ok, x, y, w, h))
+        if qc_ok:
            good_faces += 1
        else:
            bad_faces += 1
-            bad_frame_ids.add(frame_num)
-            bad_face_details.append({
-                'frame': frame_num,
-                'face_idx': fi,
-                'bbox': [x, y, w, h],
-                'inside_pts': inside_pts,
-                'total_pts': total_pts,
-                'ratio': ratio,
-                'eye_nose_ok': eye_nose_inside,
-            })

 print(f"\nTotal faces: {total_faces:,}")
 print(f"Faces with landmarks: {faces_with_lm:,}")
 print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}")
-print(f"❌ Bad: {bad_faces:,}")
+print(f"❌ Bad (no eyes or insufficient landmarks): {bad_faces:,}")
 print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%")

-print(f"\nBad faces in {len(bad_frame_ids)} unique frames")
-
-# Show sample bad faces
-print(f"\nSample bad faces:")
-for bf in sorted(bad_face_details, key=lambda b: b['ratio'])[:5]:
-    print(f"  frame={bf['frame']}, bbox={bf['bbox']}, {bf['inside_pts']}/{bf['total_pts']} inside ({bf['ratio']*100:.0f}%), eye/nose={bf['eye_nose_ok']}/3")
-
-# Show sample good faces
-print(f"\nSample good faces:")
-good_details = []
-for frame_num, frm in frame_map.items():
-    for face in frm.get('faces', []):
-        lm = face.get('landmarks')
-        if not lm:
-            continue
-        x, y, w, h = face['x'], face['y'], face['width'], face['height']
-        inside = sum(1 for pts in lm.values() for pt in pts
-            if (x <= pt[0] <= x + w) and (y <= pt[1] <= y + h))
-        total = sum(len(pts) for pts in lm.values())
-        if inside / max(1, total) >= THRESHOLD:
-            good_details.append((frame_num, x, y, w, h, inside, total))
-            if len(good_details) >= 5:
-                break
-    if len(good_details) >= 5:
-        break
-
-for g in good_details:
-    print(f"  frame={g[0]}, bbox=[{g[1]},{g[2]},{g[3]},{g[4]}], {g[5]}/{g[6]} inside ({100*g[5]/max(1,g[6]):.0f}%)")
+# Apply mode: write qc_ok to face_detections.metadata
+if args.apply:
+    print(f"\n=== Applying QC results to {SCHEMA}.face_detections ===")
+    db_url = os.environ.get("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
+    conn = psycopg2.connect(db_url)
+    cur = conn.cursor()
+    updated = 0
+    for frame_num, fi, qc_ok, x, y, w, h in qc_results:
+        qc_str = "true" if qc_ok else "false"
+        cur.execute(
+            f"UPDATE {SCHEMA}.face_detections "
+            f"SET metadata = jsonb_set(COALESCE(metadata, '{{}}'::jsonb), '{{qc_ok}}', '\"{qc_str}\"'::jsonb) "
+            f"WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s",
+            (UUID, frame_num, x, y, w, h)
+        )
+        if cur.rowcount > 0:
+            updated += 1
+    conn.commit()
+    cur.close()
+    conn.close()
+    print(f"Updated {updated} rows in {SCHEMA}.face_detections")
+    print(f"Skipped {len(qc_results) - updated} rows (no matching face_detections row)")
--- a/scripts/face_processor.py
+++ b/scripts/face_processor.py
@@ -13,6 +13,7 @@ Detection cost: near-zero CPU (Vision ANE)
 Embedding cost: near-zero CPU (CoreML ANE)
 """

+import re
 import sys
 import os
 import json
@@ -29,6 +30,7 @@ from pathlib import Path
 import coremltools as ct

 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from redis_publisher import RedisPublisher

 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 SWIFT_BIN = os.path.join(SCRIPT_DIR, "swift_processors", ".build", "debug", "swift_face")
@@ -49,11 +51,12 @@ def classify_pose(roll: float, yaw: float) -> str:

 class FaceProcessorVision:
    def __init__(self, video_path: str, output_path: str, uuid: str = "",
-                 sample_interval: int = 3):
+                 sample_interval: int = 3, publisher: RedisPublisher = None):
        self.video_path = video_path
        self.output_path = output_path
        self.uuid = uuid
        self.sample_interval = sample_interval
+        self.publisher = publisher

        # Load CoreML FaceNet
        self.coreml_model = None
@@ -127,7 +130,33 @@ class FaceProcessorVision:

        print(f"[FACE_V2] Running: {' '.join(cmd)}")
        t0 = time.time()
-        subprocess.run(cmd, check=True)
+        log_path = swift_out + ".log"
+        log_f = open(log_path, "w")
+        proc = subprocess.Popen(cmd, stdout=log_f, stderr=subprocess.STDOUT, text=True)
+        last_pct = -1
+        while proc.poll() is None:
+            time.sleep(10)
+            # Read latest log lines
+            try:
+                with open(log_path) as lf:
+                    for line in lf:
+                        line = line.strip()
+                        m = re.search(r'(\d+)% complete', line)
+                        if m:
+                            pct = int(m.group(1))
+                            if pct > last_pct:
+                                last_pct = pct
+                                if self.publisher:
+                                    self.publisher.progress("face", pct, 100, f"swift detect {pct}%")
+            except Exception:
+                pass
+        log_f.close()
+        if proc.returncode != 0:
+            stderr_out = proc.stderr.read()
+            if stderr_out:
+                print(stderr_out.strip(), file=sys.stderr)
+            raise RuntimeError(f"swift_face exited with code {proc.returncode}")
+
        elapsed = time.time() - t0
        print(f"[FACE_V2] Detection done in {elapsed:.1f}s")

@@ -156,6 +185,8 @@ class FaceProcessorVision:

        t0 = time.time()
        embed_count = 0
+        total_face_count = 0
+        last_pct = -1

        for frame_info in frames:
            frame_num = frame_info["frame"]
@@ -220,6 +251,12 @@ class FaceProcessorVision:
            if len(face_data["frames"]) % 100 == 0:
                elapsed = time.time() - t0
                print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s")
+                if self.publisher:
+                    pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1))
+                    if pct > last_pct:
+                        last_pct = pct
+                        self.publisher.progress("face", len(face_data["frames"]), len(frames),
+                            f"{embed_count} faces", embed_count, "faces")

        self.video.release()

@@ -259,19 +296,36 @@ def main():
    parser.add_argument("--force", action="store_true")
    args = parser.parse_args()

+    publisher = RedisPublisher(args.uuid) if args.uuid else None
+    if publisher:
+        publisher.info("face", "FACE_START")
+
    if args.force and os.path.exists(args.output_path):
        os.remove(args.output_path)

    processor = FaceProcessorVision(
        args.video_path, args.output_path,
-        args.uuid, args.sample_interval
+        args.uuid, args.sample_interval, publisher
    )

    # Step 1: Vision detection (bbox + pose via ANE)
+    try:
        detection = processor.process_with_swift()
+    except Exception as e:
+        if publisher:
+            publisher.error("face", f"Detection failed: {e}")
+        raise

    # Step 2: CoreML embedding + save
+    try:
        processor.embed_and_save(detection)
+    except Exception as e:
+        if publisher:
+            publisher.error("face", f"Embedding failed: {e}")
+        raise
+
+    if publisher:
+        publisher.complete("face", f"{len(detection.get('frames',[]))} frames")

    # Clean up temp detection file
    swift_out = args.output_path.replace(".json", "_detect.json")
--- a/scripts/identity_bind.py
+++ b/scripts/identity_bind.py
@@ -81,10 +81,10 @@ for cluster_id in sorted(set(labels)):
        VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
        ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
        RETURNING id
-    """, (f"PERSON_{UUID[:8]}_{cluster_id}", UUID, UUID))
+    """, (f"stranger_{UUID}_{cluster_id}", UUID, UUID))
    identity_id = cur.fetchone()[0]
    cluster_to_identity[cluster_id] = identity_id
-    print(f"  Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
+    print(f"  Cluster {cluster_id}: new identity {identity_id} (stranger_{UUID}_{cluster_id})")

 # Step 4: Create identity bindings
 print("Creating identity bindings...")
--- a/scripts/migrate_identity_files.py
+++ b/scripts/migrate_identity_files.py
@@ -0,0 +1,131 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Migrate Identity Files — one-time: DB identities → filesystem identity.json
+
+Reads all identities from PostgreSQL, queries file bindings,
+and writes identity.json + _index.json to {OUTPUT_DIR}/identities/{uuid}/
+
+Usage:
+    python3 scripts/migrate_identity_files.py
+    python3 scripts/migrate_identity_files.py --db "dbname=momentry user=accusys"
+    python3 scripts/migrate_identity_files.py --output /path/to/output
+"""
+import argparse
+import json
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+import psycopg2
+import psycopg2.extras
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Migrate identities to filesystem")
+    parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
+    parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
+    args = parser.parse_args()
+
+    conn = psycopg2.connect(args.db)
+    identities_root = Path(args.output) / "identities"
+    identities_root.mkdir(parents=True, exist_ok=True)
+
+    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+    cur.execute("""
+        SELECT id, uuid::text, name, identity_type, source, status,
+               tmdb_id, tmdb_profile, metadata::text, created_at, updated_at
+        FROM identities
+        WHERE uuid IS NOT NULL
+        ORDER BY id
+    """)
+    rows = cur.fetchall()
+
+    if not rows:
+        print("No identities found in DB.")
+        return
+
+    index = {}
+    migrated = 0
+    skipped = 0
+
+    for row in rows:
+        uuid_raw = row["uuid"]
+        uuid_clean = uuid_raw.replace("-", "")
+        name = row["name"] or ""
+
+        dir_path = identities_root / uuid_clean
+        dir_path.mkdir(parents=True, exist_ok=True)
+
+        # Get bindings for this identity from face_detections
+        bindings_cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+        bindings_cur.execute("""
+            SELECT fd.file_uuid,
+                   COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}') AS trace_ids,
+                   COUNT(*)::bigint AS face_count
+            FROM face_detections fd
+            WHERE fd.identity_id = %s
+            GROUP BY fd.file_uuid
+            ORDER BY fd.file_uuid
+        """, (row["id"],))
+        binding_rows = bindings_cur.fetchall()
+        bindings_cur.close()
+
+        file_bindings = []
+        for b in binding_rows:
+            trace_ids = b["trace_ids"]
+            if isinstance(trace_ids, list):
+                trace_ids = [int(t) for t in trace_ids if t is not None]
+            file_bindings.append({
+                "file_uuid": b["file_uuid"],
+                "trace_ids": trace_ids,
+                "face_count": int(b["face_count"]),
+            })
+
+        metadata = row.get("metadata")
+        if isinstance(metadata, str):
+            metadata = json.loads(metadata) if metadata else {}
+        elif metadata is None:
+            metadata = {}
+
+        fmt_time = lambda v: v.isoformat() if v else datetime.now(timezone.utc).isoformat()
+
+        identity_file = {
+            "version": 1,
+            "identity_uuid": uuid_clean,
+            "name": name,
+            "identity_type": row.get("identity_type"),
+            "source": row.get("source"),
+            "status": row.get("status"),
+            "tmdb_id": row.get("tmdb_id"),
+            "tmdb_profile": row.get("tmdb_profile"),
+            "metadata": metadata,
+            "file_bindings": file_bindings,
+            "created_at": fmt_time(row.get("created_at")),
+            "updated_at": fmt_time(row.get("updated_at")),
+        }
+
+        with open(dir_path / "identity.json", "w", encoding="utf-8") as f:
+            json.dump(identity_file, f, indent=2, ensure_ascii=False)
+
+        index[uuid_clean] = name
+        migrated += 1
+        print(f"  [{migrated:5d}] {name} ({uuid_clean})")
+
+    cur.close()
+    conn.close()
+
+    # Write _index.json
+    index_file = {
+        "version": 1,
+        "updated_at": datetime.now(timezone.utc).isoformat(),
+        "entries": index,
+    }
+    with open(identities_root / "_index.json", "w", encoding="utf-8") as f:
+        json.dump(index_file, f, indent=2, ensure_ascii=False)
+
+    print(f"\nDone: {migrated} identities migrated")
+    print(f"Index: {identities_root / '_index.json'} ({len(index)} entries)")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/ocr_processor.py
+++ b/scripts/ocr_processor.py
@@ -4,6 +4,7 @@ OCR Processor Wrapper
 Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
 """

+import re
 import sys
 import json
 import os
@@ -11,6 +12,10 @@ import subprocess
 import argparse


+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from redis_publisher import RedisPublisher
+
+
 SWIFT_OCR_PATH = os.path.join(
    os.path.dirname(os.path.abspath(__file__)),
    "swift_processors/.build/debug/swift_ocr"
@@ -19,6 +24,7 @@ SWIFT_OCR_ALT = os.path.join(
    os.path.dirname(os.path.abspath(__file__)),
    "swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
 )
+SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%")


 def process_ocr(
@@ -27,6 +33,7 @@ def process_ocr(
    uuid: str = "",
    sample_interval: int = 30,
    recognition_level: str = "accurate",
+    publisher: RedisPublisher = None,
 ) -> dict:
    swift_bin = SWIFT_OCR_PATH
    if not os.path.exists(swift_bin):
@@ -42,15 +49,34 @@ def process_ocr(
           "--uuid", uuid]

    print(f"[OCR] Running Swift OCR", file=sys.stderr)
-    result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

-    if result.stdout:
-        print(result.stdout.strip(), file=sys.stderr)
-    if result.stderr:
-        print(result.stderr.strip(), file=sys.stderr)
+    last_pct = -1
+    stdout_lines = []
+    for line in proc.stdout:
+        line = line.strip()
+        stdout_lines.append(line)
+        m = SWIFT_PROGRESS_RE.search(line)
+        if m:
+            pct = int(m.group(1))
+            if pct > last_pct:
+                last_pct = pct
+                print(f"[OCR] Progress: {pct}%", file=sys.stderr)
+                if publisher:
+                    publisher.progress("ocr", pct, 100, f"{pct}%")
+        elif line:
+            print(line, file=sys.stderr)

-    if result.returncode != 0 or not os.path.exists(output_path):
-        print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr)
+    stderr_output = proc.stderr.read()
+    if stderr_output:
+        print(stderr_output.strip(), file=sys.stderr)
+
+    proc.wait()
+
+    if proc.returncode != 0 or not os.path.exists(output_path):
+        print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr)
+        if publisher:
+            publisher.error("ocr", f"Swift OCR failed, using fallback")
        return _fallback(video_path, output_path, uuid, sample_interval)

    with open(output_path) as f:
@@ -81,9 +107,16 @@ if __name__ == "__main__":
    parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
    args = parser.parse_args()

+    publisher = RedisPublisher(args.uuid) if args.uuid else None
+    if publisher:
+        publisher.info("ocr", "OCR_START")
+
    result = process_ocr(args.video_path, args.output_path, args.uuid,
-                         args.sample_interval, args.recognition_level)
+                         args.sample_interval, args.recognition_level,
+                         publisher)

    with open(args.output_path, "w") as f:
        json.dump(result, f, indent=2)
    print(f"OCR: {len(result.get('frames', []))} frames with text")
+    if publisher:
+        publisher.complete("ocr", f"{len(result.get('frames',[]))} frames")
--- a/scripts/parent_chunk_5w1h.py
+++ b/scripts/parent_chunk_5w1h.py
@@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
 SCHEMA = os.getenv("DATABASE_SCHEMA", "dev")
 OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
-OLLAMA_URL = "http://localhost:11434/api"
+EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings")

 def load_speaker_map(file_uuid: str) -> dict:
    """Load speaker→identity mapping from DB (generalized, not hardcoded)"""
@@ -64,7 +64,7 @@ CURRENT_VERSIONS = {
    "embedding_agent": "nomic-embed-768d/v1",
 }

-LLM_URL = os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8081/v1/chat/completions")
+LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions"))
 LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4")


@@ -97,7 +97,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
        s, e = cs["start_time"], cs["end_time"]

        children = []
-        for seg in asr_segs:
+        for seg_idx, seg in enumerate(asr_segs):
            st, en = seg.get("start", 0), seg.get("end", 0)
            text = seg.get("text", "").strip()
            if st < s or en > e or not text: continue
@@ -117,11 +117,11 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
                "start": st, "end": en, "text": text,
                "speaker_id": spk_id, "speaker_name": character,
                "speaker_confidence": spk_conf,
-                "chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}",
+                "chunk_id": f"{file_uuid}_{seg_idx}",
            })

        # Boundary overlap: even empty scenes get partial children
-        for seg in asr_segs:
+        for seg_idx, seg in enumerate(asr_segs):
            st, en = seg.get("start", 0), seg.get("end", 0)
            text = seg.get("text", "").strip()
            if not text: continue
@@ -141,7 +141,7 @@ def build_child_chunks(data: dict, file_uuid: str) -> List[dict]:
                "start": st, "end": en, "text": text,
                "speaker_id": spk_id, "speaker_name": character,
                "speaker_confidence": spk_conf,
-                "chunk_id": f"{file_uuid}_{st:.0f}_{en:.0f}",
+                "chunk_id": f"{file_uuid}_{seg_idx}",
                "overlap_type": "partial",
            })

@@ -215,14 +215,17 @@ def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str
 # ===== Embedding (Ollama nomic-embed) =====

 def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]:
-    """Get embedding via Ollama nomic-embed-text"""
+    """Get embedding via EmbeddingGemma server"""
    for attempt in range(max_retries):
        try:
-            resp = requests.post(f"{OLLAMA_URL}/embeddings", json={
-                "model": "nomic-embed-text-v2-moe", "prompt": text,
+            resp = requests.post(EMBEDDING_URL, json={
+                "input": [text],
            }, timeout=30)
            if resp.status_code == 200:
-                return resp.json()["embedding"]
+                data = resp.json()
+                items = data.get("data", [])
+                if items:
+                    return items[0]["embedding"]
        except Exception as e:
            if attempt == max_retries - 1:
                print(f"  ⚠️ Embedding failed: {e}")
@@ -244,7 +247,7 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,

    # Get base chunk_index
    cur.execute(
-        f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunks WHERE file_uuid = %s",
+        f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s",
        (file_uuid,),
    )
    next_index = (cur.fetchone()[0] or 0) + 1
@@ -255,9 +258,27 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,

        parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}"

+        parent_embedding = embed_text(parent_text) if do_embed else None
+        if do_embed and parent_embedding:
            cur.execute(
                f"""
-            INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
+                INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
+                                             start_time, end_time, content, text_content, parent_chunk_id, embedding)
+                VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
+                ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
+                    SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
+                        embedding = EXCLUDED.embedding
+                """,
+                (parent_id, parent_id, file_uuid, parent_type, next_index,
+                 scene["start_time"], scene["end_time"],
+                 json.dumps({"summary": parent_text, "mode": mode, "type": "parent",
+                             "source_versions": CURRENT_VERSIONS}),
+                 parent_text, None, parent_embedding),
+            )
+        else:
+            cur.execute(
+                f"""
+                INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
                                             start_time, end_time, content, text_content, parent_chunk_id)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
                ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
@@ -276,9 +297,29 @@ def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool,
            child_id = child["chunk_id"]
            child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text)

+            child_embedding = embed_text(child_text) if do_embed else None
+            if do_embed and child_embedding:
                cur.execute(
                    f"""
-                INSERT INTO {SCHEMA}.chunks (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
+                    INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
+                                                 start_time, end_time, content, text_content, parent_chunk_id, embedding)
+                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector)
+                    ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
+                        SET content = EXCLUDED.content, text_content = EXCLUDED.text_content,
+                            parent_chunk_id = EXCLUDED.parent_chunk_id,
+                            embedding = EXCLUDED.embedding
+                    """,
+                    (child_id, child_id, file_uuid, child_type, next_index,
+                     child["start"], child["end"],
+                     json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode,
+                                 "speaker_confidence": child.get("speaker_confidence", 0),
+                                 "source_versions": CURRENT_VERSIONS}),
+                     child_text, parent_id, child_embedding),
+                )
+            else:
+                cur.execute(
+                    f"""
+                    INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index,
                                                 start_time, end_time, content, text_content, parent_chunk_id)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s)
                    ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE
@@ -304,7 +345,7 @@ def main():
    parser = argparse.ArgumentParser(description="Story Processor V2.0")
    parser.add_argument("--file-uuid", required=True)
    parser.add_argument("--mode", choices=["story", "llm"], default="story")
-    parser.add_argument("--max-scenes", type=int, default=300)
+    parser.add_argument("--max-scenes", type=int, default=99999)
    parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)")
    parser.add_argument("--no-db", action="store_true", help="Skip DB storage")
    args = parser.parse_args()
--- a/scripts/pose_processor.py
+++ b/scripts/pose_processor.py
@@ -5,12 +5,16 @@ Calls Swift Vision Framework pose (swift_pose) with fallback to YOLOv8 Pose.
 Uses VNDetectHumanBodyPoseRequest with ANE acceleration.
 """

+import re
 import sys
 import json
 import os
 import subprocess
 import argparse

+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from redis_publisher import RedisPublisher
+
 SWIFT_POSE_PATH = os.path.join(
    os.path.dirname(os.path.abspath(__file__)),
    "swift_processors/.build/debug/swift_pose"
@@ -21,11 +25,14 @@ SWIFT_POSE_ALT = os.path.join(
 )


+SWIFT_POSE_PROGRESS_RE = re.compile(r"\[SwiftPose\] Progress:\s*(\d+)%")
+
 def process_pose(
    video_path: str,
    output_path: str,
    uuid: str = "",
    sample_interval: int = 30,
+    publisher: RedisPublisher = None,
 ) -> dict:
    swift_bin = SWIFT_POSE_PATH
    if not os.path.exists(swift_bin):
@@ -33,6 +40,8 @@ def process_pose(

    if not os.path.exists(swift_bin):
        print("[Pose] Swift binary not found, using YOLOv8 fallback", file=sys.stderr)
+        if publisher:
+            publisher.error("pose", "Swift binary not found, using fallback")
        return _fallback(video_path, output_path, uuid, sample_interval)

    cmd = [swift_bin, video_path, output_path,
@@ -40,17 +49,32 @@ def process_pose(
           "--uuid", uuid]

    print(f"[Pose] Running Swift Pose (Vision Framework)", file=sys.stderr)
-    result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

-    if result.stdout:
-        for line in result.stdout.strip().split("\n"):
-            print(f"  {line}", file=sys.stderr)
-    if result.stderr:
-        for line in result.stderr.strip().split("\n"):
+    last_pct = -1
+    for line in proc.stdout:
+        line = line.strip()
+        m = SWIFT_POSE_PROGRESS_RE.search(line)
+        if m:
+            pct = int(m.group(1))
+            if pct > last_pct:
+                last_pct = pct
+                print(f"[Pose] Progress: {pct}%", file=sys.stderr)
+                if publisher:
+                    publisher.progress("pose", pct, 100, f"{pct}%")
+        elif line:
            print(f"  {line}", file=sys.stderr)

-    if result.returncode != 0 or not os.path.exists(output_path):
-        print(f"[Pose] Swift Pose failed, falling back to YOLOv8", file=sys.stderr)
+    stderr_output = proc.stderr.read()
+    if stderr_output:
+        print(stderr_output.strip(), file=sys.stderr)
+
+    proc.wait()
+
+    if proc.returncode != 0 or not os.path.exists(output_path):
+        print(f"[Pose] Swift Pose failed (exit={proc.returncode}), falling back to YOLOv8", file=sys.stderr)
+        if publisher:
+            publisher.error("pose", f"Swift Pose failed, using fallback")
        return _fallback(video_path, output_path, uuid, sample_interval)

    with open(output_path) as f:
@@ -113,7 +137,14 @@ if __name__ == "__main__":
    parser.add_argument("--sample-interval", type=int, default=30)
    args = parser.parse_args()

-    result = process_pose(args.video_path, args.output_path, args.uuid, args.sample_interval)
+    publisher = RedisPublisher(args.uuid) if args.uuid else None
+    if publisher:
+        publisher.info("pose", "POSE_START")
+
+    result = process_pose(args.video_path, args.output_path, args.uuid,
+                          args.sample_interval, publisher)
    with open(args.output_path, "w") as f:
        json.dump(result, f, indent=2)
    print(f"Pose: {len(result.get('frames', []))} frames with poses")
+    if publisher:
+        publisher.complete("pose", f"{len(result.get('frames',[]))} frames")
--- a/scripts/redis_publisher.py
+++ b/scripts/redis_publisher.py
@@ -34,6 +34,8 @@ class ProgressData:
    message: Optional[str] = None
    current: Optional[int] = None
    total: Optional[int] = None
+    output_count: Optional[int] = None
+    output_type: Optional[str] = None
    extra: Optional[Dict[str, Any]] = None


@@ -49,7 +51,8 @@ class StructuredMessage:
 class RedisPublisher:
    def __init__(self, uuid: str):
        self.uuid = uuid
-        self.channel = f"momentry:progress:{uuid}"
+        prefix = os.environ.get("MOMENTRY_REDIS_PREFIX", "momentry:")
+        self.channel = f"{prefix}progress:{uuid}"
        self._enabled = False
        self._client = None
        self._connect()
@@ -107,6 +110,8 @@ class RedisPublisher:
        message: Optional[str] = None,
        current: Optional[int] = None,
        total: Optional[int] = None,
+        output_count: Optional[int] = None,
+        output_type: Optional[str] = None,
        extra: Optional[Dict[str, Any]] = None,
    ) -> bool:
        if not self._enabled:
@@ -121,6 +126,8 @@ class RedisPublisher:
                message=message,
                current=current,
                total=total,
+                output_count=output_count,
+                output_type=output_type,
                extra=extra,
            ),
        )
@@ -136,6 +143,8 @@ class RedisPublisher:
        current: int,
        total: int,
        message: str = "",
+        output_count: Optional[int] = None,
+        output_type: Optional[str] = None,
    ) -> bool:
        return self.publish(
            MessageType.PROGRESS,
@@ -143,6 +152,8 @@ class RedisPublisher:
            message=message,
            current=current,
            total=total,
+            output_count=output_count,
+            output_type=output_type,
        )

    def complete(self, processor: str, message: str = "") -> bool:
--- a/scripts/sync_users_from_sftpgo.py
+++ b/scripts/sync_users_from_sftpgo.py
@@ -0,0 +1,117 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Sync users from SFTPGo to Momentry users table.
+
+Usage:
+    python3 scripts/sync_users_from_sftpgo.py
+    python3 scripts/sync_users_from_sftpgo.py --sftpgo-url http://localhost:8080
+    python3 scripts/sync_users_from_sftpgo.py --db "dbname=momentry user=accusys"
+
+Environment:
+    SFTPGO_BASE_URL   Default: http://localhost:8080
+    DATABASE_URL      Default: dbname=momentry user=accusys host=localhost
+
+This script does NOT copy passwords. It creates user records with placeholder
+password hashes. The real password will be captured on the user's first
+login through Momentry (which verifies against SFTPGo and caches the hash).
+"""
+import argparse
+import json
+import os
+import sys
+from typing import Any
+
+import psycopg2
+import psycopg2.extras
+import requests
+
+
+def get_sftpgo_users(sftpgo_url: str, admin_user: str, admin_pass: str) -> list[dict[str, Any]]:
+    """Get all users from SFTPGo."""
+    # Get admin token (SFTPGo uses GET, not POST)
+    token_url = f"{sftpgo_url}/api/v2/token"
+    resp = requests.get(token_url, auth=(admin_user, admin_pass), timeout=10)
+    resp.raise_for_status()
+    token = resp.json().get("access_token")
+    if not token:
+        print("ERROR: Failed to get SFTPGo admin token", file=sys.stderr)
+        sys.exit(1)
+
+    # List users
+    users_url = f"{sftpgo_url}/api/v2/users"
+    headers = {"Authorization": f"Bearer {token}"}
+    resp = requests.get(users_url, headers=headers, timeout=10)
+    resp.raise_for_status()
+    return resp.json()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Sync SFTPGo users to Momentry")
+    parser.add_argument("--sftpgo-url", default=os.getenv("SFTPGO_BASE_URL", "http://localhost:8080"))
+    parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
+    parser.add_argument("--admin-user", default="admin")
+    parser.add_argument("--admin-pass", default=os.getenv("SFTPGO_ADMIN_PASSWORD", "Test3200Test3200"))
+    parser.add_argument("--dry-run", action="store_true", help="Print what would be done without executing")
+    args = parser.parse_args()
+
+    # Fetch users from SFTPGo
+    print(f"[SFTPGo] Connecting to {args.sftpgo_url}...")
+    try:
+        sftpgo_users = get_sftpgo_users(args.sftpgo_url, args.admin_user, args.admin_pass)
+    except Exception as e:
+        print(f"ERROR: Failed to fetch SFTPGo users: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"[SFTPGo] Found {len(sftpgo_users)} users")
+
+    # Connect to Momentry DB and set schema
+    conn = psycopg2.connect(args.db)
+    cur = conn.cursor()
+    cur.execute("SET search_path TO dev")
+
+    synced = 0
+    skipped = 0
+
+    for user in sftpgo_users:
+        username = user.get("username")
+        status = user.get("status", 0)
+
+        if not username or status != 1:
+            skipped += 1
+            continue
+
+        role = "admin" if username == "admin" else "user"
+        # Placeholder hash — will be updated on first login via SFTPGo fallback
+        placeholder_hash = "$placeholder$synced_from_sftpgo"
+
+        if args.dry_run:
+            print(f"  Would insert: {username} (role={role})")
+            synced += 1
+            continue
+
+        try:
+            cur.execute(
+                "INSERT INTO users (username, password_hash, role) VALUES (%s, %s, %s) "
+                "ON CONFLICT (username) DO NOTHING",
+                (username, placeholder_hash, role),
+            )
+            if cur.rowcount > 0:
+                print(f"  ✅ {username} (role={role})")
+                synced += 1
+            else:
+                print(f"  ⏭️  {username} already exists, skipped")
+                skipped += 1
+        except Exception as e:
+            print(f"  ❌ {username}: {e}", file=sys.stderr)
+            skipped += 1
+
+    conn.commit()
+    cur.close()
+    conn.close()
+
+    print(f"\nDone: {synced} synced, {skipped} skipped/errors")
+    print("Note: Password hashes are placeholders. First login via Momentry will cache the real hash.")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/tmdb_agent.py
+++ b/scripts/tmdb_agent.py
@@ -0,0 +1,285 @@
+#!/opt/homebrew/bin/python3.11
+"""
+TMDb Agent — pre-fetch TMDb data and write directly to identity files.
+
+Usage:
+    python3 scripts/tmdb_agent.py --file-uuid <uuid>
+    python3 scripts/tmdb_agent.py --file-uuid <uuid> --db "dbname=momentry user=accusys"
+
+Environment:
+    TMDB_API_KEY          Required. TMDb API key.
+    MOMENTRY_OUTPUT_DIR   Default: /Users/accusys/momentry/output
+    DATABASE_URL          Default: dbname=momentry user=accusys host=localhost
+
+Flow:
+    1. Query videos table for file_name
+    2. Extract movie name from filename
+    3. TMDB /search/movie → find best match
+    4. TMDB /movie/{id}/credits → fetch cast
+    5. TMDB /person/{id} → fetch person details
+    6. Write {OUTPUT}/identities/{uuid}/identity.json + profile.jpg for each cast member
+    7. Write {OUTPUT}/{uuid}.tmdb.json cache (movie info + identity uuid list)
+"""
+import argparse
+import hashlib
+import json
+import os
+import re
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+import requests
+import psycopg2
+import psycopg2.extras
+
+
+TMDB_BASE = "https://api.themoviedb.org/3"
+TMDB_API_KEY = os.getenv("TMDB_API_KEY")
+
+
+def extract_movie_name(filename: str) -> str | None:
+    """Extract movie name from filename (e.g. 'Charade_1963.mp4' → 'Charade 1963')"""
+    name = Path(filename).stem
+    cleaned = re.sub(r'[._]', ' ', name).strip()
+    # Strip text after separators like |, (, [, {
+    for sep in ('|', '(', '[', '{', '\u2502'):
+        idx = cleaned.find(sep)
+        if idx > 0:
+            cleaned = cleaned[:idx].strip()
+    # Strip common suffixes (quality, format, source, etc.)
+    suffixes = (
+        r'\d{3,4}p', r'\d{3,4}x\d{3,4}', r'\d+fps', r'bluray', r'web[ -]?dl',
+        r'webrip', r'hdrip', r'dvdrip', r'dvd', r'brrip', r'hdtv', r'xvid',
+        r'x264', r'h264', r'x265', r'h265', r'hevc', r'aac', r'mp3', r'ac3',
+        r'dts', r'5\.1', r'7\.1', r'dual[ -]?audio', r'multi[ -]?sub',
+        r'proper', r'repack', r'extended', r'unrated', r'directors[ -]?cut',
+        r'theatrical', r'internal', r'limited', r'complete', r'full[ -]?movie',
+        r'english', r'french', r'spanish', r'german', r'chinese',
+        r'youtube', r'yify', r'ettv', r'rarbg', r'tgx', r'axxo', r'ctrlhd',
+    )
+    pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
+    cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
+    # Collapse multiple spaces
+    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    return cleaned if len(cleaned) >= 3 else None
+
+
+def search_movie(query: str) -> dict | None:
+    """Search TMDB for a movie by name. Returns first result."""
+    url = f"{TMDB_BASE}/search/movie"
+    params = {"query": query, "api_key": TMDB_API_KEY, "language": "en-US", "page": 1}
+    try:
+        resp = requests.get(url, params=params, timeout=15)
+        resp.raise_for_status()
+        results = resp.json().get("results", [])
+        return results[0] if results else None
+    except Exception as e:
+        print(f"TMDB search failed: {e}", file=sys.stderr)
+        return None
+
+
+def get_credits(movie_id: int) -> list[dict]:
+    """Get cast credits for a movie from TMDB."""
+    url = f"{TMDB_BASE}/movie/{movie_id}/credits"
+    params = {"api_key": TMDB_API_KEY, "language": "en-US"}
+    try:
+        resp = requests.get(url, params=params, timeout=15)
+        resp.raise_for_status()
+        return resp.json().get("cast", [])
+    except Exception as e:
+        print(f"TMDB credits failed: {e}", file=sys.stderr)
+        return []
+
+
+def get_person_details(person_id: int) -> dict:
+    """Fetch person details from TMDB /person/{id}."""
+    url = f"{TMDB_BASE}/person/{person_id}"
+    params = {"api_key": TMDB_API_KEY, "language": "en-US"}
+    try:
+        resp = requests.get(url, params=params, timeout=15)
+        resp.raise_for_status()
+        data = resp.json()
+        return {
+            "biography": data.get("biography"),
+            "birthday": data.get("birthday"),
+            "place_of_birth": data.get("place_of_birth"),
+            "also_known_as": data.get("also_known_as", []),
+            "imdb_id": data.get("imdb_id"),
+            "known_for_department": data.get("known_for_department"),
+            "popularity": data.get("popularity"),
+            "deathday": data.get("deathday"),
+            "gender": data.get("gender"),
+            "homepage": data.get("homepage"),
+        }
+    except Exception as e:
+        print(f"TMDB person details failed for {person_id}: {e}", file=sys.stderr)
+        return {}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="TMDb Agent — pre-fetch cache")
+    parser.add_argument("--file-uuid", required=True, help="File UUID to enrich")
+    parser.add_argument("--db", default=os.getenv("DATABASE_URL", "dbname=momentry user=accusys host=localhost"))
+    parser.add_argument("--output", default=os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output"))
+    args = parser.parse_args()
+
+    if not TMDB_API_KEY:
+        print("ERROR: TMDB_API_KEY not set.", file=sys.stderr)
+        sys.exit(1)
+
+    # 1. Query DB for file_name
+    schema = os.getenv("DATABASE_SCHEMA", "").strip()
+    table = f"{schema}.videos" if schema else "videos"
+    conn = psycopg2.connect(args.db)
+    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+    cur.execute(f"SELECT file_name FROM {table} WHERE file_uuid = %s", (args.file_uuid,))
+    row = cur.fetchone()
+    cur.close()
+    conn.close()
+
+    if not row:
+        print(f"ERROR: File not found: {args.file_uuid}", file=sys.stderr)
+        sys.exit(1)
+
+    file_name = row["file_name"]
+    print(f"[TKG-AGENT] File: {file_name} ({args.file_uuid})")
+
+    # 2. Extract movie name
+    movie_name = extract_movie_name(file_name)
+    if not movie_name:
+        print(f"ERROR: Cannot extract movie name from: {file_name}", file=sys.stderr)
+        sys.exit(1)
+    print(f"[TKG-AGENT] Extracted movie name: '{movie_name}'")
+
+    # 3. Search TMDB
+    movie = search_movie(movie_name)
+    if not movie:
+        print(f"ERROR: No TMDB movie found for: {movie_name}", file=sys.stderr)
+        sys.exit(1)
+    print(f"[TKG-AGENT] Matched: {movie['title']} (TMDB id={movie['id']})")
+
+    # 4. Fetch credits
+    cast = get_credits(movie["id"])
+    if not cast:
+        print(f"WARN: No cast data found for movie {movie['id']}", file=sys.stderr)
+
+    # 5. Enrich each cast member with person details and write identity files
+    output = Path(args.output)
+    identities_root = output / "identities"
+    identities_root.mkdir(parents=True, exist_ok=True)
+
+    now = datetime.now(timezone.utc).isoformat()
+    created_identities = []
+
+    for i, m in enumerate(cast):
+        person_id = m["id"]
+        person = get_person_details(person_id)
+
+        # Generate deterministic UUID: SHA256("tmdb-{movie_id}-{person_id}-{name}")
+        uuid_raw = hashlib.sha256(f"tmdb-{movie['id']}-{person_id}-{m['name']}".encode()).hexdigest()[:32]
+        profile_url = (
+            f"https://image.tmdb.org/t/p/w185{m['profile_path']}"
+            if m.get("profile_path") else None
+        )
+
+        # Build identity.json
+        metadata = {
+            "tmdb_character": m.get("character", ""),
+            "tmdb_cast_order": i,
+            "tmdb_movie_id": movie["id"],
+            "tmdb_movie_title": movie["title"],
+            "tmdb_biography": person.get("biography"),
+            "tmdb_birthday": person.get("birthday"),
+            "tmdb_place_of_birth": person.get("place_of_birth"),
+            "tmdb_aliases": person.get("also_known_as", []),
+            "tmdb_imdb_id": person.get("imdb_id"),
+            "tmdb_department": person.get("known_for_department"),
+            "tmdb_popularity": person.get("popularity"),
+            "tmdb_deathday": person.get("deathday"),
+            "tmdb_gender": person.get("gender"),
+            "tmdb_homepage": person.get("homepage"),
+        }
+
+        identity = {
+            "version": 1,
+            "identity_uuid": uuid_raw,
+            "name": m["name"],
+            "identity_type": "people",
+            "source": "tmdb",
+            "status": "confirmed",
+            "tmdb_id": person_id,
+            "tmdb_profile": profile_url,
+            "metadata": {k: v for k, v in metadata.items() if v is not None or k == "tmdb_aliases"},
+            "file_bindings": [],
+            "created_at": now,
+            "updated_at": now,
+        }
+
+        # Write identity.json
+        identity_dir = identities_root / uuid_raw
+        identity_dir.mkdir(parents=True, exist_ok=True)
+        identity_path = identity_dir / "identity.json"
+        with open(identity_path, "w", encoding="utf-8") as f:
+            json.dump(identity, f, indent=2, ensure_ascii=False)
+
+        # Download profile.jpg
+        if profile_url:
+            img_path = identity_dir / "profile.jpg"
+            if not img_path.exists():
+                try:
+                    resp = requests.get(profile_url, timeout=15)
+                    if resp.status_code == 200:
+                        img_path.write_bytes(resp.content)
+                except Exception as e:
+                    print(f"  [WARN] Failed to download profile for {m['name']}: {e}", file=sys.stderr)
+
+        created_identities.append({
+            "identity_uuid": uuid_raw,
+            "name": m["name"],
+            "tmdb_id": person_id,
+            "character": m.get("character", ""),
+            "order": i,
+        })
+
+        if (i + 1) % 5 == 0:
+            print(f"[TKG-AGENT] Wrote {i+1}/{len(cast)} identity files")
+
+    # Update _index.json
+    index_path = identities_root / "_index.json"
+    index = {}
+    if index_path.exists():
+        with open(index_path) as f:
+            index = json.load(f)
+    for ci in created_identities:
+        index[ci["identity_uuid"]] = ci["name"]
+    with open(index_path, "w", encoding="utf-8") as f:
+        json.dump(index, f, indent=2, ensure_ascii=False)
+
+    # Write movie cache ({uuid}.tmdb.json) — simplified, no per-person data
+    cache = {
+        "file_uuid": args.file_uuid,
+        "fetched_at": now,
+        "source": "agent",
+        "movie": {
+            "tmdb_id": movie["id"],
+            "title": movie["title"],
+            "release_date": movie.get("release_date"),
+            "overview": movie.get("overview"),
+            "poster_path": movie.get("poster_path"),
+        },
+        "cast_count": len(cast),
+        "identities_created": len(created_identities),
+        "identities": created_identities,
+    }
+
+    cache_path = output / f"{args.file_uuid}.tmdb.json"
+    with open(cache_path, "w", encoding="utf-8") as f:
+        json.dump(cache, f, indent=2, ensure_ascii=False)
+
+    print(f"[TKG-AGENT] Cache written: {cache_path}")
+    print(f"[TKG-AGENT] Identity files: {len(created_identities)} cast members → {identities_root}/")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/visual_chunk_processor.py
+++ b/scripts/visual_chunk_processor.py
@@ -384,6 +384,7 @@ def main():
    parser.add_argument("video_path", help="視頻文件路徑")
    parser.add_argument("output_path", help="輸出文件路徑")
    parser.add_argument("--yolo-result", help="YOLO 結果文件路徑（可選）")
+    parser.add_argument("--uuid", help="檔案 UUID（由 executor 傳入）")
    parser.add_argument(
        "--strategy", choices=["fixed", "similarity"], default="fixed", help="分片策略"
    )
--- a/src/api/agent_api.rs
+++ b/src/api/agent_api.rs
@@ -57,12 +57,7 @@ async fn translate_text(
        "temperature": 0.1
    });

-    let response = client
-        .post(llm_url)
-        .json(&body)
-        .send()
-        .await
-        .map_err(|e| {
+    let response = client.post(llm_url).json(&body).send().await.map_err(|e| {
        (
            StatusCode::INTERNAL_SERVER_ERROR,
            format!("Failed to call LLM: {}", e),
--- a/src/api/five_w1h_agent_api.rs
+++ b/src/api/five_w1h_agent_api.rs
@@ -97,17 +97,25 @@ struct SceneSummaryResult {

 fn llm_base_url() -> String {
    let v = std::env::var("MOMENTRY_LLM_URL");
-    if v.is_ok() { return v.unwrap(); }
+    if v.is_ok() {
+        return v.unwrap();
+    }
    let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
-    if v.is_ok() { return v.unwrap(); }
+    if v.is_ok() {
+        return v.unwrap();
+    }
    "http://localhost:8082/v1/chat/completions".to_string()
 }

 fn llm_model() -> String {
    let v = std::env::var("MOMENTRY_LLM_MODEL");
-    if v.is_ok() { return v.unwrap(); }
+    if v.is_ok() {
+        return v.unwrap();
+    }
    let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
-    if v.is_ok() { return v.unwrap(); }
+    if v.is_ok() {
+        return v.unwrap();
+    }
    "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
 }

@@ -115,7 +123,7 @@ fn llm_model() -> String {

 async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
    let table = schema::table_name("chunk");
-    sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option<String>)>(&format!(
+    sqlx::query_as::<_, (String, i64, i64, f64, Option<f64>, Option<f64>, serde_json::Value, Option<serde_json::Value>, Option<String>)>(&format!(
        r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
           FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
    ))
@@ -123,7 +131,8 @@ async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Ve
    .fetch_all(db.pool()).await?
    .into_iter().map(|r| Ok(CutScene {
        chunk_id: r.0, start_frame: r.1, end_frame: r.2,
-        fps: r.3, start_time: r.4, end_time: r.5, content: r.6, metadata: r.7, summary_text: r.8,
+        fps: r.3, start_time: r.4.unwrap_or(0.0), end_time: r.5.unwrap_or(0.0),
+        content: r.6, metadata: r.7.unwrap_or(serde_json::json!({})), summary_text: r.8,
    })).collect()
 }

@@ -133,7 +142,7 @@ async fn fetch_sentences_in_scene(
    cut: &CutScene,
 ) -> anyhow::Result<Vec<SentenceChunk>> {
    let table = schema::table_name("chunk");
-    sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!(
+    sqlx::query_as::<_, (String, String, Option<f64>, Option<f64>, i64, i64, serde_json::Value)>(&format!(
        r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
           FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
           AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table
@@ -141,7 +150,7 @@ async fn fetch_sentences_in_scene(
    .bind(file_uuid).bind(cut.start_time).bind(cut.end_time)
    .fetch_all(db.pool()).await?
    .into_iter().map(|r| Ok(SentenceChunk {
-        chunk_id: r.0, text: r.1, start_time: r.2, end_time: r.3,
+        chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0),
        start_frame: r.4, end_frame: r.5, content: r.6,
    })).collect()
 }
@@ -540,10 +549,7 @@ async fn analyze_5w1h(
        if let Some(ref t) = cut.summary_text {
            if t.len() > 20 {
                processed += 1;
-                prev_context.push(format!(
-                    "Scene (t={:.0}s): {}",
-                    cut.start_time, t
-                ));
+                prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
                continue;
            }
        }
@@ -621,10 +627,7 @@ async fn batch_analyze_5w1h(
            if let Some(ref t) = cut.summary_text {
                if t.len() > 20 {
                    processed += 1;
-                    prev_context.push(format!(
-                        "Scene (t={:.0}s): {}",
-                        cut.start_time, t
-                    ));
+                    prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
                    continue;
                }
            }
@@ -713,10 +716,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
        if let Some(ref t) = cut.summary_text {
            if t.len() > 20 {
                processed += 1;
-                prev_context.push(format!(
-                    "Scene (t={:.0}s): {}",
-                    cut.start_time, t
-                ));
+                prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t));
                continue;
            }
        }
@@ -764,38 +764,44 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
    qdrant.init_collection(768).await?;

    let chunk_table = schema::table_name("chunk");
-    let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>(
-        &format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time \
+    let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!(
+        "SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \
           FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \
-           AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
-    )
+           AND (text_content IS NOT NULL AND text_content != '') ORDER BY id",
+        chunk_table
+    ))
    .bind(file_uuid)
    .fetch_all(db.pool())
    .await?;

    let total_vec = rows.len();
    let mut stored = 0usize;
-    for (chunk_id, _ctype, text, start_time, end_time) in &rows {
+    for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows {
        let text = text.trim();
        if text.is_empty() || text.len() < 5 {
            continue;
        }
        match embedder.embed_document(text).await {
            Ok(vector) => {
-                if let Err(e) = sqlx::query(
-                    &format!("UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", chunk_table)
-                )
+                if let Err(e) = sqlx::query(&format!(
+                    "UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3",
+                    chunk_table
+                ))
                .bind(&vector as &[f32])
                .bind(chunk_id)
                .bind(file_uuid)
-                .execute(db.pool()).await {
+                .execute(db.pool())
+                .await
+                {
                    tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e);
                    continue;
                }
                let payload = VectorPayload {
-                    uuid: file_uuid.to_string(),
+                    file_uuid: file_uuid.to_string(),
                    chunk_id: chunk_id.clone(),
                    chunk_type: "sentence".to_string(),
+                    start_frame: *start_frame,
+                    end_frame: *end_frame,
                    start_time: *start_time,
                    end_time: *end_time,
                    text: Some(text.to_string()),
--- a/src/api/identities.rs
+++ b/src/api/identities.rs
@@ -93,16 +93,15 @@ async fn create_identity(
    })?;

    let id_table = crate::core::db::schema::table_name("identities");
-    let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
    let query = format!(
        "SELECT uuid, reference_data->'total_references' as total,
               reference_data->'angles_covered' as angles,
               reference_data->'quality_avg' as quality
        FROM {}
-        WHERE {} = $1
+        WHERE name = $1
        ORDER BY created_at DESC
        LIMIT 1",
-        id_table, name_col
+        id_table
    );

    let row: Option<(String, Option<i32>, Option<Vec<String>>, Option<f64>)> =
@@ -168,11 +167,19 @@ async fn list_identities(
    let id_table = crate::core::db::schema::table_name("identities");

    let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table))
-        .fetch_one(db.pool()).await
-        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Count error: {}", e)))?;
+        .fetch_one(db.pool())
+        .await
+        .map_err(|e| {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                format!("Count error: {}", e),
+            )
+        })?;

-    let name_col = if id_table.starts_with("dev.") { "name" } else { "real_name" };
-    let sql = format!("SELECT id::int, uuid, {} AS name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", name_col, id_table);
+    let sql = format!(
+        "SELECT id::int, uuid, name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2",
+        id_table
+    );

    let rows: Vec<(i32, uuid::Uuid, String, Option<serde_json::Value>)> = match sqlx::query_as(&sql)
        .bind(page_size as i64)
@@ -200,12 +207,25 @@ async fn list_identities(
        .collect();

    let identities_table = crate::core::db::schema::table_name("identities");
-    let total_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
-        .fetch_one(db.pool()).await.unwrap_or(0);
-    let tmdb_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table))
-        .fetch_one(db.pool()).await.unwrap_or(0);
-    let auto_identities: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {} WHERE source = 'auto'", identities_table))
-        .fetch_one(db.pool()).await.unwrap_or(0);
+    let total_identities: i64 =
+        sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", identities_table))
+            .fetch_one(db.pool())
+            .await
+            .unwrap_or(0);
+    let tmdb_identities: i64 = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
+        identities_table
+    ))
+    .fetch_one(db.pool())
+    .await
+    .unwrap_or(0);
+    let auto_identities: i64 = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) FROM {} WHERE source = 'auto'",
+        identities_table
+    ))
+    .fetch_one(db.pool())
+    .await
+    .unwrap_or(0);

    Ok(Json(IdentityListResponse {
        identities,
--- a/src/api/identity_agent_api.rs
+++ b/src/api/identity_agent_api.rs
@@ -15,8 +15,14 @@ use crate::core::db::PostgresDb;

 pub fn identity_agent_routes() -> Router<AppState> {
    Router::new()
-        .route("/api/v1/agents/identity/match-from-photo", post(match_from_photo))
-        .route("/api/v1/agents/identity/match-from-trace", post(match_from_trace))
+        .route(
+            "/api/v1/agents/identity/match-from-photo",
+            post(match_from_photo),
+        )
+        .route(
+            "/api/v1/agents/identity/match-from-trace",
+            post(match_from_trace),
+        )
 }

 #[derive(Debug, Serialize)]
@@ -73,13 +79,21 @@ async fn match_from_photo(

    let uuid_clean = identity_uuid.replace('-', "");
    if uuid_clean.is_empty() || file_uuid.is_empty() {
-        return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
+        return Err((
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({
                "success": false, "message": "identity_uuid and file_uuid are required"
-        }))));
+            })),
+        ));
    }
-    let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
+    let data = image_data.ok_or_else(|| {
+        (
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({
                "success": false, "message": "No image field found. Use field name 'image'."
-    }))))?;
+            })),
+        )
+    })?;

    // 1. Save uploaded image to temp
    let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
@@ -88,11 +102,17 @@ async fn match_from_photo(
        .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
    let temp_dir = std::env::temp_dir().join("momentry_match_face");
    std::fs::create_dir_all(&temp_dir).map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})),
+        )
    })?;
    let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean));
    std::fs::write(&temp_img, &data).map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})),
+        )
    })?;

    // 2. Extract face embedding via Python script
@@ -103,56 +123,83 @@ async fn match_from_photo(
        .output()
        .await
        .map_err(|e| {
-            (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})))
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})),
+            )
        })?;

    let _ = std::fs::remove_file(&temp_img);

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
-        return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
+        return Err((
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({
                "success": false, "message": format!("Face extraction failed: {}", stderr)
-        }))));
+            })),
+        ));
    }

    let stdout = String::from_utf8_lossy(&output.stdout);
    let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Failed to parse extractor output"})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": "Failed to parse extractor output"})),
+        )
    })?;

    let embedding: Vec<f64> = serde_json::from_value(
-        extract_result.get("embedding")
-            .ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({"message": "No embedding in extractor output"}))))?
-            .clone()
-    ).map_err(|_| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Invalid embedding format"})))
+        extract_result
+            .get("embedding")
+            .ok_or_else(|| {
+                (
+                    StatusCode::BAD_REQUEST,
+                    Json(serde_json::json!({"message": "No embedding in extractor output"})),
+                )
+            })?
+            .clone(),
+    )
+    .map_err(|_| {
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": "Invalid embedding format"})),
+        )
    })?;

    let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect();

    // 3. Look up identity internal ID
    let id_table = schema::table_name("identities");
-    let identity_id_row: Option<(i32,)> = sqlx::query_as(
-        &format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
-    )
+    let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
+        "SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
+        id_table
+    ))
    .bind(&uuid_clean)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": format!("DB error: {}", e)})),
+        )
    })?;

    let identity_id = match identity_id_row {
        Some((id,)) => id,
-        None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
+        None => {
+            return Err((
+                StatusCode::NOT_FOUND,
+                Json(serde_json::json!({
                    "success": false, "message": "Identity not found"
-        })))),
+                })),
+            ))
+        }
    };

    // 4. Find best matching trace (highest similarity, no threshold)
    let fd_table = schema::table_name("face_detections");
-    let best_match: Option<(i32, i32, f64)> = sqlx::query_as(
-        &format!(
+    let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!(
        r#"SELECT id, trace_id,
                   1 - (embedding::vector <=> $1::vector) as similarity
               FROM {}
@@ -160,22 +207,25 @@ async fn match_from_photo(
               ORDER BY embedding::vector <=> $1::vector
               LIMIT 1"#,
        fd_table
-        )
-    )
+    ))
    .bind(&embedding_f32)
    .bind(&file_uuid)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
+        )
    })?;

    // 5. Update best match face_detection
    let mut traces_matched: Vec<i32> = Vec::new();
    if let Some((fb_id, fb_trace, fb_sim)) = best_match {
-        let _ = sqlx::query(
-            &format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
-        )
+        let _ = sqlx::query(&format!(
+            "UPDATE {} SET identity_id = $1 WHERE id = $2",
+            fd_table
+        ))
        .bind(identity_id)
        .bind(fb_id)
        .execute(state.db.pool())
@@ -191,7 +241,10 @@ async fn match_from_photo(
            file_uuid,
            matches: 1,
            traces_matched,
-            message: format!("Best trace: trace_id={}, similarity={:.4}", fb_trace, fb_sim),
+            message: format!(
+                "Best trace: trace_id={}, similarity={:.4}",
+                fb_trace, fb_sim
+            ),
        }))
    } else {
        Ok(Json(MatchFromPhotoResponse {
@@ -221,26 +274,30 @@ async fn match_from_trace(
    // 1. Get 3 best face embeddings from this trace at different angles
    //    Divide trace frame range into 3 segments, pick best face from each
    let fd_table = schema::table_name("face_detections");
-    let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(
-        &format!(
+    let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(&format!(
        "SELECT embedding, frame_number FROM {} \
             WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
             ORDER BY frame_number ASC",
        fd_table
-        )
-    )
+    ))
    .bind(&req.file_uuid)
    .bind(req.trace_id)
    .fetch_all(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": format!("DB error: {}", e)})),
+        )
    })?;

    if all_faces.is_empty() {
-        return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({
                "success": false, "message": "No embedding found for this trace"
-        }))));
+            })),
+        ));
    }

    // Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment
@@ -254,14 +311,12 @@ async fn match_from_trace(
    let mut query_embeddings: Vec<Vec<f32>> = Vec::new();

    // Get width*height info if available (not all pipelines store it)
-    let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(
-        &format!(
+    let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!(
        "SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
             FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
             ORDER BY frame_number ASC",
        fd_table
-        )
-    )
+    ))
    .bind(&req.file_uuid)
    .bind(req.trace_id)
    .fetch_all(state.db.pool())
@@ -296,8 +351,7 @@ async fn match_from_trace(
    let mut seen_trace_ids = std::collections::HashSet::new();

    for qemb in &query_embeddings {
-        let top = sqlx::query_as::<_, (i32, i32, f64)>(
-            &format!(
+        let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
            r#"SELECT id, trace_id,
                       1 - (embedding::vector <=> $1::vector) as similarity
                   FROM {}
@@ -307,15 +361,17 @@ async fn match_from_trace(
                   ORDER BY embedding::vector <=> $1::vector
                   LIMIT 1"#,
            fd_table
-            )
-        )
+        ))
        .bind(qemb)
        .bind(&req.file_uuid)
        .bind(req.trace_id)
        .fetch_optional(state.db.pool())
        .await
        .map_err(|e| {
-            (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
+            )
        })?;

        if let Some((cface_id, c_trace_id, c_sim)) = top {
@@ -327,35 +383,49 @@ async fn match_from_trace(

    // 3. Look up identity internal ID
    let id_table = schema::table_name("identities");
-    let identity_id_row: Option<(i32,)> = sqlx::query_as(
-        &format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
-    )
+    let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
+        "SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
+        id_table
+    ))
    .bind(&uuid_clean)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"message": format!("DB error: {}", e)})),
+        )
    })?;

    let identity_id = match identity_id_row {
        Some((id,)) => id,
-        None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
+        None => {
+            return Err((
+                StatusCode::NOT_FOUND,
+                Json(serde_json::json!({
                    "success": false, "message": "Identity not found"
-        })))),
+                })),
+            ))
+        }
    };

    // 4. Update matched face_detections
    let mut traces_matched: Vec<i32> = Vec::new();
    for (id, trace_id, _similarity) in &validated {
-        if let Err(e) = sqlx::query(
-            &format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
-        )
+        if let Err(e) = sqlx::query(&format!(
+            "UPDATE {} SET identity_id = $1 WHERE id = $2",
+            fd_table
+        ))
        .bind(identity_id)
        .bind(id)
        .execute(state.db.pool())
        .await
        {
-            tracing::warn!("[match-from-trace] Failed to update face_detection {}: {}", id, e);
+            tracing::warn!(
+                "[match-from-trace] Failed to update face_detection {}: {}",
+                id,
+                e
+            );
        } else {
            if !traces_matched.contains(trace_id) {
                traces_matched.push(*trace_id);
@@ -364,9 +434,10 @@ async fn match_from_trace(
    }

    // 5. Also bind the source trace itself
-    let _ = sqlx::query(
-        &format!("UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", fd_table)
-    )
+    let _ = sqlx::query(&format!(
+        "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
+        fd_table
+    ))
    .bind(identity_id)
    .bind(&req.file_uuid)
    .bind(req.trace_id)
@@ -388,7 +459,10 @@ async fn match_from_trace(
        file_uuid: req.file_uuid,
        matches: match_count,
        traces_matched,
-        message: format!("Matched {} faces ({} unique traces)", match_count, trace_count),
+        message: format!(
+            "Matched {} faces ({} unique traces)",
+            match_count, trace_count
+        ),
    }))
 }

@@ -461,7 +535,10 @@ fn analyze_person_speaker_overlap(
            }

            // Check if persons co-occur in time (frame proximity)
-            let overlap = person.frames.iter().any(|f| other_person.frames.contains(f));
+            let overlap = person
+                .frames
+                .iter()
+                .any(|f| other_person.frames.contains(f));
            if overlap {
                matched_persons.push(other_person.person_id.clone());
                visited_persons.insert(other_person.person_id.clone());
@@ -474,9 +551,10 @@ fn analyze_person_speaker_overlap(
            person.frames.iter().max().copied().unwrap_or(0) as f64,
        );
        for speaker in speakers {
-            let has_overlap = speaker.segments.iter().any(|(start, end)| {
-                *start <= person_time_range.1 && *end >= person_time_range.0
-            });
+            let has_overlap = speaker
+                .segments
+                .iter()
+                .any(|(start, end)| *start <= person_time_range.1 && *end >= person_time_range.0);
            if has_overlap {
                if !matched_speakers.contains(&speaker.speaker_id) {
                    matched_speakers.push(speaker.speaker_id.clone());
@@ -563,11 +641,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::

    // Step 2: 載入所有 face_detections（含 frame_number），按 trace_id 分組
    let fd_table = schema::table_name("face_detections");
-    let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(
-        &format!("SELECT trace_id, frame_number, embedding FROM {} \
+    let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
+        "SELECT trace_id, frame_number, embedding FROM {} \
         WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
-         ORDER BY trace_id, frame_number", fd_table),
-    )
+         ORDER BY trace_id, frame_number",
+        fd_table
+    ))
    .bind(file_uuid)
    .fetch_all(pool)
    .await?;
@@ -647,16 +726,18 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
    let fd_table = schema::table_name("face_detections");
    let mut updated = 0usize;
    for (tid, name) in &matched {
-        let id_opt = sqlx::query_scalar::<_, Option<i32>>(
-            &format!("SELECT id FROM {} WHERE name=$1 AND source='tmdb'", identities_table),
-        )
+        let id_opt = sqlx::query_scalar::<_, Option<i32>>(&format!(
+            "SELECT id FROM {} WHERE name=$1 AND source='tmdb'",
+            identities_table
+        ))
        .bind(name)
        .fetch_optional(pool)
        .await?;
        if let Some(identity_id) = id_opt {
-            let _ = sqlx::query(
-                &format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
-            )
+            let _ = sqlx::query(&format!(
+                "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
+                fd_table
+            ))
            .bind(identity_id)
            .bind(file_uuid)
            .bind(tid)
@@ -726,32 +807,32 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::

    // Step 6: 未匹配的 trace 設 stranger_id = trace_id
    // trace_id 在同一個 file 內是 sequential integer，直接複用為 stranger_id
-    let stranger_update = sqlx::query(
-        &format!(
+    let stranger_update = sqlx::query(&format!(
        "UPDATE {} SET stranger_id = trace_id \
             WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
               AND (stranger_id IS NULL OR stranger_id != trace_id)",
        fd_table
-        )
-    )
+    ))
    .bind(file_uuid)
    .execute(pool)
    .await?;
    let stranger_count = stranger_update.rows_affected();

    // Step 7: Save identity files for all affected identities
-    let affected = sqlx::query_scalar::<_, uuid::Uuid>(
-        &format!("SELECT DISTINCT i.uuid FROM {} i \
+    let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!(
+        "SELECT DISTINCT i.uuid FROM {} i \
         JOIN {} fd ON fd.identity_id = i.id \
-         WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", identities_table, fd_table)
-    )
+         WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL",
+        identities_table, fd_table
+    ))
    .bind(file_uuid)
    .fetch_all(pool)
    .await
    .unwrap_or_default();
    for uuid in &affected {
        let us = uuid.to_string().replace('-', "");
-        if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await {
+        if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await
+        {
            tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e);
        }
    }
@@ -773,13 +854,15 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
 pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
    // Load face traces with identity_id and frame numbers
    let fd_table = schema::table_name("face_detections");
-    let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(
-        &format!("SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
+    let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(&format!(
+        "SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
         FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
-         GROUP BY trace_id", fd_table)
-    )
+         GROUP BY trace_id",
+        fd_table
+    ))
    .bind(file_uuid)
-    .fetch_all(pool).await?;
+    .fetch_all(pool)
+    .await?;

    if traces.is_empty() {
        tracing::info!("[SpeakerBind] No face traces with identities");
@@ -945,9 +1028,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
        let speakers = extract_speakers_from_asrx_data(&asrx_data);
        let identities = analyze_person_speaker_overlap(&persons, &speakers);

-        let uuid_short = &file_uuid[..8.min(file_uuid.len())];
        for (idx, id_result) in identities.iter().enumerate() {
-            let identity_name = format!("stranger_{}_{}", uuid_short, idx);
+            let identity_name = format!("stranger_{}", idx);
            let metadata = serde_json::json!({
                "source": "identity_agent",
                "trace_ids": id_result.person_ids,
--- a/src/api/identity_api.rs
+++ b/src/api/identity_api.rs
@@ -38,8 +38,18 @@ pub fn identity_routes() -> Router<crate::api::server::AppState> {
        .route("/api/v1/resource/heartbeat", post(heartbeat_resource))
        .route("/api/v1/resources", get(list_resources))
        .route("/api/v1/identity/upload", post(upload_identity))
-        .route("/api/v1/identity/:identity_uuid/profile-image", post(upload_profile_image).get(get_profile_image))
-        .route("/api/v1/identity/:identity_uuid/json", get(get_identity_json))
+        .route(
+            "/api/v1/identity/:identity_uuid/profile-image",
+            post(upload_profile_image).get(get_profile_image),
+        )
+        .route(
+            "/api/v1/identity/:identity_uuid/status",
+            get(get_identity_status),
+        )
+        .route(
+            "/api/v1/identity/:identity_uuid/json",
+            get(get_identity_json),
+        )
        // Experiment: identity text search (non-polluting, separate endpoint)
        .route("/api/v1/search/identity_text", get(search_identity_text))
        .route("/api/v1/identities/search", get(search_identities_by_text))
@@ -98,7 +108,8 @@ async fn list_files(
        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;

-    let data = records.0
+    let data = records
+        .0
        .into_iter()
        .map(|r| FileItem {
            file_uuid: r.file_uuid,
@@ -163,7 +174,9 @@ async fn get_file_detail(
            file_name: f.file_name,
            file_path: f.file_path,
            metadata: f.probe_json,
-            created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at).ok().map(|d| d.into()),
+            created_at: chrono::DateTime::parse_from_rfc3339(&f.created_at)
+                .ok()
+                .map(|d| d.into()),
        })),
        None => Err((
            StatusCode::NOT_FOUND,
@@ -214,13 +227,42 @@ async fn get_file_identities(
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;

    let fps = 25.0;
-    let data: Vec<FileIdentityItem> = Vec::new();
+    let data: Vec<FileIdentityItem> = records
+        .into_iter()
+        .map(|r| FileIdentityItem {
+            identity_id: r.identity_id,
+            identity_uuid: r.identity_uuid,
+            name: r.name,
+            metadata: r.metadata,
+            face_count: r.face_count,
+            speaker_count: r.speaker_count,
+            start_frame: r.start_frame,
+            end_frame: r.end_frame,
+            start_time: r.start_time,
+            end_time: r.end_time,
+            confidence: r.confidence,
+        })
+        .collect();
+
+    let total = match sqlx::query_scalar::<_, i64>(
+        &format!(
+            "SELECT COUNT(DISTINCT fd.identity_id) FROM {} fd WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
+            crate::core::db::schema::table_name("face_detections")
+        )
+    )
+    .bind(&file_uuid)
+    .fetch_one(state.db.pool())
+    .await
+    {
+        Ok(c) => c,
+        Err(_) => data.len() as i64,
+    };

    Ok(Json(FileIdentitiesResponse {
        success: true,
        file_uuid: file_uuid,
        fps,
-        total: data.len() as i64,
+        total,
        page,
        page_size,
        data,
@@ -243,6 +285,16 @@ pub struct IdentityDetailResponse {
    pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
 }

+#[derive(Debug, Serialize)]
+pub struct IdentityStatusResponse {
+    pub success: bool,
+    pub identity_uuid: String,
+    pub name: String,
+    pub has_json: bool,
+    pub has_jpg: bool,
+    pub error: Option<String>,
+}
+
 fn strip_uuid(u: &uuid::Uuid) -> String {
    u.to_string().replace('-', "")
 }
@@ -270,7 +322,11 @@ async fn get_identity_detail(
            metadata: i.metadata,
            reference_data: i.reference_data,
            tmdb_id: i.tmdb_id,
-            tmdb_profile: Some(format!("{}/identities/{}/profile.jpg", crate::core::config::OUTPUT_DIR.as_str(), i.uuid.replace('-', ""))),
+            tmdb_profile: Some(format!(
+                "{}/identities/{}/profile.jpg",
+                crate::core::config::OUTPUT_DIR.as_str(),
+                i.uuid.replace('-', "")
+            )),
            created_at: i.created_at,
            updated_at: i.updated_at,
        })),
@@ -281,6 +337,44 @@ async fn get_identity_detail(
    }
 }

+async fn get_identity_status(
+    State(state): State<crate::api::server::AppState>,
+    Path(identity_uuid): Path<String>,
+) -> Result<Json<IdentityStatusResponse>, (StatusCode, String)> {
+    let uuid_clean = identity_uuid.replace('-', "");
+
+    let identity = state
+        .db
+        .get_identity_by_uuid(&uuid_clean)
+        .await
+        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+
+    match identity {
+        Some(i) => {
+            // Check both UUID formats (with and without hyphens)
+            let dir_nohyphen = crate::core::identity::storage::identity_dir(&uuid_clean);
+            let uuid_hyphen = i.uuid.clone();
+            let dir_hyphen = crate::core::identity::storage::identity_dir(&uuid_hyphen);
+            let has_json = dir_nohyphen.join("identity.json").exists()
+                || dir_hyphen.join("identity.json").exists();
+            let has_jpg = dir_nohyphen.join("profile.jpg").exists()
+                || dir_hyphen.join("profile.jpg").exists();
+            Ok(Json(IdentityStatusResponse {
+                success: true,
+                identity_uuid: i.uuid.clone(),
+                name: i.name,
+                has_json,
+                has_jpg,
+                error: None,
+            }))
+        }
+        None => Err((
+            StatusCode::NOT_FOUND,
+            format!("Identity not found: {}", uuid_clean),
+        )),
+    }
+}
+
 #[derive(Debug, Serialize)]
 pub struct IdentityFilesResponse {
    pub success: bool,
@@ -375,10 +469,25 @@ async fn get_identity_files(
        })
        .collect();

+    let total = match sqlx::query_scalar::<_, i64>(
+        &format!(
+            "SELECT COUNT(DISTINCT fd.file_uuid) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
+            crate::core::db::schema::table_name("face_detections"),
+            crate::core::db::schema::table_name("identities"),
+        )
+    )
+    .bind(&uuid)
+    .fetch_one(state.db.pool())
+    .await
+    {
+        Ok(c) => c,
+        Err(_) => data.len() as i64,
+    };
+
    Ok(Json(IdentityFilesResponse {
        success: true,
        identity_uuid: uuid.to_string().replace('-', ""),
-        total: data.len() as i64,
+        total,
        page,
        page_size,
        data,
@@ -449,10 +558,25 @@ async fn get_identity_faces(
        })
        .collect();

+    let total = match sqlx::query_scalar::<_, i64>(
+        &format!(
+            "SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = (SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1)",
+            crate::core::db::schema::table_name("face_detections"),
+            crate::core::db::schema::table_name("identities"),
+        )
+    )
+    .bind(&uuid)
+    .fetch_one(state.db.pool())
+    .await
+    {
+        Ok(c) => c,
+        Err(_) => data.len() as i64,
+    };
+
    Ok(Json(IdentityFacesResponse {
        success: true,
        identity_uuid: uuid.to_string().replace('-', ""),
-        total: data.len() as i64,
+        total,
        page,
        page_size,
        data,
@@ -721,12 +845,24 @@ async fn upload_profile_image(
    let uuid_clean = identity_uuid.replace('-', "");

    // Verify identity exists
-    if state.db.get_identity_by_uuid(&uuid_clean).await.map_err(|_| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": "DB error"})))
-    })?.is_none() {
-        return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
+    if state
+        .db
+        .get_identity_by_uuid(&uuid_clean)
+        .await
+        .map_err(|_| {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"success": false, "message": "DB error"})),
+            )
+        })?
+        .is_none()
+    {
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({
                "success": false, "message": "Identity not found"
-        }))));
+            })),
+        ));
    }

    // Process multipart upload
@@ -740,9 +876,14 @@ async fn upload_profile_image(
            ext = match content_type.as_str() {
                "image/png" => "png",
                "image/jpeg" | "image/jpg" => "jpg",
-                _ => return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
+                _ => {
+                    return Err((
+                        StatusCode::BAD_REQUEST,
+                        Json(serde_json::json!({
                            "success": false, "message": "Unsupported image type. Use JPEG or PNG."
-                })))),
+                        })),
+                    ))
+                }
            };
            image_data = Some(field.bytes().await.map_err(|_| {
                (StatusCode::BAD_REQUEST, Json(serde_json::json!({"success": false, "message": "Failed to read image data"})))
@@ -750,9 +891,14 @@ async fn upload_profile_image(
        }
    }

-    let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
+    let data = image_data.ok_or_else(|| {
+        (
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({
                "success": false, "message": "No image field found. Use field name 'image'."
-    }))))?;
+            })),
+        )
+    })?;

    // Write image file
    let dir = crate::core::identity::storage::identity_dir(&uuid_clean);
@@ -789,8 +935,16 @@ async fn get_profile_image(
        let path = dir.join(format!("profile.{}", ext));
        if path.exists() {
            let data = std::fs::read(&path).map_err(|_| StatusCode::NOT_FOUND)?;
-            let content_type = if *ext == "png" { "image/png" } else { "image/jpeg" };
-            return Ok((StatusCode::OK, [("content-type".to_string(), content_type.to_string())], data));
+            let content_type = if *ext == "png" {
+                "image/png"
+            } else {
+                "image/jpeg"
+            };
+            return Ok((
+                StatusCode::OK,
+                [("content-type".to_string(), content_type.to_string())],
+                data,
+            ));
        }
    }
    Err(StatusCode::NOT_FOUND)
@@ -802,7 +956,14 @@ async fn get_identity_json(
 ) -> Result<(StatusCode, [(String, String); 1], Vec<u8>), StatusCode> {
    let clean = identity_uuid.replace('-', "");
    let with_hyphens = if clean.len() == 32 {
-        format!("{}-{}-{}-{}-{}", &clean[0..8], &clean[8..12], &clean[12..16], &clean[16..20], &clean[20..32])
+        format!(
+            "{}-{}-{}-{}-{}",
+            &clean[0..8],
+            &clean[8..12],
+            &clean[12..16],
+            &clean[16..20],
+            &clean[20..32]
+        )
    } else {
        identity_uuid.clone()
    };
@@ -821,7 +982,9 @@ async fn get_identity_json(
    }

    // 2. Lazy Sync: If file missing, generate from DB and save
-    if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await {
+    if let Err(e) =
+        crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &clean).await
+    {
        tracing::warn!("[identity-json] Lazy sync failed for {}: {}", clean, e);
        return Err(StatusCode::NOT_FOUND);
    }
@@ -858,7 +1021,7 @@ struct IdentityTextHit {
    chunk_id: String,
    start_time: f64,
    end_time: f64,
-    text_content: String,
+    text_content: Option<String>,
    identity_id: Option<i32>,
    identity_name: Option<String>,
    identity_source: Option<String>,
@@ -889,7 +1052,7 @@ async fn search_identity_text(

    let query = format!(
        r#"SELECT c.file_uuid, c.chunk_id, c.start_time, c.end_time, c.text_content,
-                  fd.identity_id, CASE WHEN id_table LIKE 'dev.%' THEN i.name ELSE i.real_name END AS identity_name, i.source AS identity_source,
+                  fd.identity_id, i.name AS identity_name, i.source AS identity_source,
                  fd.trace_id
           FROM {} c
           LEFT JOIN {} fd ON fd.file_uuid = c.file_uuid
@@ -902,18 +1065,42 @@ async fn search_identity_text(
        chunk_table, fd_table, id_table
    );

-    let rows = sqlx::query_as::<_, (String, String, f64, f64, String, Option<i32>, Option<String>, Option<String>, Option<i32>)>(&query)
-        .bind(&params.uuid).bind(&like_q).bind(limit)
+    let rows = sqlx::query_as::<
+        _,
+        (
+            String,
+            String,
+            f64,
+            f64,
+            Option<String>,
+            Option<i32>,
+            Option<String>,
+            Option<String>,
+            Option<i32>,
+        ),
+    >(&query)
+    .bind(&params.uuid)
+    .bind(&like_q)
+    .bind(limit)
    .fetch_all(state.db.pool())
    .await
    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

    let results: Vec<IdentityTextHit> = rows
        .into_iter()
-        .map(|(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
-            file_uuid: fu, chunk_id: cid, start_time: st, end_time: et, text_content: txt,
-            identity_id: iid, identity_name: iname, identity_source: isrc, trace_id: tid,
-        })
+        .map(
+            |(fu, cid, st, et, txt, iid, iname, isrc, tid)| IdentityTextHit {
+                file_uuid: fu,
+                chunk_id: cid,
+                start_time: st,
+                end_time: et,
+                text_content: txt,
+                identity_id: iid,
+                identity_name: iname,
+                identity_source: isrc,
+                trace_id: tid,
+            },
+        )
        .collect();

    let total = results.len() as i64;
@@ -922,7 +1109,14 @@ async fn search_identity_text(
    let start = (page - 1) * page_size;
    let paged: Vec<IdentityTextHit> = results.into_iter().skip(start).take(page_size).collect();
    let limit = params.limit.unwrap_or(50) as usize;
-    Ok(Json(IdentityTextResponse { success: true, total, page, page_size, limit, results: paged }))
+    Ok(Json(IdentityTextResponse {
+        success: true,
+        total,
+        page,
+        page_size,
+        limit,
+        results: paged,
+    }))
 }

 #[derive(Debug, Deserialize)]
@@ -942,7 +1136,7 @@ struct IdentitySearchHit {
    trace_id: Option<i32>,
    chunk_id: String,
    start_time: f64,
-    text_content: String,
+    text_content: Option<String>,
 }

 #[derive(Debug, Serialize)]
@@ -965,7 +1159,7 @@ async fn search_identities_by_text(
    let limit = params.limit.unwrap_or(50).min(100);

    let query = format!(
-        r#"SELECT i.id::int, COALESCE(i.real_name, i.actor_name, i.name) AS name, i.source, i.tmdb_id,
+        r#"SELECT i.id::int, i.name, i.source, i.tmdb_id,
                  fd.file_uuid, fd.trace_id,
                  c.chunk_id, c.start_time, c.text_content
           FROM {} i
@@ -973,15 +1167,30 @@ async fn search_identities_by_text(
           JOIN {} c ON c.file_uuid = fd.file_uuid
               AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0)
               AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0)
-           WHERE COALESCE(i.real_name, i.actor_name, i.name) ILIKE $1
+           WHERE i.name ILIKE $1
             AND ($2::text IS NULL OR fd.file_uuid = $2)
-           ORDER BY COALESCE(i.real_name, i.actor_name, i.name), c.start_time
+           ORDER BY i.name, c.start_time
           LIMIT $3"#,
        id_table, fd_table, chunk_table
    );

-    let rows = sqlx::query_as::<_, (i32, String, Option<String>, Option<i32>, String, Option<i32>, String, f64, String)>(&query)
-        .bind(&like_q).bind(&params.uuid).bind(limit)
+    let rows = sqlx::query_as::<
+        _,
+        (
+            i32,
+            String,
+            Option<String>,
+            Option<i32>,
+            String,
+            Option<i32>,
+            String,
+            f64,
+            Option<String>,
+        ),
+    >(&query)
+    .bind(&like_q)
+    .bind(&params.uuid)
+    .bind(limit)
    .fetch_all(state.db.pool())
    .await
    .map_err(|e| {
@@ -991,12 +1200,25 @@ async fn search_identities_by_text(

    let results: Vec<IdentitySearchHit> = rows
        .into_iter()
-        .map(|(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
-            identity_id: iid, name, source: src, tmdb_id: tid,
-            file_uuid: fu, trace_id, chunk_id: cid, start_time: st, text_content: txt,
-        })
+        .map(
+            |(iid, name, src, tid, fu, trace_id, cid, st, txt)| IdentitySearchHit {
+                identity_id: iid,
+                name,
+                source: src,
+                tmdb_id: tid,
+                file_uuid: fu,
+                trace_id,
+                chunk_id: cid,
+                start_time: st,
+                text_content: txt,
+            },
+        )
        .collect();

    let total = results.len() as i64;
-    Ok(Json(IdentitySearchResponse { success: true, total, results }))
+    Ok(Json(IdentitySearchResponse {
+        success: true,
+        total,
+        results,
+    }))
 }
--- a/src/api/identity_binding.rs
+++ b/src/api/identity_binding.rs
@@ -1,5 +1,5 @@
 use axum::{
-    extract::{Path, Query},
+    extract::{Path, Query, State},
    http::StatusCode,
    response::Json,
    routing::{get, post},
@@ -77,7 +77,7 @@ pub async fn bind_identity(

    // Get identity_id from identity_uuid
    let identity_row: Option<(i64, String)> = sqlx::query_as(&format!(
-        "SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
+        "SELECT id, name FROM {} WHERE uuid = $1::uuid",
        id_table
    ))
    .bind(&identity_uuid)
@@ -116,8 +116,14 @@ pub async fn bind_identity(

    let uuid_clean = identity_uuid.replace('-', "");
    // Sync identity JSON file
-    if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await {
-        tracing::warn!("[bind] Failed to sync identity file for {}: {}", uuid_clean, e);
+    if let Err(e) =
+        crate::core::identity::storage::save_identity_file_by_pool(&db, &uuid_clean).await
+    {
+        tracing::warn!(
+            "[bind] Failed to sync identity file for {}: {}",
+            uuid_clean,
+            e
+        );
    }

    Ok(Json(ApiResponse {
@@ -189,8 +195,15 @@ pub async fn unbind_identity(
        .ok()
        .flatten();
        if let Some(identity_uuid) = uuid {
-            if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid).await {
-                tracing::warn!("[unbind] Failed to sync identity file for {}: {}", identity_uuid, e);
+            if let Err(e) =
+                crate::core::identity::storage::save_identity_file_by_pool(&db, &identity_uuid)
+                    .await
+            {
+                tracing::warn!(
+                    "[unbind] Failed to sync identity file for {}: {}",
+                    identity_uuid,
+                    e
+                );
            }
        }
    }
@@ -221,7 +234,7 @@ pub async fn merge_identities(

    // Get IDs for both identities
    let from_row: Option<(i64, String)> = sqlx::query_as(&format!(
-        "SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
+        "SELECT id, name FROM {} WHERE uuid = $1::uuid",
        id_table
    ))
    .bind(&identity_uuid)
@@ -239,7 +252,7 @@ pub async fn merge_identities(
    ))?;

    let into_row: Option<(i64, String)> = sqlx::query_as(&format!(
-        "SELECT id, COALESCE(real_name, actor_name) AS name FROM {} WHERE uuid = $1::uuid",
+        "SELECT id, name FROM {} WHERE uuid = $1::uuid",
        id_table
    ))
    .bind(&req.into_uuid)
@@ -299,8 +312,14 @@ pub async fn merge_identities(

    // Sync target identity JSON
    let into_uuid_clean = req.into_uuid.replace('-', "");
-    if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await {
-        tracing::warn!("[merge] Failed to sync target identity file for {}: {}", into_uuid_clean, e);
+    if let Err(e) =
+        crate::core::identity::storage::save_identity_file_by_pool(&db, &into_uuid_clean).await
+    {
+        tracing::warn!(
+            "[merge] Failed to sync target identity file for {}: {}",
+            into_uuid_clean,
+            e
+        );
    }

    // Delete source identity JSON if not keeping history
@@ -339,6 +358,106 @@ pub struct ListIdentitiesParams {
    pub offset: Option<i32>,
 }

+#[derive(Debug, Serialize)]
+pub struct IdentityTraceInfo {
+    pub file_uuid: String,
+    pub trace_id: i32,
+    pub frame_count: i64,
+    pub first_frame: i32,
+    pub last_frame: i32,
+    pub first_sec: f64,
+    pub last_sec: f64,
+    pub avg_confidence: f64,
+}
+
+#[derive(Debug, Serialize)]
+pub struct IdentityTracesResponse {
+    pub success: bool,
+    pub identity_uuid: String,
+    pub name: String,
+    pub total_traces: usize,
+    pub total_faces: i64,
+    pub traces: Vec<IdentityTraceInfo>,
+}
+
+pub async fn get_identity_traces(
+    State(state): State<crate::api::server::AppState>,
+    Path(identity_uuid): Path<String>,
+) -> Result<Json<IdentityTracesResponse>, (StatusCode, String)> {
+    let id_table = crate::core::db::schema::table_name("identities");
+    let fd_table = crate::core::db::schema::table_name("face_detections");
+
+    // Get identity name
+    let identity: Option<(i32, String)> = sqlx::query_as(&format!(
+        "SELECT id, name FROM {} WHERE uuid = $1::uuid",
+        id_table
+    ))
+    .bind(&identity_uuid)
+    .fetch_optional(state.db.pool())
+    .await
+    .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+
+    let (identity_id, name) =
+        identity.ok_or((StatusCode::NOT_FOUND, "Identity not found".to_string()))?;
+
+    // Get all traces for this identity across all files
+    let rows: Vec<(String, i32, i64, i32, i32, f64, f64, f64)> = sqlx::query_as(&format!(
+        r#"SELECT fd.file_uuid::text, fd.trace_id,
+                      COUNT(*)::bigint AS frame_count,
+                      MIN(fd.frame_number)::int AS first_frame,
+                      MAX(fd.frame_number)::int AS last_frame,
+                      ROUND(MIN(fd.frame_number)::numeric / 25.0, 1)::float8 AS first_sec,
+                      ROUND(MAX(fd.frame_number)::numeric / 25.0, 1)::float8 AS last_sec,
+                      ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence
+               FROM {} fd
+               WHERE fd.identity_id = $1
+               GROUP BY fd.file_uuid, fd.trace_id
+               ORDER BY fd.file_uuid, fd.trace_id"#,
+        fd_table
+    ))
+    .bind(identity_id)
+    .fetch_all(state.db.pool())
+    .await
+    .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+
+    let total_traces = rows.len();
+    let total_faces: i64 = rows.iter().map(|r| r.2).sum();
+
+    let traces: Vec<IdentityTraceInfo> = rows
+        .into_iter()
+        .map(
+            |(
+                file_uuid,
+                trace_id,
+                frame_count,
+                first_frame,
+                last_frame,
+                first_sec,
+                last_sec,
+                avg_confidence,
+            )| IdentityTraceInfo {
+                file_uuid,
+                trace_id,
+                frame_count,
+                first_frame,
+                last_frame,
+                first_sec,
+                last_sec,
+                avg_confidence,
+            },
+        )
+        .collect();
+
+    Ok(Json(IdentityTracesResponse {
+        success: true,
+        identity_uuid,
+        name,
+        total_traces,
+        total_faces,
+        traces,
+    }))
+}
+
 pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
    Router::new()
        .route("/api/v1/identity/:identity_uuid/bind", post(bind_identity))
@@ -350,4 +469,8 @@ pub fn identity_binding_routes() -> Router<crate::api::server::AppState> {
            "/api/v1/identity/:identity_uuid/mergeinto",
            post(merge_identities),
        )
+        .route(
+            "/api/v1/identity/:identity_uuid/traces",
+            get(get_identity_traces),
+        )
 }
--- a/src/api/media_api.rs
+++ b/src/api/media_api.rs
@@ -14,8 +14,16 @@ use crate::core::db::{schema, PostgresDb};

 /// Shared video query params: mode=normal|debug, audio=on|off
 fn parse_video_params(params: &std::collections::HashMap<String, String>) -> (String, String) {
-    let mode = params.get("mode").map(|s| s.as_str()).unwrap_or("normal").to_string();
-    let audio = params.get("audio").map(|s| s.as_str()).unwrap_or("on").to_string();
+    let mode = params
+        .get("mode")
+        .map(|s| s.as_str())
+        .unwrap_or("normal")
+        .to_string();
+    let audio = params
+        .get("audio")
+        .map(|s| s.as_str())
+        .unwrap_or("on")
+        .to_string();
    (mode, audio)
 }

@@ -142,9 +150,12 @@ struct BboxParams {
 /// Priority: start_frame/end_frame > start/end > start_time/end_time.
 /// If only time is given, convert via fps.
 fn resolve_frame_range(
-    start_frame: Option<i32>, end_frame: Option<i32>,
-    start: Option<i32>, end: Option<i32>,
-    start_time: Option<f64>, end_time: Option<f64>,
+    start_frame: Option<i32>,
+    end_frame: Option<i32>,
+    start: Option<i32>,
+    end: Option<i32>,
+    start_time: Option<f64>,
+    end_time: Option<f64>,
    fps: f64,
 ) -> (i32, i32) {
    if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) {
@@ -186,7 +197,15 @@ async fn bbox_overlay_video(
    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
    .unwrap_or(24.0);

-    let (start_f, end_f) = resolve_frame_range(p.start_frame, p.end_frame, p.start, p.end, p.start_time, p.end_time, fps);
+    let (start_f, end_f) = resolve_frame_range(
+        p.start_frame,
+        p.end_frame,
+        p.start,
+        p.end,
+        p.start_time,
+        p.end_time,
+        fps,
+    );

    let start_sec = start_f as f64 / fps;

@@ -228,13 +247,26 @@ async fn bbox_overlay_video(
    let dur = duration.to_string();
    let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur];
    if vf != "null" {
-        bbox_args.extend_from_slice(&["-vf", &vf, "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"]);
+        bbox_args.extend_from_slice(&[
+            "-vf",
+            &vf,
+            "-c:v",
+            "libx264",
+            "-preset",
+            "ultrafast",
+            "-crf",
+            "28",
+        ]);
    } else {
        bbox_args.extend_from_slice(&["-c", "copy"]);
    }
-    if bbox_audio == "off" { bbox_args.push("-an"); }
+    if bbox_audio == "off" {
+        bbox_args.push("-an");
+    }
    bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
-    let status = ffmpeg_cmd().args(&bbox_args).status()
+    let status = ffmpeg_cmd()
+        .args(&bbox_args)
+        .status()
        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
    if !status.success() {
        let _ = std::fs::remove_file(&tmp);
@@ -315,14 +347,20 @@ async fn trace_video(
        let sk = seek.to_string();
        let du = duration.to_string();
        let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
-        if audio == "off" { cmd_args.push("-an"); }
+        if audio == "off" {
+            cmd_args.push("-an");
+        }
        cmd_args.extend_from_slice(&["-y", &tmp_str]);
-        let result = ffmpeg_cmd().args(&cmd_args).output()
+        let result = ffmpeg_cmd()
+            .args(&cmd_args)
+            .output()
            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
        if !result.status.success() {
            return Err(StatusCode::INTERNAL_SERVER_ERROR);
        }
-        let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+        let data = tokio::fs::read(&tmp)
+            .await
+            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
        let _ = std::fs::remove_file(&tmp);
        return Ok(Response::builder()
            .header(header::CONTENT_TYPE, "video/mp4")
@@ -345,8 +383,11 @@ async fn trace_video(
         ORDER BY fd.trace_id, fd.frame_number",
        face_table, identities_table
    ))
-    .bind(&file_uuid).bind(start_fn).bind(end_fn)
-    .fetch_all(state.db.pool()).await
+    .bind(&file_uuid)
+    .bind(start_fn)
+    .bind(end_fn)
+    .fetch_all(state.db.pool())
+    .await
    .unwrap_or_default();

    // Group frames by trace_id, compute start_frame per trace; collect bbox per frame
@@ -359,7 +400,9 @@ async fn trace_video(
        if let Some(name) = name_opt {
            trace_identity.entry(*tid).or_insert_with(|| name.clone());
        } else {
-            trace_identity.entry(*tid).or_insert_with(|| format!("Stranger_{:03}", tid));
+            trace_identity
+                .entry(*tid)
+                .or_insert_with(|| format!("Stranger_{:03}", tid));
        }
    }

@@ -374,7 +417,8 @@ async fn trace_video(
    .unwrap_or_else(|| "-".to_string());

    // Sort traces for consistent ordering
-    let mut sorted_traces: Vec<(i32, &Vec<i32>)> = trace_frames.iter().map(|(k, v)| (*k, v)).collect();
+    let mut sorted_traces: Vec<(i32, &Vec<i32>)> =
+        trace_frames.iter().map(|(k, v)| (*k, v)).collect();
    sorted_traces.sort_by_key(|(tid, _)| *tid);

    let frame_offset = first_frame as i64 - (padding * fps) as i64;
@@ -389,10 +433,12 @@ async fn trace_video(
        "drawtext=text='Frame %{{n}}  %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12"
    ));
    parts.push(format!(
-        "drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56", cut_label
+        "drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56",
+        cut_label
    ));
    parts.push(format!(
-        "drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100", file_uuid
+        "drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100",
+        file_uuid
    ));

    // Per-trace entries: show trace_id, start_frame, identity name
@@ -400,11 +446,18 @@ async fn trace_video(
    let mut y_pos = 144;
    for (tid, frames) in &sorted_traces {
        let start = frames.iter().min().unwrap_or(&first_frame);
-        let identity = trace_identity.get(tid).map(|s| s.as_str()).unwrap_or("unknown");
+        let identity = trace_identity
+            .get(tid)
+            .map(|s| s.as_str())
+            .unwrap_or("unknown");
        let label = format!("Trace {}\\: start={}  {}", tid, start, identity);

        // Continuous range (interpolated): visible from first to last frame
-        let enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset);
+        let enable = format!(
+            "between(n,{},{})",
+            frames[0] as i64 - frame_offset,
+            frames[frames.len() - 1] as i64 - frame_offset
+        );

        parts.push(format!(
            "drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'",
@@ -415,7 +468,11 @@ async fn trace_video(

    // Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label
    for (tid, frames) in &sorted_traces {
-        let range_enable = format!("between(n,{},{})", frames[0] as i64 - frame_offset, frames[frames.len() - 1] as i64 - frame_offset);
+        let range_enable = format!(
+            "between(n,{},{})",
+            frames[0] as i64 - frame_offset,
+            frames[frames.len() - 1] as i64 - frame_offset
+        );
        // Interpolated bbox at first known position across the whole trace range
        if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) {
            parts.push(format!(
@@ -448,23 +505,45 @@ async fn trace_video(
    let tmp_str = tmp.to_str().unwrap_or("").to_string();
    let sk = seek.to_string();
    let du = duration.to_string();
-    let mut debug_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du,
-                              "-/filter_complex", &filter_path,
-                              "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28"];
-    if audio == "on" { debug_args.extend_from_slice(&["-c:a", "aac"]); }
+    let mut debug_args = vec![
+        "-ss",
+        &sk,
+        "-i",
+        &video_path,
+        "-t",
+        &du,
+        "-/filter_complex",
+        &filter_path,
+        "-c:v",
+        "libx264",
+        "-preset",
+        "ultrafast",
+        "-crf",
+        "28",
+    ];
+    if audio == "on" {
+        debug_args.extend_from_slice(&["-c:a", "aac"]);
+    }
    debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
-    let result = ffmpeg_cmd().args(&debug_args).output()
+    let result = ffmpeg_cmd()
+        .args(&debug_args)
+        .output()
        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
    if !result.status.success() {
        let stderr = String::from_utf8_lossy(&result.stderr);
        let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes());
-        tracing::error!("ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt", stderr.len());
+        tracing::error!(
+            "ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt",
+            stderr.len()
+        );
        let _ = std::fs::remove_file(&filter_file);
        let _ = std::fs::remove_file(&tmp);
        return Err(StatusCode::INTERNAL_SERVER_ERROR);
    }

-    let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+    let data = tokio::fs::read(&tmp)
+        .await
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
    let _ = std::fs::remove_file(&filter_file);
    let _ = std::fs::remove_file(&tmp);
    Ok(Response::builder()
@@ -503,19 +582,27 @@ async fn stream_video(
    // Chunk extraction with dual time/frame params
    let start_time_param = params.get("start_time").and_then(|v| v.parse::<f64>().ok());
    let end_time_param = params.get("end_time").and_then(|v| v.parse::<f64>().ok());
-    let start_frame_param = params.get("start_frame").and_then(|v| v.parse::<f64>().ok());
+    let start_frame_param = params
+        .get("start_frame")
+        .and_then(|v| v.parse::<f64>().ok());
    let end_frame_param = params.get("end_frame").and_then(|v| v.parse::<f64>().ok());
    let start_legacy = params.get("start").and_then(|v| v.parse::<f64>().ok());
    let end_legacy = params.get("end").and_then(|v| v.parse::<f64>().ok());

-    let has_range = start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
+    let has_range =
+        start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();

    if has_range {
        let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) {
            let _fps: f64 = sqlx::query_scalar(&format!(
-                "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1", videos_table
-            )).bind(&file_uuid).fetch_optional(state.db.pool()).await
-                .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?.unwrap_or(24.0);
+                "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
+                videos_table
+            ))
+            .bind(&file_uuid)
+            .fetch_optional(state.db.pool())
+            .await
+            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
+            .unwrap_or(24.0);
            (sf / _fps, (ef - sf) / _fps)
        } else if let (Some(st), Some(et)) = (start_time_param, end_time_param) {
            (st, et - st)
@@ -533,15 +620,21 @@ async fn stream_video(
        let ss = start_sec.to_string();
        let d = dur.to_string();
        let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"];
-        if audio == "off" { chunk_args.push("-an"); }
+        if audio == "off" {
+            chunk_args.push("-an");
+        }
        chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
-        let status = ffmpeg_cmd().args(&chunk_args).status()
+        let status = ffmpeg_cmd()
+            .args(&chunk_args)
+            .status()
            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
        if !status.success() {
            let _ = std::fs::remove_file(&tmp);
            return Err(StatusCode::INTERNAL_SERVER_ERROR);
        }
-        let data = tokio::fs::read(&tmp).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+        let data = tokio::fs::read(&tmp)
+            .await
+            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
        let _ = std::fs::remove_file(&tmp);
        return Ok(Response::builder()
            .header(header::CONTENT_TYPE, "video/mp4")
@@ -704,7 +797,7 @@ async fn video_clip(
        let frame_count = ((e - s) * fps) as i64;
        cmd.args(["-vframes", &frame_count.to_string()]);
    } else {
-        cmd.args(["-to", &e.to_string()]);
+        cmd.args(["-t", &(e - s).to_string()]);
    }
    if mode == "debug" {
        let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) {
@@ -717,8 +810,20 @@ async fn video_clip(
    if audio == "off" {
        cmd.args(["-an"]);
    }
-    cmd.args(["-c:v", "libx264", "-c:a", "aac", "-f", "mpegts", "-"]);
-    let output = cmd.output().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+    cmd.args([
+        "-c:v",
+        "libx264",
+        "-c:a",
+        "aac",
+        "-movflags",
+        "frag_keyframe+empty_moov",
+        "-f",
+        "mp4",
+        "-",
+    ]);
+    let output = cmd
+        .output()
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
    if !output.status.success() {
        return Err(StatusCode::INTERNAL_SERVER_ERROR);
    }
--- a/src/api/middleware.rs
+++ b/src/api/middleware.rs
@@ -88,16 +88,10 @@ fn hex_val(c: u8) -> Option<u8> {
 }

 fn extract_api_key(headers: &HeaderMap, uri: &axum::http::Uri) -> Result<String, StatusCode> {
-    if let Some(key) = headers
-        .get("X-API-Key")
-        .and_then(|v| v.to_str().ok())
-    {
+    if let Some(key) = headers.get("X-API-Key").and_then(|v| v.to_str().ok()) {
        return Ok(key.to_string());
    }
-    if let Some(auth) = headers
-        .get("Authorization")
-        .and_then(|v| v.to_str().ok())
-    {
+    if let Some(auth) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
        // Check if it's a JWT (starts with eyJ)
        let trimmed = auth.strip_prefix("Bearer ").unwrap_or(auth);
        if !jwt::is_jwt(trimmed) {
@@ -129,7 +123,11 @@ pub async fn unified_auth(

    // Priority 1: Cookie session (Portal)
    let cookies = extract_cookies(headers);
-    if let Some(sid) = cookies.iter().find(|(k, _)| k == "session_id").map(|(_, v)| v.clone()) {
+    if let Some(sid) = cookies
+        .iter()
+        .find(|(k, _)| k == "session_id")
+        .map(|(_, v)| v.clone())
+    {
        match state.db.get_session_by_id(&sid).await {
            Ok(Some((_id, user_id, api_key_id, _expires_at))) => {
                let key_hash = hash_key(&api_key_id);
@@ -162,15 +160,17 @@ pub async fn unified_auth(
    }

    // Priority 2: JWT (Authorization: Bearer <eyJ...>)
-    if let Some(auth_header) = headers
-        .get("Authorization")
-        .and_then(|v| v.to_str().ok())
-    {
+    if let Some(auth_header) = headers.get("Authorization").and_then(|v| v.to_str().ok()) {
        if let Some(token) = auth_header.strip_prefix("Bearer ") {
            if jwt::is_jwt(token) {
                match jwt::verify_jwt(token) {
                    Ok(claims) => {
-                        if !state.db.is_jwt_blacklisted(&claims.jti).await.unwrap_or(false) {
+                        if !state
+                            .db
+                            .is_jwt_blacklisted(&claims.jti)
+                            .await
+                            .unwrap_or(false)
+                        {
                            let exp = chrono::DateTime::from_timestamp(claims.exp as i64, 0);
                            let user_id: i32 = claims.sub.parse().unwrap_or(0);
                            let auth = UserAuth {
--- a/src/api/server.rs
+++ b/src/api/server.rs
--- a/src/api/tmdb_api.rs
+++ b/src/api/tmdb_api.rs
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};

 use crate::api::server::AppState;
 use crate::core::config;
-use crate::core::db::PostgresDb;
+use crate::core::db::{PostgresDb, QdrantDb};
 use crate::core::tmdb;

 #[derive(Debug, Serialize)]
@@ -64,10 +64,44 @@ struct FileUuidParam {
    file_uuid: String,
 }

+#[derive(Debug, Deserialize)]
+struct TmdbFetchRequest {
+    file_uuid: String,
+}
+
+#[derive(Debug, Serialize)]
+struct TmdbFetchMemberResult {
+    name: String,
+    character: Option<String>,
+    aliases: Vec<String>,
+    metadata: serde_json::Value,
+    status: String,
+    has_json: bool,
+    has_jpg: bool,
+    error: Option<String>,
+}
+
+#[derive(Debug, Serialize)]
+struct TmdbFetchResponse {
+    success: bool,
+    movie_title: Option<String>,
+    tmdb_id: Option<u64>,
+    results: Vec<TmdbFetchMemberResult>,
+    summary: serde_json::Value,
+}
+
 pub fn tmdb_routes() -> Router<AppState> {
    Router::new()
        .route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch))
-        .route("/api/v1/file/:file_uuid/tmdb-probe", post(tmdb_probe_handler))
+        .route(
+            "/api/v1/file/:file_uuid/tmdb-probe",
+            post(tmdb_probe_handler),
+        )
+        .route("/api/v1/tmdb/fetch", post(tmdb_fetch))
+        .route(
+            "/api/v1/agents/tmdb/match/:file_uuid",
+            post(tmdb_match_handler),
+        )
        .route("/api/v1/resource/tmdb", get(tmdb_resource_status))
        .route("/api/v1/resource/tmdb/check", post(tmdb_resource_check))
 }
@@ -79,9 +113,10 @@ async fn tmdb_prefetch(
    let file_uuid = req.file_uuid;

    // Verify file exists in DB
-    let file_exists: bool = sqlx::query_scalar(
-        &format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos"))
-    )
+    let file_exists: bool = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
+        crate::core::db::schema::table_name("videos")
+    ))
    .bind(&file_uuid)
    .fetch_one(state.db.pool())
    .await
@@ -182,18 +217,22 @@ async fn tmdb_probe_handler(
    let file_uuid = params.file_uuid;

    // Verify file exists
-    let file_exists: bool = sqlx::query_scalar(
-        &format!("SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1", crate::core::db::schema::table_name("videos"))
-    )
+    let file_exists: bool = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
+        crate::core::db::schema::table_name("videos")
+    ))
    .bind(&file_uuid)
    .fetch_one(state.db.pool())
    .await
    .unwrap_or(false);

    if !file_exists {
-        return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({
                "error": "Video not found", "file_uuid": file_uuid
-        }))));
+            })),
+        ));
    }

    match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await {
@@ -214,7 +253,10 @@ async fn tmdb_probe_handler(
                    .await
                {
                    for uuid in rows {
-                        let _ = crate::core::identity::storage::save_identity_file_by_pool(&pool, &uuid).await;
+                        let _ = crate::core::identity::storage::save_identity_file_by_pool(
+                            &pool, &uuid,
+                        )
+                        .await;
                    }
                }
            });
@@ -245,22 +287,24 @@ async fn tmdb_probe_handler(
                    message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(),
                }))
            } else {
-                Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({
+                Err((
+                    StatusCode::INTERNAL_SERVER_ERROR,
+                    Json(serde_json::json!({
                        "error": msg, "file_uuid": file_uuid
-                }))))
+                    })),
+                ))
            }
        }
    }
 }

-async fn tmdb_resource_status(
-    State(state): State<AppState>,
-) -> Json<TmdbResourceResponse> {
+async fn tmdb_resource_status(State(state): State<AppState>) -> Json<TmdbResourceResponse> {
    let status = tmdb::status::quick_status();
    let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool())
        .await
        .unwrap_or(0);
-    let identities_with_embedding = tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
+    let identities_with_embedding =
+        tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
            .await
            .unwrap_or(0);
    let cache_files = tmdb::status::count_cache_files();
@@ -303,3 +347,383 @@ async fn tmdb_resource_check() -> Json<TmdbCheckResponse> {
        status,
    })
 }
+
+async fn tmdb_fetch(
+    State(state): State<AppState>,
+    Json(req): Json<TmdbFetchRequest>,
+) -> Result<Json<TmdbFetchResponse>, (StatusCode, Json<serde_json::Value>)> {
+    let file_uuid = req.file_uuid;
+
+    let filename: Option<String> = sqlx::query_scalar(&format!(
+        "SELECT file_name FROM {} WHERE file_uuid = $1",
+        crate::core::db::schema::table_name("videos")
+    ))
+    .bind(&file_uuid)
+    .fetch_optional(state.db.pool())
+    .await
+    .map_err(|e| {
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
+    })?
+    .flatten();
+
+    let filename = filename.ok_or_else(|| {
+        (
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({"error": "File not found"})),
+        )
+    })?;
+
+    // Run probe to create identities
+    match tmdb::probe::probe_movie(&state.db, &filename, &file_uuid).await {
+        Ok(Some(probe_result)) => {
+            let mut member_results = Vec::new();
+
+            // Read the cache to get cast list with names and profile URLs
+            if let Ok(cache) = tmdb::cache::read_tmdb_cache(&file_uuid) {
+                for member in &cache.cast {
+                    let name = member.name.clone();
+                    let character = if member.character.is_empty() {
+                        None
+                    } else {
+                        Some(member.character.clone())
+                    };
+                    let aliases = member.also_known_as.clone();
+                    let profile_url = member
+                        .profile_path
+                        .as_ref()
+                        .map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
+
+                    let metadata = serde_json::json!({
+                        "tmdb_id": member.id,
+                        "name": member.name,
+                        "character": member.character,
+                        "aliases": member.also_known_as,
+                        "profile_path": member.profile_path,
+                        "order": member.order,
+                        "biography": member.biography,
+                        "birthday": member.birthday,
+                        "place_of_birth": member.place_of_birth,
+                        "imdb_id": member.imdb_id,
+                        "known_for_department": member.known_for_department,
+                        "popularity": member.popularity,
+                        "deathday": member.deathday,
+                        "gender": member.gender,
+                        "homepage": member.homepage,
+                    });
+
+                    let identity_row = sqlx::query_as::<_, (i32, uuid::Uuid)>(&format!(
+                        "SELECT id, uuid FROM {} WHERE name = $1 AND source = 'tmdb' LIMIT 1",
+                        crate::core::db::schema::table_name("identities")
+                    ))
+                    .bind(&name)
+                    .fetch_optional(state.db.pool())
+                    .await;
+
+                    match identity_row {
+                        Ok(Some((identity_id, uuid))) => {
+                            let clean = uuid.to_string().replace('-', "");
+                            let dir = crate::core::identity::storage::identity_dir(&clean);
+                            std::fs::create_dir_all(&dir).ok();
+
+                            let json_result = crate::core::identity::storage::save_identity_file(
+                                &state.db, &clean,
+                            )
+                            .await;
+                            let has_json = json_result.is_ok();
+
+                            let has_jpg = if let Some(url) = &profile_url {
+                                let jpg_path = dir.join("profile.jpg");
+                                if jpg_path.exists() {
+                                    true
+                                } else if let Ok(resp) = reqwest::get(url).await {
+                                    if let Ok(bytes) = resp.bytes().await {
+                                        std::fs::write(&jpg_path, &bytes).is_ok()
+                                    } else {
+                                        false
+                                    }
+                                } else {
+                                    false
+                                }
+                            } else {
+                                false
+                            };
+
+                            // Push face_embedding to Qdrant if available
+                            let face_collection = format!(
+                                "{}_faces",
+                                crate::core::config::REDIS_KEY_PREFIX
+                                    .as_str()
+                                    .trim_end_matches(':')
+                            );
+                            let emb_row: Option<(Vec<f32>,)> = sqlx::query_as(
+                                &format!(
+                                    "SELECT face_embedding::real[] FROM {} WHERE uuid = $1 AND face_embedding IS NOT NULL",
+                                    crate::core::db::schema::table_name("identities")
+                                )
+                            )
+                            .bind(&uuid)
+                            .fetch_optional(state.db.pool())
+                            .await
+                            .unwrap_or(None);
+
+                            if let Some((embedding,)) = emb_row {
+                                let qdrant = QdrantDb::new();
+                                qdrant.ensure_collection(&face_collection, 512).await.ok();
+                                let _ = qdrant
+                                    .upsert_vector_to_collection(
+                                        &face_collection,
+                                        identity_id as u64,
+                                        &embedding,
+                                        Some(serde_json::json!({
+                                            "identity_id": identity_id,
+                                            "name": name,
+                                            "source": "tmdb",
+                                        })),
+                                    )
+                                    .await;
+                            }
+
+                            let status = if has_json && has_jpg {
+                                "success"
+                            } else {
+                                "partial"
+                            };
+                            let error = if !has_json {
+                                Some(format!("{:?}", json_result.err()))
+                            } else if !has_jpg {
+                                Some("profile download failed".to_string())
+                            } else {
+                                None
+                            };
+
+                            member_results.push(TmdbFetchMemberResult {
+                                name: name.clone(),
+                                character: character.clone(),
+                                aliases: aliases.clone(),
+                                metadata: metadata.clone(),
+                                status: status.to_string(),
+                                has_json,
+                                has_jpg,
+                                error,
+                            });
+                        }
+                        Ok(None) => {
+                            member_results.push(TmdbFetchMemberResult {
+                                name: name.clone(),
+                                character: character.clone(),
+                                aliases: aliases.clone(),
+                                metadata: metadata.clone(),
+                                status: "skipped".to_string(),
+                                has_json: false,
+                                has_jpg: false,
+                                error: None,
+                            });
+                        }
+                        Err(e) => {
+                            member_results.push(TmdbFetchMemberResult {
+                                name: name.clone(),
+                                character: character.clone(),
+                                aliases: aliases.clone(),
+                                metadata: metadata.clone(),
+                                status: "error".to_string(),
+                                has_json: false,
+                                has_jpg: false,
+                                error: Some(format!("DB error: {}", e)),
+                            });
+                        }
+                    }
+                }
+            }
+
+            let total = member_results.len();
+            let success_count = member_results
+                .iter()
+                .filter(|r| r.status == "success")
+                .count();
+            let json_count = member_results.iter().filter(|r| r.has_json).count();
+            let jpg_count = member_results.iter().filter(|r| r.has_jpg).count();
+
+            Ok(Json(TmdbFetchResponse {
+                success: true,
+                movie_title: Some(probe_result.title),
+                tmdb_id: Some(probe_result.tmdb_id),
+                results: member_results,
+                summary: serde_json::json!({
+                    "total": total,
+                    "success": success_count,
+                    "with_json": json_count,
+                    "with_jpg": jpg_count,
+                }),
+            }))
+        }
+        Ok(None) => Err((
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({
+                "error": "No movie found for this filename"
+            })),
+        )),
+        Err(e) => Err((
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({
+                "error": e.to_string()
+            })),
+        )),
+    }
+}
+
+#[derive(Debug, Serialize)]
+struct TmdbMatchResponse {
+    success: bool,
+    file_uuid: String,
+    bindings_created: usize,
+    tmdb_identities_available: usize,
+    message: String,
+}
+
+async fn tmdb_match_handler(
+    Path(params): Path<FileUuidParam>,
+    State(state): State<AppState>,
+) -> Result<Json<TmdbMatchResponse>, (StatusCode, Json<serde_json::Value>)> {
+    let file_uuid = params.file_uuid;
+
+    // Verify file exists
+    let file_exists: bool = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
+        crate::core::db::schema::table_name("videos")
+    ))
+    .bind(&file_uuid)
+    .fetch_one(state.db.pool())
+    .await
+    .unwrap_or(false);
+
+    if !file_exists {
+        return Err((
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({
+                "error": "Video not found", "file_uuid": file_uuid
+            })),
+        ));
+    }
+
+    // Get all TMDb identities with face_embedding
+    let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
+        &format!(
+            "SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL",
+            crate::core::db::schema::table_name("identities")
+        )
+    )
+    .fetch_all(state.db.pool())
+    .await
+    .map_err(|e| {
+        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+    })?;
+
+    if tmdb_rows.is_empty() {
+        return Ok(Json(TmdbMatchResponse {
+            success: true,
+            file_uuid,
+            bindings_created: 0,
+            tmdb_identities_available: 0,
+            message: "No TMDb identities with face embeddings".to_string(),
+        }));
+    }
+
+    let face_collection = format!(
+        "{}_faces",
+        crate::core::config::REDIS_KEY_PREFIX
+            .as_str()
+            .trim_end_matches(':')
+    );
+
+    let qdrant = QdrantDb::new();
+    let _ = qdrant.ensure_collection(&face_collection, 512).await;
+
+    let trace_collection = format!(
+        "{}_traces",
+        crate::core::config::REDIS_KEY_PREFIX
+            .as_str()
+            .trim_end_matches(':')
+    );
+    let _ = qdrant.ensure_collection(&trace_collection, 512).await;
+
+    // Sync trace embeddings (idempotent)
+    if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await {
+        tracing::error!("[TKG-MATCH] Trace sync failed: {}", e);
+    }
+
+    let mut total_bindings = 0usize;
+
+    for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows {
+        // Search Qdrant trace collection with this TMDb embedding
+        let results = match qdrant
+            .search_face_collection(
+                &trace_collection,
+                tmdb_embedding,
+                100,
+                "source",
+                "tmdb",
+                Some(&file_uuid),
+            )
+            .await
+        {
+            Ok(r) => r,
+            Err(e) => {
+                tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e);
+                continue;
+            }
+        };
+
+        // Filter results by threshold and file_uuid
+        let filtered: Vec<_> = results
+            .into_iter()
+            .filter(|(score, payload)| {
+                *score >= 0.50
+                    && payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid)
+            })
+            .collect();
+
+        if filtered.is_empty() {
+            continue;
+        }
+
+        // Bind matched traces directly
+        let mut bound_count = 0usize;
+        for (_score, payload) in &filtered {
+            if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) {
+                let r = sqlx::query(&format!(
+                    "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
+                    crate::core::db::schema::table_name("face_detections")
+                ))
+                .bind(tmdb_id)
+                .bind(&file_uuid)
+                .bind(tid as i32)
+                .execute(state.db.pool())
+                .await;
+                if let Ok(result) = r {
+                    bound_count += result.rows_affected() as usize;
+                }
+            }
+        }
+
+        if bound_count > 0 {
+            tracing::info!(
+                "[TKG-MATCH] {}: bound {} traces to TMDb identity {}",
+                tmdb_name,
+                bound_count,
+                tmdb_id
+            );
+        }
+        total_bindings += bound_count;
+    }
+
+    Ok(Json(TmdbMatchResponse {
+        success: true,
+        file_uuid,
+        bindings_created: total_bindings,
+        tmdb_identities_available: tmdb_rows.len(),
+        message: format!("{} traces matched to TMDb identities", total_bindings),
+    }))
+}
--- a/src/api/trace_agent_api.rs
+++ b/src/api/trace_agent_api.rs
@@ -11,10 +11,7 @@ use crate::core::db::PostgresDb;

 pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
    Router::new()
-        .route(
-            "/api/v1/file/:file_uuid/traces",
-            post(list_traces_sorted),
-        )
+        .route("/api/v1/file/:file_uuid/traces", post(list_traces_sorted))
        .route(
            "/api/v1/file/:file_uuid/trace/:trace_id/faces",
            get(list_trace_faces),
@@ -78,9 +75,10 @@ async fn list_traces_sorted(
        _ => "start_frame ASC",
    };

-    let fps: f64 =
-        sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
-            crate::core::db::schema::table_name("videos")))
+    let fps: f64 = sqlx::query_scalar(&format!(
+        "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
+        crate::core::db::schema::table_name("videos")
+    ))
    .bind(&file_uuid)
    .fetch_optional(state.db.pool())
    .await
@@ -113,8 +111,7 @@ async fn list_traces_sorted(
        crate::core::db::schema::table_name("face_detections"),
    );

-    let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> =
-        sqlx::query_as(&query)
+    let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = sqlx::query_as(&query)
        .bind(&file_uuid)
        .bind(min_faces)
        .bind(effective_limit)
@@ -220,19 +217,20 @@ async fn list_trace_faces(
    };
    let interpolate = q.interpolate.unwrap_or(false);

-    let fps: f64 =
-        sqlx::query_scalar(&format!("SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
-            crate::core::db::schema::table_name("videos")))
+    let fps: f64 = sqlx::query_scalar(&format!(
+        "SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
+        crate::core::db::schema::table_name("videos")
+    ))
    .bind(&file_uuid)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
    .unwrap_or(24.0);

-    let total_detected: i64 = sqlx::query_scalar(
-        &format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
-            crate::core::db::schema::table_name("face_detections"))
-    )
+    let total_detected: i64 = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
+        crate::core::db::schema::table_name("face_detections")
+    ))
    .bind(&file_uuid)
    .bind(trace_id)
    .fetch_one(state.db.pool())
@@ -247,12 +245,12 @@ async fn list_trace_faces(
        Option<i32>,
        Option<i32>,
        f32,
-    )> = sqlx::query_as(
-        &format!("SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
+    )> = sqlx::query_as(&format!(
+        "SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
                   FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
                   ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
-            crate::core::db::schema::table_name("face_detections"))
-    )
+        crate::core::db::schema::table_name("face_detections")
+    ))
    .bind(&file_uuid)
    .bind(trace_id)
    .bind(limit)
--- a/src/api/universal_search.rs
+++ b/src/api/universal_search.rs
@@ -88,9 +88,9 @@ pub enum SearchResult {
    },
    #[serde(rename = "person")]
    Person {
-        person_id: String,
+        identity_id: i32,
+        identity_uuid: String,
        name: Option<String>,
-        speaker_id: Option<String>,
        appearance_count: i32,
        score: f64,
        first_appearance_time: Option<f64>,
@@ -168,7 +168,7 @@ pub async fn universal_search(
        results.retain(|r| match r {
            SearchResult::Chunk { chunk_id, .. } => seen_chunks.insert(chunk_id.clone()),
            SearchResult::Frame { frame_number, .. } => seen_frames.insert(*frame_number),
-            SearchResult::Person { person_id, .. } => seen_persons.insert(person_id.clone()),
+            SearchResult::Person { identity_id, .. } => seen_persons.insert(*identity_id),
        });
    }

@@ -251,9 +251,9 @@ pub async fn search_persons(
    let limit = query.limit.unwrap_or(20);
    let persons = search_persons_by_query(
        &db,
+        &query.file_uuid,
        &query.query,
        query.min_appearances,
-        query.max_age,
        limit,
    )
    .await
@@ -305,7 +305,6 @@ pub struct PersonSearchQuery {
    pub file_uuid: String,
    pub query: Option<String>,
    pub min_appearances: Option<i32>,
-    pub max_age: Option<i32>, // New filter for "children"
    pub limit: Option<usize>,
 }

@@ -317,13 +316,9 @@ pub struct PersonSearchResponse {

 #[derive(Debug, Serialize)]
 pub struct PersonResult {
-    pub person_id: String,
+    pub identity_id: i32,
+    pub identity_uuid: String,
    pub name: Option<String>,
-    pub character_name: Option<String>,
-    pub aliases: Option<Vec<String>>,
-    pub age: Option<i32>,
-    pub gender: Option<String>,
-    pub speaker_id: Option<String>,
    pub appearance_count: i32,
    pub first_appearance_time: Option<f64>,
    pub last_appearance_time: Option<f64>,
@@ -594,43 +589,37 @@ async fn search_persons_internal(
    db: &PostgresDb,
    req: &UniversalSearchRequest,
 ) -> Result<Vec<SearchResult>, anyhow::Error> {
-    let table = "person_identities";
+    let uuid = match &req.file_uuid {
+        Some(u) => u.replace('\'', "''"),
+        None => return Err(anyhow::anyhow!("file_uuid is required for person search")),
+    };
+
+    let id_table = schema::table_name("identities");
+    let fd_table = schema::table_name("face_detections");
    let mut sql = format!(
-        "SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1",
-        table
+        "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
+         MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
+         FROM {} i JOIN {} fd ON fd.identity_id = i.id \
+         WHERE fd.file_uuid = '{}'",
+        id_table, fd_table, uuid
    );

    if !req.query.is_empty() {
-        sql.push_str(&format!(
-            " AND (name ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
-            req.query, req.query, req.query
-        ));
-    }
-    if let Some(ref filters) = req.filters {
-        if let Some(ref speaker_id) = filters.speaker_id {
-            sql.push_str(&format!(" AND speaker_id = '{}'", speaker_id));
-        }
-        if let Some(ref person_id) = filters.person_id {
-            sql.push_str(&format!(" AND person_id = '{}'", person_id));
-        }
+        let q = req.query.replace('\'', "''");
+        sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
    }

+    sql.push_str(" GROUP BY i.id, i.uuid, i.name");
    sql.push_str(" ORDER BY appearance_count DESC");
    sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));

-    let rows: Vec<(
-        String,
-        Option<String>,
-        Option<String>,
-        i32,
-        Option<f64>,
-        Option<f64>,
-    )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
+    let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
+        sqlx::query_as(&sql).fetch_all(db.pool()).await?;

    let results: Vec<SearchResult> = rows
        .into_iter()
        .map(
-            |(person_id, name, speaker_id, appearance_count, first_time, last_time)| {
+            |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
                let score = if !req.query.is_empty()
                    && name.as_ref().map_or(false, |n| {
                        n.to_lowercase().contains(&req.query.to_lowercase())
@@ -641,10 +630,10 @@ async fn search_persons_internal(
                };

                SearchResult::Person {
-                    person_id,
+                    identity_id,
+                    identity_uuid,
                    name,
-                    speaker_id,
-                    appearance_count,
+                    appearance_count: appearance_count as i32,
                    score,
                    first_appearance_time: first_time,
                    last_appearance_time: last_time,
@@ -739,82 +728,49 @@ async fn search_frames_internal_v2(

 async fn search_persons_by_query(
    db: &PostgresDb,
+    file_uuid: &str,
    query: &Option<String>,
    min_appearances: Option<i32>,
-    max_age: Option<i32>,
    limit: usize,
 ) -> Result<Vec<PersonResult>, anyhow::Error> {
-    let table = "person_identities";
+    let id_table = schema::table_name("identities");
+    let fd_table = schema::table_name("face_detections");
    let mut sql = format!(
-        "SELECT person_id, name, character_name, aliases, age, gender, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM {} WHERE 1=1",
-        table
+        "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
+         MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
+         FROM {} i JOIN {} fd ON fd.identity_id = i.id \
+         WHERE fd.file_uuid = '{}'",
+        id_table,
+        fd_table,
+        file_uuid.replace('\'', "''")
    );

-    if let Some(ref q) = query {
-        // Search name, character_name, aliases (cast to text), person_id, speaker_id
-        sql.push_str(&format!(
-            " AND (name ILIKE '%{}%' OR character_name ILIKE '%{}%' OR aliases::text ILIKE '%{}%' OR person_id ILIKE '%{}%' OR speaker_id ILIKE '%{}%')",
-            q, q, q, q, q
-        ));
+    if let Some(q) = query {
+        let safe = q.replace('\'', "''");
+        sql.push_str(&format!(" AND i.name ILIKE '%{}%'", safe));
    }

+    sql.push_str(" GROUP BY i.id, i.uuid, i.name");
+
    if let Some(min) = min_appearances {
-        sql.push_str(&format!(" AND appearance_count >= {}", min));
-    }
-    if let Some(max_a) = max_age {
-        // Strictly filter for age <= max_age.
-        // Note: This excludes entries with NULL age.
-        sql.push_str(&format!(" AND age <= {}", max_a));
+        sql.push_str(&format!(" HAVING COUNT(fd.id) >= {}", min));
    }

    sql.push_str(" ORDER BY appearance_count DESC");
    sql.push_str(&format!(" LIMIT {}", limit));

-    let rows: Vec<(
-        String,
-        Option<String>,
-        Option<String>,
-        Option<serde_json::Value>,
-        Option<i32>,
-        Option<String>,
-        Option<String>,
-        i32,
-        Option<f64>,
-        Option<f64>,
-    )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
+    let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
+        sqlx::query_as(&sql).fetch_all(db.pool()).await?;

    let results: Vec<PersonResult> = rows
        .into_iter()
        .map(
-            |(
-                person_id,
-                name,
-                character_name,
-                aliases_json,
-                age,
-                gender,
-                speaker_id,
-                appearance_count,
-                first_time,
-                last_time,
-            )| {
-                let aliases = aliases_json.and_then(|v| {
-                    v.as_array().map(|arr| {
-                        arr.iter()
-                            .filter_map(|val| val.as_str().map(String::from))
-                            .collect()
-                    })
-                });
-
+            |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
                PersonResult {
-                    person_id,
+                    identity_id,
+                    identity_uuid,
                    name,
-                    character_name,
-                    aliases,
-                    age,
-                    gender,
-                    speaker_id,
-                    appearance_count,
+                    appearance_count: appearance_count as i32,
                    first_appearance_time: first_time,
                    last_appearance_time: last_time,
                }
--- a/src/api/visual_chunk_search.rs
+++ b/src/api/visual_chunk_search.rs
@@ -392,8 +392,14 @@ pub async fn get_visual_chunk_statistics(
        uuid.replace('\'', "''")
    );

-    let row: (i64, Option<f64>, Option<f64>, Option<f64>, Option<i64>, Option<f64>) =
-        sqlx::query_as(&sql).fetch_one(db.pool()).await?;
+    let row: (
+        i64,
+        Option<f64>,
+        Option<f64>,
+        Option<f64>,
+        Option<i64>,
+        Option<f64>,
+    ) = sqlx::query_as(&sql).fetch_one(db.pool()).await?;

    let mut stats = HashMap::new();
    stats.insert("total_chunks".to_string(), Value::from(row.0));
--- a/src/bin/release.rs
+++ b/src/bin/release.rs
@@ -13,7 +13,14 @@ use std::path::{Path, PathBuf};
 use std::process::Command;

 fn dir_size(path: &Path) -> u64 {
-    path.read_dir().map(|d| d.filter_map(|e| e.ok()).filter_map(|e| e.metadata().ok()).map(|m| m.len()).sum()).unwrap_or(0)
+    path.read_dir()
+        .map(|d| {
+            d.filter_map(|e| e.ok())
+                .filter_map(|e| e.metadata().ok())
+                .map(|m| m.len())
+                .sum()
+        })
+        .unwrap_or(0)
 }

 const DEMO_DIR: &str = "/Users/accusys/momentry/var/sftpgo/data/demo";
@@ -22,7 +29,10 @@ const RELEASE_DIR: &str = "/Users/accusys/momentry_core_0.1/release/files";
 const PG_BIN: &str = "/Users/accusys/pgsql/18.3/bin";

 #[derive(Parser)]
-#[command(name = "release", about = "Release Manager — deploy/undeploy video packages")]
+#[command(
+    name = "release",
+    about = "Release Manager — deploy/undeploy video packages"
+)]
 struct Cli {
    #[command(subcommand)]
    command: Commands,
@@ -107,7 +117,12 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
    fs::create_dir_all(&tmpdir)?;

    let status = Command::new("tar")
-        .args(["-xzf", tarball.to_str().unwrap(), "-C", tmpdir.to_str().unwrap()])
+        .args([
+            "-xzf",
+            tarball.to_str().unwrap(),
+            "-C",
+            tmpdir.to_str().unwrap(),
+        ])
        .status()
        .context("tar extraction failed")?;
    if !status.success() {
@@ -127,8 +142,8 @@ fn extract_tarball(tarball: &Path) -> Result<PathBuf> {
 /// Get file_info.json from package directory
 fn read_file_info(pkg_dir: &Path) -> Result<serde_json::Value> {
    let info_path = pkg_dir.join("file_info.json");
-    let content = fs::read_to_string(&info_path)
-        .with_context(|| format!("Cannot read {:?}", info_path))?;
+    let content =
+        fs::read_to_string(&info_path).with_context(|| format!("Cannot read {:?}", info_path))?;
    serde_json::from_str(&content).context("Invalid file_info.json")
 }

@@ -140,7 +155,10 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
        anyhow::bail!("File not found: {}", tarball);
    }

-    println!("=== Deploy: {} ===", tarball_path.file_name().unwrap().to_str().unwrap());
+    println!(
+        "=== Deploy: {} ===",
+        tarball_path.file_name().unwrap().to_str().unwrap()
+    );

    // Extract
    let pkg_dir = extract_tarball(tarball_path)?;
@@ -148,7 +166,9 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {

    // Read file_info
    let info = read_file_info(&pkg_dir)?;
-    let uuid = info["file_uuid"].as_str().context("Missing file_uuid in file_info.json")?;
+    let uuid = info["file_uuid"]
+        .as_str()
+        .context("Missing file_uuid in file_info.json")?;
    let file_name = info["file_name"].as_str().unwrap_or("?");
    println!("UUID: {}\nVideo: {}", uuid, file_name);

@@ -168,7 +188,8 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
        let entry = entry?;
        let fname = entry.file_name();
        let fname_str = fname.to_str().unwrap_or("");
-        if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi") {
+        if fname_str.ends_with(".mp4") || fname_str.ends_with(".mov") || fname_str.ends_with(".avi")
+        {
            let dest = Path::new(DEMO_DIR).join(&fname);
            if !dest.exists() {
                fs::copy(entry.path(), &dest)?;
@@ -192,12 +213,15 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {
    println!("Output files copied to {}", OUTPUT_DIR);

    // Verify
-    let chunk_count: (i64,) = sqlx::query_as(
-        "SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1"
-    ).bind(uuid).fetch_one(db.pool()).await?;
-    let face_count: (i64,) = sqlx::query_as(
-        "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1"
-    ).bind(uuid).fetch_one(db.pool()).await?;
+    let chunk_count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = $1")
+        .bind(uuid)
+        .fetch_one(db.pool())
+        .await?;
+    let face_count: (i64,) =
+        sqlx::query_as("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1")
+            .bind(uuid)
+            .fetch_one(db.pool())
+            .await?;

    // Cleanup
    fs::remove_dir_all(&pkg_dir.parent().unwrap_or(&pkg_dir))?;
@@ -213,9 +237,11 @@ async fn cmd_deploy(db: &PostgresDb, tarball: &str) -> Result<()> {

 async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result<()> {
    // Get video info
-    let rows: Vec<(String, String)> = sqlx::query_as(
-        "SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1"
-    ).bind(uuid).fetch_all(db.pool()).await?;
+    let rows: Vec<(String, String)> =
+        sqlx::query_as("SELECT file_name, file_path FROM dev.videos WHERE file_uuid = $1")
+            .bind(uuid)
+            .fetch_all(db.pool())
+            .await?;

    if rows.is_empty() {
        anyhow::bail!("UUID {} not found in DB", uuid);
@@ -252,7 +278,9 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
        println!("  {}: {} rows deleted", tbl, result.rows_affected());
    }
    sqlx::query("DELETE FROM dev.videos WHERE file_uuid = $1")
-        .bind(uuid).execute(db.pool()).await?;
+        .bind(uuid)
+        .execute(db.pool())
+        .await?;
    println!("  dev.videos: removed");

    // Delete output files
@@ -270,7 +298,10 @@ async fn cmd_undeploy(db: &PostgresDb, uuid: &str, skip_confirm: bool) -> Result
        let vp = Path::new(file_path);
        if vp.exists() {
            fs::remove_file(vp)?;
-            println!("  Video file: removed ({})", vp.file_name().unwrap().to_str().unwrap_or("?"));
+            println!(
+                "  Video file: removed ({})",
+                vp.file_name().unwrap().to_str().unwrap_or("?")
+            );
        }
    }

@@ -292,11 +323,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
        "SELECT file_uuid, file_name, duration, status,
                (SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks,
                (SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces
-         FROM dev.videos v ORDER BY id DESC"
-    ).fetch_all(db.pool()).await?;
+         FROM dev.videos v ORDER BY id DESC",
+    )
+    .fetch_all(db.pool())
+    .await?;

-    println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
-        "UUID", "Name", "Duration", "Status", "Chunks", "Faces");
+    println!(
+        "{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
+        "UUID", "Name", "Duration", "Status", "Chunks", "Faces"
+    );
    println!("{}", "-".repeat(116));

    for row in &rows {
@@ -318,10 +353,15 @@ async fn cmd_list(db: &PostgresDb) -> Result<()> {
            name.clone()
        };

-        println!("{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
-            uuid, short_name, dur_str,
+        println!(
+            "{:<36} {:<44} {:>8} {:>10} {:>6} {:>6}",
+            uuid,
+            short_name,
+            dur_str,
            status.as_deref().unwrap_or("?"),
-            chunks.unwrap_or(0), faces.unwrap_or(0));
+            chunks.unwrap_or(0),
+            faces.unwrap_or(0)
+        );
    }
    Ok(())
 }
@@ -336,9 +376,23 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
        "SELECT file_uuid, file_name, file_path, duration, fps, width, height FROM dev.videos WHERE file_uuid = $1"
    ).bind(uuid).fetch_optional(db.pool()).await?;
    let (_, file_name, file_path, duration, fps, width, height): (
-        String, String, String, Option<f64>, Option<f64>, Option<i32>, Option<i32>
+        String,
+        String,
+        String,
+        Option<f64>,
+        Option<f64>,
+        Option<i32>,
+        Option<i32>,
    ) = match row {
-        Some(r) => (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4), r.get(5), r.get(6)),
+        Some(r) => (
+            r.get(0),
+            r.get(1),
+            r.get(2),
+            r.get(3),
+            r.get(4),
+            r.get(5),
+            r.get(6),
+        ),
        None => anyhow::bail!("UUID {} not found", uuid),
    };

@@ -360,7 +414,10 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
        "momentry_version": env!("CARGO_PKG_VERSION"),
        "momentry_build": env!("BUILD_GIT_HASH"),
    });
-    fs::write(outdir.join("file_info.json"), serde_json::to_string_pretty(&info)?)?;
+    fs::write(
+        outdir.join("file_info.json"),
+        serde_json::to_string_pretty(&info)?,
+    )?;

    // Export per-table .sql files (avoid single 4.7GB psql load)
    let sql_dir = outdir.join("sql");
@@ -376,7 +433,13 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {

    let mut import_order = vec!["master.sql"];

-    fn write_table_sql(outdir: &Path, tbl: &str, col: &str, uuid: &str, psql_exec: &dyn Fn(&str) -> Result<String>) -> Result<()> {
+    fn write_table_sql(
+        outdir: &Path,
+        tbl: &str,
+        col: &str,
+        uuid: &str,
+        psql_exec: &dyn Fn(&str) -> Result<String>,
+    ) -> Result<()> {
        let safe_name = tbl.replace('.', "_");
        let path = outdir.join(format!("{}.sql", safe_name));
        let parts: Vec<&str> = tbl.split('.').collect();
@@ -419,8 +482,16 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
        let data = psql_exec(&idents_query)?;
        if !data.is_empty() {
            let mut f = fs::File::create(&idents_path)?;
-            writeln!(f, "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)", uuid)?;
-            writeln!(f, "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;", cols)?;
+            writeln!(
+                f,
+                "-- dev.identities WHERE file_uuid = '{}' OR global (tmdb/merged/user_defined)",
+                uuid
+            )?;
+            writeln!(
+                f,
+                "COPY dev.identities ({}) FROM STDIN WITH CSV HEADER;",
+                cols
+            )?;
            writeln!(f, "{}", data)?;
            writeln!(f, "\\.")?;
        }
@@ -440,7 +511,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
        if !data.is_empty() {
            let mut f = fs::File::create(&binds_path)?;
            writeln!(f, "-- dev.identity_bindings (from face_detections JOIN)")?;
-            writeln!(f, "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;", cols)?;
+            writeln!(
+                f,
+                "COPY dev.identity_bindings ({}) FROM STDIN WITH CSV HEADER;",
+                cols
+            )?;
            writeln!(f, "{}", data)?;
            writeln!(f, "\\.")?;
        }
@@ -469,7 +544,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
    let sql_path = outdir.join("data.sql");
    {
        let mut f = fs::File::create(&sql_path)?;
-        writeln!(f, "-- Release package: {} — see sql/ for per-table files", uuid)?;
+        writeln!(
+            f,
+            "-- Release package: {} — see sql/ for per-table files",
+            uuid
+        )?;
        writeln!(f, "BEGIN;")?;
        writeln!(f, "\\i sql/dev_videos.sql")?;
        writeln!(f, "\\i sql/dev_chunk.sql")?;
@@ -492,7 +571,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
            let dest = outdir.join(vp.file_name().unwrap());
            fs::copy(vp, &dest)?;
            let vsize = fs::metadata(&dest)?.len();
-            println!("  {} ({} MB)", vp.file_name().unwrap().to_str().unwrap_or("?"), vsize / 1024 / 1024);
+            println!(
+                "  {} ({} MB)",
+                vp.file_name().unwrap().to_str().unwrap_or("?"),
+                vsize / 1024 / 1024
+            );
        }
    }

@@ -541,11 +624,18 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
    let vec0_src = "/Users/accusys/momentry_core_0.1/scripts/vec0.dylib";
    if Path::new(vec0_src).exists() {
        fs::copy(vec0_src, outdir.join("vec0.dylib"))?;
-        println!("  vec0.dylib ({} KB)", fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024);
+        println!(
+            "  vec0.dylib ({} KB)",
+            fs::metadata(outdir.join("vec0.dylib"))?.len() / 1024
+        );
    }

    // Create tar.gz
-    let tarball = Path::new(RELEASE_DIR).join(format!("{}_v{}.tar.gz", uuid, Utc::now().format("%Y%m%d_%H%M%S")));
+    let tarball = Path::new(RELEASE_DIR).join(format!(
+        "{}_v{}.tar.gz",
+        uuid,
+        Utc::now().format("%Y%m%d_%H%M%S")
+    ));
    let status = Command::new("tar")
        .args(["-czf", tarball.to_str().unwrap(), "-C", RELEASE_DIR, uuid])
        .status()?;
@@ -553,7 +643,11 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
        anyhow::bail!("tar creation failed");
    }
    let tsize = fs::metadata(&tarball)?.len();
-    println!("\n  Package: {} ({} MB)", tarball.display(), tsize / 1024 / 1024);
+    println!(
+        "\n  Package: {} ({} MB)",
+        tarball.display(),
+        tsize / 1024 / 1024
+    );

    // Sanity check: warn if any sql file is suspiciously large
    println!("  Checking sql/ file sizes...");
@@ -564,33 +658,55 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> {
            let sz = fs::metadata(&path)?.len() as f64 / 1024.0 / 1024.0;
            let name = path.file_stem().and_then(|s| s.to_str()).unwrap_or("?");
            match name {
-                "dev_videos" | "master" if sz > 1.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 1 MB", name, sz as u64),
-                "dev_chunk" if sz > 2.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 2 MB for ~2.4K chunks", name, sz as u64),
-                "dev_identities" if sz > 1.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 1 MB for ~428 identities", name, sz as u64),
-                "dev_identity_bindings" if sz > 5.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 5 MB for ~7.6K bindings", name, sz as u64),
-                "dev_tkg_nodes" if sz > 10.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 10 MB for ~6.4K nodes", name, sz as u64),
-                "dev_tkg_edges" if sz > 20.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 20 MB for ~21K edges", name, sz as u64),
-                "dev_face_detections" if sz > 1000.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)", name, sz as u64),
-                "dev_chunk_vectors" if sz > 200.0 =>
-                    println!("  ⚠️  {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)", name, sz as u64),
+                "dev_videos" | "master" if sz > 1.0 => {
+                    println!("  ⚠️  {} is {} MB, expected < 1 MB", name, sz as u64)
+                }
+                "dev_chunk" if sz > 2.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 2 MB for ~2.4K chunks",
+                    name, sz as u64
+                ),
+                "dev_identities" if sz > 1.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 1 MB for ~428 identities",
+                    name, sz as u64
+                ),
+                "dev_identity_bindings" if sz > 5.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 5 MB for ~7.6K bindings",
+                    name, sz as u64
+                ),
+                "dev_tkg_nodes" if sz > 10.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 10 MB for ~6.4K nodes",
+                    name, sz as u64
+                ),
+                "dev_tkg_edges" if sz > 20.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 20 MB for ~21K edges",
+                    name, sz as u64
+                ),
+                "dev_face_detections" if sz > 1000.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 1000 MB for ~70K faces (512D emb)",
+                    name, sz as u64
+                ),
+                "dev_chunk_vectors" if sz > 200.0 => println!(
+                    "  ⚠️  {} is {} MB, expected < 200 MB for ~2.4K chunks (768D emb)",
+                    name, sz as u64
+                ),
                _ => {}
            }
            if sz > 2000.0 {
-                println!("  ⚠️  {} is {:.0} MB — unusually large, verify query", name, sz);
+                println!(
+                    "  ⚠️  {} is {:.0} MB — unusually large, verify query",
+                    name, sz
+                );
            }
        }
    }
    Ok(())
 }

-fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Option<i64>) -> Result<()> {
+fn cmd_visualize_offline(
+    sqlite_path: &str,
+    output: Option<&str>,
+    identity: Option<i64>,
+) -> Result<()> {
    let outpath = match output {
        Some(p) => p.to_string(),
        None => sqlite_path.replace(".sqlite", "_report.html"),
@@ -606,7 +722,10 @@ fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Opti
        .output()
        .context("Offline report script failed")?;
    if !output.status.success() {
-        anyhow::bail!("Offline report: {}", String::from_utf8_lossy(&output.stderr));
+        anyhow::bail!(
+            "Offline report: {}",
+            String::from_utf8_lossy(&output.stderr)
+        );
    }
    println!("{}", String::from_utf8_lossy(&output.stdout));
    println!("\n  Open: {}", outpath);
@@ -624,7 +743,10 @@ fn cmd_visualize(uuid: &str, typ: &str, output: Option<&str>, identity: Option<i
    match typ {
        "heatmap" | "density" => generate_face_heatmap(uuid, &outpath, identity)?,
        "timeline" => generate_face_timeline(uuid, &outpath, identity)?,
-        _ => anyhow::bail!("Unknown visualization type: {}. Try: heatmap, density, timeline", typ),
+        _ => anyhow::bail!(
+            "Unknown visualization type: {}. Try: heatmap, density, timeline",
+            typ
+        ),
    }
    Ok(())
 }
@@ -698,16 +820,28 @@ fn cmd_stats() -> Result<()> {

        for line in listing.lines() {
            let trimmed = line.trim();
-            if trimmed.is_empty() || trimmed.ends_with('/') { continue; }
+            if trimmed.is_empty() || trimmed.ends_with('/') {
+                continue;
+            }

            // tar -tvzf format: perms link owner group size date_month date_day time path...
            // Fields are space-separated; size is 5th field, path starts at 8th field
            let parts: Vec<&str> = trimmed.split_whitespace().collect();
-            if parts.len() < 8 { continue; }
+            if parts.len() < 8 {
+                continue;
+            }
            let fsize = parts[4].parse::<u64>().unwrap_or(0);
            let fpath = parts[8..].join(" ");
-            let fname = Path::new(&fpath).file_name().unwrap_or_default().to_str().unwrap_or("?");
-            let ext = Path::new(&fpath).extension().unwrap_or_default().to_str().unwrap_or("");
+            let fname = Path::new(&fpath)
+                .file_name()
+                .unwrap_or_default()
+                .to_str()
+                .unwrap_or("?");
+            let ext = Path::new(&fpath)
+                .extension()
+                .unwrap_or_default()
+                .to_str()
+                .unwrap_or("");

            match ext {
                "sql" => {
@@ -732,10 +866,26 @@ fn cmd_stats() -> Result<()> {
        }

        println!("  ─────────────────────────────");
-        println!("  SQL: {} files, {:.0} MB", sql_count, total_sql as f64 / 1048576.0);
-        println!("  Video: {} files, {:.0} MB", video_count, total_video as f64 / 1048576.0);
-        println!("  JSON: {} files, {:.0} MB", json_count, total_json as f64 / 1048576.0);
-        println!("  Total: {:.0} MB (compressed: {:.0} MB)", (total_sql + total_video + total_json) as f64 / 1048576.0, pkg_size as f64 / 1048576.0);
+        println!(
+            "  SQL: {} files, {:.0} MB",
+            sql_count,
+            total_sql as f64 / 1048576.0
+        );
+        println!(
+            "  Video: {} files, {:.0} MB",
+            video_count,
+            total_video as f64 / 1048576.0
+        );
+        println!(
+            "  JSON: {} files, {:.0} MB",
+            json_count,
+            total_json as f64 / 1048576.0
+        );
+        println!(
+            "  Total: {:.0} MB (compressed: {:.0} MB)",
+            (total_sql + total_video + total_json) as f64 / 1048576.0,
+            pkg_size as f64 / 1048576.0
+        );
        println!();
    }

@@ -758,8 +908,17 @@ async fn main() -> Result<()> {
        Commands::List => cmd_list(&db).await?,
        Commands::Package { uuid } => cmd_package(&db, &uuid).await?,
        Commands::Stats => cmd_stats()?,
-        Commands::Visualize { uuid, typ, output, identity } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
-        Commands::VisualizeOffline { sqlite_path, output, identity } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
+        Commands::Visualize {
+            uuid,
+            typ,
+            output,
+            identity,
+        } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?,
+        Commands::VisualizeOffline {
+            sqlite_path,
+            output,
+            identity,
+        } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?,
    }
    Ok(())
 }
--- a/src/bin/service.rs
+++ b/src/bin/service.rs
@@ -16,7 +16,10 @@ const LOG_DIR: &str = "/Users/accusys/service_logs";
 const LAUNCH_DIR: &str = "/Users/accusys/Library/LaunchAgents";

 #[derive(Parser)]
-#[command(name = "service", about = "Service Lifecycle Manager — source → build → install → config → launch → env")]
+#[command(
+    name = "service",
+    about = "Service Lifecycle Manager — source → build → install → config → launch → env"
+)]
 struct Cli {
    #[command(subcommand)]
    command: Commands,
@@ -111,22 +114,54 @@ fn cmd_source_list() -> Result<()> {
        ("pyenv", "pyenv/", "git repo"),
        ("cmake", "cmake-4.2.0-macos-universal.tar.gz", "binary"),
        ("llama.cpp", "llama.cpp/", "git repo"),
-        ("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", "source tarball"),
-        ("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", "binary (TDF)"),
-        ("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", "npm package"),
+        (
+            "libreoffice (src)",
+            "libreoffice-26.2.3.2.tar.xz",
+            "source tarball",
+        ),
+        (
+            "libreoffice (dmg)",
+            "LibreOffice_26.2.3_MacOS_aarch64.dmg",
+            "binary (TDF)",
+        ),
+        (
+            "mermaid-cli",
+            "mermaid-js-mermaid-cli-11.14.0.tgz",
+            "npm package",
+        ),
        ("librsvg", "librsvg/", "Rust source"),
-        ("GroundingDINO", "GroundingDINO/", "git repo (IDEA-Research)"),
+        (
+            "GroundingDINO",
+            "GroundingDINO/",
+            "git repo (IDEA-Research)",
+        ),
        ("PaliGemma", "paligemma/", "HuggingFace reference"),
        ("Odoo 19 CE", "odoo/", "git repo (LGPL-3.0)"),
        ("ERPNext v15", "erpnext/", "git repo (GPL-3.0)"),
        ("Frappe Framework", "frappe/", "git repo (MIT)"),
        ("Gitea v1.25", "gitea/", "git repo (MIT, Go)"),
        ("Go v1.26", "go/", "git repo (BSD)"),
-        ("Rust/Cargo", "rustc-1.92.0-src.tar.xz", "source tarball (Apache 2.0 / MIT)"),
-        ("rustup", "rustup-1.28.1.tar.gz", "source tarball (Apache 2.0)"),
-        ("Swift v6.3", "swift-6.3.1-RELEASE.tar.gz", "source tarball (Apache 2.0)"),
+        (
+            "Rust/Cargo",
+            "rustc-1.92.0-src.tar.xz",
+            "source tarball (Apache 2.0 / MIT)",
+        ),
+        (
+            "rustup",
+            "rustup-1.28.1.tar.gz",
+            "source tarball (Apache 2.0)",
+        ),
+        (
+            "Swift v6.3",
+            "swift-6.3.1-RELEASE.tar.gz",
+            "source tarball (Apache 2.0)",
+        ),
        ("yt-dlp", "yt-dlp/", "git repo (Unlicense)"),
-        ("SQLite", "sqlite-amalgamation-3490100.zip", "amalgamation (Public Domain)"),
+        (
+            "SQLite",
+            "sqlite-amalgamation-3490100.zip",
+            "amalgamation (Public Domain)",
+        ),
        ("sqlite-vec", "sqlite-vec/", "git repo (MIT)"),
    ];

@@ -164,7 +199,11 @@ fn cmd_source_verify() -> Result<()> {
        ("cmake", "cmake-4.2.0-macos-universal.tar.gz", false),
        ("llama.cpp", "llama.cpp/", true),
        ("libreoffice (src)", "libreoffice-26.2.3.2.tar.xz", false),
-        ("libreoffice (dmg)", "LibreOffice_26.2.3_MacOS_aarch64.dmg", false),
+        (
+            "libreoffice (dmg)",
+            "LibreOffice_26.2.3_MacOS_aarch64.dmg",
+            false,
+        ),
        ("mermaid-cli", "mermaid-js-mermaid-cli-11.14.0.tgz", false),
        ("librsvg", "librsvg/", true),
        ("GroundingDINO", "GroundingDINO/", true),
@@ -186,7 +225,11 @@ fn cmd_source_verify() -> Result<()> {
    let mut missing = 0;
    for (name, path, is_dir) in &checks {
        let full = src_dir.join(path);
-        let exists = if *is_dir { full.is_dir() } else { full.is_file() };
+        let exists = if *is_dir {
+            full.is_dir()
+        } else {
+            full.is_file()
+        };
        if exists {
            println!("  ✅ {}", name);
            ok += 1;
@@ -202,7 +245,10 @@ fn cmd_source_verify() -> Result<()> {
 // ---- Build ----

 fn cmd_build(service: &str) -> Result<()> {
-    let install_sh = Path::new(SERVICE_SRC).parent().unwrap().join("install_services.sh");
+    let install_sh = Path::new(SERVICE_SRC)
+        .parent()
+        .unwrap()
+        .join("install_services.sh");

    if service == "all" {
        // Run the full install script
@@ -224,8 +270,14 @@ fn cmd_build(service: &str) -> Result<()> {
        "ffmpeg" => {
            println!("Building ffmpeg (requires x264 + freetype)...");
            // Simplified: run the install script which handles incremental builds
-            let status = Command::new("bash").arg(&install_sh).env("PREFIX", PREFIX).env("SRC_DIR", SERVICE_SRC).status()?;
-            if !status.success() { anyhow::bail!("Build failed"); }
+            let status = Command::new("bash")
+                .arg(&install_sh)
+                .env("PREFIX", PREFIX)
+                .env("SRC_DIR", SERVICE_SRC)
+                .status()?;
+            if !status.success() {
+                anyhow::bail!("Build failed");
+            }
        }
        "redis" => {
            let src = format!("{}/redis-7.4.3.tar.gz", SERVICE_SRC);
@@ -236,37 +288,67 @@ fn cmd_build(service: &str) -> Result<()> {
            run_build("postgresql", &src, &format!("cd /tmp && tar xzf {} && cd postgresql-18.3 && ./configure --prefix={}/pgsql/18.3 && make -j$(sysctl -n hw.ncpu) && make install", src, PREFIX))?;
        }
        "llama" => {
-            println!("Building llama.cpp from {}...", format!("{}/llama.cpp", SERVICE_SRC));
+            println!(
+                "Building llama.cpp from {}...",
+                format!("{}/llama.cpp", SERVICE_SRC)
+            );
            let status = Command::new("cmake")
                .args(["-B", "build", "-DCMAKE_INSTALL_PREFIX=/tmp/llama_install"])
                .current_dir(format!("{}/llama.cpp", SERVICE_SRC))
                .status()?;
-            if !status.success() { anyhow::bail!("cmake failed"); }
-            let status = Command::new("cmake").args(["--build", "build", "--config", "Release", "-j"]).current_dir(format!("{}/llama.cpp", SERVICE_SRC)).status()?;
-            if !status.success() { anyhow::bail!("build failed"); }
+            if !status.success() {
+                anyhow::bail!("cmake failed");
+            }
+            let status = Command::new("cmake")
+                .args(["--build", "build", "--config", "Release", "-j"])
+                .current_dir(format!("{}/llama.cpp", SERVICE_SRC))
+                .status()?;
+            if !status.success() {
+                anyhow::bail!("build failed");
+            }
        }
        "libreoffice" => {
            let dmg = format!("{}/LibreOffice_26.2.3_MacOS_aarch64.dmg", SERVICE_SRC);
            let mount = "/tmp/lo_mount";
            println!("Extracting LibreOffice from DMG...");
            // Mount
-            let status = Command::new("hdiutil").args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount]).status()?;
-            if !status.success() { anyhow::bail!("DMG mount failed"); }
+            let status = Command::new("hdiutil")
+                .args(["attach", &dmg, "-nobrowse", "-quiet", "-mountpoint", mount])
+                .status()?;
+            if !status.success() {
+                anyhow::bail!("DMG mount failed");
+            }
            // Copy app
            let lo_dir = format!("{}/libreoffice", PREFIX);
            let _ = std::fs::remove_dir_all(format!("{}/LibreOffice.app", lo_dir));
            std::fs::create_dir_all(&lo_dir)?;
-            let status = Command::new("cp").args(["-R", &format!("{}/LibreOffice.app", mount), &format!("{}/LibreOffice.app", lo_dir)]).status()?;
-            if !status.success() { anyhow::bail!("Copy failed"); }
+            let status = Command::new("cp")
+                .args([
+                    "-R",
+                    &format!("{}/LibreOffice.app", mount),
+                    &format!("{}/LibreOffice.app", lo_dir),
+                ])
+                .status()?;
+            if !status.success() {
+                anyhow::bail!("Copy failed");
+            }
            // Create symlink
            std::fs::create_dir_all(format!("{}/bin", lo_dir))?;
            let _ = std::fs::remove_file(format!("{}/bin/soffice", lo_dir));
-            std::os::unix::fs::symlink("../LibreOffice.app/Contents/MacOS/soffice", format!("{}/bin/soffice", lo_dir))?;
+            std::os::unix::fs::symlink(
+                "../LibreOffice.app/Contents/MacOS/soffice",
+                format!("{}/bin/soffice", lo_dir),
+            )?;
            // Unmount
-            let _ = Command::new("hdiutil").args(["detach", mount, "-quiet"]).status();
+            let _ = Command::new("hdiutil")
+                .args(["detach", mount, "-quiet"])
+                .status();
            println!("  libreoffice installed to {}/bin/soffice", lo_dir);
        }
-        _ => anyhow::bail!("Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python", service),
+        _ => anyhow::bail!(
+            "Unknown service: {}. Try: all, ffmpeg, redis, postgres, llama, libreoffice, python",
+            service
+        ),
    }
    Ok(())
 }
@@ -274,7 +356,9 @@ fn cmd_build(service: &str) -> Result<()> {
 fn run_build(name: &str, src: &str, cmd: &str) -> Result<()> {
    println!("Building {} from {}...", name, src);
    let status = Command::new("bash").arg("-c").arg(cmd).status()?;
-    if !status.success() { anyhow::bail!("{} build failed", name); }
+    if !status.success() {
+        anyhow::bail!("{} build failed", name);
+    }
    println!("  {} build complete", name);
    Ok(())
 }
@@ -292,7 +376,10 @@ fn cmd_install(service: &str) -> Result<()> {
    let rsvg_src = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
    let gitea_src = format!("{}/gitea/bin/gitea", PREFIX);
    let go_src = format!("{}/go/bin/go", PREFIX);
-    let rustc_src = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX);
+    let rustc_src = format!(
+        "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
+        PREFIX
+    );
    let swift_src = "/usr/bin/swift".to_string();
    let ytdlp_src = "/opt/homebrew/bin/yt-dlp".to_string();

@@ -313,7 +400,9 @@ fn cmd_install(service: &str) -> Result<()> {
    ];

    for (name, src) in &installs {
-        if service != "all" && service != *name { continue; }
+        if service != "all" && service != *name {
+            continue;
+        }
        if Path::new(src).exists() {
            println!("  ✅ {} installed: {}", name, src);
        } else {
@@ -370,12 +459,18 @@ fn cmd_config(service: &str) -> Result<()> {
        println!("MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions");
        println!("MOMENTRY_OUTPUT_DIR={}/momentry/output_dev", PREFIX);
        println!("MOMENTRY_SCRIPTS_DIR={}/momentry_core_0.1/scripts", PREFIX);
-        println!("MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
+        println!(
+            "MOMENTRY_PYTHON_PATH={}/.pyenv/versions/3.11.15/bin/python3.11",
+            PREFIX
+        );
    }

    if service == "all" || service == "embedding" {
        println!("\n--- Embedding Server config ---");
-        println!("# Start: {} embeddinggemma_server.py --port 11436", format!("{}/momentry_core_0.1/scripts", PREFIX));
+        println!(
+            "# Start: {} embeddinggemma_server.py --port 11436",
+            format!("{}/momentry_core_0.1/scripts", PREFIX)
+        );
        println!("MODEL=google/embeddinggemma-300m");
        println!("PORT=11436");
        println!("DEVICE=mps");
@@ -393,25 +488,58 @@ fn cmd_launch_generate() -> Result<()> {
    let pg_args = format!("-D {}/pgsql/18.3/data", PREFIX);
    let redis_bin = format!("{}/redis/bin/redis-server", PREFIX);
    let redis_args = format!("{}/redis/redis.conf", PREFIX);
-    let qdrant_bin = format!("{}/momentry_core_0.1/services/qdrant/target/release/qdrant", PREFIX);
+    let qdrant_bin = format!(
+        "{}/momentry_core_0.1/services/qdrant/target/release/qdrant",
+        PREFIX
+    );
    let embed_bin = format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX);
-    let embed_args = format!("{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436", PREFIX);
+    let embed_args = format!(
+        "{}/momentry_core_0.1/scripts/embeddinggemma_server.py --port 11436",
+        PREFIX
+    );
    let llama_bin = format!("{}/llama/bin/llama-server", PREFIX);
-    let llama_args = format!("-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384", PREFIX);
-    let play_bin = format!("{}/momentry_core_0.1/target/debug/momentry_playground", PREFIX);
+    let llama_args = format!(
+        "-m {}/models/google_gemma-4-26B-A4B-it-Q5_K_M.gguf --port 8082 -ngl 99 -c 16384",
+        PREFIX
+    );
+    let play_bin = format!(
+        "{}/momentry_core_0.1/target/debug/momentry_playground",
+        PREFIX
+    );

    let services: Vec<(&str, &str, &str, &str)> = vec![
        ("com.momentry.postgres", &pg_bin, &pg_args, "PostgreSQL"),
        ("com.momentry.redis", &redis_bin, &redis_args, "Redis"),
        ("com.momentry.qdrant", &qdrant_bin, "", "Qdrant"),
-        ("com.momentry.embedding",   &embed_bin, &embed_args, "EmbeddingGemma"),
-        ("com.momentry.llama",       &llama_bin, &llama_args, "LLM (llama.cpp)"),
-        ("com.momentry.playground",  &play_bin, "server --port 3003", "Momentry Playground"),
-        ("com.momentry.worker",      &play_bin, "worker --max-concurrent 2 --poll-interval 5", "Momentry Worker"),
+        (
+            "com.momentry.embedding",
+            &embed_bin,
+            &embed_args,
+            "EmbeddingGemma",
+        ),
+        (
+            "com.momentry.llama",
+            &llama_bin,
+            &llama_args,
+            "LLM (llama.cpp)",
+        ),
+        (
+            "com.momentry.playground",
+            &play_bin,
+            "server --port 3003",
+            "Momentry Playground",
+        ),
+        (
+            "com.momentry.worker",
+            &play_bin,
+            "worker --max-concurrent 2 --poll-interval 5",
+            "Momentry Worker",
+        ),
    ];

    for (label, bin, args, _desc) in &services {
-        let plist = format!(r#"<?xml version="1.0" encoding="UTF-8"?>
+        let plist = format!(
+            r#"<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
@@ -451,7 +579,11 @@ fn cmd_launch_generate() -> Result<()> {
        fs::write(&plist_path, plist)?;
        println!("  📝 {} → {:?}", label, plist_path.file_name().unwrap());
    }
-    println!("\n  Generated {} plist files in {}", services.len(), LAUNCH_DIR);
+    println!(
+        "\n  Generated {} plist files in {}",
+        services.len(),
+        LAUNCH_DIR
+    );
    Ok(())
 }

@@ -461,7 +593,9 @@ fn cmd_launch_load() -> Result<()> {
        let path = entry.path();
        if path.extension().map_or(false, |e| e == "plist") {
            let name = path.file_stem().unwrap().to_str().unwrap_or("?");
-            let status = Command::new("launchctl").args(["load", "-w", path.to_str().unwrap()]).status();
+            let status = Command::new("launchctl")
+                .args(["load", "-w", path.to_str().unwrap()])
+                .status();
            match status {
                Ok(s) if s.success() => println!("  ✅ loaded: {}", name),
                Ok(_) => println!("  ⚠️  load failed: {}", name),
@@ -478,7 +612,9 @@ fn cmd_launch_unload() -> Result<()> {
        let path = entry.path();
        if path.extension().map_or(false, |e| e == "plist") {
            let name = path.file_stem().unwrap().to_str().unwrap_or("?");
-            let status = Command::new("launchctl").args(["unload", path.to_str().unwrap()]).status();
+            let status = Command::new("launchctl")
+                .args(["unload", path.to_str().unwrap()])
+                .status();
            match status {
                Ok(s) if s.success() => println!("  ✅ unloaded: {}", name),
                Ok(_) => println!("  ⚠️  unload failed: {}", name),
@@ -504,7 +640,11 @@ fn cmd_launch_status() -> Result<()> {
            Ok(o) if o.status.success() => {
                let stdout = String::from_utf8_lossy(&o.stdout);
                if stdout.contains("PID") || stdout.lines().count() > 1 {
-                    let pid = stdout.lines().nth(1).and_then(|l| l.split_whitespace().next()).unwrap_or("-");
+                    let pid = stdout
+                        .lines()
+                        .nth(1)
+                        .and_then(|l| l.split_whitespace().next())
+                        .unwrap_or("-");
                    println!("  🟢 {} (PID: {})", label, pid);
                } else {
                    println!("  ⚪ {} (not running)", label);
@@ -519,7 +659,8 @@ fn cmd_launch_status() -> Result<()> {
 // ---- Env ----

 fn cmd_env(output: &Option<String>) -> Result<()> {
-    let env_content = format!(r#"# Momentry Core — Environment Configuration
+    let env_content = format!(
+        r#"# Momentry Core — Environment Configuration
 # Generated: {}
 # Service: {} env

@@ -601,8 +742,14 @@ fn cmd_test() -> Result<()> {
    let rsvg_bin = format!("{}/librsvg/bin/rsvg-convert", PREFIX);
    let gitea_bin = format!("{}/gitea/bin/gitea", PREFIX);
    let go_bin = format!("{}/go/bin/go", PREFIX);
-    let rustc_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc", PREFIX);
-    let cargo_bin = format!("{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo", PREFIX);
+    let rustc_bin = format!(
+        "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/rustc",
+        PREFIX
+    );
+    let cargo_bin = format!(
+        "{}/.rustup/toolchains/stable-aarch64-apple-darwin/bin/cargo",
+        PREFIX
+    );
    let swift_bin = "/usr/bin/swift".to_string();
    let ytdlp_bin = "/opt/homebrew/bin/yt-dlp".to_string();

@@ -641,7 +788,11 @@ fn cmd_test() -> Result<()> {
        let output = Command::new(bin).args(args).output();
        match output {
            Ok(o) if o.status.success() => {
-                let ver = String::from_utf8_lossy(&o.stdout).lines().next().unwrap_or("?").to_string();
+                let ver = String::from_utf8_lossy(&o.stdout)
+                    .lines()
+                    .next()
+                    .unwrap_or("?")
+                    .to_string();
                println!("✅ {}", ver.chars().take(70).collect::<String>());
                pass += 1;
            }
@@ -666,14 +817,87 @@ fn cmd_test() -> Result<()> {
    // Functional tests
    println!("\n--- Functional Tests ---");
    // Create test docx for libreoffice test
-    let _ = std::fs::write("/tmp/svc_test_func.docx", "Service test document for LibreOffice conversion");
+    let _ = std::fs::write(
+        "/tmp/svc_test_func.docx",
+        "Service test document for LibreOffice conversion",
+    );
    let func_tests = [
-        ("ffprobe probe", "ffprobe", vec!["-v", "error", "-show_entries", "format=duration", "-of", "csv=p=0", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4"]),
-        ("ffmpeg audio extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-t", "2", "-ar", "16000", "-ac", "1", "/tmp/svc_test_audio.wav"]),
-        ("ffmpeg frame extract", "ffmpeg", vec!["-y", "-v", "quiet", "-i", "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4", "-ss", "100", "-vframes", "1", "/tmp/svc_test_frame.jpg"]),
-        ("libreoffice doc→txt", "libreoffice", vec!["--headless", "--convert-to", "txt", "/tmp/svc_test_func.docx", "--outdir", "/tmp/"]),
-        ("rsvg-convert svg→png", "rsvg-convert", vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"]),
-        ("mmdc mermaid→png", "mermaid-cli", vec!["-i", "/tmp/test_mermaid.mmd", "-o", "/tmp/svc_test_mmd.png", "-w", "200"]),
+        (
+            "ffprobe probe",
+            "ffprobe",
+            vec![
+                "-v",
+                "error",
+                "-show_entries",
+                "format=duration",
+                "-of",
+                "csv=p=0",
+                "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
+            ],
+        ),
+        (
+            "ffmpeg audio extract",
+            "ffmpeg",
+            vec![
+                "-y",
+                "-v",
+                "quiet",
+                "-i",
+                "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
+                "-t",
+                "2",
+                "-ar",
+                "16000",
+                "-ac",
+                "1",
+                "/tmp/svc_test_audio.wav",
+            ],
+        ),
+        (
+            "ffmpeg frame extract",
+            "ffmpeg",
+            vec![
+                "-y",
+                "-v",
+                "quiet",
+                "-i",
+                "/Users/accusys/momentry/var/sftpgo/data/demo/Charade_YouTube_24fps.mp4",
+                "-ss",
+                "100",
+                "-vframes",
+                "1",
+                "/tmp/svc_test_frame.jpg",
+            ],
+        ),
+        (
+            "libreoffice doc→txt",
+            "libreoffice",
+            vec![
+                "--headless",
+                "--convert-to",
+                "txt",
+                "/tmp/svc_test_func.docx",
+                "--outdir",
+                "/tmp/",
+            ],
+        ),
+        (
+            "rsvg-convert svg→png",
+            "rsvg-convert",
+            vec!["-o", "/tmp/svc_test_rsvg.png", "/tmp/test_rsvg.svg"],
+        ),
+        (
+            "mmdc mermaid→png",
+            "mermaid-cli",
+            vec![
+                "-i",
+                "/tmp/test_mermaid.mmd",
+                "-o",
+                "/tmp/svc_test_mmd.png",
+                "-w",
+                "200",
+            ],
+        ),
    ];

    for (desc, bin_name, args) in &func_tests {
@@ -689,8 +913,14 @@ fn cmd_test() -> Result<()> {
        };
        let output = Command::new(bin).args(args).output();
        match output {
-            Ok(o) if o.status.success() => { println!("✅"); pass += 1; }
-            _ => { println!("❌"); fail += 1; }
+            Ok(o) if o.status.success() => {
+                println!("✅");
+                pass += 1;
+            }
+            _ => {
+                println!("❌");
+                fail += 1;
+            }
        }
    }

@@ -706,7 +936,10 @@ fn cmd_test() -> Result<()> {

 fn cmd_report() -> Result<()> {
    println!("=== Momentry Service Report ===");
-    println!("Generated: {}", chrono::Local::now().format("%Y-%m-%d %H:%M:%S"));
+    println!(
+        "Generated: {}",
+        chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
+    );
    println!();

    // 1. Source status
@@ -730,13 +963,25 @@ fn cmd_report() -> Result<()> {
    println!("\n## 2. Binaries");
    let binaries = [
        ("cmake", &format!("{}/bin/cmake", PREFIX)),
-        ("python3.11", &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX)),
+        (
+            "python3.11",
+            &format!("{}/.pyenv/versions/3.11.15/bin/python3.11", PREFIX),
+        ),
        ("ffmpeg", &format!("{}/ffmpeg_build/bin/ffmpeg", PREFIX)),
        ("ffprobe", &format!("{}/ffmpeg_build/bin/ffprobe", PREFIX)),
-        ("redis-server", &format!("{}/redis/bin/redis-server", PREFIX)),
+        (
+            "redis-server",
+            &format!("{}/redis/bin/redis-server", PREFIX),
+        ),
        ("postgres", &format!("{}/pgsql/18.3/bin/postgres", PREFIX)),
-        ("llama-server", &format!("{}/llama/bin/llama-server", PREFIX)),
-        ("libreoffice", &format!("{}/libreoffice/bin/soffice", PREFIX)),
+        (
+            "llama-server",
+            &format!("{}/llama/bin/llama-server", PREFIX),
+        ),
+        (
+            "libreoffice",
+            &format!("{}/libreoffice/bin/soffice", PREFIX),
+        ),
    ];
    for (name, path) in &binaries {
        let status = if Path::new(path).exists() {
@@ -772,9 +1017,18 @@ fn cmd_report() -> Result<()> {

    // 4. Ports
    println!("\n## 4. Port Status");
-    let ports = [(3003, "Playground"), (5432, "PostgreSQL"), (6379, "Redis"), (6333, "Qdrant"), (8082, "LLM"), (11436, "Embedding")];
+    let ports = [
+        (3003, "Playground"),
+        (5432, "PostgreSQL"),
+        (6379, "Redis"),
+        (6333, "Qdrant"),
+        (8082, "LLM"),
+        (11436, "Embedding"),
+    ];
    for (port, name) in &ports {
-        let output = Command::new("lsof").args(["-i", &format!(":{}", port)]).output();
+        let output = Command::new("lsof")
+            .args(["-i", &format!(":{}", port)])
+            .output();
        match output {
            Ok(o) if o.status.success() => println!("  🟢 :{} ({})", port, name),
            _ => println!("  ⚪ :{} ({})", port, name),
@@ -797,14 +1051,21 @@ fn cmd_report() -> Result<()> {
 }

 fn format_bytes(bytes: u64) -> String {
-    if bytes > 1024 * 1024 * 1024 { format!("{:.1}GB", bytes as f64 / 1_073_741_824.0) }
-    else if bytes > 1024 * 1024 { format!("{:.0}MB", bytes as f64 / 1_048_576.0) }
-    else if bytes > 1024 { format!("{:.0}KB", bytes as f64 / 1024.0) }
-    else { format!("{}B", bytes) }
+    if bytes > 1024 * 1024 * 1024 {
+        format!("{:.1}GB", bytes as f64 / 1_073_741_824.0)
+    } else if bytes > 1024 * 1024 {
+        format!("{:.0}MB", bytes as f64 / 1_048_576.0)
+    } else if bytes > 1024 {
+        format!("{:.0}KB", bytes as f64 / 1024.0)
+    } else {
+        format!("{}B", bytes)
+    }
 }

 fn format_dir_size(path: &Path) -> String {
-    let output = Command::new("du").args(["-sh", path.to_str().unwrap()]).output();
+    let output = Command::new("du")
+        .args(["-sh", path.to_str().unwrap()])
+        .output();
    match output {
        Ok(o) if o.status.success() => {
            let s = String::from_utf8_lossy(&o.stdout);
@@ -824,7 +1085,10 @@ async fn main() -> Result<()> {
            SourceAction::List => cmd_source_list()?,
            SourceAction::Verify => cmd_source_verify()?,
            SourceAction::Download { name } => {
-                println!("Downloading: {} (use install_services.sh for full download)", name);
+                println!(
+                    "Downloading: {} (use install_services.sh for full download)",
+                    name
+                );
                println!("Source URLs:");
                println!("  ffmpeg:   https://ffmpeg.org/releases/ffmpeg-7.1.1.tar.xz");
                println!("  redis:    https://download.redis.io/releases/redis-7.4.3.tar.gz");
--- a/src/core/chunk/rule3_ingest.rs
+++ b/src/core/chunk/rule3_ingest.rs
@@ -75,15 +75,13 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {

        // Query chunks table for Rule 1 sentence chunks
        let chunk_table = schema::table_name("chunk");
-        let rule1_rows: Vec<(String,)> = sqlx::query_as(
-            &format!(
+        let rule1_rows: Vec<(String,)> = sqlx::query_as(&format!(
            "SELECT chunk_id FROM {} \
                 WHERE file_uuid = $1 AND chunk_type = 'sentence' \
                 AND start_frame >= $2 \
                 AND end_frame <= $3",
            chunk_table
-            ),
-        )
+        ))
        .bind(file_uuid)
        .bind(scene.start_frame as i64)
        .bind(scene.end_frame as i64)
@@ -101,16 +99,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
            // Let's re-query text directly.
        }

-        let texts: Vec<String> = sqlx::query_scalar(
-            &format!(
+        let texts: Vec<String> = sqlx::query_scalar(&format!(
            "SELECT text_content FROM {} \
                 WHERE file_uuid = $1 AND chunk_type = 'sentence' \
                 AND start_frame >= $2 \
                 AND end_frame <= $3 \
                 ORDER BY start_frame ASC",
            chunk_table
-            ),
-        )
+        ))
        .bind(file_uuid)
        .bind(scene.start_frame as i64)
        .bind(scene.end_frame as i64)
@@ -154,16 +150,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
            "scene_number": scene.scene_number
        });

-        sqlx::query(
-            &format!(
+        sqlx::query(&format!(
            "INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
                 start_time, end_time, fps, start_frame, end_frame, \
                 content, text_content, summary_text, metadata, child_chunk_ids) \
                 VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \
                 ON CONFLICT (file_uuid, chunk_id) DO NOTHING",
            chunk_table
-            ),
-        )
+        ))
        .bind(file_uuid)
        .bind(&chunk_id)
        .bind(scene.scene_number as i32)
--- a/src/core/config.rs
+++ b/src/core/config.rs
@@ -20,8 +20,7 @@ pub fn set_cache_enabled(enabled: bool) {
 }

 // Switch 1: watcher detects new file → auto-register
-pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> =
-    Lazy::new(|| RwLock::new(false));
+pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));

 pub fn get_watcher_auto_register() -> bool {
    *RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap()
@@ -33,8 +32,7 @@ pub fn set_watcher_auto_register(enabled: bool) {
 }

 // Switch 2: register → auto-trigger processing pipeline
-pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> =
-    Lazy::new(|| RwLock::new(false));
+pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));

 pub fn get_auto_pipeline_enabled() -> bool {
    *RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap()
@@ -107,6 +105,30 @@ pub static REDIS_KEY_PREFIX: Lazy<String> =
 pub static DATABASE_SCHEMA: Lazy<String> =
    Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string()));

+pub static SYSTEM_TIMEZONE: Lazy<String> = Lazy::new(|| {
+    if let Ok(tz) = env::var("MOMENTRY_TIMEZONE") {
+        if !tz.is_empty() {
+            return tz;
+        }
+    }
+    if let Ok(tz) = env::var("TZ") {
+        if !tz.is_empty() {
+            return tz;
+        }
+    }
+    // macOS: /etc/localtime → /var/db/timezone/zoneinfo/Asia/Taipei
+    // Linux:  /etc/localtime → /usr/share/zoneinfo/Asia/Taipei
+    if let Ok(path) = std::fs::read_link("/etc/localtime") {
+        let s = path.to_string_lossy();
+        for prefix in &["/usr/share/zoneinfo/", "/var/db/timezone/zoneinfo/"] {
+            if let Some(tz) = s.strip_prefix(prefix) {
+                return tz.to_string();
+            }
+        }
+    }
+    "Asia/Taipei".to_string()
+});
+
 pub static MONGODB_DATABASE: Lazy<String> =
    Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string()));

--- a/src/core/db/postgres_db.rs
+++ b/src/core/db/postgres_db.rs
--- a/src/core/db/qdrant_db.rs
+++ b/src/core/db/qdrant_db.rs
@@ -15,9 +15,11 @@ pub struct QdrantDb {

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct VectorPayload {
-    pub uuid: String,
+    pub file_uuid: String,
    pub chunk_id: String,
    pub chunk_type: String,
+    pub start_frame: i64,
+    pub end_frame: i64,
    pub start_time: f64,
    pub end_time: f64,
    pub text: Option<String>,
@@ -189,6 +191,49 @@ impl QdrantDb {
        Ok(())
    }

+    pub async fn upsert_vectors_batch(
+        &self,
+        collection: &str,
+        points: &[(u64, &[f32], Option<serde_json::Value>)],
+    ) -> Result<()> {
+        let url = format!(
+            "{}/collections/{}/points?wait=true",
+            self.base_url, collection
+        );
+
+        let qdrant_points: Vec<serde_json::Value> = points
+            .iter()
+            .map(|(id, vec, payload)| {
+                let mut p = serde_json::json!({
+                    "id": id,
+                    "vector": vec,
+                });
+                if let Some(pl) = payload {
+                    p["payload"] = pl.clone();
+                }
+                p
+            })
+            .collect();
+
+        let body = serde_json::json!({ "points": qdrant_points });
+
+        let response = self
+            .client
+            .put(&url)
+            .header("api-key", &self.api_key)
+            .json(&body)
+            .send()
+            .await
+            .context("Failed to send batch upsert request to Qdrant")?;
+
+        let status = response.status();
+        if !status.is_success() {
+            let response_text = response.text().await.unwrap_or_default();
+            anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
+        }
+        Ok(())
+    }
+
    pub async fn upsert_vector(
        &self,
        chunk_id: &str,
@@ -207,12 +252,23 @@ impl QdrantDb {
        );

        let mut payload_map = HashMap::new();
-        payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
+        payload_map.insert(
+            "file_uuid".to_string(),
+            serde_json::json!(payload.file_uuid),
+        );
        payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
        payload_map.insert(
            "chunk_type".to_string(),
            serde_json::json!(payload.chunk_type),
        );
+        payload_map.insert(
+            "start_frame".to_string(),
+            serde_json::json!(payload.start_frame),
+        );
+        payload_map.insert(
+            "end_frame".to_string(),
+            serde_json::json!(payload.end_frame),
+        );
        payload_map.insert(
            "start_time".to_string(),
            serde_json::json!(payload.start_time),
@@ -224,7 +280,7 @@ impl QdrantDb {

        // Generate consistent point ID from uuid and chunk_id
        // Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
-        let point_id_str = format!("{}_{}", payload.uuid, chunk_id);
+        let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};
        let mut hasher = DefaultHasher::new();
@@ -240,9 +296,9 @@ impl QdrantDb {
        });

        tracing::debug!(
-            "Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}",
+            "Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
            chunk_id,
-            payload.uuid,
+            payload.file_uuid,
            vector.len()
        );

@@ -337,7 +393,7 @@ impl QdrantDb {
            .map(|r| {
                let uuid = r
                    .payload
-                    .get("uuid")
+                    .get("file_uuid")
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown")
                    .to_string();
@@ -409,7 +465,7 @@ impl QdrantDb {
                                .map(|r| {
                                    let uuid = r
                                        .payload
-                                        .get("uuid")
+                                        .get("file_uuid")
                                        .and_then(|v| v.as_str())
                                        .unwrap_or("unknown")
                                        .to_string();
@@ -471,7 +527,7 @@ impl QdrantDb {
            "filter": {
                "must": [
                    {
-                        "key": "uuid",
+                        "key": "file_uuid",
                        "match": {
                            "value": uuid
                        }
@@ -532,7 +588,7 @@ impl QdrantDb {
            .map(|r| {
                let uuid = r
                    .payload
-                    .get("uuid")
+                    .get("file_uuid")
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown")
                    .to_string();
@@ -553,6 +609,89 @@ impl QdrantDb {
        Ok(search_results)
    }

+    pub async fn search_face_collection(
+        &self,
+        collection: &str,
+        query_vector: &[f32],
+        limit: usize,
+        exclude_payload_key: &str,
+        exclude_payload_value: &str,
+        include_file_uuid: Option<&str>,
+    ) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
+        let url = format!("{}/collections/{}/points/search", self.base_url, collection);
+
+        let mut filter = serde_json::json!({
+            "must_not": [
+                {
+                    "key": exclude_payload_key,
+                    "match": { "value": exclude_payload_value }
+                }
+            ]
+        });
+
+        if let Some(file_uuid) = include_file_uuid {
+            filter["must"] = serde_json::json!([
+                {
+                    "key": "file_uuid",
+                    "match": { "value": file_uuid }
+                }
+            ]);
+        }
+
+        let body = serde_json::json!({
+            "vector": query_vector,
+            "limit": limit,
+            "with_payload": true,
+            "filter": filter,
+        });
+
+        let response = self
+            .client
+            .post(&url)
+            .header("api-key", &self.api_key)
+            .header("Content-Type", "application/json")
+            .json(&body)
+            .send()
+            .await
+            .context("Failed to search Qdrant face collection")?;
+
+        let status = response.status();
+        let response_text = response
+            .text()
+            .await
+            .unwrap_or_else(|_| "Failed to read response".to_string());
+
+        if !status.is_success() {
+            return Err(anyhow::anyhow!(
+                "Qdrant search_face_collection failed: {} - {}",
+                status,
+                response_text
+            ));
+        }
+
+        #[derive(Deserialize)]
+        struct QdrantSearchResult {
+            result: Vec<QdrantPoint>,
+        }
+        #[derive(Deserialize)]
+        struct QdrantPoint {
+            score: f64,
+            payload: HashMap<String, serde_json::Value>,
+        }
+
+        match serde_json::from_str::<QdrantSearchResult>(&response_text) {
+            Ok(parsed) => {
+                let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
+                    .result
+                    .into_iter()
+                    .map(|r| (r.score, r.payload))
+                    .collect();
+                Ok(results)
+            }
+            Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
+        }
+    }
+
    pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
        let url = format!(
            "{}/collections/{}/points/delete",
@@ -563,7 +702,7 @@ impl QdrantDb {
            "filter": {
                "must": [
                    {
-                        "key": "uuid",
+                        "key": "file_uuid",
                        "match": {
                            "value": uuid
                        }
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
 impl VectorStore for QdrantDb {
    async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
        let payload = VectorPayload {
-            uuid: String::new(),
+            file_uuid: String::new(),
            chunk_id: chunk_id.to_string(),
            chunk_type: String::new(),
+            start_frame: 0,
+            end_frame: 0,
            start_time: 0.0,
            end_time: 0.0,
            text: None,
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
    let qdrant: QdrantDb = QdrantDb::new();

    let query = format!(
-        "SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
+        "SELECT id, trace_id, frame_number, embedding FROM {} \
+         WHERE file_uuid = $1 AND embedding IS NOT NULL \
+         AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
        table
    );
    let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
    );
    Ok(())
 }
+
+pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
+    use crate::core::config::DATABASE_URL;
+    use sqlx::Row;
+
+    let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
+    let table = crate::core::db::schema::table_name("face_detections");
+    let qdrant = QdrantDb::new();
+
+    let collection = format!(
+        "{}_traces",
+        crate::core::config::REDIS_KEY_PREFIX
+            .as_str()
+            .trim_end_matches(':')
+    );
+    qdrant.ensure_collection(&collection, 512).await?;
+
+    // Read all face_detections with embeddings, grouped by trace_id in Rust
+    let rows = sqlx::query(&format!(
+        "SELECT trace_id, embedding FROM {} \
+         WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
+         AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
+        table
+    ))
+    .bind(file_uuid)
+    .fetch_all(&pool)
+    .await?;
+
+    let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
+        std::collections::HashMap::new();
+    let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
+        std::collections::HashMap::new(); // (count, min_frame, max_frame)
+
+    for row in &rows {
+        let tid: Option<i32> = row.get(0);
+        let emb: Option<Vec<f32>> = row.get(1);
+        if let (Some(tid), Some(emb)) = (tid, emb) {
+            trace_faces.entry(tid).or_default().push(emb);
+            let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
+            entry.0 += 1;
+        }
+    }
+
+    // Compute average embedding per trace
+    struct AvgTrace {
+        tid: i32,
+        avg_emb: Vec<f32>,
+        frame_count: i64,
+    }
+
+    let mut trace_avgs: Vec<AvgTrace> = Vec::new();
+
+    for (&tid, faces) in &trace_faces {
+        let dim = faces[0].len();
+        let mut avg = vec![0.0f32; dim];
+        for face in faces {
+            for (i, &v) in face.iter().enumerate() {
+                avg[i] += v;
+            }
+        }
+        let n = faces.len() as f32;
+        for v in &mut avg {
+            *v /= n;
+        }
+
+        let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
+        trace_avgs.push(AvgTrace {
+            tid,
+            avg_emb: avg,
+            frame_count: stats.0,
+        });
+    }
+
+    // Push to Qdrant in batches
+    for chunk in trace_avgs.chunks(500) {
+        let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
+            .iter()
+            .map(|t| {
+                (
+                    t.tid as u64,
+                    t.avg_emb.as_slice(),
+                    Some(serde_json::json!({
+                        "trace_id": t.tid,
+                        "file_uuid": file_uuid,
+                        "frame_count": t.frame_count,
+                        "source": "trace",
+                    })),
+                )
+            })
+            .collect();
+        qdrant.upsert_vectors_batch(&collection, &batch).await?;
+    }
+
+    tracing::info!(
+        "Synced {} trace embeddings to Qdrant for {}",
+        trace_faces.len(),
+        file_uuid
+    );
+    Ok(())
+}
--- a/src/core/db/sync_db.rs
+++ b/src/core/db/sync_db.rs
@@ -45,9 +45,11 @@ impl SyncDb {
        }

        let payload = VectorPayload {
-            uuid: uuid.clone(),
+            file_uuid: uuid.clone(),
            chunk_id: chunk_id.clone(),
            chunk_type,
+            start_frame: chunk.start_frame,
+            end_frame: chunk.end_frame,
            start_time,
            end_time,
            text: Some(text.to_string()),
--- a/src/core/health_agent.rs
+++ b/src/core/health_agent.rs
@@ -33,26 +33,38 @@ pub async fn run_consistency_checks(db: &PostgresDb) -> ConsistencyReport {

    // Check 1: stale_processing — status=processing but job_id is null
    let c1 = check_stale_processing(db).await;
-    if c1.count > 0 { any_issue = true; }
+    if c1.count > 0 {
+        any_issue = true;
+    }
    checks.push(c1);

    // Check 2: orphaned_processing — status=processing but no active monitor_job
    let c2 = check_orphaned_processing(db).await;
-    if c2.count > 0 { any_issue = true; }
+    if c2.count > 0 {
+        any_issue = true;
+    }
    checks.push(c2);

    // Check 3: unregistered_with_uuid — DB rows left behind by migration
    let c3 = check_unregistered_with_uuid(db).await;
-    if c3.count > 0 { any_issue = true; }
+    if c3.count > 0 {
+        any_issue = true;
+    }
    checks.push(c3);

    // Check 4: processing_job_done — status=processing but job already completed
    let c4 = check_processing_job_done(db).await;
-    if c4.count > 0 { any_issue = true; }
+    if c4.count > 0 {
+        any_issue = true;
+    }
    checks.push(c4);

    ConsistencyReport {
-        status: if any_issue { "degraded".to_string() } else { "ok".to_string() },
+        status: if any_issue {
+            "degraded".to_string()
+        } else {
+            "ok".to_string()
+        },
        checked_at,
        checks,
    }
@@ -68,9 +80,17 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
    .await
    .unwrap_or_default();

-    let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
-        file_uuid, file_name, status, detail: "job_id is null".to_string(),
-    }).collect();
+    let files: Vec<ConsistencyFile> = rows
+        .into_iter()
+        .map(
+            |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
+                file_uuid,
+                file_name,
+                status,
+                detail: "job_id is null".to_string(),
+            },
+        )
+        .collect();

    ConsistencyCheck {
        check: "stale_processing".to_string(),
@@ -83,7 +103,8 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
 async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
    let vt = schema::table_name("videos");
    let mj = schema::table_name("monitor_jobs");
-    let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
+    let rows: Vec<(String, String, String)> =
+        sqlx::query_as::<_, (String, String, String)>(&format!(
            "SELECT v.file_uuid, v.file_name, v.status \
         FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \
         WHERE v.status = 'processing' AND m.id IS NULL",
@@ -93,9 +114,17 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
        .await
        .unwrap_or_default();

-    let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
-        file_uuid, file_name, status, detail: "no active monitor_job".to_string(),
-    }).collect();
+    let files: Vec<ConsistencyFile> = rows
+        .into_iter()
+        .map(
+            |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
+                file_uuid,
+                file_name,
+                status,
+                detail: "no active monitor_job".to_string(),
+            },
+        )
+        .collect();

    ConsistencyCheck {
        check: "orphaned_processing".to_string(),
@@ -107,7 +136,8 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {

 async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
    let vt = schema::table_name("videos");
-    let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
+    let rows: Vec<(String, String, String)> =
+        sqlx::query_as::<_, (String, String, String)>(&format!(
            "SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
            vt
        ))
@@ -115,9 +145,17 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
        .await
        .unwrap_or_default();

-    let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
-        file_uuid, file_name, status, detail: "migration residue".to_string(),
-    }).collect();
+    let files: Vec<ConsistencyFile> = rows
+        .into_iter()
+        .map(
+            |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
+                file_uuid,
+                file_name,
+                status,
+                detail: "migration residue".to_string(),
+            },
+        )
+        .collect();

    ConsistencyCheck {
        check: "unregistered_with_uuid".to_string(),
@@ -130,7 +168,8 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
 async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
    let vt = schema::table_name("videos");
    let mj = schema::table_name("monitor_jobs");
-    let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
+    let rows: Vec<(String, String, String)> =
+        sqlx::query_as::<_, (String, String, String)>(&format!(
            "SELECT v.file_uuid, v.file_name, v.status \
         FROM {} v JOIN {} m ON v.file_uuid = m.uuid \
         WHERE v.status = 'processing' AND m.status = 'completed'",
@@ -140,9 +179,17 @@ async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
        .await
        .unwrap_or_default();

-    let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
-        file_uuid, file_name, status, detail: "monitor_job already completed".to_string(),
-    }).collect();
+    let files: Vec<ConsistencyFile> = rows
+        .into_iter()
+        .map(
+            |(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
+                file_uuid,
+                file_name,
+                status,
+                detail: "monitor_job already completed".to_string(),
+            },
+        )
+        .collect();

    ConsistencyCheck {
        check: "processing_job_done".to_string(),
--- a/src/core/identity/storage.rs
+++ b/src/core/identity/storage.rs
@@ -54,8 +54,7 @@ pub fn read_identity_file(uuid: &str) -> Result<IdentityFile> {
    let path = identity_file_path(uuid);
    let content = std::fs::read_to_string(&path)
        .with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?;
-    serde_json::from_str(&content)
-        .with_context(|| format!("Invalid identity.json: {}", uuid))
+    serde_json::from_str(&content).with_context(|| format!("Invalid identity.json: {}", uuid))
 }

 pub fn write_identity_file(file: &IdentityFile) -> Result<()> {
@@ -167,7 +166,10 @@ pub fn rebuild_index() -> Result<usize> {
                entries.insert(uuid.clone(), file.name);
            }
            Err(e) => {
-                warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e);
+                warn!(
+                    "[identity-storage] Skipping {} in index rebuild: {}",
+                    uuid, e
+                );
            }
        }
    }
@@ -187,18 +189,16 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
    let identity_table = crate::core::db::schema::table_name("identities");
    let fd_table = crate::core::db::schema::table_name("face_detections");

-    // Schema-aware column selection: dev uses 'name', public uses 'real_name'
-    let name_col = if identity_table.starts_with("dev.") { "name" } else { "real_name" };
-
    let clean = uuid.replace('-', "");
+
    let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
        &format!(
-            "SELECT id, uuid::text, {} AS name, identity_type, source, status, metadata, reference_data, \
+            "SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
              NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
              face_embedding::real[] as face_embedding, \
              tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
              FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
-            name_col, identity_table
+            identity_table
        )
    )
    .bind(&clean)
@@ -322,8 +322,13 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
    let mut entries: HashMap<String, String> = if index_path.exists() {
        let content = std::fs::read_to_string(&index_path)?;
        let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default();
-        v["entries"].as_object()
-            .map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect())
+        v["entries"]
+            .as_object()
+            .map(|obj| {
+                obj.iter()
+                    .map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string()))
+                    .collect()
+            })
            .unwrap_or_default()
    } else {
        HashMap::new()
@@ -338,7 +343,9 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
 }

 pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
-    let record = db.get_identity_by_uuid(uuid).await?
+    let record = db
+        .get_identity_by_uuid(uuid)
+        .await?
        .with_context(|| format!("Identity not found in DB: {}", uuid))?;

    let identity_uuid = record.uuid.clone();
@@ -415,6 +422,7 @@ mod tests {
            status: Some("confirmed".to_string()),
            tmdb_id: Some(112),
            tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()),
+            local_profile: None,
            metadata: serde_json::json!({"tmdb_character": "Test Role"}),
            file_bindings: vec![FileBinding {
                file_uuid: "ffffffffffffffffffffffffffffffff".to_string(),
@@ -442,7 +450,9 @@ mod tests {
    fn test_identity_dir_path() {
        let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
        let p = identity_dir(uuid);
-        assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid)));
+        assert!(p
+            .to_string_lossy()
+            .ends_with(&format!("identities/{}", uuid)));
    }

    #[test]
@@ -463,7 +473,10 @@ mod tests {
        let base = Path::new("/tmp/test_base");
        let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
        let p = identity_dir_at(base, uuid);
-        assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"));
+        assert_eq!(
+            p,
+            Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
+        );
    }

    #[test]
@@ -490,7 +503,10 @@ mod tests {
        assert_eq!(read.name, file.name);
        assert_eq!(read.source, file.source);
        assert_eq!(read.tmdb_id, file.tmdb_id);
-        assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count);
+        assert_eq!(
+            read.file_bindings[0].face_count,
+            file.file_bindings[0].face_count
+        );

        let _ = std::fs::remove_dir_all(&tmp);
    }
@@ -521,9 +537,21 @@ mod tests {
        let _ = std::fs::remove_dir_all(&tmp);
        let base = &tmp;

-        std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap();
-        std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap();
-        std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap();
+        std::fs::create_dir_all(
+            base.join("identities")
+                .join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+        )
+        .unwrap();
+        std::fs::create_dir_all(
+            base.join("identities")
+                .join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
+        )
+        .unwrap();
+        std::fs::create_dir_all(
+            base.join("identities")
+                .join("cccccccccccccccccccccccccccccccc"),
+        )
+        .unwrap();
        std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap();
        std::fs::create_dir_all(base.join("identities").join("short")).unwrap();

--- a/src/core/ingestion.rs
+++ b/src/core/ingestion.rs
@@ -56,19 +56,25 @@ impl IngestionService {
            .to_string();

        // 1. Compute SHA256 for dedup
-        let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path).ok().unwrap_or_default();
+        let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path)
+            .ok()
+            .unwrap_or_default();

        // 2. Hash check — same content = already registered
        let videos_table = schema::table_name("videos");
        if !content_hash.is_empty() {
-            if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(
-                &format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table)
-            )
+            if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
+                "SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1",
+                videos_table
+            ))
            .bind(&content_hash)
            .fetch_optional(self.db.pool())
            .await
            {
-                info!("Content already registered: {} ({})", filename, existing_uuid);
+                info!(
+                    "Content already registered: {} ({})",
+                    filename, existing_uuid
+                );
                return Ok(Some(existing_uuid));
            }
        }
@@ -108,7 +114,8 @@ impl IngestionService {
        let probe_result = probe::probe_video(file_path).ok();
        let file_meta = std::fs::metadata(&canonical_path).ok();

-        let duration = probe_result.as_ref()
+        let duration = probe_result
+            .as_ref()
            .and_then(|r| r.format.duration.as_ref())
            .and_then(|s| s.parse::<f64>().ok())
            .unwrap_or(0.0);
@@ -148,7 +155,11 @@ impl IngestionService {
        }

        let total_frames = {
-            let video_stream = probe_result.as_ref().and_then(|pr| pr.streams.iter().find(|s| s.codec_type.as_deref() == Some("video")));
+            let video_stream = probe_result.as_ref().and_then(|pr| {
+                pr.streams
+                    .iter()
+                    .find(|s| s.codec_type.as_deref() == Some("video"))
+            });

            if let Some(stream) = video_stream {
                if let Some(nb_frames_str) = &stream.nb_frames {
@@ -223,7 +234,10 @@ impl IngestionService {
        // Store content_hash for dedup
        if !content_hash.is_empty() {
            let vt = schema::table_name("videos");
-            let _ = sqlx::query(&format!("UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", vt))
+            let _ = sqlx::query(&format!(
+                "UPDATE {} SET content_hash = $1 WHERE file_uuid = $2",
+                vt
+            ))
            .bind(&content_hash)
            .bind(&uuid)
            .execute(self.db.pool())
@@ -243,5 +257,3 @@ impl IngestionService {
        Ok(Some(uuid))
    }
 }
-
-
--- a/src/core/probe/unified.rs
+++ b/src/core/probe/unified.rs
@@ -17,42 +17,84 @@ mod tests {
    #[test]
    fn test_detect_category_image() {
        assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image);
-        assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image);
+        assert_eq!(
+            detect_category(Path::new("photo.jpeg")),
+            FileCategory::Image
+        );
        assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image);
        assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image);
-        assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image);
+        assert_eq!(
+            detect_category(Path::new("photo.webp")),
+            FileCategory::Image
+        );
    }

    #[test]
    fn test_detect_category_document() {
-        assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document);
-        assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document);
-        assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document);
-        assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document);
+        assert_eq!(
+            detect_category(Path::new("doc.pdf")),
+            FileCategory::Document
+        );
+        assert_eq!(
+            detect_category(Path::new("doc.docx")),
+            FileCategory::Document
+        );
+        assert_eq!(
+            detect_category(Path::new("doc.pages")),
+            FileCategory::Document
+        );
+        assert_eq!(
+            detect_category(Path::new("doc.txt")),
+            FileCategory::Document
+        );
    }

    #[test]
    fn test_detect_category_spreadsheet() {
-        assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet);
-        assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet);
-        assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet);
+        assert_eq!(
+            detect_category(Path::new("data.xlsx")),
+            FileCategory::Spreadsheet
+        );
+        assert_eq!(
+            detect_category(Path::new("data.csv")),
+            FileCategory::Spreadsheet
+        );
+        assert_eq!(
+            detect_category(Path::new("data.numbers")),
+            FileCategory::Spreadsheet
+        );
    }

    #[test]
    fn test_detect_category_presentation() {
-        assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation);
-        assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation);
+        assert_eq!(
+            detect_category(Path::new("deck.pptx")),
+            FileCategory::Presentation
+        );
+        assert_eq!(
+            detect_category(Path::new("deck.key")),
+            FileCategory::Presentation
+        );
    }

    #[test]
    fn test_detect_category_archive() {
-        assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive);
-        assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive);
+        assert_eq!(
+            detect_category(Path::new("files.zip")),
+            FileCategory::Archive
+        );
+        assert_eq!(
+            detect_category(Path::new("files.tar.gz")),
+            FileCategory::Archive
+        );
    }

    #[test]
    fn test_detect_category_unknown() {
-        assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown);
+        assert_eq!(
+            detect_category(Path::new("file.xyz")),
+            FileCategory::Unknown
+        );
        assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown);
    }

@@ -84,13 +126,18 @@ pub enum FileCategory {

 /// Detect file category from path extension
 pub fn detect_category(path: &Path) -> FileCategory {
-    let ext = path.extension()
+    let ext = path
+        .extension()
        .and_then(|e| e.to_str())
        .map(|e| e.to_lowercase());
    match ext.as_deref() {
        Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video,
-        Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => FileCategory::Image,
-        Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => FileCategory::Document,
+        Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => {
+            FileCategory::Image
+        }
+        Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => {
+            FileCategory::Document
+        }
        Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet,
        Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation,
        Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive,
@@ -102,16 +149,20 @@ pub fn detect_category(path: &Path) -> FileCategory {
 pub fn base_format_info(path: &Path) -> serde_json::Value {
    let meta = std::fs::metadata(path).ok();
    let size = meta.as_ref().map(|m| m.len()).unwrap_or(0);
-    let mtime = meta.as_ref()
+    let mtime = meta
+        .as_ref()
        .and_then(|m| m.modified().ok())
        .and_then(|t| {
            let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64;
-            chrono::DateTime::from_timestamp(secs, 0)
-                .map(|dt| dt.to_rfc3339())
+            chrono::DateTime::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
        })
        .unwrap_or_default();
    let fname = path.to_string_lossy().to_string();
-    let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase();
+    let ext = path
+        .extension()
+        .and_then(|e| e.to_str())
+        .unwrap_or("")
+        .to_lowercase();
    let cat = detect_category(path);
    let file_type = match cat {
        FileCategory::Video => "video",
@@ -150,7 +201,13 @@ fn ffprobe_probe(path: &Path, format_base: serde_json::Value) -> serde_json::Val
 }

 /// Run Python probe for document/spreadsheet/presentation files
-fn python_probe(path: &Path, category: &FileCategory, scripts_dir: &str, python_path: &str, format_base: serde_json::Value) -> serde_json::Value {
+fn python_probe(
+    path: &Path,
+    category: &FileCategory,
+    scripts_dir: &str,
+    python_path: &str,
+    format_base: serde_json::Value,
+) -> serde_json::Value {
    let script = format!("{}/probe_file.py", scripts_dir);
    if !std::path::Path::new(&script).exists() {
        return minimal_probe(format_base);
@@ -184,18 +241,12 @@ fn minimal_probe(format_base: serde_json::Value) -> serde_json::Value {

 /// Unified probe: dispatches to the right probe based on file type
 /// Returns a probe_json-compatible Value
-pub async fn unified_probe(
-    path: &Path,
-    scripts_dir: &str,
-    python_path: &str,
-) -> serde_json::Value {
+pub async fn unified_probe(path: &Path, scripts_dir: &str, python_path: &str) -> serde_json::Value {
    let cat = detect_category(path);
    let format_base = base_format_info(path);

    match cat {
-        FileCategory::Video | FileCategory::Image => {
-            ffprobe_probe(path, format_base)
-        }
+        FileCategory::Video | FileCategory::Image => ffprobe_probe(path, format_base),
        FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => {
            python_probe(path, &cat, scripts_dir, python_path, format_base)
        }
--- a/src/core/processor/cut.rs
+++ b/src/core/processor/cut.rs
@@ -1,5 +1,6 @@
 use anyhow::{Context, Result};
 use serde::{Deserialize, Serialize};
+use std::process::Command;
 use std::time::Duration;

 use super::executor::PythonExecutor;
@@ -27,13 +28,21 @@ pub async fn process_cut(
    output_path: &str,
    uuid: Option<&str>,
 ) -> Result<CutResult> {
+    // Try native ffmpeg-based scene detection first
+    let result = try_native_cut(video_path);
+    if let Ok(r) = result {
+        let json = serde_json::to_string_pretty(&r)?;
+        std::fs::write(output_path, &json)
+            .with_context(|| format!("Failed to write {:?}", output_path))?;
+        return Ok(r);
+    }
+
+    // Fallback: Python scenedetect
+    tracing::warn!("[CUT] Native impl failed, falling back to Python");
    let executor = PythonExecutor::new()?;
    let script_path = executor.script_path("cut_processor.py");

-    tracing::info!("[CUT] Starting scene detection: {}", video_path);
-
    if !script_path.exists() {
-        tracing::warn!("[CUT] Script not found, returning empty result");
        return Ok(CutResult {
            frame_count: 0,
            fps: 0.0,
@@ -53,19 +62,179 @@ pub async fn process_cut(
        .with_context(|| format!("Failed to run {:?}", script_path))?;

    let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?;
-
    let result: CutResult =
        serde_json::from_str(&json_str).context("Failed to parse CUT output")?;

-    tracing::info!("[CUT] Result: {} scenes detected", result.scenes.len());
-
    Ok(result)
 }

+// ── Native ffmpeg scene detection ─────────────────────────────────
+
+fn try_native_cut(video_path: &str) -> Result<CutResult> {
+    // Step 1: Get video info (fps, frame count)
+    let probe = Command::new("ffprobe")
+        .args([
+            "-v",
+            "quiet",
+            "-print_format",
+            "json",
+            "-show_format",
+            "-show_streams",
+            video_path,
+        ])
+        .output()
+        .context("Failed to run ffprobe")?;
+
+    let probe_info: serde_json::Value =
+        serde_json::from_slice(&probe.stdout).context("Failed to parse ffprobe output")?;
+
+    let streams = probe_info["streams"]
+        .as_array()
+        .map_or(vec![], |s| s.clone());
+    let video_stream = streams.iter().find(|s| s["codec_type"] == "video");
+
+    let fps = video_stream
+        .and_then(|s| s["r_frame_rate"].as_str().and_then(parse_fraction))
+        .unwrap_or(30.0);
+
+    let total_frames: u64 = video_stream
+        .and_then(|s| s["nb_frames"].as_str())
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0);
+
+    let duration: f64 = probe_info["format"]["duration"]
+        .as_str()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(0.0);
+
+    // Step 2: Use ffmpeg scene detection filter
+    // The `scene` filter computes the difference between consecutive frames
+    // and outputs when the difference exceeds the threshold (0.3 = medium sensitivity)
+    let scene_output = Command::new("ffprobe")
+        .args([
+            "-v",
+            "quiet",
+            "-show_entries",
+            "frame=pts_time",
+            "-of",
+            "compact=p=0:nk=1",
+            "-f",
+            "lavfi",
+            &format!("movie={},select='gt(scene\\,0.3)',showinfo", video_path),
+            "-show_frames",
+        ])
+        .output()
+        .context("Failed to run ffmpeg scene detection")?;
+
+    let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
+    let mut scene_times: Vec<f64> = Vec::new();
+
+    // Parse ffmpeg showinfo output for scene changes
+    // Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
+    for line in stderr_output.lines() {
+        if line.contains("pts_time:") {
+            if let Some(pos) = line.find("pts_time:") {
+                let rest = &line[pos + 9..];
+                let time_str = rest.split_whitespace().next().unwrap_or("");
+                if let Ok(t) = time_str.parse::<f64>() {
+                    scene_times.push(t);
+                }
+            }
+        }
+    }
+
+    // Step 3: Build scenes from cut points
+    let mut scenes: Vec<CutScene> = Vec::new();
+    let mut prev_time = 0.0;
+    let mut prev_frame: u64 = 0;
+
+    for (i, &cut_time) in scene_times.iter().enumerate() {
+        let end_frame = (cut_time * fps).round() as u64;
+        let start_frame = prev_frame;
+
+        if end_frame > start_frame {
+            scenes.push(CutScene {
+                scene_number: (i + 1) as u32,
+                start_frame: prev_frame,
+                end_frame: end_frame.saturating_sub(1),
+                start_time: prev_time,
+                end_time: cut_time - (1.0 / fps),
+            });
+        }
+
+        prev_time = cut_time;
+        prev_frame = end_frame;
+    }
+
+    // Final scene (last cut point → end of video)
+    if total_frames > 0 && prev_frame < total_frames {
+        scenes.push(CutScene {
+            scene_number: (scenes.len() + 1) as u32,
+            start_frame: prev_frame,
+            end_frame: total_frames.saturating_sub(1),
+            start_time: prev_time,
+            end_time: duration,
+        });
+    }
+
+    // If no scenes detected, create a single scene covering the whole video
+    if scenes.is_empty() && total_frames > 0 {
+        scenes.push(CutScene {
+            scene_number: 1,
+            start_frame: 0,
+            end_frame: total_frames.saturating_sub(1),
+            start_time: 0.0,
+            end_time: duration,
+        });
+    }
+
+    Ok(CutResult {
+        frame_count: total_frames,
+        fps,
+        scenes,
+    })
+}
+
+/// Parse fractional fps like "30000/1001" into f64
+fn parse_fraction(s: &str) -> Option<f64> {
+    if let Some(pos) = s.find('/') {
+        let num: f64 = s[..pos].parse().ok()?;
+        let den: f64 = s[pos + 1..].parse().ok()?;
+        if den > 0.0 {
+            return Some(num / den);
+        }
+    }
+    s.parse::<f64>().ok()
+}
+
+// ── Tests ─────────────────────────────────────────────────────────
+
 #[cfg(test)]
 mod tests {
    use super::*;

+    #[test]
+    fn test_parse_fraction() {
+        let r = parse_fraction("30000/1001").unwrap();
+        assert!((r - 29.97).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_parse_fraction_int() {
+        let r = parse_fraction("30").unwrap();
+        assert!((r - 30.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_parse_fraction_invalid() {
+        assert!(parse_fraction("not/a/num").is_none());
+    }
+
+    #[test]
+    fn test_parse_fraction_zero_den() {
+        assert!(parse_fraction("1/0").is_none());
+    }
+
    #[test]
    fn test_cut_result_serialization() {
        let result = CutResult {
@@ -81,8 +250,9 @@ mod tests {
        };

        let json = serde_json::to_string(&result).unwrap();
+        assert!(json.contains("frame_count"));
        assert!(json.contains("scene_number"));
-        assert!(json.contains("1"));
+        assert!(json.contains("fps"));
    }

    #[test]
@@ -90,20 +260,23 @@ mod tests {
        let json = r#"{
            "frame_count": 100,
            "fps": 30.0,
-            "scenes": [
-                {"scene_number": 1, "start_frame": 0, "end_frame": 30, "start_time": 0.0, "end_time": 1.0},
-                {"scene_number": 2, "start_frame": 31, "end_frame": 60, "start_time": 1.033, "end_time": 2.0}
-            ]
+            "scenes": [{
+                "scene_number": 1,
+                "start_frame": 0,
+                "end_frame": 30,
+                "start_time": 0.0,
+                "end_time": 1.0
+            }]
        }"#;

        let result: CutResult = serde_json::from_str(json).unwrap();
-        assert_eq!(result.frame_count, 100);
-        assert_eq!(result.scenes.len(), 2);
-        assert_eq!(result.scenes[1].scene_number, 2);
+        assert_eq!(result.scenes.len(), 1);
+        assert_eq!(result.scenes[0].scene_number, 1);
+        assert_eq!(result.scenes[0].start_frame, 0);
    }

    #[test]
-    fn test_cut_result_empty_scenes() {
+    fn test_empty_scenes() {
        let result = CutResult {
            frame_count: 0,
            fps: 0.0,
@@ -111,17 +284,4 @@ mod tests {
        };
        assert!(result.scenes.is_empty());
    }
-
-    #[test]
-    fn test_cut_scene_times() {
-        let scene = CutScene {
-            scene_number: 1,
-            start_frame: 0,
-            end_frame: 30,
-            start_time: 0.0,
-            end_time: 1.0,
-        };
-        assert!(scene.end_time > scene.start_time);
-        assert_eq!(scene.scene_number, 1);
-    }
 }
--- a/src/core/processor/executor.rs
+++ b/src/core/processor/executor.rs
@@ -109,8 +109,7 @@ pub fn validate_python_env() -> Result<()> {
        tracing::warn!("Expected Python 3.11, got: {}", version.trim());
    }

-    let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
-        .unwrap_or_else(|_| {
+    let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
        let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
        manifest.join("scripts").to_string_lossy().to_string()
    });
@@ -133,8 +132,7 @@ impl PythonExecutor {
    pub fn new() -> Result<Self> {
        let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
            .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
-        let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
-            .unwrap_or_else(|_| {
+        let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
            let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
            manifest.join("scripts").to_string_lossy().to_string()
        });
@@ -173,7 +171,8 @@ impl PythonExecutor {

        if let Some(expected_hash) = self.checksums.get(&rel_path) {
            let output = std::process::Command::new("shasum")
-                .arg("-a").arg("256")
+                .arg("-a")
+                .arg("256")
                .arg(&script_path)
                .output()
                .context("Failed to run shasum for integrity check")?;
@@ -235,8 +234,9 @@ impl PythonExecutor {
        }

        // Verify script integrity via SHA256 checksum before execution
-        self.verify_script_integrity(script_name)
-            .context("Pre-execution integrity check failed — possible version mismatch or corruption")?;
+        self.verify_script_integrity(script_name).context(
+            "Pre-execution integrity check failed — possible version mismatch or corruption",
+        )?;

        // 標記輸出檔為處理中（add .tmp suffix）
        let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
--- a/src/core/processor/heuristic_scene.rs
+++ b/src/core/processor/heuristic_scene.rs
@@ -44,22 +44,59 @@ pub enum CrowdSize {

 /// Indoor-indicative YOLO classes (COCO labels)
 const INDOOR_CLASSES: &[&str] = &[
-    "chair", "couch", "bed", "dining table", "toilet", "tv", "laptop",
-    "microwave", "oven", "refrigerator", "sink", "book", "clock",
-    "vase", "potted plant",
+    "chair",
+    "couch",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "microwave",
+    "oven",
+    "refrigerator",
+    "sink",
+    "book",
+    "clock",
+    "vase",
+    "potted plant",
 ];

 /// Vehicle-indicative classes (person + vehicle = transport scene)
 const VEHICLE_CLASSES: &[&str] = &[
-    "car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike",
+    "car",
+    "truck",
+    "bus",
+    "train",
+    "boat",
+    "aeroplane",
+    "bicycle",
+    "motorbike",
 ];

 /// Outdoor-indicative YOLO classes
 const OUTDOOR_CLASSES: &[&str] = &[
-    "car", "truck", "bus", "train", "boat", "airplane",
-    "traffic light", "fire hydrant", "stop sign", "parking meter",
-    "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
-    "bear", "zebra", "giraffe", "tree",
+    "car",
+    "truck",
+    "bus",
+    "train",
+    "boat",
+    "airplane",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "tree",
 ];

 /// Build heuristic scene metadata from disk files (yolo.json + DB face data).
@@ -113,13 +150,14 @@ pub async fn build_heuristic_scene_meta(

    // Get face counts grouped by frame
    let fd_table = schema::table_name("face_detections");
-    let face_rows: Vec<(i64, i64)> = sqlx::query_as(
-        &format!("SELECT frame_number, COUNT(*) as fc \
+    let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!(
+        "SELECT frame_number, COUNT(*) as fc \
         FROM {} \
         WHERE file_uuid = $1 AND frame_number IS NOT NULL \
         GROUP BY frame_number \
-         ORDER BY frame_number", fd_table),
-    )
+         ORDER BY frame_number",
+        fd_table
+    ))
    .bind(file_uuid)
    .fetch_all(pool)
    .await
@@ -166,7 +204,10 @@ pub async fn build_heuristic_scene_meta(
        let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64;
        let total_indicator = indoor_ratio + outdoor_ratio;
        let (indoor_score, outdoor_score) = if total_indicator > 0.0 {
-            (indoor_ratio / total_indicator, outdoor_ratio / total_indicator)
+            (
+                indoor_ratio / total_indicator,
+                outdoor_ratio / total_indicator,
+            )
        } else {
            (0.5, 0.5)
        };
@@ -187,17 +228,13 @@ pub async fn build_heuristic_scene_meta(
            .map(|c| class_frame_presence.get(*c).copied().unwrap_or(0))
            .sum();
        let person_ratio = person_frames as f64 / frame_count.max(1) as f64;
-        let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0
-            && outdoor_score > 0.3;
+        let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 && outdoor_score > 0.3;

        // Dominant objects: rank by frame presence (not total count)
        let mut sorted: Vec<_> = class_frame_presence.into_iter().collect();
        sorted.sort_by(|a, b| b.1.cmp(&a.1));
-        let dominant_objects: Vec<String> = sorted
-            .iter()
-            .take(3)
-            .map(|(cls, _)| cls.clone())
-            .collect();
+        let dominant_objects: Vec<String> =
+            sorted.iter().take(3).map(|(cls, _)| cls.clone()).collect();

        segments.push(SceneSegmentMeta {
            segment_index: idx as u32 + 1,
@@ -229,12 +266,15 @@ pub async fn build_heuristic_scene_meta(

 /// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON.
 /// Called from job_worker post-processing trigger.
-pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result<usize> {
+pub async fn generate_scene_meta(
+    db: &crate::core::db::PostgresDb,
+    file_uuid: &str,
+) -> Result<usize> {
    let pool = db.pool();

    // Read CUT segment boundaries from cut.json
-    let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
-        .join(format!("{}.cut.json", file_uuid));
+    let cut_path =
+        Path::new(crate::core::config::OUTPUT_DIR.as_str()).join(format!("{}.cut.json", file_uuid));
    let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() {
        let cut_str = tokio::fs::read_to_string(&cut_path)
            .await
@@ -250,8 +290,7 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
            start_time: f64,
            end_time: f64,
        }
-        let cut: CutJson = serde_json::from_str(&cut_str)
-            .context("Failed to parse cut.json")?;
+        let cut: CutJson = serde_json::from_str(&cut_str).context("Failed to parse cut.json")?;
        cut.scenes
            .into_iter()
            .map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time))
@@ -259,9 +298,10 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
    } else {
        // Fallback: query DB for video duration, make one segment
        let videos_table = schema::table_name("videos");
-        let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(
-            &format!("SELECT total_frames, duration FROM {} WHERE file_uuid = $1", videos_table),
-        )
+        let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(&format!(
+            "SELECT total_frames, duration FROM {} WHERE file_uuid = $1",
+            videos_table
+        ))
        .bind(file_uuid)
        .fetch_optional(pool)
        .await
--- a/src/core/processor/mod.rs
+++ b/src/core/processor/mod.rs
@@ -10,6 +10,7 @@ pub mod ocr;
 pub mod pose;
 pub mod scene_classification;
 pub mod story;
+pub mod tkg;
 pub mod visual_chunk;
 pub mod yolo;

@@ -25,7 +26,8 @@ pub use face_recognition::{
    RecognizedFaceDetection,
 };
 pub use heuristic_scene::{
-    build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta,
+    build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
+    SceneSegmentMeta,
 };
 pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
 pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
@@ -34,5 +36,6 @@ pub use scene_classification::{
    SceneSegment,
 };
 pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
+pub use tkg::{build_tkg, TkgResult};
 pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
 pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
--- a/src/core/processor/story.rs
+++ b/src/core/processor/story.rs
@@ -106,7 +106,10 @@ pub async fn process_story(
    }

    // Fallback: Python script
-    tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err());
+    tracing::warn!(
+        "[STORY] Native impl failed, falling back to Python: {:?}",
+        result.err()
+    );
    let executor = PythonExecutor::new()?;
    let script_path = executor.script_path("story_processor.py");

@@ -145,7 +148,11 @@ pub async fn process_story(

 // ── Native implementation ─────────────────────────────────────────

-fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> {
+fn try_native_story(
+    _video_path: &str,
+    output_path: &str,
+    _uuid: Option<&str>,
+) -> Result<StoryResult> {
    let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
    let basename = Path::new(output_path)
        .file_stem()
@@ -160,8 +167,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
    let asr_data: AsrData = if asr_path.exists() {
        let content = std::fs::read_to_string(&asr_path)
            .with_context(|| format!("Failed to read {:?}", asr_path))?;
-        serde_json::from_str(&content)
-            .with_context(|| format!("Failed to parse {:?}", asr_path))?
+        serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))?
    } else {
        AsrData { segments: vec![] }
    };
@@ -169,8 +175,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
    let cut_data: CutData = if cut_path.exists() {
        let content = std::fs::read_to_string(&cut_path)
            .with_context(|| format!("Failed to read {:?}", cut_path))?;
-        serde_json::from_str(&content)
-            .with_context(|| format!("Failed to parse {:?}", cut_path))?
+        serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))?
    } else {
        CutData { scenes: vec![] }
    };
@@ -376,22 +381,39 @@ fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64
        let mut unique: Vec<&String> = objects.iter().collect();
        unique.sort();
        unique.dedup();
-        let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", ");
+        let objs = unique
+            .iter()
+            .take(5)
+            .map(|s| (*s).as_str())
+            .collect::<Vec<_>>()
+            .join(", ");
        parts.push(format!("Visuals: {}", objs));
    }

    format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
 }

-fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String {
+fn generate_scene_narrative(
+    objects: &[String],
+    start: f64,
+    end: f64,
+    scene_count: usize,
+) -> String {
    let mut unique: Vec<&String> = objects.iter().collect();
    unique.sort();
    unique.dedup();
    let top5: Vec<&String> = unique.iter().take(5).cloned().collect();

    if !top5.is_empty() {
-        let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ");
-        format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str)
+        let obj_str = top5
+            .iter()
+            .map(|s| s.as_str())
+            .collect::<Vec<_>>()
+            .join(", ");
+        format!(
+            "[{:.0}s-{:.0}s] {} scenes. Visuals: {}.",
+            start, end, scene_count, obj_str
+        )
    } else {
        format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
    }
@@ -408,7 +430,8 @@ mod tests {
        let text = generate_narrative(
            &["Hello world".to_string()],
            &["person".to_string()],
-            0.0, 5.0,
+            0.0,
+            5.0,
        );
        assert!(text.contains("[0s-5s]"));
        assert!(text.contains("Speech:"));
@@ -576,7 +599,10 @@ mod tests {
        };

        assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
-        assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some()));
+        assert!(result
+            .child_chunks
+            .iter()
+            .all(|c| c.parent_chunk_id.is_some()));
        assert!(result.parent_chunks[0].parent_chunk_id.is_none());
    }

@@ -594,11 +620,7 @@ mod tests {
        std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
        std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();

-        let result = try_native_story(
-            "/dummy.mp4",
-            out_path.to_str().unwrap(),
-            None,
-        ).unwrap();
+        let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();

        assert_eq!(result.stats.total_child_chunks, 0);
        assert_eq!(result.stats.total_parent_chunks, 0);
@@ -616,13 +638,17 @@ mod tests {
        let cut_path = dir.join(format!("{}.cut.json", basename));
        let out_path = dir.join(format!("{}.story.json", basename));

-        std::fs::write(&asr_path, r#"{
+        std::fs::write(
+            &asr_path,
+            r#"{
            "segments": [
                {"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
                {"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
                {"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
            ]
-        }"#).unwrap();
+        }"#,
+        )
+        .unwrap();

        std::fs::write(&cut_path, r#"{
            "scenes": [
@@ -631,11 +657,7 @@ mod tests {
            ]
        }"#).unwrap();

-        let result = try_native_story(
-            "/dummy.mp4",
-            out_path.to_str().unwrap(),
-            None,
-        ).unwrap();
+        let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();

        assert_eq!(result.stats.asr_children, 3);
        assert_eq!(result.stats.cut_children, 2);
@@ -649,7 +671,11 @@ mod tests {
        for child in &result.child_chunks {
            if child.source == "asr" {
                assert!(child.parent_chunk_id.is_some());
-                assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_"));
+                assert!(child
+                    .parent_chunk_id
+                    .as_ref()
+                    .unwrap()
+                    .starts_with("story_asr_"));
            }
        }

--- a/src/core/processor/tkg.rs
+++ b/src/core/processor/tkg.rs
@@ -0,0 +1,703 @@
+use anyhow::{Context, Result};
+use serde::Deserialize;
+use sqlx::PgPool;
+use std::collections::HashMap;
+use std::path::Path;
+
+use crate::core::db::postgres_db::PostgresDb;
+
+fn t(name: &str) -> String {
+    let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
+    if schema == "public" {
+        name.to_string()
+    } else {
+        format!("{}.{}", schema, name)
+    }
+}
+
+// ── Input data structs ────────────────────────────────────────────
+
+#[derive(Debug, Deserialize)]
+struct YoloJson {
+    #[serde(default)]
+    frames: HashMap<String, YoloFrameEntry>,
+}
+
+#[derive(Debug, Deserialize)]
+struct YoloFrameEntry {
+    #[serde(default)]
+    detections: Vec<YoloDetEntry>,
+    #[serde(default)]
+    objects: Vec<YoloDetEntry>,
+}
+
+#[derive(Debug, Deserialize)]
+struct YoloDetEntry {
+    #[serde(default)]
+    class_name: String,
+    #[serde(default)]
+    confidence: f64,
+}
+
+#[derive(Debug, Deserialize)]
+struct AsrxJson {
+    #[serde(default)]
+    segments: Vec<AsrxSegmentEntry>,
+    #[serde(default)]
+    speaker_stats: Option<HashMap<String, AsrxSpeakerStat>>,
+}
+
+#[derive(Debug, Deserialize)]
+struct AsrxSegmentEntry {
+    #[serde(default)]
+    speaker_id: String,
+    #[serde(default)]
+    start_time: f64,
+    #[serde(default)]
+    end_time: f64,
+    #[allow(dead_code)]
+    start_frame: i64,
+    #[allow(dead_code)]
+    end_frame: i64,
+}
+
+#[derive(Debug, Deserialize)]
+struct AsrxSpeakerStat {
+    #[serde(default)]
+    count: i64,
+}
+
+// ── Face detection trace ──────────────────────────────────────────
+
+#[derive(Debug, sqlx::FromRow)]
+struct FaceTraceRow {
+    trace_id: i64,
+    frame_count: i64,
+    start_f: i64,
+    end_f: i64,
+    avg_x: Option<f64>,
+    avg_y: Option<f64>,
+    avg_w: Option<f64>,
+    avg_h: Option<f64>,
+}
+
+#[derive(Debug, sqlx::FromRow)]
+struct FaceDetectionRow {
+    trace_id: i64,
+    frame_number: i64,
+    #[allow(dead_code)]
+    x: Option<f64>,
+    #[allow(dead_code)]
+    y: Option<f64>,
+    #[allow(dead_code)]
+    width: Option<f64>,
+    #[allow(dead_code)]
+    height: Option<f64>,
+}
+
+// ── Public API ────────────────────────────────────────────────────
+
+pub struct TkgResult {
+    pub face_trace_nodes: usize,
+    pub object_nodes: usize,
+    pub speaker_nodes: usize,
+    pub co_occurrence_edges: usize,
+    pub speaker_face_edges: usize,
+    pub face_face_edges: usize,
+}
+
+pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
+    let pool = db.pool();
+    let n_face = build_face_trace_nodes(pool, file_uuid).await?;
+    let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
+    let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?;
+
+    let e_co = build_co_occurrence_edges(pool, file_uuid, output_dir).await?;
+    let e_sf = build_speaker_face_edges(pool, file_uuid, output_dir).await?;
+    let e_ff = build_face_face_edges(pool, file_uuid).await?;
+
+    Ok(TkgResult {
+        face_trace_nodes: n_face,
+        object_nodes: n_objects,
+        speaker_nodes: n_speakers,
+        co_occurrence_edges: e_co,
+        speaker_face_edges: e_sf,
+        face_face_edges: e_ff,
+    })
+}
+
+// ── Node builders ─────────────────────────────────────────────────
+
+async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str) -> Result<usize> {
+    let face_table = t("face_detections");
+    let nodes_table = t("tkg_nodes");
+
+    let rows = sqlx::query_as::<_, FaceTraceRow>(&format!(
+        r#"
+        SELECT trace_id,
+               COUNT(*)::bigint as frame_count,
+               MIN(frame_number) as start_f,
+               MAX(frame_number) as end_f,
+               AVG(x::float8) as avg_x,
+               AVG(y::float8) as avg_y,
+               AVG(width::float8) as avg_w,
+               AVG(height::float8) as avg_h
+        FROM {}
+        WHERE file_uuid = $1 AND trace_id IS NOT NULL
+        GROUP BY trace_id
+        ORDER BY trace_id
+        "#,
+        face_table
+    ))
+    .bind(file_uuid)
+    .fetch_all(pool)
+    .await?;
+
+    let mut count = 0;
+    for row in &rows {
+        let external_id = format!("trace_{}", row.trace_id);
+        let label = format!("Face Trace {}", row.trace_id);
+        let props = serde_json::json!({
+            "frame_count": row.frame_count,
+            "start_frame": row.start_f,
+            "end_frame": row.end_f,
+            "avg_bbox": {
+                "x": row.avg_x.unwrap_or(0.0).round() as i64,
+                "y": row.avg_y.unwrap_or(0.0).round() as i64,
+                "width": row.avg_w.unwrap_or(0.0).round() as i64,
+                "height": row.avg_h.unwrap_or(0.0).round() as i64,
+            }
+        });
+
+        sqlx::query(&format!(
+            r#"
+            INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
+            VALUES ($1, $2, $3, $4, $5::jsonb)
+            ON CONFLICT (file_uuid, node_type, external_id)
+            DO UPDATE SET
+                properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
+                label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
+            "#,
+            nodes_table
+        ))
+        .bind("face_trace")
+        .bind(&external_id)
+        .bind(file_uuid)
+        .bind(&label)
+        .bind(serde_json::to_string(&props)?)
+        .execute(pool)
+        .await?;
+
+        count += 1;
+    }
+
+    Ok(count)
+}
+
+async fn build_yolo_object_nodes(
+    pool: &PgPool,
+    file_uuid: &str,
+    output_dir: &str,
+) -> Result<usize> {
+    let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
+    if !yolo_path.exists() {
+        return Ok(0);
+    }
+
+    let content = std::fs::read_to_string(&yolo_path)
+        .with_context(|| format!("Failed to read {:?}", yolo_path))?;
+    let yolo: YoloJson = serde_json::from_str(&content)
+        .with_context(|| format!("Failed to parse {:?}", yolo_path))?;
+
+    let mut class_counts: HashMap<String, i64> = HashMap::new();
+    for fdata in yolo.frames.values() {
+        let dets = if !fdata.detections.is_empty() {
+            &fdata.detections
+        } else {
+            &fdata.objects
+        };
+        for det in dets {
+            *class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
+        }
+    }
+
+    let nodes_table = t("tkg_nodes");
+    let mut count = 0;
+    for (cls, cnt) in &class_counts {
+        let props = serde_json::json!({ "total_detections": cnt });
+
+        sqlx::query(&format!(
+            r#"
+            INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
+            VALUES ($1, $2, $3, $4, $5::jsonb)
+            ON CONFLICT (file_uuid, node_type, external_id)
+            DO UPDATE SET
+                properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
+            "#,
+            nodes_table
+        ))
+        .bind("object")
+        .bind(cls)
+        .bind(file_uuid)
+        .bind(cls)
+        .bind(serde_json::to_string(&props)?)
+        .execute(pool)
+        .await?;
+
+        count += 1;
+    }
+
+    Ok(count)
+}
+
+async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -> Result<usize> {
+    let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
+    if !asrx_path.exists() {
+        return Ok(0);
+    }
+
+    let content = std::fs::read_to_string(&asrx_path)
+        .with_context(|| format!("Failed to read {:?}", asrx_path))?;
+    let asrx: AsrxJson = serde_json::from_str(&content)
+        .with_context(|| format!("Failed to parse {:?}", asrx_path))?;
+
+    let stats = asrx.speaker_stats.unwrap_or_default();
+    let nodes_table = t("tkg_nodes");
+    let mut count = 0;
+
+    for (sid, stat) in &stats {
+        let props = serde_json::json!({ "segment_count": stat.count });
+
+        sqlx::query(&format!(
+            r#"
+            INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
+            VALUES ($1, $2, $3, $4, $5::jsonb)
+            ON CONFLICT (file_uuid, node_type, external_id)
+            DO UPDATE SET
+                properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
+            "#,
+            nodes_table
+        ))
+        .bind("speaker")
+        .bind(sid)
+        .bind(file_uuid)
+        .bind(sid)
+        .bind(serde_json::to_string(&props)?)
+        .execute(pool)
+        .await?;
+
+        count += 1;
+    }
+
+    Ok(count)
+}
+
+// ── Edge builders ─────────────────────────────────────────────────
+
+async fn build_co_occurrence_edges(
+    pool: &PgPool,
+    file_uuid: &str,
+    output_dir: &str,
+) -> Result<usize> {
+    let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
+    if !yolo_path.exists() {
+        return Ok(0);
+    }
+
+    let content = std::fs::read_to_string(&yolo_path)?;
+    let yolo: YoloJson = serde_json::from_str(&content)?;
+
+    let face_table = t("face_detections");
+    let nodes_table = t("tkg_nodes");
+    let edges_table = t("tkg_edges");
+
+    let face_rows = sqlx::query_as::<_, FaceDetectionRow>(&format!(
+        r#"SELECT trace_id, frame_number, x, y, width, height
+           FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
+           ORDER BY frame_number"#,
+        face_table
+    ))
+    .bind(file_uuid)
+    .fetch_all(pool)
+    .await?;
+
+    let mut edge_count = 0;
+    for face in &face_rows {
+        let frame_str = face.frame_number.to_string();
+        let yolo_frame = match yolo.frames.get(&frame_str) {
+            Some(f) => f,
+            None => continue,
+        };
+
+        let dets = if !yolo_frame.detections.is_empty() {
+            &yolo_frame.detections
+        } else {
+            &yolo_frame.objects
+        };
+
+        if dets.is_empty() {
+            continue;
+        }
+
+        let external_id = format!("trace_{}", face.trace_id);
+        let face_node: Option<(i64,)> = sqlx::query_as(&format!(
+            "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
+            nodes_table
+        ))
+        .bind(file_uuid)
+        .bind(&external_id)
+        .fetch_optional(pool)
+        .await?;
+
+        let face_node_id = match face_node {
+            Some((id,)) => id,
+            None => continue,
+        };
+
+        for det in dets {
+            let obj_node: Option<(i64,)> = sqlx::query_as(&format!(
+                "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2",
+                nodes_table
+            ))
+            .bind(file_uuid)
+            .bind(&det.class_name)
+            .fetch_optional(pool)
+            .await?;
+
+            let obj_node_id = match obj_node {
+                Some((id,)) => id,
+                None => continue,
+            };
+
+            let edge_props = serde_json::json!({
+                "frame": face.frame_number,
+                "object_confidence": det.confidence,
+            });
+
+            if let Err(e) = sqlx::query(&format!(
+                r#"
+                INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
+                VALUES ($1, $2, $3, $4, $5::jsonb)
+                ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
+                DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
+                "#,
+                edges_table
+            ))
+            .bind("CO_OCCURS_WITH")
+            .bind(face_node_id)
+            .bind(obj_node_id)
+            .bind(file_uuid)
+            .bind(serde_json::to_string(&edge_props)?)
+            .execute(pool)
+            .await
+            {
+                tracing::warn!(
+                    "[TKG] Edge insert failed (trace={}, obj={}): {}",
+                    face.trace_id,
+                    det.class_name,
+                    e
+                );
+                continue;
+            }
+
+            edge_count += 1;
+        }
+    }
+
+    Ok(edge_count)
+}
+
+async fn build_speaker_face_edges(
+    pool: &PgPool,
+    file_uuid: &str,
+    output_dir: &str,
+) -> Result<usize> {
+    let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
+    if !asrx_path.exists() {
+        return Ok(0);
+    }
+
+    let content = std::fs::read_to_string(&asrx_path)?;
+    let asrx: AsrxJson = serde_json::from_str(&content)?;
+
+    if asrx.segments.is_empty() {
+        return Ok(0);
+    }
+
+    let face_table = t("face_detections");
+    let nodes_table = t("tkg_nodes");
+    let edges_table = t("tkg_edges");
+
+    let traces = sqlx::query_as::<_, (i64, i64, i64)>(&format!(
+        r#"SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f
+           FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
+           GROUP BY trace_id"#,
+        face_table
+    ))
+    .bind(file_uuid)
+    .fetch_all(pool)
+    .await?;
+
+    // Calculate fps from last segment
+    let last = asrx.segments.last().unwrap();
+    let fps = if last.end_time > 0.0 {
+        last.end_frame as f64 / last.end_time
+    } else {
+        30.0
+    };
+
+    let mut edge_count = 0;
+
+    for (tid, sf, ef) in &traces {
+        let face_ext_id = format!("trace_{}", tid);
+        let face_node: Option<(i64,)> = sqlx::query_as(&format!(
+            "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
+            nodes_table
+        ))
+        .bind(file_uuid)
+        .bind(&face_ext_id)
+        .fetch_optional(pool)
+        .await?;
+
+        let face_node_id = match face_node {
+            Some((id,)) => id,
+            None => continue,
+        };
+
+        let face_start_sec = *sf as f64 / fps;
+        let face_end_sec = *ef as f64 / fps;
+
+        for seg in &asrx.segments {
+            let seg_start = seg.start_time;
+            let seg_end = seg.end_time;
+            let overlap_start = face_start_sec.max(seg_start);
+            let overlap_end = face_end_sec.min(seg_end);
+
+            if overlap_start >= overlap_end {
+                continue;
+            }
+
+            let overlap_dur = overlap_end - overlap_start;
+            let face_dur = face_end_sec - face_start_sec;
+            if face_dur <= 0.0 {
+                continue;
+            }
+            let overlap_ratio = overlap_dur / face_dur;
+
+            if overlap_ratio < 0.3 {
+                continue;
+            }
+
+            let speaker_node: Option<(i64,)> = sqlx::query_as(&format!(
+                "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2",
+                nodes_table
+            ))
+            .bind(file_uuid)
+            .bind(&seg.speaker_id)
+            .fetch_optional(pool)
+            .await?;
+
+            let speaker_node_id = match speaker_node {
+                Some((id,)) => id,
+                None => continue,
+            };
+
+            let edge_props = serde_json::json!({
+                "overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0,
+                "overlap_duration_s": (overlap_dur * 10.0).round() / 10.0,
+                "face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec),
+                "speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end),
+            });
+
+            sqlx::query(&format!(
+                r#"
+                INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
+                VALUES ($1, $2, $3, $4, $5::jsonb)
+                ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
+                DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
+                "#,
+                edges_table
+            ))
+            .bind("SPEAKS_AS")
+            .bind(face_node_id)
+            .bind(speaker_node_id)
+            .bind(file_uuid)
+            .bind(serde_json::to_string(&edge_props)?)
+            .execute(pool)
+            .await?;
+
+            edge_count += 1;
+        }
+    }
+
+    Ok(edge_count)
+}
+
+async fn build_face_face_edges(pool: &PgPool, file_uuid: &str) -> Result<usize> {
+    let face_table = t("face_detections");
+    let nodes_table = t("tkg_nodes");
+    let edges_table = t("tkg_edges");
+
+    let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
+        r#"
+        SELECT a.trace_id AS tid_a, b.trace_id AS tid_b, a.frame_number
+        FROM {} a
+        JOIN {} b
+          ON a.file_uuid = b.file_uuid
+         AND a.frame_number = b.frame_number
+         AND a.trace_id < b.trace_id
+        WHERE a.file_uuid = $1
+          AND a.trace_id IS NOT NULL
+          AND b.trace_id IS NOT NULL
+        ORDER BY a.frame_number
+        "#,
+        face_table, face_table
+    ))
+    .bind(file_uuid)
+    .fetch_all(pool)
+    .await?;
+
+    if rows.is_empty() {
+        return Ok(0);
+    }
+
+    // Deduplicate by pair
+    let mut pair_frames: HashMap<(i64, i64), Vec<i64>> = HashMap::new();
+    for (tid_a, tid_b, frame) in &rows {
+        let key = if *tid_a < *tid_b {
+            (*tid_a, *tid_b)
+        } else {
+            (*tid_b, *tid_a)
+        };
+        pair_frames.entry(key).or_default().push(*frame);
+    }
+
+    let mut edge_count = 0;
+    for ((tid_a, tid_b), frames) in &pair_frames {
+        let ext_a = format!("trace_{}", tid_a);
+        let ext_b = format!("trace_{}", tid_b);
+
+        let n_a: Option<(i64,)> = sqlx::query_as(&format!(
+            "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
+            nodes_table
+        ))
+        .bind(file_uuid)
+        .bind(&ext_a)
+        .fetch_optional(pool)
+        .await?;
+
+        let n_b: Option<(i64,)> = sqlx::query_as(&format!(
+            "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
+            nodes_table
+        ))
+        .bind(file_uuid)
+        .bind(&ext_b)
+        .fetch_optional(pool)
+        .await?;
+
+        let (n_a_id, n_b_id) = match (n_a, n_b) {
+            (Some((a,)), Some((b,))) => (a, b),
+            _ => continue,
+        };
+
+        let edge_props = serde_json::json!({
+            "first_frame": frames[0],
+            "frame_count": frames.len() as i64,
+        });
+
+        sqlx::query(&format!(
+            r#"
+            INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
+            VALUES ($1, $2, $3, $4, $5::jsonb)
+            ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
+            DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
+            "#,
+            edges_table
+        ))
+        .bind("CO_OCCURS_WITH")
+        .bind(n_a_id)
+        .bind(n_b_id)
+        .bind(file_uuid)
+        .bind(serde_json::to_string(&edge_props)?)
+        .execute(pool)
+        .await?;
+
+        edge_count += 1;
+    }
+
+    Ok(edge_count)
+}
+
+// ── Tests ─────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_yolo_json_deserialize() {
+        let json = r#"{
+            "frames": {
+                "1": {"time_seconds": 0.0, "detections": [{"class_name": "person", "confidence": 0.9}]},
+                "2": {"time_seconds": 1.0, "detections": [{"class_name": "chair", "confidence": 0.8}]}
+            }
+        }"#;
+        let yolo: YoloJson = serde_json::from_str(json).unwrap();
+        assert_eq!(yolo.frames.len(), 2);
+        assert_eq!(yolo.frames["1"].detections[0].class_name, "person");
+    }
+
+    #[test]
+    fn test_yolo_json_empty_frames() {
+        let json = r#"{"frames": {}}"#;
+        let yolo: YoloJson = serde_json::from_str(json).unwrap();
+        assert!(yolo.frames.is_empty());
+    }
+
+    #[test]
+    fn test_asrx_json_deserialize() {
+        let json = r#"{
+            "segments": [
+                {"speaker_id": "SPEAKER_01", "start_time": 0.0, "end_time": 2.0, "start_frame": 0, "end_frame": 60}
+            ],
+            "speaker_stats": {"SPEAKER_01": {"count": 1}}
+        }"#;
+        let asrx: AsrxJson = serde_json::from_str(json).unwrap();
+        assert_eq!(asrx.segments.len(), 1);
+        assert_eq!(asrx.segments[0].speaker_id, "SPEAKER_01");
+    }
+
+    #[test]
+    fn test_asrx_json_no_stats() {
+        let json = r#"{"segments": []}"#;
+        let asrx: AsrxJson = serde_json::from_str(json).unwrap();
+        assert!(asrx.speaker_stats.is_none());
+    }
+
+    #[test]
+    fn test_yolo_objects_fallback() {
+        let json = r#"{
+            "frames": {
+                "1": {"objects": [{"class_name": "person"}]}
+            }
+        }"#;
+        let yolo: YoloJson = serde_json::from_str(json).unwrap();
+        assert_eq!(yolo.frames["1"].objects[0].class_name, "person");
+        assert!(yolo.frames["1"].detections.is_empty());
+    }
+
+    #[test]
+    fn test_tkg_result() {
+        let r = TkgResult {
+            face_trace_nodes: 5,
+            object_nodes: 10,
+            speaker_nodes: 3,
+            co_occurrence_edges: 20,
+            speaker_face_edges: 8,
+            face_face_edges: 4,
+        };
+        assert_eq!(r.face_trace_nodes, 5);
+        assert_eq!(r.object_nodes, 10);
+        assert_eq!(r.speaker_nodes, 3);
+    }
+}
--- a/src/core/storage/content_hash.rs
+++ b/src/core/storage/content_hash.rs
@@ -1,7 +1,7 @@
+use anyhow::Result;
 use sha2::{Digest, Sha256};
 use std::io::Read;
 use std::path::Path;
-use anyhow::Result;

 /// Compute SHA256 of the entire file content
 pub fn compute_sha256(path: &Path) -> Result<String> {
@@ -10,7 +10,9 @@ pub fn compute_sha256(path: &Path) -> Result<String> {
    let mut buf = [0u8; 65536];
    loop {
        let n = file.read(&mut buf)?;
-        if n == 0 { break; }
+        if n == 0 {
+            break;
+        }
        hasher.update(&buf[..n]);
    }
    let hash = format!("{:x}", hasher.finalize());
--- a/src/core/tmdb/cache.rs
+++ b/src/core/tmdb/cache.rs
@@ -65,7 +65,11 @@ pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf {
 pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> {
    let path = tmdb_cache_path(file_uuid);
    if !path.exists() {
-        anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display());
+        anyhow::bail!(
+            "TMDb cache not found: {} (expected: {})",
+            file_uuid,
+            path.display()
+        );
    }
    let content = std::fs::read_to_string(&path)
        .with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?;
@@ -96,9 +100,7 @@ pub fn count_cache_files() -> usize {
    match std::fs::read_dir(&dir) {
        Ok(entries) => entries
            .filter_map(|e| e.ok())
-            .filter(|e| {
-                e.file_name().to_string_lossy().ends_with(".tmdb.json")
-            })
+            .filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json"))
            .count(),
        Err(_) => 0,
    }
--- a/src/core/tmdb/face_agent.rs
+++ b/src/core/tmdb/face_agent.rs
@@ -46,11 +46,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul

    // Step 2: Load face_detections grouped by trace_id
    let fd_table = schema::table_name("face_detections");
-    let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
-        &format!("SELECT trace_id, embedding FROM {} \
+    let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(&format!(
+        "SELECT trace_id, embedding FROM {} \
         WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
-         ORDER BY trace_id", fd_table),
-    )
+         ORDER BY trace_id",
+        fd_table
+    ))
    .bind(file_uuid)
    .fetch_all(pool)
    .await?;
@@ -156,9 +157,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
    let fd_table = schema::table_name("face_detections");
    let mut after_qc = HashMap::new();
    for (&tid, &(id, ref name)) in &matched {
-        let cnt: i64 = sqlx::query_scalar(
-            &format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", fd_table),
-        )
+        let cnt: i64 = sqlx::query_scalar(&format!(
+            "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2",
+            fd_table
+        ))
        .bind(file_uuid)
        .bind(tid)
        .fetch_one(pool)
@@ -194,9 +196,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
    // Step 5: Update DB
    let mut updated = 0usize;
    for (&tid, &(id, _)) in &matched {
-        let r = sqlx::query(
-            &format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
-        )
+        let r = sqlx::query(&format!(
+            "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
+            fd_table
+        ))
        .bind(id)
        .bind(file_uuid)
        .bind(tid)
@@ -223,8 +226,7 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
 async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
    let fd_table = schema::table_name("face_detections");
    // Find all collision pairs: same identity, same frame, different trace
-    let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(
-        &format!(
+    let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(&format!(
        "SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
             FROM {} a \
             JOIN {} b \
@@ -236,8 +238,7 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
               AND a.identity_id = b.identity_id \
             ORDER BY a.identity_id, a.frame_number",
        fd_table, fd_table
-        ),
-    )
+    ))
    .bind(file_uuid)
    .fetch_all(pool)
    .await?;
@@ -256,25 +257,36 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
    let mut unbound = 0usize;
    for ((id, ta, tb), overlap_frames) in &collision_groups {
        // Get face detection count for each trace
-        let cnt_a: i64 = sqlx::query_scalar(
-            &format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
-        )
-        .bind(file_uuid).bind(ta).bind(id)
-        .fetch_one(pool).await.unwrap_or(0);
+        let cnt_a: i64 = sqlx::query_scalar(&format!(
+            "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
+            fd_table
+        ))
+        .bind(file_uuid)
+        .bind(ta)
+        .bind(id)
+        .fetch_one(pool)
+        .await
+        .unwrap_or(0);

-        let cnt_b: i64 = sqlx::query_scalar(
-            &format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
-        )
-        .bind(file_uuid).bind(tb).bind(id)
-        .fetch_one(pool).await.unwrap_or(0);
+        let cnt_b: i64 = sqlx::query_scalar(&format!(
+            "SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
+            fd_table
+        ))
+        .bind(file_uuid)
+        .bind(tb)
+        .bind(id)
+        .fetch_one(pool)
+        .await
+        .unwrap_or(0);

        // Unbind the trace with fewer detections (likely the false positive)
        let victim = if cnt_a <= cnt_b { *ta } else { *tb };
        let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };

-        sqlx::query(
-            &format!("UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", fd_table),
-        )
+        sqlx::query(&format!(
+            "UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
+            fd_table
+        ))
        .bind(file_uuid)
        .bind(victim)
        .execute(pool)
--- a/src/core/tmdb/probe.rs
+++ b/src/core/tmdb/probe.rs
@@ -45,7 +45,14 @@ fn extract_movie_name(filename: &str) -> Option<String> {
        .file_stem()
        .and_then(|s| s.to_str())?;

-    let cleaned = name.replace(['.', '_'], " ").trim().to_string();
+    // Take only the part before year patterns or separators
+    let cleaned = name
+        .replace(['.', '_'], " ")
+        .split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
+        .next()
+        .unwrap_or(&name)
+        .trim()
+        .to_string();

    if cleaned.is_empty() || cleaned.len() < 3 {
        return None;
@@ -53,10 +60,7 @@ fn extract_movie_name(filename: &str) -> Option<String> {
    Some(cleaned)
 }

-pub async fn probe_from_cache(
-    db: &PostgresDb,
-    file_uuid: &str,
-) -> Result<TmdbProbeResult> {
+pub async fn probe_from_cache(db: &PostgresDb, file_uuid: &str) -> Result<TmdbProbeResult> {
    let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?;
    if cache.identities.is_empty() && !cache.cast.is_empty() {
        return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await;
@@ -83,7 +87,8 @@ async fn upsert_identities_from_disk(
        }
        match std::fs::read_to_string(&path) {
            Ok(content) => {
-                match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content) {
+                match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content)
+                {
                    Ok(identity_file) => {
                        let identities_table = crate::core::db::schema::table_name("identities");
                        let result = sqlx::query(&format!(
@@ -106,21 +111,35 @@ async fn upsert_identities_from_disk(

                        match result {
                            Ok(_) => {
-                                info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid);
+                                info!(
+                                    "[TMDB] Upserted identity: {} (uuid={})",
+                                    identity_file.name, identity_file.identity_uuid
+                                );
                                identities_created += 1;
                            }
                            Err(e) => {
-                                warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e);
+                                warn!(
+                                    "[TMDB] Failed to upsert identity '{}': {}",
+                                    identity_file.name, e
+                                );
                            }
                        }
                    }
                    Err(e) => {
-                        warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e);
+                        warn!(
+                            "[TMDB] Failed to parse identity file {}: {}",
+                            path.display(),
+                            e
+                        );
                    }
                }
            }
            Err(e) => {
-                warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e);
+                warn!(
+                    "[TMDB] Failed to read identity file {}: {}",
+                    path.display(),
+                    e
+                );
            }
        }
    }
@@ -181,7 +200,9 @@ pub async fn create_identities_from_data(
            continue;
        }

-        let profile_url = member.profile_path.as_ref()
+        let profile_url = member
+            .profile_path
+            .as_ref()
            .map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));

        let metadata = serde_json::json!({
@@ -226,8 +247,13 @@ pub async fn create_identities_from_data(
                    member.name, member.character, uuid_str
                );
                identities_created += 1;
-                if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await {
-                    warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e);
+                if let Err(e) =
+                    crate::core::identity::storage::save_identity_file(db, &uuid_str).await
+                {
+                    warn!(
+                        "[TMDB] Failed to save identity file for {}: {}",
+                        member.name, e
+                    );
                }
                // Download and save TMDb profile image locally
                if let Some(url) = &profile_url {
@@ -393,8 +419,10 @@ pub async fn probe_movie(
        overview: movie.overview.clone(),
        poster_path: movie.poster_path.clone(),
    };
-    let cache_cast: Vec<cache::TmdbCastMember> = credits.cast.iter().map(|m| {
-        cache::TmdbCastMember {
+    let cache_cast: Vec<cache::TmdbCastMember> = credits
+        .cast
+        .iter()
+        .map(|m| cache::TmdbCastMember {
            id: m.id,
            name: m.name.clone(),
            character: m.character.clone(),
@@ -410,8 +438,8 @@ pub async fn probe_movie(
            deathday: None,
            gender: None,
            homepage: None,
-        }
-    }).collect();
+        })
+        .collect();

    // Write TMDb cache so probe_from_cache can be used next time
    let cache_obj = cache::TmdbCache {
--- a/src/core/tmdb/status.rs
+++ b/src/core/tmdb/status.rs
@@ -60,7 +60,11 @@ pub async fn check_tmdb_api() -> TmdbResourceStatus {
                enabled: *config::tmdb::PROBE_ENABLED,
                api_reachable: Some(reachable),
                api_latency_ms: Some(latency),
-                api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) },
+                api_error: if reachable {
+                    None
+                } else {
+                    Some(format!("HTTP {}", resp.status()))
+                },
                last_check_at: Some(chrono::Utc::now().to_rfc3339()),
            }
        }
@@ -84,9 +88,10 @@ pub fn count_cache_files() -> usize {

 pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
    let identities_table = crate::core::db::schema::table_name("identities");
-    let count: i64 = sqlx::query_scalar(
-        &format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)
-    )
+    let count: i64 = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
+        identities_table
+    ))
    .fetch_one(pool)
    .await?;
    Ok(count)
@@ -94,9 +99,10 @@ pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {

 pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> {
    let identities_table = crate::core::db::schema::table_name("identities");
-    let count: i64 = sqlx::query_scalar(
-        &format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table)
-    )
+    let count: i64 = sqlx::query_scalar(&format!(
+        "SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
+        identities_table
+    ))
    .fetch_one(pool)
    .await?;
    Ok(count)
--- a/src/player/chunk_selector.rs
+++ b/src/player/chunk_selector.rs
@@ -147,7 +147,7 @@ impl ChunkSelector {

                        // Try to match UUID - either exact match or partial match
                        let _uuid = payload
-                            .and_then(|p| p.get("uuid"))
+                            .and_then(|p| p.get("file_uuid"))
                            .and_then(|v| v.as_str())
                            .unwrap_or("");

--- a/src/playground.rs
+++ b/src/playground.rs
@@ -8,10 +8,10 @@ use tracing::{info, warn};

 use momentry_core::core::api_key::{ApiKeyService, ApiKeyType};
 use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType};
+use momentry_core::core::db::schema;
 use momentry_core::core::db::Database;
 use momentry_core::core::time::FrameTime;
 use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
-use momentry_core::core::db::schema;
 use momentry_core::{
    Embedder, OutputDir, PostgresDb, QdrantDb, RedisClient, VectorPayload, VideoRecord, VideoStatus,
 };
@@ -1985,7 +1985,8 @@ async fn main() -> Result<()> {
                    chunk_id: None,
                    created_at: String::new(),
                };
-                db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?).await?;
+                db.store_pre_chunk(&uuid, "asr", serde_json::to_value(&pre_chunk)?)
+                    .await?;
                asr_pre_chunk_ids.push(i as i64);
            }

@@ -2009,7 +2010,8 @@ async fn main() -> Result<()> {
                    chunk_id: None,
                    created_at: String::new(),
                };
-                db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?).await?;
+                db.store_pre_chunk(&uuid, "cut", serde_json::to_value(&pre_chunk)?)
+                    .await?;
                cut_pre_chunk_ids.push(i as i64);
            }

@@ -2037,7 +2039,8 @@ async fn main() -> Result<()> {
                    chunk_id: None,
                    created_at: String::new(),
                };
-                db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?).await?;
+                db.store_pre_chunk(&uuid, "time", serde_json::to_value(&pre_chunk)?)
+                    .await?;
                time_pre_chunk_ids.push(time_pre_chunk_ids.len() as i64);
                time_start = time_end;
            }
@@ -2117,7 +2120,8 @@ async fn main() -> Result<()> {
                    frame_path: None,
                    created_at: String::new(),
                };
-                db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?).await?;
+                db.store_frame(&uuid, *frame_num as i64, serde_json::to_value(&frame)?)
+                    .await?;
            }

            println!("Stored {} frames", all_frames.len());
@@ -2357,8 +2361,7 @@ async fn main() -> Result<()> {
                for frame in &context_frames {
                    if let Some(objects) = frame["yolo_objects"].as_array() {
                        for obj in objects {
-                            if let Some(class_name) =
-                                obj.get("class_name").and_then(|v| v.as_str())
+                            if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str())
                            {
                                *all_objects.entry(class_name.to_string()).or_insert(0) += 1;
                            }
@@ -2494,9 +2497,11 @@ async fn main() -> Result<()> {
                            }

                            let qdrant_payload = VectorPayload {
-                                uuid: chunk.uuid.clone(),
+                                file_uuid: chunk.uuid.clone(),
                                chunk_id: chunk.chunk_id.clone(),
                                chunk_type: "sentence".to_string(),
+                                start_frame: chunk.start_frame,
+                                end_frame: chunk.end_frame,
                                start_time: chunk.start_time().seconds(),
                                end_time: chunk.end_time().seconds(),
                                text: Some(text.to_string()),
--- a/src/verification/verifier.rs
+++ b/src/verification/verifier.rs
@@ -79,12 +79,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
                None => VerificationResult::ok(proc_name, file_uuid),
            }
        }
-        ProcessorType::Yolo => {
-            VerificationResult::ok(proc_name, file_uuid)
-        }
-        ProcessorType::Face => {
-            VerificationResult::ok(proc_name, file_uuid)
-        }
+        ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid),
+        ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid),
        ProcessorType::Ocr => {
            let frames = value.get("frames").and_then(|v| v.as_array());
            match frames {
@@ -114,7 +110,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
        ProcessorType::FiveW1H => {
            let scenes = value.get("scenes").and_then(|v| v.as_array());
            match scenes {
-                Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
+                Some(s) if s.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 scenes")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::ok(proc_name, file_uuid),
            }
--- a/src/watcher/watcher.rs
+++ b/src/watcher/watcher.rs
@@ -37,7 +37,8 @@ pub async fn run_watcher() -> Result<()> {
    info!("Watch directories: {:?}", dirs);

    tokio::spawn(async move {
-        let mut interval = time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
+        let mut interval =
+            time::interval(std::time::Duration::from_millis(config.poll_interval_ms));
        let mut known = std::collections::HashSet::new();
        loop {
            interval.tick().await;
@@ -109,15 +110,43 @@ async fn auto_register_file(file_path: &str) {
        }
    };

-    let file_name = pre.get("file_name").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
+    let file_name = pre
+        .get("file_name")
+        .and_then(|v| v.as_str())
+        .unwrap_or("unknown")
+        .to_string();
    let probe = pre.get("probe_json").cloned().unwrap_or_default();
-    let file_type = pre.get("file_type").and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
-    let canonical_path = pre.get("file_path").and_then(|v| v.as_str()).unwrap_or(file_path).to_string();
+    let file_type = pre
+        .get("file_type")
+        .and_then(|v| v.as_str())
+        .unwrap_or("unknown")
+        .to_string();
+    let canonical_path = pre
+        .get("file_path")
+        .and_then(|v| v.as_str())
+        .unwrap_or(file_path)
+        .to_string();

-    let duration = probe.get("format").and_then(|f| f.get("duration")).and_then(|v| v.as_f64()).unwrap_or(0.0);
-    let width = probe.get("format").and_then(|f| f.get("width")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
-    let height = probe.get("format").and_then(|f| f.get("height")).and_then(|v| v.as_u64()).unwrap_or(0) as u32;
-    let fps_val = probe.get("format").and_then(|f| f.get("fps")).and_then(|v| v.as_f64()).unwrap_or(0.0);
+    let duration = probe
+        .get("format")
+        .and_then(|f| f.get("duration"))
+        .and_then(|v| v.as_f64())
+        .unwrap_or(0.0);
+    let width = probe
+        .get("format")
+        .and_then(|f| f.get("width"))
+        .and_then(|v| v.as_u64())
+        .unwrap_or(0) as u32;
+    let height = probe
+        .get("format")
+        .and_then(|f| f.get("height"))
+        .and_then(|v| v.as_u64())
+        .unwrap_or(0) as u32;
+    let fps_val = probe
+        .get("format")
+        .and_then(|f| f.get("fps"))
+        .and_then(|v| v.as_f64())
+        .unwrap_or(0.0);

    let record = VideoRecord {
        id: 0,
@@ -158,7 +187,10 @@ async fn auto_register_file(file_path: &str) {

    match db.register_video(&record).await {
        Ok(id) => info!("[WATCHER] Auto-registered {} (id={})", record.file_uuid, id),
-        Err(e) => warn!("[WATCHER] Auto-register failed for {}: {}", record.file_uuid, e),
+        Err(e) => warn!(
+            "[WATCHER] Auto-register failed for {}: {}",
+            record.file_uuid, e
+        ),
    }
 }

@@ -175,10 +207,14 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
    let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
        .unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());

-    let birthday = std::fs::metadata(&path).ok()
+    let birthday = std::fs::metadata(&path)
+        .ok()
        .and_then(|m| m.modified().ok())
        .map(|t| {
-            let secs = t.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs();
+            let secs = t
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_default()
+                .as_secs();
            chrono::DateTime::from_timestamp(secs as i64, 0)
                .map(|dt| dt.to_rfc3339())
                .unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
@@ -186,9 +222,8 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {
        .unwrap_or_else(|| chrono::Utc::now().to_rfc3339());

    let mac = crate::core::storage::uuid::get_mac_address();
-    let file_uuid = crate::core::storage::uuid::compute_birth_uuid(
-        &mac, &birthday, &canonical_str, &filename,
-    );
+    let file_uuid =
+        crate::core::storage::uuid::compute_birth_uuid(&mac, &birthday, &canonical_str, &filename);

    let pre_path = std::path::PathBuf::from(&output_dir).join(format!("{}.pre.json", file_uuid));
    if pre_path.exists() {
@@ -198,15 +233,22 @@ pub async fn pre_process_file(file_path: &str) -> Option<String> {

    info!("[PRE-PROCESS] Pre-processing: {} → {}", filename, file_uuid);

-    let content_hash = crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();
+    let content_hash =
+        crate::core::storage::content_hash::compute_sha256(&path).unwrap_or_default();

    let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
        .unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
    let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
        .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
-    let probe_json = crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;
+    let probe_json =
+        crate::core::probe::unified::unified_probe(&path, &scripts_dir, &python_path).await;

-    let file_type = probe_json.get("format").and_then(|f| f.get("file_type")).and_then(|v| v.as_str()).unwrap_or("unknown").to_string();
+    let file_type = probe_json
+        .get("format")
+        .and_then(|f| f.get("file_type"))
+        .and_then(|v| v.as_str())
+        .unwrap_or("unknown")
+        .to_string();

    let pre_data = serde_json::json!({
        "file_name": filename,
--- a/src/worker/job_worker.rs
+++ b/src/worker/job_worker.rs
@@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
 use crate::core::config::OUTPUT_DIR;
 use crate::core::db::qdrant_db::QdrantDb;
 use crate::core::db::{
-    schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
+    schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
+    VideoStatus,
 };
 use crate::core::embedding::Embedder;
+use crate::core::processor::heuristic_scene::generate_scene_meta;
 use crate::worker::config::WorkerConfig;
 use crate::worker::processor::{ProcessorPool, ProcessorTask};
-use crate::core::processor::heuristic_scene::generate_scene_meta;
 use crate::worker::resources::SystemResources;
 use sqlx::PgPool;

@@ -70,14 +71,15 @@ impl JobWorker {
        // Reset stale running jobs: jobs stuck in 'running' with no active processor results
        let monitor_jobs_table = schema::table_name("monitor_jobs");
        let processor_results_table = schema::table_name("processor_results");
-        if let Err(e) = sqlx::query(
-            &format!("UPDATE {} SET status = 'pending', updated_at = NOW()
+        if let Err(e) = sqlx::query(&format!(
+            "UPDATE {} SET status = 'pending', updated_at = NOW()
             WHERE status = 'running'
             AND id NOT IN (
                 SELECT DISTINCT job_id FROM {}
                 WHERE status IN ('pending', 'running')
-             )", monitor_jobs_table, processor_results_table),
-        )
+             )",
+            monitor_jobs_table, processor_results_table
+        ))
        .execute(self.db.pool())
        .await
        {
@@ -608,12 +610,23 @@ impl JobWorker {
        }

        let fu = uuid;
-        let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"));
+        let rule1 = check!(&format!(
+            "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
+        ));
        let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
-        let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"));
+        let rule3 = check!(&format!(
+            "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
+        ));
        let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
-        let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes")));
-        let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists();
+        let tkg = check!(&format!(
+            "SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1",
+            schema::table_name("tkg_nodes")
+        ));
+        let scene_meta = std::path::Path::new(&format!(
+            "{}/{fu}.scene_meta.json",
+            crate::core::config::OUTPUT_DIR.as_str()
+        ))
+        .exists();
        let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1"));

        let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h;
@@ -847,26 +860,23 @@ impl JobWorker {
                                Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
                            }

-                            // Build Temporal Knowledge Graph (TKG)
-                            info!("📝 Building TKG graph...");
-                            let executor = match crate::core::processor::PythonExecutor::new() {
-                                Ok(ex) => ex,
-                                Err(e) => {
-                                    error!("Failed to create PythonExecutor for TKG: {}", e);
-                                    return;
-                                }
-                            };
-                            match executor
-                                .run(
-                                    "tkg_builder.py",
-                                    &["--file-uuid", &uuid_clone],
-                                    Some(&uuid_clone),
-                                    "TKG_BUILDER",
-                                    Some(std::time::Duration::from_secs(300)),
+                            // Build Temporal Knowledge Graph (TKG) — native Rust
+                            info!("📝 Building TKG graph (Rust)...");
+                            let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
+                                .unwrap_or_else(|_| ".".to_string());
+                            match crate::core::processor::tkg::build_tkg(
+                                db_clone.as_ref(),
+                                &uuid_clone,
+                                &output_dir,
                            )
                            .await
                            {
-                                Ok(()) => info!("✅ TKG built for {}", uuid_clone),
+                                Ok(r) => info!(
+                                    "✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges",
+                                    uuid_clone,
+                                    r.face_trace_nodes, r.object_nodes, r.speaker_nodes,
+                                    r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges,
+                                ),
                                Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
                            }
                        }
@@ -898,7 +908,7 @@ impl JobWorker {
                            let ids = sqlx::query_scalar::<_, uuid::Uuid>(
                                "SELECT DISTINCT i.uuid FROM identities i \
                                 JOIN face_detections fd ON fd.identity_id = i.id \
-                                 WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL"
+                                 WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
                            )
                            .bind(&uuid_clone)
                            .fetch_all(db_clone.pool())
@@ -907,12 +917,18 @@ impl JobWorker {
                            for id_uuid in &ids {
                                let us = id_uuid.to_string().replace('-', "");
                                if let Err(e) = crate::core::identity::storage::save_identity_file(
-                                    &db_clone, &us
-                                ).await {
+                                    &db_clone, &us,
+                                )
+                                .await
+                                {
                                    warn!("[P2.5] Failed to save identity file {}: {}", us, e);
                                }
                            }
-                            info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone);
+                            info!(
+                                "[P2.5] {} identity files saved for {}",
+                                ids.len(),
+                                uuid_clone
+                            );
                        }
                        Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e),
                    }
@@ -1088,8 +1104,8 @@ impl JobWorker {
        let pool = db.pool();

        let chunk_table = schema::table_name("chunk");
-        let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>(
-            &format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
+        let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>(
+            &format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
        )
        .bind(uuid)
        .fetch_all(pool)
@@ -1107,7 +1123,17 @@ impl JobWorker {
        );

        let mut stored = 0usize;
-        for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
+        for (
+            chunk_id,
+            _chunk_type,
+            text,
+            start_frame,
+            end_frame,
+            start_time,
+            end_time,
+            _content_str,
+        ) in &rows
+        {
            if text.is_empty() {
                continue;
            }
@@ -1119,9 +1145,11 @@ impl JobWorker {
                        continue;
                    }
                    let payload = VectorPayload {
-                        uuid: uuid.to_string(),
+                        file_uuid: uuid.to_string(),
                        chunk_id: chunk_id.clone(),
                        chunk_type: "sentence".to_string(),
+                        start_frame: *start_frame,
+                        end_frame: *end_frame,
                        start_time: *start_time,
                        end_time: *end_time,
                        text: Some(text.clone()),
--- a/src/worker/processor.rs
+++ b/src/worker/processor.rs
@@ -237,11 +237,19 @@ impl ProcessorPool {
                let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name);
                let now = chrono::Utc::now().to_rfc3339();
                let _: Option<String> = redis::cmd("HSET")
-                    .arg(&key).arg("started_at").arg(&now)
-                    .query_async(&mut conn).await.ok();
+                    .arg(&key)
+                    .arg("started_at")
+                    .arg(&now)
+                    .query_async(&mut conn)
+                    .await
+                    .ok();
                let _: Option<String> = redis::cmd("HSET")
-                    .arg(&key).arg("embedding_started_at").arg(&now)
-                    .query_async(&mut conn).await.ok();
+                    .arg(&key)
+                    .arg("embedding_started_at")
+                    .arg(&now)
+                    .query_async(&mut conn)
+                    .await
+                    .ok();
            }

            // Subscribe to Redis progress pub/sub and update processor hash in real-time
@@ -254,10 +262,12 @@ impl ProcessorPool {
                let cb_processor = sub_processor.clone();
                if let Err(e) = sub_redis
                    .subscribe_and_callback(&sub_uuid, move |msg| {
-                        tracing::info!("[Subscriber] Got msg for={} cur={} tot={}", 
+                        tracing::info!(
+                            "[Subscriber] Got msg for={} cur={} tot={}",
                            msg.processor,
                            msg.data.current.unwrap_or(0),
-                            msg.data.total.unwrap_or(0));
+                            msg.data.total.unwrap_or(0)
+                        );
                        if msg.processor == cb_processor {
                            let cur = msg.data.current.unwrap_or(0);
                            let tot = msg.data.total.unwrap_or(0);
@@ -266,11 +276,18 @@ impl ProcessorPool {
                            let u = cb_uuid.clone();
                            let p = cb_processor.clone();
                            tokio::spawn(async move {
-                                match r.update_worker_processor_status(
-                                    &u, &p, "running", None,
-                                    cur, oc, tot, 0, 0,
-                                ).await {
-                                    Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot),
+                                match r
+                                    .update_worker_processor_status(
+                                        &u, &p, "running", None, cur, oc, tot, 0, 0,
+                                    )
+                                    .await
+                                {
+                                    Ok(_) => tracing::info!(
+                                        "[Subscriber] Updated {}: cur={} tot={}",
+                                        p,
+                                        cur,
+                                        tot
+                                    ),
                                    Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
                                }
                            });
@@ -756,9 +773,11 @@ impl ProcessorPool {
            .enumerate()
            .map(|(i, segment)| {
                // Prefer ASR output frames, fallback to time-based conversion
-                let start_frame = segment.start_frame
+                let start_frame = segment
+                    .start_frame
                    .unwrap_or_else(|| (segment.start_time * fps).round() as i64);
-                let end_frame = segment.end_frame
+                let end_frame = segment
+                    .end_frame
                    .unwrap_or_else(|| (segment.end_time * fps).round() as i64);
                let data = serde_json::json!({
                    "text": segment.text,
@@ -892,7 +911,11 @@ impl ProcessorPool {
        tracing::info!(
            "Storing {} Face pre-chunks + {} detections for video {}",
            frames_count,
-            face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(),
+            face_result
+                .frames
+                .iter()
+                .map(|f| f.faces.len())
+                .sum::<usize>(),
            uuid
        );

@@ -911,7 +934,10 @@ impl ProcessorPool {
                detections_to_store.push((
                    frame.frame as i64,
                    frame.timestamp,
-                    face.x, face.y, face.width, face.height,
+                    face.x,
+                    face.y,
+                    face.width,
+                    face.height,
                    face.confidence,
                ));
            }
@@ -1170,9 +1196,10 @@ impl ProcessorPool {
                "top_5": scene.top_5,
            });
            let chunk_table = crate::core::db::schema::table_name("chunk");
-            let _ = sqlx::query(
-                &format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table)
-            )
+            let _ = sqlx::query(&format!(
+                "UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
+                chunk_table
+            ))
            .bind(&meta)
            .bind(uuid)
            .bind(&chk_id)