feat: service inventory, ERP reports, sqlite-vec integration, visualize tool

- Add SERVICE_INVENTORY_V1.0.0.md (25 source-verified tools, 3.7GB) - Add ERP_SELECTION_REPORT.md (Odoo CE vs ERPNext comparison) - Add SFTPGO_ODOO_REPLACEMENT.md (SFTPGo migration plan) - Add SERVICE_GO_GITEA_BUILD.md (Go compiler + Gitea build report) - Add release visualize command (face trace heatmap + identity filter) - Add sqlite-vec integration (160MB SQLite with vec0 vector tables) - Add export_identities.py, export_sqlite.py, render_face_heatmap.py - Add Go, Gitea, Rust/Cargo, Swift, yt-dlp, SQLite, sqlite-vec to service CLI - Fix package to include identities and identity_bindings in data.sql - Update release list to show all deployed video stats - Add V1.0.0 YAML frontmatter to all docs (DOCS_STANDARD compliant)
2026-05-13 02:37:45 +08:00
parent cac60c6093
commit 2992a0e650
25 changed files with 6076 additions and 3 deletions
--- a/scripts/embed_faces.py
+++ b/scripts/embed_faces.py
@@ -0,0 +1,161 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Process Swift face detection output + add CoreML FaceNet embeddings.
+Replaces face_processor.py Step 2 when Swift already ran.
+"""
+import sys, os, json, argparse, time
+import cv2
+import numpy as np
+import coremltools as ct
+from pathlib import Path
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+FACENET_PATH = os.path.join(SCRIPT_DIR, "..", "models", "facenet512.mlpackage")
+
+def classify_pose(roll, yaw):
+    abs_yaw = abs(yaw)
+    abs_roll = abs(roll)
+    if abs_yaw < 15 and abs_roll < 15:
+        return "frontal"
+    elif abs_yaw > 30:
+        return "profile_right" if yaw > 0 else "profile_left"
+    else:
+        return "three_quarter"
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--swift-json", required=True, help="Swift detection output")
+    parser.add_argument("--video", required=True, help="Video file path")
+    parser.add_argument("--output", required=True, help="Output face.json path")
+    parser.add_argument("--fps", type=float, default=24.0)
+    args = parser.parse_args()
+
+    print(f"[EMBED] Loading Swift output: {args.swift_json}")
+    with open(args.swift_json) as f:
+        swift = json.load(f)
+
+    swift_frames = swift.get("frames", [])
+    print(f"[EMBED] Swift frames: {len(swift_frames)}")
+
+    # Load CoreML FaceNet
+    facenet = os.path.normpath(FACENET_PATH)
+    coreml_model = None
+    if os.path.exists(facenet):
+        coreml_model = ct.models.MLModel(facenet)
+        print(f"[EMBED] FaceNet loaded")
+    else:
+        print(f"[EMBED] WARNING: FaceNet not found at {facenet}")
+
+    # Open video
+    video = cv2.VideoCapture(args.video)
+    if not video.isOpened():
+        raise RuntimeError(f"Cannot open {args.video}")
+    v_fps = video.get(cv2.CAP_PROP_FPS)
+    v_total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    v_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+    v_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    print(f"[EMBED] Video: {v_width}x{v_height}, {v_fps:.1f}fps")
+
+    # Sequential read optimization: build lookup set
+    needed_frames = set()
+    frame_data_map = {}
+    for sf in swift_frames:
+        fn = int(sf.get("frame", sf.get("frame_number", 0)))
+        needed_frames.add(fn)
+        frame_data_map[fn] = sf
+
+    output_frames = []
+    embed_count = 0
+    t0 = time.time()
+    current_frame = 0
+
+    while True:
+        ret, frame = video.read()
+        if not ret:
+            break
+
+        if current_frame not in needed_frames:
+            current_frame += 1
+            continue
+
+        sf = frame_data_map[current_frame]
+        timestamp = sf.get("timestamp", current_frame / v_fps)
+        faces_in = sf.get("faces", [])
+
+        processed_faces = []
+        for face in faces_in:
+            bb = face.get("bbox", {})
+            x, y, w, h = bb.get("x", 0), bb.get("y", 0), bb.get("width", 0), bb.get("height", 0)
+
+            if w <= 10 or h <= 10:
+                continue
+
+            x1, y1 = max(0, x), max(0, y)
+            x2, y2 = min(v_width, x + w), min(v_height, y + h)
+            if x2 <= x1 or y2 <= y1:
+                continue
+            face_img = frame[y1:y2, x1:x2]
+            if face_img.size == 0:
+                continue
+
+            emb = None
+            if coreml_model is not None and face_img.shape[0] > 0 and face_img.shape[1] > 0:
+                try:
+                    resized = cv2.resize(face_img, (160, 160))
+                    rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB).astype(np.float32)
+                    normalized = rgb / 127.5 - 1.0
+                    input_data = np.expand_dims(np.transpose(normalized, (2, 0, 1)), axis=0)
+                    result = coreml_model.predict({"input": input_data})
+                    emb = list(result.values())[0].flatten().tolist()
+                    embed_count += 1
+                except Exception as e:
+                    pass
+
+            # Pose
+            pose_info = face.get("pose", {})
+            pose_angle = classify_pose(pose_info.get("roll", 0), pose_info.get("yaw", 0))
+
+            processed_faces.append({
+                "x": x, "y": y, "width": w, "height": h,
+                "confidence": face.get("confidence", 0.5),
+                "embedding": emb,
+                "pose_angle": {
+                    "angle": pose_angle,
+                    "roll": pose_info.get("roll", 0),
+                    "yaw": pose_info.get("yaw", 0),
+                    "pitch": pose_info.get("pitch", 0),
+                },
+                "lips": face.get("lips"),
+                "landmarks": face.get("landmarks"),
+                "attributes": None,
+            })
+
+        if processed_faces:
+            output_frames.append({
+                "frame": current_frame,
+                "timestamp": timestamp,
+                "faces": processed_faces,
+            })
+
+        current_frame += 1
+
+        if len(output_frames) % 500 == 0:
+            print(f"[EMBED] {len(output_frames)}/{len(needed_frames)} frames, {embed_count} embeddings, {time.time()-t0:.0f}s")
+
+    video.release()
+
+    output = {
+        "frame_count": len(output_frames),
+        "fps": v_fps,
+        "frames": output_frames,
+    }
+
+    os.makedirs(os.path.dirname(args.output), exist_ok=True)
+    with open(args.output, "w") as f:
+        json.dump(output, f, indent=2, ensure_ascii=False)
+
+    elapsed = time.time() - t0
+    print(f"[EMBED] Done: {len(output_frames)} frames, {embed_count} embeddings, {elapsed:.0f}s → {args.output}")
+
+if __name__ == "__main__":
+    main()
--- a/scripts/export_file_package.py
+++ b/scripts/export_file_package.py
@@ -0,0 +1,131 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Export a single file's data to SQL file (COPY format).
+Usage: python3 export_file_package.py <file_uuid> <output_dir>
+"""
+import json, os, sys, subprocess
+
+PG_BIN = "/Users/accusys/pgsql/18.3/bin"
+DB_URL = "postgresql://accusys@localhost:5432/momentry"
+
+TABLES = [
+    ("dev.videos", "file_uuid"),
+    ("dev.chunk", "file_uuid"),
+    ("dev.chunk_vectors", "uuid"),
+    ("dev.face_detections", "file_uuid"),
+]
+
+def main():
+    uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
+    outdir = sys.argv[2] if len(sys.argv) > 2 else "/tmp/file_pkg"
+    os.makedirs(outdir, exist_ok=True)
+    sql_path = os.path.join(outdir, "data.sql")
+
+    print(f"Exporting {uuid} → {sql_path}")
+    with open(sql_path, "w") as f:
+        f.write(f"-- File package: {uuid}\nBEGIN;\n\n")
+
+        for tbl, col in TABLES:
+            f.write(f"-- {tbl} WHERE {col} = '{uuid}'\n")
+
+            # Get column list
+            schema, table = tbl.split(".")
+            r = subprocess.run(
+                [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+                 "-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"],
+                capture_output=True, text=True, timeout=15)
+            cols = r.stdout.strip()
+
+            r = subprocess.run(
+                [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
+                 f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"],
+                capture_output=True, text=True, timeout=60)
+            if r.stdout.strip():
+                f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n")
+                f.write(r.stdout)
+                if not r.stdout.endswith("\n"):
+                    f.write("\n")
+                f.write("\\.\n\n")
+
+        # Export identities referenced by this file's face_detections
+        f.write(f"-- dev.identities (referenced by face_detections WHERE file_uuid='{uuid}')\n")
+        r = subprocess.run(
+            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+             "-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"],
+            capture_output=True, text=True, timeout=15)
+        cols = r.stdout.strip()
+        r = subprocess.run(
+            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
+             f"COPY (SELECT DISTINCT i.* FROM dev.identities i INNER JOIN dev.face_detections fd ON fd.identity_id = i.id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
+            capture_output=True, text=True, timeout=60)
+        if r.stdout.strip():
+            f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n")
+            f.write(r.stdout)
+            if not r.stdout.endswith("\n"):
+                f.write("\n")
+            f.write("\\.\n\n")
+
+        # Export identity_bindings for identities referenced by this file
+        f.write(f"-- dev.identity_bindings (for identities in face_detections WHERE file_uuid='{uuid}')\n")
+        r = subprocess.run(
+            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+             "-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identity_bindings' AND is_updatable='YES'"],
+            capture_output=True, text=True, timeout=15)
+        cols = r.stdout.strip()
+        r = subprocess.run(
+            [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
+             f"COPY (SELECT DISTINCT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id WHERE fd.file_uuid = '{uuid}') TO STDOUT WITH CSV HEADER"],
+            capture_output=True, text=True, timeout=60)
+        if r.stdout.strip():
+            f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n")
+            f.write(r.stdout)
+            if not r.stdout.endswith("\n"):
+                f.write("\n")
+            f.write("\\.\n\n")
+
+        f.write("COMMIT;\n")
+
+    size = os.path.getsize(sql_path)
+    print(f"  {sql_path} ({size/1024/1024:.1f} MB)")
+
+    # Copy video file to package
+    r = subprocess.run(
+        [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+         "-c", f"SELECT file_path FROM dev.videos WHERE file_uuid='{uuid}'"],
+        capture_output=True, text=True, timeout=15)
+    video_path = r.stdout.strip()
+    if video_path and os.path.exists(video_path):
+        video_name = os.path.basename(video_path)
+        dest = os.path.join(outdir, video_name)
+        import shutil
+        shutil.copy2(video_path, dest)
+        vsize = os.path.getsize(dest)
+        print(f"  {video_name} ({vsize/1024/1024:.0f} MB)")
+    else:
+        print(f"  WARNING: video file not found at {video_path}")
+
+    # file_info.json
+    r = subprocess.run(
+        [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+         "-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"],
+        capture_output=True, text=True, timeout=15)
+    if r.stdout.strip():
+        info = json.loads(r.stdout.strip())
+        with open(os.path.join(outdir, "file_info.json"), "w") as f:
+            json.dump(info, f, indent=2)
+        print(f"  file_info.json")
+
+    # Export identities.json (for offline analysis)
+    id_path = os.path.join(outdir, f"{uuid}.identities.json")
+    r = subprocess.run(
+        [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+         "-c", f"SELECT json_build_object('file_uuid', file_uuid) FROM dev.videos WHERE file_uuid='{uuid}'"],
+        capture_output=True, text=True, timeout=15)
+    subprocess.run(
+        ["/opt/homebrew/bin/python3.11", os.path.join(os.path.dirname(os.path.abspath(__file__)), "export_identities.py"), uuid, id_path],
+        check=False, timeout=60)
+    if os.path.exists(id_path):
+        print(f"  {uuid}.identities.json ({os.path.getsize(id_path)/1024:.0f}KB)")
+
+if __name__ == "__main__":
+    main()
--- a/scripts/export_identities.py
+++ b/scripts/export_identities.py
@@ -0,0 +1,74 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Export identity data for a video UUID as JSON (for offline analysis).
+Usage: python3 export_identities.py <file_uuid> [output.json]
+"""
+import sys, json, psycopg2
+
+UUID = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
+OUT = sys.argv[2] if len(sys.argv) > 2 else f"/Users/accusys/momentry/output_dev/{UUID}.identities.json"
+
+conn = psycopg2.connect("dbname=momentry user=accusys")
+cur = conn.cursor()
+
+# Get identities referenced by this file's face_detections
+cur.execute("""
+    SELECT DISTINCT i.id, i.name, i.uuid, i.identity_type, i.source, i.status,
+           i.face_embedding, i.voice_embedding, i.reference_data, i.tmdb_id, i.tmdb_profile
+    FROM dev.identities i
+    INNER JOIN dev.face_detections fd ON fd.identity_id = i.id
+    WHERE fd.file_uuid = %s
+    ORDER BY i.id
+""", (UUID,))
+rows = cur.fetchall()
+
+identities = []
+for r in rows:
+    identities.append({
+        "id": r[0],
+        "name": r[1],
+        "uuid": str(r[2]) if r[2] else None,
+        "identity_type": r[3],
+        "source": r[4],
+        "status": r[5],
+        "tmdb_id": r[9],
+        "tmdb_profile": r[10],
+    })
+
+# Get identity_bindings for these identities' traces
+cur.execute("""
+    SELECT DISTINCT ib.identity_id, ib.identity_type, ib.identity_value, ib.confidence
+    FROM dev.identity_bindings ib
+    WHERE ib.identity_id IN (
+        SELECT DISTINCT fd.identity_id FROM dev.face_detections fd WHERE fd.file_uuid = %s
+    )
+""", (UUID,))
+bindings = [{"identity_id": r[0], "identity_type": r[1], "identity_value": r[2], "confidence": float(r[3])} for r in cur.fetchall()]
+
+# Get trace-to-identity mapping from face_detections
+cur.execute("""
+    SELECT DISTINCT trace_id, identity_id, COUNT(*) as face_count
+    FROM dev.face_detections
+    WHERE file_uuid = %s AND identity_id IS NOT NULL AND trace_id IS NOT NULL
+    GROUP BY trace_id, identity_id ORDER BY trace_id
+""", (UUID,))
+trace_map = [{"trace_id": r[0], "identity_id": r[1], "face_count": r[2]} for r in cur.fetchall()]
+
+cur.close(); conn.close()
+
+output = {
+    "file_uuid": UUID,
+    "identity_count": len(identities),
+    "binding_count": len(bindings),
+    "trace_mapping_count": len(trace_map),
+    "identities": identities,
+    "bindings": bindings,
+    "trace_to_identity": trace_map,
+}
+
+with open(OUT, 'w') as f:
+    json.dump(output, f, indent=2, ensure_ascii=False)
+
+size_kb = len(json.dumps(output)) / 1024
+print(f"Exported {len(identities)} identities, {len(bindings)} bindings, {len(trace_map)} trace mappings")
+print(f"  → {OUT} ({size_kb:.0f}KB)")
--- a/scripts/export_sqlite.py
+++ b/scripts/export_sqlite.py
@@ -0,0 +1,238 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Export a video's data to a self-contained SQLite database for offline app use.
+Uses sqlite-vec extension for native vector storage.
+The vec0.dylib must be in the script directory or /tmp/.
+Usage: python3 export_sqlite.py <file_uuid> [output.sqlite]
+"""
+import sys, json, sqlite3, psycopg2, os
+
+UUID = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
+OUT = sys.argv[2] if len(sys.argv) > 2 else f"/Users/accusys/momentry/output_dev/{UUID}.sqlite"
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# Find vec0.dylib
+VEC_DYLIB = None
+for path in [
+    os.path.join(SCRIPT_DIR, "vec0.dylib"),
+    "/tmp/vec0.dylib",
+    os.path.join(SCRIPT_DIR, "sqlite-vec", "vec0.dylib"),
+]:
+    if os.path.exists(path):
+        VEC_DYLIB = path
+        break
+
+print(f"Exporting {UUID} → {OUT}")
+if VEC_DYLIB:
+    print(f"  sqlite-vec: {VEC_DYLIB}")
+
+# Connect to PostgreSQL
+pg = psycopg2.connect("dbname=momentry user=accusys")
+pg_cur = pg.cursor()
+
+# Connect to SQLite
+if os.path.exists(OUT):
+    os.remove(OUT)
+lite = sqlite3.connect(OUT)
+
+# Load sqlite-vec extension if available
+if VEC_DYLIB:
+    lite.enable_load_extension(True)
+    try:
+        lite.load_extension(VEC_DYLIB)
+        print("  sqlite-vec extension loaded")
+    except Exception as e:
+        print(f"  WARNING: Could not load sqlite-vec: {e}")
+    lite.enable_load_extension(False)
+
+lite_cur = lite.cursor()
+
+# ---- Helper ----
+def pg_to_sqlite(pg_query, lite_table, lite_schema, params=None, transform=None):
+    """Copy PostgreSQL query result to SQLite table."""
+    lite_cur.execute(lite_schema)
+    pg_cur.execute(pg_query, params or [])
+    rows = pg_cur.fetchall()
+    if not rows:
+        return 0
+    cols = [d[0] for d in pg_cur.description]
+    placeholders = ",".join(["?" for _ in cols])
+
+    count = 0
+    for row in rows:
+        d = dict(zip(cols, row))
+        if transform:
+            d = transform(d)
+        vals = []
+        for c in cols:
+            v = d.get(c)
+            vals.append(None if v is None else v)
+        try:
+            lite_cur.execute(f"INSERT INTO {lite_table} VALUES ({placeholders})", vals)
+            count += 1
+        except Exception:
+            pass
+    lite.commit()
+    return count
+
+# Create tables (skip WAL — Python sqlite3 may not support PRAGMA with extensions loaded)
+print("Creating tables...")
+
+# videos
+pg_to_sqlite(
+    "SELECT file_uuid, file_name, file_path, duration, fps, width, height, probe_json::text, status FROM dev.videos WHERE file_uuid=%s",
+    "videos",
+    "CREATE TABLE IF NOT EXISTS videos (file_uuid TEXT PRIMARY KEY, file_name TEXT, file_path TEXT, duration REAL, fps REAL, width INTEGER, height INTEGER, probe_json TEXT, status TEXT)",
+    [UUID])
+
+# chunk
+pg_to_sqlite(
+    "SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, metadata->>'speaker_id' as speaker_id FROM dev.chunk WHERE file_uuid=%s AND chunk_type='sentence' ORDER BY chunk_id",
+    "chunk",
+    """CREATE TABLE IF NOT EXISTS chunk (
+        file_uuid TEXT, chunk_id TEXT, chunk_type TEXT,
+        start_time REAL, end_time REAL, fps REAL,
+        start_frame INTEGER, end_frame INTEGER, text_content TEXT, speaker_id TEXT,
+        PRIMARY KEY(file_uuid, chunk_id))""",
+    [UUID])
+
+def parse_pg_array(text):
+    """Parse PostgreSQL array format {0.1,0.2,...} to Python list."""
+    if not text or text == 'null':
+        return None
+    try:
+        text = text.strip('{}')
+        return [float(x) for x in text.split(',') if x.strip()]
+    except:
+        return None
+
+# chunk vectors → vec0 virtual table
+print("  Creating vec0 table: chunk_embeddings (768D)...")
+lite_cur.execute("""
+    CREATE VIRTUAL TABLE IF NOT EXISTS chunk_embeddings USING vec0(
+        embedding float[768]
+    )
+""")
+pg_cur.execute("SELECT chunk_id, COALESCE(embedding::text, 'null'), uuid FROM dev.chunk_vectors WHERE uuid=%s", [UUID])
+chunk_vecs = pg_cur.fetchall()
+if chunk_vecs:
+    for chunk_id, emb_text, _ in chunk_vecs:
+        # chunk_vectors uses JSONB format, not PG array format
+        emb = None
+        try:
+            emb = json.loads(emb_text) if emb_text else None
+        except:
+            pass
+        if not emb:
+            emb = parse_pg_array(emb_text)  # fallback
+        if emb and len(emb) == 768:
+            lite_cur.execute(
+                "INSERT INTO chunk_embeddings (rowid, embedding) VALUES (?, ?)",
+                [int(chunk_id) if chunk_id.isdigit() else hash(chunk_id) & 0x7fffffff,
+                 json.dumps(emb)])
+    lite.commit()
+    print(f"  chunk_embeddings: {len(chunk_vecs)} vectors")
+
+# face detections
+def transform_face(row):
+    return row  # embedding moved to vec0 table
+
+pg_to_sqlite(
+    """SELECT file_uuid, face_id, frame_number, x, y, width, height, confidence,
+              identity_id, trace_id,
+              COALESCE(timestamp_secs, frame_number / 25.0) as timestamp_secs
+       FROM dev.face_detections WHERE file_uuid=%s ORDER BY frame_number""",
+    "face_detections",
+    """CREATE TABLE IF NOT EXISTS face_detections (
+        file_uuid TEXT, face_id TEXT, frame_number INTEGER,
+        x INTEGER, y INTEGER, width INTEGER, height INTEGER,
+        confidence REAL, identity_id INTEGER, trace_id INTEGER,
+        timestamp_secs REAL)""",
+    [UUID], transform_face)
+
+# face embeddings → vec0 virtual table (512D)
+print("  Creating vec0 table: face_embeddings (512D)...")
+lite_cur.execute("""
+    CREATE VIRTUAL TABLE IF NOT EXISTS face_embeddings USING vec0(
+        embedding float[512]
+    )
+""")
+pg_cur.execute("SELECT id, COALESCE(embedding::text, 'null') FROM dev.face_detections WHERE file_uuid=%s", [UUID])
+face_vecs = pg_cur.fetchall()
+if face_vecs:
+    batch = []
+    for db_id, emb_text in face_vecs:
+        emb = parse_pg_array(emb_text)
+        if emb and len(emb) == 512:
+            batch.append((db_id, json.dumps(emb)))
+        if len(batch) >= 500:
+            lite_cur.executemany("INSERT INTO face_embeddings VALUES (?, ?)", batch)
+            batch = []
+    if batch:
+        lite_cur.executemany("INSERT INTO face_embeddings VALUES (?, ?)", batch)
+    lite.commit()
+    print(f"  face_embeddings: {len(face_vecs)} vectors")
+
+# identities
+def transform_identity(row):
+    return row
+
+pg_to_sqlite(
+    """SELECT DISTINCT i.id, i.name, i.uuid, i.identity_type, i.source, i.status,
+              i.tmdb_id, i.tmdb_profile, i.tmdb_poster
+       FROM dev.identities i
+       INNER JOIN dev.face_detections fd ON fd.identity_id = i.id
+       WHERE fd.file_uuid=%s""",
+    "identities",
+    """CREATE TABLE IF NOT EXISTS identities (
+        id INTEGER PRIMARY KEY, name TEXT, uuid TEXT, identity_type TEXT,
+        source TEXT, status TEXT, tmdb_id INTEGER,
+        tmdb_profile TEXT, tmdb_poster TEXT)""",
+    [UUID], transform_identity)
+
+# identity_bindings
+pg_to_sqlite(
+    """SELECT DISTINCT ib.identity_id, ib.identity_type, ib.identity_value, ib.confidence
+       FROM dev.identity_bindings ib
+       INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id
+       WHERE fd.file_uuid=%s""",
+    "identity_bindings",
+    "CREATE TABLE IF NOT EXISTS identity_bindings (identity_id INTEGER, identity_type TEXT, identity_value TEXT, confidence REAL)",
+    [UUID])
+
+# ---- Create indexes ----
+print("Creating indexes...")
+lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_trace ON face_detections(trace_id)")
+lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_identity ON face_detections(identity_id)")
+lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_frame ON face_detections(frame_number)")
+lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_fd_time ON face_detections(timestamp_secs)")
+lite_cur.execute("CREATE INDEX IF NOT EXISTS idx_chunk_chunkid ON chunk(chunk_id)")
+lite.commit()
+
+# ---- Stats ----
+pg_cur.close(); pg.close()
+lite_cur.close(); lite.close()
+
+size_mb = os.path.getsize(OUT) / 1024 / 1024
+print(f"\n  {OUT} ({size_mb:.0f}MB)")
+
+# Verify
+lite = sqlite3.connect(OUT)
+if VEC_DYLIB:
+    lite.enable_load_extension(True)
+    lite.load_extension(VEC_DYLIB)
+    lite.enable_load_extension(False)
+c = lite.cursor()
+for tbl in ['videos', 'chunk', 'face_detections', 'identities', 'identity_bindings']:
+    c.execute(f"SELECT COUNT(*) FROM {tbl}")
+    print(f"  {tbl}: {c.fetchone()[0]} rows")
+# Check vec tables
+try:
+    c.execute("SELECT COUNT(*) FROM chunk_embeddings")
+    print(f"  chunk_embeddings (vec0, 768D): {c.fetchone()[0]} vectors")
+except: print("  chunk_embeddings: N/A")
+try:
+    c.execute("SELECT COUNT(*) FROM face_embeddings")
+    print(f"  face_embeddings (vec0, 512D): {c.fetchone()[0]} vectors")
+except: print("  face_embeddings: N/A")
+c.close(); lite.close()
--- a/scripts/face_processor.py
+++ b/scripts/face_processor.py
@@ -49,7 +49,7 @@ def classify_pose(roll: float, yaw: float) -> str:

 class FaceProcessorVision:
    def __init__(self, video_path: str, output_path: str, uuid: str = "",
-                 sample_interval: int = 30):
+                 sample_interval: int = 3):
        self.video_path = video_path
        self.output_path = output_path
        self.uuid = uuid
@@ -205,7 +205,7 @@ class FaceProcessorVision:
                        "pitch": pose_info.get("pitch", 0),
                    },
                    "lips": face.get("lips"),
-                    "landmarks": None,
+                    "landmarks": face.get("landmarks"),
                    "attributes": None,
                })

@@ -255,7 +255,7 @@ def main():
    parser.add_argument("video_path", help="Video file path")
    parser.add_argument("output_path", help="Output JSON path")
    parser.add_argument("--uuid", "-u", default="")
-    parser.add_argument("--sample-interval", type=int, default=30)
+    parser.add_argument("--sample-interval", type=int, default=3)
    parser.add_argument("--force", action="store_true")
    args = parser.parse_args()

--- a/scripts/identity_bind.py
+++ b/scripts/identity_bind.py
@@ -0,0 +1,129 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Identity Binding: cluster face traces → identity bindings.
+Uses face embeddings from face_detections, clusters per trace, creates identities.
+"""
+import json, sys, time
+import psycopg2
+import numpy as np
+from sklearn.cluster import AgglomerativeClustering
+
+UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
+DB = "dbname=momentry user=accusys"
+DISTANCE_THRESHOLD = 0.55  # Cosine distance threshold for clustering
+
+print(f"=== Identity Binding for {UUID} ===")
+
+conn = psycopg2.connect(DB)
+cur = conn.cursor()
+
+# Step 1: Get trace embeddings from face_detections
+print("Loading face trace data...")
+cur.execute("""
+    SELECT trace_id, embedding
+    FROM dev.face_detections
+    WHERE file_uuid = %s AND trace_id IS NOT NULL AND embedding IS NOT NULL
+    ORDER BY trace_id, id
+""", (UUID,))
+rows = cur.fetchall()
+print(f"Face detections with embeddings: {len(rows)}")
+
+# Group by trace_id and compute average embedding
+trace_embs = {}
+for trace_id, emb in rows:
+    if trace_id not in trace_embs:
+        trace_embs[trace_id] = []
+    trace_embs[trace_id].append(emb)
+
+print(f"Unique traces: {len(trace_embs)}")
+
+# Compute mean embeddings per trace
+trace_ids = []
+trace_vectors = []
+for tid, embs in sorted(trace_embs.items()):
+    mean_emb = np.mean(embs, axis=0)
+    mean_emb = mean_emb / (np.linalg.norm(mean_emb) + 1e-10)
+    trace_ids.append(tid)
+    trace_vectors.append(mean_emb)
+
+X = np.array(trace_vectors)
+print(f"Trace vectors shape: {X.shape}")
+
+# Step 2: Cluster traces
+print("Clustering traces...")
+if len(X) > 1:
+    clustering = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=DISTANCE_THRESHOLD,
+        metric='cosine',
+        linkage='average'
+    )
+    labels = clustering.fit_predict(X)
+else:
+    labels = [0]
+
+n_clusters = len(set(labels))
+print(f"Clusters/identities: {n_clusters}")
+
+# Step 3: Get or create identity records
+print("Creating identity records...")
+# Get existing identities
+cur.execute("SELECT id, uuid FROM dev.identities")
+existing = {row[0]: row[1] for row in cur.fetchall()}
+
+# Map cluster -> identity_id
+cluster_to_identity = {}
+for cluster_id in sorted(set(labels)):
+    # Create new identity
+    identity_uuid = None
+    cur.execute("""
+        INSERT INTO dev.identities (name, identity_type, source, status, created_at)
+        VALUES (%s, 'face', 'auto', 'active', NOW())
+        ON CONFLICT (name) DO UPDATE SET status = 'active'
+        RETURNING id
+    """, (f"PERSON_{UUID[:8]}_{cluster_id}",))
+    identity_id = cur.fetchone()[0]
+    cluster_to_identity[cluster_id] = identity_id
+    print(f"  Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
+
+# Step 4: Create identity bindings
+print("Creating identity bindings...")
+bindings = 0
+for tid, label in zip(trace_ids, labels):
+    identity_id = cluster_to_identity[label]
+    # Get a representative face_id for this trace
+    cur.execute("""
+        SELECT face_id FROM dev.face_detections
+        WHERE file_uuid = %s AND trace_id = %s
+        LIMIT 1
+    """, (UUID, tid))
+    row = cur.fetchone()
+    if row:
+        face_id = row[0]
+        # Create binding
+        cur.execute("""
+            INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, confidence, created_at)
+            VALUES (%s, 'trace', %s, 0.8, NOW())
+            ON CONFLICT DO NOTHING
+        """, (identity_id, str(tid)))
+        bindings += 1
+
+        # Also update face_detection with identity_id
+        cur.execute("""
+            UPDATE dev.face_detections SET identity_id = %s
+            WHERE file_uuid = %s AND trace_id = %s
+        """, (identity_id, UUID, tid))
+
+conn.commit()
+print(f"Created {bindings} identity bindings for {n_clusters} identities")
+
+# Summary
+print(f"\n=== Summary ===")
+cur.execute("SELECT COUNT(*) FROM dev.identities WHERE source = 'auto'")
+print(f"Total auto-generated identities: {cur.fetchone()[0]}")
+cur.execute("SELECT COUNT(*) FROM dev.identity_bindings")
+print(f"Total identity bindings: {cur.fetchone()[0]}")
+
+cur.close()
+conn.close()
+print("=== Done ===")
--- a/scripts/insert_chunks.py
+++ b/scripts/insert_chunks.py
@@ -0,0 +1,48 @@
+#!/opt/homebrew/bin/python3.11
+"""Insert sentence chunks from transcribe.py output into dev.chunk table."""
+import json, sys
+import psycopg2
+
+DB = "dbname=momentry user=accusys"
+UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
+ASR_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.asr.json"
+FPS = 23.976023976023978
+
+with open(ASR_PATH) as f:
+    asr = json.load(f)
+
+segments = asr.get("segments", [])
+print(f"Inserting {len(segments)} sentence chunks for {UUID}...")
+
+conn = psycopg2.connect(DB)
+cur = conn.cursor()
+
+inserted = 0
+for seg in segments:
+    chunk_id = seg["chunk_id"]
+    start_time = seg["start_time"]
+    end_time = seg["end_time"]
+    start_frame = int(start_time * FPS)
+    end_frame = int(end_time * FPS)
+    text = seg.get("text", "")
+    speaker_change = seg.get("speaker_change", False)
+
+    content = json.dumps({
+        "source": "transcribe",
+        "speaker_change": speaker_change,
+        "pass1_index": seg.get("pass1_index", 0),
+    })
+
+    cur.execute("""
+        INSERT INTO dev.chunk (file_uuid, chunk_id, chunk_type, start_time, end_time,
+            start_frame, end_frame, fps, text_content, content, created_at)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s::jsonb, NOW())
+        ON CONFLICT (file_uuid, chunk_id) DO NOTHING
+    """, (UUID, chunk_id, "sentence", start_time, end_time,
+          start_frame, end_frame, FPS, text, content))
+    inserted += 1
+
+conn.commit()
+cur.close()
+conn.close()
+print(f"Done: {inserted} chunks inserted")
--- a/scripts/release_manager.py
+++ b/scripts/release_manager.py
@@ -0,0 +1,344 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Release Manager - Deploy / Undeploy video release packages.
+
+Usage:
+  python3 release_manager.py deploy <package.tar.gz>
+  python3 release_manager.py undeploy <file_uuid>
+  python3 release_manager.py list
+  python3 release_manager.py package <file_uuid>        # Create new release package
+"""
+
+import json, os, sys, shutil, subprocess, tarfile, tempfile, argparse, time
+import psycopg2
+from urllib.request import Request, urlopen
+
+PG_BIN = "/Users/accusys/pgsql/18.3/bin"
+DB = "dbname=momentry user=accusys"
+QDRANT = "http://localhost:6333"
+DEMO_DIR = "/Users/accusys/momentry/var/sftpgo/data/demo"
+OUTPUT_DIR = "/Users/accusys/momentry/output_dev"
+RELEASE_DIR = "/Users/accusys/momentry_core_0.1/release/files"
+
+# ---- Helpers ----
+
+def psql_cmd(sql, db=DB):
+    """Run a SQL command via psql."""
+    r = subprocess.run(
+        [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", sql],
+        capture_output=True, text=True, timeout=30)
+    return r.stdout.strip()
+
+def pg_execute(sql, params=None):
+    """Execute SQL via psycopg2."""
+    conn = psycopg2.connect(DB)
+    cur = conn.cursor()
+    if params:
+        cur.execute(sql, params)
+    else:
+        cur.execute(sql)
+    conn.commit()
+    cur.close()
+    conn.close()
+
+def pg_query(sql, params=None):
+    """Query via psycopg2."""
+    conn = psycopg2.connect(DB)
+    cur = conn.cursor()
+    if params:
+        cur.execute(sql, params)
+    else:
+        cur.execute(sql)
+    rows = cur.fetchall()
+    cur.close()
+    conn.close()
+    return rows
+
+def qdrant_delete_points(uuid, collection):
+    """Delete points from Qdrant collection by payload filter."""
+    try:
+        req = Request(f"{QDRANT}/collections/{collection}/points/delete",
+            data=json.dumps({
+                "filter": {"must": [{"key": "file_uuid", "match": {"value": uuid}}]}
+            }).encode(),
+            headers={"Content-Type": "application/json"}, method="POST")
+        urlopen(req)
+        return True
+    except:
+        return False
+
+# ---- Deploy ----
+
+def cmd_deploy(tarball_path):
+    """Deploy a release package."""
+    if not os.path.exists(tarball_path):
+        print(f"ERROR: {tarball_path} not found")
+        return 1
+
+    t0 = time.time()
+    print(f"=== Deploy: {os.path.basename(tarball_path)} ===")
+
+    # 1. Extract
+    tmpdir = tempfile.mkdtemp(prefix="release_deploy_")
+    print(f"Extracting to {tmpdir}...")
+    with tarfile.open(tarball_path) as tar:
+        tar.extractall(tmpdir)
+
+    # Find UUID from directory name or file_info.json
+    uuid = None
+    for item in os.listdir(tmpdir):
+        info_path = os.path.join(tmpdir, item, "file_info.json")
+        if os.path.exists(info_path):
+            with open(info_path) as f:
+                info = json.load(f)
+            uuid = info.get("file_uuid", "")
+            break
+
+    if not uuid:
+        print("ERROR: Could not find file_info.json with UUID")
+        return 1
+
+    pkg_dir = os.path.join(tmpdir, uuid)
+    print(f"UUID: {uuid}")
+
+    # 2. Import data.sql
+    sql_path = os.path.join(pkg_dir, "data.sql")
+    if os.path.exists(sql_path):
+        print(f"Importing data.sql ({os.path.getsize(sql_path)/1024/1024:.0f} MB)...")
+        r = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-f", sql_path],
+            capture_output=True, text=True, timeout=300)
+        if r.returncode != 0:
+            print(f"WARNING: SQL import may have issues")
+            print(r.stderr[-500:] if r.stderr else "")
+    else:
+        print("WARNING: data.sql not found in package")
+
+    # 3. Copy video to demo dir
+    for fname in os.listdir(pkg_dir):
+        fpath = os.path.join(pkg_dir, fname)
+        if fname.endswith(('.mp4', '.mov', '.avi', '.mkv')):
+            dest = os.path.join(DEMO_DIR, fname)
+            if not os.path.exists(dest):
+                shutil.copy2(fpath, dest)
+                print(f"Video: {fname} → {DEMO_DIR}/")
+            else:
+                print(f"Video: {fname} already exists in demo dir")
+
+    # 4. Copy JSON outputs
+    for fname in os.listdir(pkg_dir):
+        if fname.endswith('.json'):
+            src = os.path.join(pkg_dir, fname)
+            dest = os.path.join(OUTPUT_DIR, fname)
+            shutil.copy2(src, dest)
+
+    print(f"Output files copied to {OUTPUT_DIR}/")
+
+    # 5. Verify deployment
+    rows = pg_query("SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = %s", (uuid,))
+    chunks = rows[0][0] if rows else 0
+    rows = pg_query("SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = %s", (uuid,))
+    faces = rows[0][0] if rows else 0
+    rows = pg_query("SELECT file_name, duration FROM dev.videos WHERE file_uuid = %s", (uuid,))
+    video_info = rows[0] if rows else ("?", "?")
+
+    elapsed = time.time() - t0
+    print(f"\n=== Deploy Complete ({elapsed:.0f}s) ===")
+    print(f"  Video: {video_info[0]} ({float(video_info[1]):.0f}s)")
+    print(f"  Chunks: {chunks}")
+    print(f"  Face detections: {faces}")
+
+    shutil.rmtree(tmpdir, ignore_errors=True)
+    return 0
+
+# ---- Undeploy ----
+
+def cmd_undeploy(uuid):
+    """Undeploy: remove all trace of a UUID from the system."""
+    print(f"=== Undeploy: {uuid} ===")
+
+    # Confirm
+    rows = pg_query("SELECT file_name FROM dev.videos WHERE file_uuid = %s", (uuid,))
+    if not rows:
+        print(f"ERROR: UUID {uuid} not found in DB")
+        return 1
+    filename = rows[0][0]
+    print(f"Video: {filename}")
+    print("This will DELETE all data for this video. Are you sure? (y/N): ", end="")
+    confirm = sys.stdin.readline().strip().lower()
+    if confirm != 'y':
+        print("Cancelled")
+        return 0
+
+    t0 = time.time()
+
+    # Get video path before deleting
+    rows = pg_query("SELECT file_path FROM dev.videos WHERE file_uuid = %s", (uuid,))
+    video_path = rows[0][0] if rows else ""
+
+    # 1. Delete DB data
+    tables = [
+        ("dev.chunk", "file_uuid"),
+        ("dev.chunk_vectors", "uuid"),
+        ("dev.face_detections", "file_uuid"),
+        ("dev.processor_results", "file_uuid"),
+        ("dev.monitor_jobs", "uuid"),
+        ("dev.pre_chunks", "file_uuid"),
+    ]
+    for tbl, col in tables:
+        pg_execute(f"DELETE FROM {tbl} WHERE {col} = %s", (uuid,))
+        print(f"  {tbl}: cleared")
+    pg_execute("DELETE FROM dev.videos WHERE file_uuid = %s", (uuid,))
+    print(f"  dev.videos: removed")
+
+    # Clean orphaned identity bindings
+    pg_execute("DELETE FROM dev.identity_bindings WHERE identity_value NOT IN (SELECT face_id FROM dev.face_detections)")
+
+    # 2. Delete output files
+    for f in os.listdir(OUTPUT_DIR):
+        if f.startswith(uuid):
+            os.remove(os.path.join(OUTPUT_DIR, f))
+    print(f"  Output files: removed")
+
+    # 3. Delete video from demo dir
+    if video_path and os.path.exists(video_path):
+        os.remove(video_path)
+        print(f"  Video file: removed ({os.path.basename(video_path)})")
+
+    # 4. Clean Qdrant (skip - Qdrant points don't have easy UUID filter)
+    # Instead rely on upsert behavior
+
+    # 5. Delete release package
+    pkg_path = os.path.join(RELEASE_DIR, uuid)
+    if os.path.exists(pkg_path):
+        shutil.rmtree(pkg_path)
+        print(f"  Release dir: removed")
+    for f in os.listdir(RELEASE_DIR):
+        if f.startswith(uuid):
+            os.remove(os.path.join(RELEASE_DIR, f))
+            print(f"  Release file: {f} removed")
+
+    elapsed = time.time() - t0
+    print(f"\n=== Undeploy Complete ({elapsed:.0f}s) ===")
+    return 0
+
+# ---- List ----
+
+def cmd_list():
+    """List deployed videos."""
+    rows = pg_query("""
+        SELECT file_uuid, file_name, 
+               TO_CHAR((duration/60)::int, 'FM999"min"') as dur,
+               status,
+               (SELECT COUNT(*) FROM dev.chunk WHERE file_uuid = v.file_uuid) as chunks,
+               (SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = v.file_uuid) as faces
+        FROM dev.videos v ORDER BY id DESC
+    """)
+    print(f"{'UUID':36s} {'Name':40s} {'Duration':8s} {'Status':10s} {'Chunks':>6s} {'Faces':>6s}")
+    print("-" * 120)
+    for r in rows:
+        uuid, name, dur, status, chunks, faces = r
+        short_name = (name or "")[:38] + ".." if len(name or "") > 40 else (name or "")
+        print(f"{uuid:36s} {short_name:40s} {dur or '?':8s} {status or '?':10s} {chunks or 0:>6d} {faces or 0:>6d}")
+
+# ---- Package ----
+
+def cmd_package(uuid):
+    """Create a release package for a deployed video."""
+    print(f"=== Package: {uuid} ===")
+
+    # Check video exists
+    rows = pg_query("SELECT file_uuid, file_name, file_path FROM dev.videos WHERE file_uuid = %s", (uuid,))
+    if not rows:
+        print(f"ERROR: UUID {uuid} not found")
+        return 1
+
+    outdir = os.path.join(RELEASE_DIR, uuid)
+    shutil.rmtree(outdir, ignore_errors=True)
+    os.makedirs(outdir, exist_ok=True)
+
+    # Export data.sql
+    r = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+        "-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"],
+        capture_output=True, text=True, timeout=15)
+    if r.stdout.strip():
+        info = json.loads(r.stdout.strip())
+        with open(os.path.join(outdir, "file_info.json"), "w") as f:
+            json.dump(info, f, indent=2)
+
+    # Export SQL
+    sql_path = os.path.join(outdir, "data.sql")
+    with open(sql_path, "w") as f:
+        f.write(f"-- Release package: {uuid}\nBEGIN;\n\n")
+        for tbl, col in [("dev.videos", "file_uuid"), ("dev.chunk", "file_uuid"),
+                         ("dev.chunk_vectors", "uuid"), ("dev.face_detections", "file_uuid")]:
+            r = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
+                f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"],
+                capture_output=True, text=True, timeout=60)
+            if r.stdout.strip():
+                # Get column names
+                schema, table = tbl.split(".")
+                r2 = subprocess.run([f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
+                    "-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"],
+                    capture_output=True, text=True, timeout=15)
+                cols = r2.stdout.strip()
+                f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n")
+                f.write(r.stdout)
+                if not r.stdout.endswith("\n"):
+                    f.write("\n")
+                f.write("\\.\n\n")
+        f.write("COMMIT;\n")
+
+    size = os.path.getsize(sql_path)
+    print(f"  data.sql ({size/1024/1024:.0f} MB)")
+
+    # Copy video
+    video_path = rows[0][2]
+    if video_path and os.path.exists(video_path):
+        dest = os.path.join(outdir, os.path.basename(video_path))
+        shutil.copy2(video_path, dest)
+        print(f"  {os.path.basename(video_path)} ({os.path.getsize(dest)/1024/1024:.0f} MB)")
+
+    # Copy output JSONs
+    for fname in os.listdir(OUTPUT_DIR):
+        if fname.startswith(uuid) and fname.endswith('.json'):
+            shutil.copy2(os.path.join(OUTPUT_DIR, fname), os.path.join(outdir, fname))
+
+    # tar.gz
+    tarball = os.path.join(RELEASE_DIR, f"{uuid}_v{int(time.time())}.tar.gz")
+    subprocess.run(["tar", "-czf", tarball, "-C", RELEASE_DIR, uuid], check=True, timeout=300)
+    tsize = os.path.getsize(tarball)
+    print(f"  Package: {tarball} ({tsize/1024/1024:.0f} MB)")
+    return 0
+
+# ---- Main ----
+
+def main():
+    parser = argparse.ArgumentParser(description="Release Manager — deploy/undeploy/list video packages")
+    sub = parser.add_subparsers(dest="cmd")
+
+    p_deploy = sub.add_parser("deploy", help="Deploy a release package")
+    p_deploy.add_argument("tarball", help="Path to .tar.gz package")
+
+    p_undeploy = sub.add_parser("undeploy", help="Undeploy (remove all data for a UUID)")
+    p_undeploy.add_argument("uuid", help="File UUID")
+
+    p_list = sub.add_parser("list", help="List deployed videos")
+
+    p_package = sub.add_parser("package", help="Create release package for deployed video")
+    p_package.add_argument("uuid", help="File UUID")
+
+    args = parser.parse_args()
+
+    if args.cmd == "deploy":
+        sys.exit(cmd_deploy(args.tarball))
+    elif args.cmd == "undeploy":
+        sys.exit(cmd_undeploy(args.uuid))
+    elif args.cmd == "list":
+        cmd_list()
+    elif args.cmd == "package":
+        sys.exit(cmd_package(args.uuid))
+    else:
+        parser.print_help()
+
+if __name__ == "__main__":
+    main()
--- a/scripts/render_face_heatmap.py
+++ b/scripts/render_face_heatmap.py
@@ -0,0 +1,222 @@
+#!/opt/homebrew/bin/python3.11
+"""Face Trace Heatmap + Timeline Visualization for Momentry.
+Usage:
+  python3 render_face_heatmap.py <uuid> [output.html] [--identity ID]
+"""
+import sys, psycopg2, argparse
+from collections import defaultdict
+
+parser = argparse.ArgumentParser()
+parser.add_argument("uuid")
+parser.add_argument("output", nargs="?", default=None)
+parser.add_argument("--identity", "-i", type=int, default=None, help="Filter by identity_id")
+args = parser.parse_args()
+
+UUID = args.uuid
+OUT = args.output or f"/tmp/face_report_{UUID[:8]}.html"
+IDENTITY = args.identity
+
+conn = psycopg2.connect("dbname=momentry user=accusys")
+cur = conn.cursor()
+
+cur.execute("SELECT duration, file_name, COALESCE(fps, 25.0) FROM dev.videos WHERE file_uuid=%s", (UUID,))
+row = cur.fetchone()
+if not row:
+    print("UUID not found")
+    sys.exit(1)
+duration, video_name, fps = float(row[0] or 6785), row[1] or UUID, float(row[2] or 25.0)
+
+# Get sample interval from face.json metadata (or default 3 = 8Hz)
+sample_interval = 3
+hz = fps / sample_interval
+
+# Build identity filter
+identity_filter = ""
+identity_params = [UUID]
+identity_label = ""
+identity_info = None  # full identity record when filtered
+top_identities = []   # top identities summary (all view)
+
+if IDENTITY is not None:
+    identity_filter = " AND identity_id = %s"
+    identity_params.append(IDENTITY)
+    cur.execute("SELECT id, name, identity_type, source, status FROM dev.identities WHERE id=%s", (IDENTITY,))
+    id_row = cur.fetchone()
+    if id_row:
+        identity_info = {"id": id_row[0], "name": id_row[1], "type": id_row[2], "source": id_row[3], "status": id_row[4]}
+        identity_label = f" (identity: {id_row[1]})"
+    else:
+        identity_label = f" (identity #{IDENTITY})"
+    identity_params = [UUID, IDENTITY]
+
+# Query trace spans
+cur.execute(f"""
+    SELECT trace_id, MIN(frame_number), MAX(frame_number),
+           COALESCE(MIN(timestamp_secs), MIN(frame_number) / {fps}) as first_t,
+           COALESCE(MAX(timestamp_secs), MAX(frame_number) / {fps}) as last_t,
+           COUNT(*)
+    FROM dev.face_detections
+    WHERE file_uuid=%s AND trace_id IS NOT NULL{identity_filter}
+    GROUP BY trace_id ORDER BY first_t
+""", identity_params)
+trace_spans = cur.fetchall()
+
+# Query density per time bucket (5s)
+cur.execute(f"""
+    SELECT FLOOR(COALESCE(timestamp_secs, frame_number / {fps}) / 5)::int as bkt, COUNT(*) as cnt
+    FROM dev.face_detections
+    WHERE file_uuid=%s AND trace_id IS NOT NULL{identity_filter}
+    GROUP BY bkt ORDER BY bkt
+""", identity_params)
+density = {b: c for b, c in cur.fetchall()}
+
+# Count total detections
+cur.execute(f"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=%s{identity_filter}", identity_params)
+total_detections = cur.fetchone()[0]
+
+# Get top identities (for all view) and trace↔identity mapping
+if IDENTITY is None:
+    cur.execute("""
+        SELECT fd.identity_id, i.name, COUNT(*) as faces, COUNT(DISTINCT fd.trace_id) as traces
+        FROM dev.face_detections fd
+        LEFT JOIN dev.identities i ON i.id = fd.identity_id
+        WHERE fd.file_uuid=%s AND fd.identity_id IS NOT NULL
+        GROUP BY fd.identity_id, i.name ORDER BY faces DESC LIMIT 10
+    """, (UUID,))
+    top_identities = cur.fetchall()
+else:
+    # Get trace→identity mapping for tooltip enrichment
+    cur.execute("""
+        SELECT DISTINCT fd.trace_id, i.name
+        FROM dev.face_detections fd
+        LEFT JOIN dev.identities i ON i.id = fd.identity_id
+        WHERE fd.file_uuid=%s AND fd.identity_id IS NOT NULL
+    """, (UUID,))
+    trace_to_identity = {r[0]: r[1] for r in cur.fetchall()}
+
+cur.close(); conn.close()
+
+BUCKET = 5
+num_buckets = int(duration / BUCKET) + 1
+max_density = max(density.values()) if density else 1
+
+def build_html():
+    h = []
+    h.append('<!DOCTYPE html><html><head><meta charset="utf-8"><title>Face Trace Report</title>')
+    h.append('<style>')
+    h.append('body{font-family:-apple-system,BlinkMacSystemFont,sans-serif;margin:20px;background:#0d1117;color:#c9d1d9}')
+    h.append('h1,h2{color:#e94560}')
+    h.append('.stats{display:flex;gap:12px;margin:8px 0;flex-wrap:wrap}')
+    h.append('.stat{background:#161b22;padding:6px 14px;border-radius:6px}')
+    h.append('.stat .num{font-size:20px;font-weight:bold;color:#e94560}')
+    h.append('.stat .label{font-size:10px;color:#8b949e}')
+    h.append('.viz{position:relative;background:#0d1117;border:1px solid #30363d;margin:8px 0;overflow:hidden}')
+    h.append('.bar{display:block;position:absolute;height:3px;background:#e94560;opacity:0.7;border-radius:1px}')
+    h.append('.bar:hover{height:8px;opacity:1}')
+    h.append('</style></head><body>')
+    sub = identity_label if identity_label else ""
+    h.append('<h1>Face Trace Report — ' + video_name[:60] + sub + '</h1>')
+
+    # Identity card (when filtering by identity)
+    if identity_info:
+        h.append('<div style="background:#161b22;border:1px solid #30363d;border-radius:8px;padding:16px;margin:12px 0;">')
+        h.append('<h3 style="margin:0;color:#e94560">Identity Details</h3>')
+        h.append(f'<table style="width:100%;margin-top:8px;color:#c9d1d9;border-collapse:collapse">')
+        h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e;width:80px">ID</td><td>{identity_info["id"]}</td></tr>')
+        h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Name</td><td style="font-weight:bold">{identity_info["name"]}</td></tr>')
+        h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Type</td><td>{identity_info["type"]}</td></tr>')
+        h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Source</td><td>{identity_info["source"]}</td></tr>')
+        h.append(f'<tr><td style="padding:4px 12px 4px 0;color:#8b949e">Status</td><td>{identity_info["status"]}</td></tr>')
+        h.append('</table></div>')
+
+    # Top identities table (all view)
+    if top_identities:
+        h.append('<h2>Top Identities</h2>')
+        h.append('<div style="overflow-x:auto">')
+        h.append('<table style="width:100%;color:#c9d1d9;border-collapse:collapse;font-size:13px">')
+        h.append('<tr style="background:#161b22;text-align:left"><th style="padding:6px 10px">Identity</th><th style="padding:6px 10px">Name</th><th style="padding:6px 10px;text-align:right">Faces</th><th style="padding:6px 10px;text-align:right">Traces</th></tr>')
+        for iid, name, fc, tc in top_identities:
+            short_name = (name or f"#{iid}")[:60]
+            h.append(f'<tr style="border-bottom:1px solid #21262d"><td style="padding:4px 10px;color:#8b949e">{iid}</td><td style="padding:4px 10px">{short_name}</td><td style="padding:4px 10px;text-align:right">{fc:,}</td><td style="padding:4px 10px;text-align:right">{tc}</td></tr>')
+        h.append('</table></div>')
+
+    # Stats row
+    h.append('<div class="stats">')
+    h.append(f'<div class="stat"><div class="num">{len(trace_spans):,}</div><div class="label">traces</div></div>')
+    h.append(f'<div class="stat"><div class="num">{total_detections:,}</div><div class="label">detections</div></div>')
+    h.append(f'<div class="stat"><div class="num">{duration:.0f}s</div><div class="label">duration</div></div>')
+    h.append(f'<div class="stat"><div class="num">{max_density}</div><div class="label">max per {BUCKET}s</div></div>')
+    h.append(f'<div class="stat"><div class="num">{fps:.0f}fps</div><div class="label">video fps</div></div>')
+    h.append(f'<div class="stat"><div class="num">{hz:.0f}Hz</div><div class="label">sample rate (every {sample_interval}frames)</div></div>')
+    h.append(f'<div class="stat"><div class="num">{num_buckets}</div><div class="label">{BUCKET}s buckets</div></div>')
+    h.append('</div>')
+
+    # 1. Density histogram
+    h.append('<h2>Face Density Over Time</h2>')
+    h.append('<div style="color:#666;font-size:12px;margin-bottom:4px">Number of face detections per 5-second interval</div>')
+    w_px = num_buckets * 2 + 20
+    h.append(f'<div class="viz" style="width:{w_px}px;height:80px">')
+    for b in range(num_buckets):
+        v = density.get(b, 0)
+        h_px = max(2, int(60 * v / max(1, max_density * 0.6))) if v > 0 else 0
+        if v == 0:
+            color = "#0d1117"
+        else:
+            i = min(v / (max(1, max_density * 0.5)), 1.0)
+            r = int(233 * i + 13 * (1 - i))
+            g = int(69 * i + 13 * (1 - i))
+            bv = int(96 * i + 23 * (1 - i))
+            color = f"rgb({r},{g},{bv})"
+        title = f"{b * BUCKET:.0f}s: {v} faces"
+        h.append(f'<span style="position:absolute;left:{b*2+10}px;bottom:0;width:2px;height:{h_px}px;background:{color}" title="{title}"></span>')
+    h.append('</div>')
+    h.append(f'<div style="font-size:10px;color:#444;width:{w_px}px;display:flex;justify-content:space-between"><span>0s</span><span>{duration:.0f}s</span></div>')
+
+    # 2. Trace timeline (Gantt)
+    h.append('<h2>Trace Timeline</h2>')
+    h.append('<div style="color:#666;font-size:12px;margin-bottom:4px">First → last appearance for each trace. Hover for details.</div>')
+    show_traces = min(len(trace_spans), 2000)
+    bar_h = 2
+    chart_height = show_traces * (bar_h + 1) + 10
+    h.append(f'<div class="viz" style="width:{w_px}px;height:{chart_height}px">')
+    for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:show_traces]):
+        left = int(t0 / duration * (w_px - 20)) + 10
+        width = max(3, int((t1 - t0) / duration * (w_px - 20)))
+        top = i * (bar_h + 1) + 5
+        opacity = 1.0 if cnt > 5 else 0.3
+        identity_note = ""
+        if IDENTITY is not None and tid in trace_to_identity:
+            identity_note = f", identity: {trace_to_identity[tid]}"
+        title = f"T{tid}: {t0:.0f}s–{t1:.0f}s, {cnt} faces, f{fn0}–f{fn1}{identity_note}"
+        h.append(f'<span class="bar" style="left:{left}px;top:{top}px;width:{width}px;height:{bar_h}px;opacity:{opacity}" title="{title}"></span>')
+    h.append('</div>')
+    h.append(f'<div style="font-size:10px;color:#444;width:{w_px}px;display:flex;justify-content:space-between"><span>0s (showing {show_traces}/{len(trace_spans)} traces)</span><span>{duration:.0f}s</span></div>')
+
+    # 3. Per-trace heatmap
+    h.append('<h2>Per-Trace Heatmap (top 500, every 10th trace)</h2>')
+    h.append(f'<div style="overflow-x:scroll;max-width:100%">')
+    step = max(1, num_buckets // 120)
+    for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:500]):
+        if i % 10 != 0:
+            continue
+        start_bkt = int(t0 / BUCKET)
+        end_bkt = int(t1 / BUCKET) + 1
+        row = f'<div style="white-space:nowrap;line-height:6px"><span style="display:inline-block;width:45px;font-size:6px;color:#555">T{tid}</span>'
+        for b in range(0, num_buckets, step):
+            active = start_bkt <= b <= end_bkt
+            color = "#e94560" if active else "#161b22"
+            row += f'<span style="display:inline-block;width:2px;height:4px;background:{color};margin:0"></span>'
+        row += '</div>'
+        h.append(row)
+    h.append('</div>')
+
+    h.append('</body></html>')
+    return '\n'.join(h)
+
+html = build_html()
+with open(OUT, 'w') as f:
+    f.write(html)
+
+print(f"Saved: {OUT}")
+print(f"Traces: {len(trace_spans)}, Detections: {total_detections}, Density max: {max_density}, Duration: {duration:.0f}s, Sample: {hz:.0f}Hz")
+print(f"Size: {len(html) / 1024:.0f}KB")
--- a/scripts/speaker_assign.py
+++ b/scripts/speaker_assign.py
@@ -0,0 +1,164 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Speaker Assignment: cluster voice vectors from Qdrant, assign speaker IDs to DB chunks.
+"""
+import json, sys, time
+import psycopg2
+import numpy as np
+from urllib.request import Request, urlopen
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.metrics.pairwise import cosine_similarity
+
+UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
+QDRANT = "http://localhost:6333"
+DB = "dbname=momentry user=accusys"
+COLLECTION = "momentry_dev_voice"
+
+print(f"=== Speaker Assignment for {UUID} ===")
+
+# Step 1: Read voice vectors from Qdrant
+print("Reading voice vectors from Qdrant...")
+vectors = []
+chunk_ids = []
+# We need to scroll through all points
+offset = None
+while True:
+    data = {"limit": 100, "with_payload": True, "with_vector": True}
+    if offset is not None:
+        data["offset"] = offset
+    req = Request(f"{QDRANT}/collections/{COLLECTION}/points/scroll",
+        data=json.dumps(data).encode(),
+        headers={"Content-Type": "application/json"}, method="POST")
+    resp = json.loads(urlopen(req).read())
+    result = resp["result"]
+    points = result.get("points", [])
+    if not points:
+        break
+    for pt in points:
+        payload = pt.get("payload", {})
+        cid = payload.get("chunk_id", "")
+        # Only get vectors for THIS UUID's chunks
+        # Filter by checking DB later, or rely on Qdrant payload
+        vectors.append(pt["vector"])
+        chunk_ids.append(cid)
+    offset = result.get("next_page_offset")
+    if offset is None:
+        break
+    print(f"  Read {len(vectors)} vectors...")
+
+print(f"Total vectors: {len(vectors)}")
+
+# Step 2: Filter to only our UUID's chunks (from DB)
+conn = psycopg2.connect(DB)
+cur = conn.cursor()
+cur.execute("SELECT chunk_id FROM dev.chunk WHERE file_uuid = %s AND chunk_type = 'sentence' ORDER BY id", (UUID,))
+db_chunk_ids = set(row[0] for row in cur.fetchall())
+print(f"DB chunk_ids: {len(db_chunk_ids)}")
+
+# Filter vectors to match DB chunks
+filtered_vectors = []
+filtered_chunk_ids = []
+for v, cid in zip(vectors, chunk_ids):
+    if cid in db_chunk_ids:
+        filtered_vectors.append(v)
+        filtered_chunk_ids.append(cid)
+
+vectors = filtered_vectors
+chunk_ids = filtered_chunk_ids
+print(f"Matched vectors: {len(vectors)}")
+
+# Sort by chunk_id (which is numeric string)
+indices = sorted(range(len(chunk_ids)), key=lambda i: int(chunk_ids[i]) if chunk_ids[i].isdigit() else 0)
+vectors = [vectors[i] for i in indices]
+chunk_ids = [chunk_ids[i] for i in indices]
+
+# Step 3: Read speaker_change from asr.json
+asr_path = f"/Users/accusys/momentry/output_dev/{UUID}.asr.json"
+with open(asr_path) as f:
+    asr_data = json.load(f)
+segments = asr_data.get("segments", [])
+speaker_changes = {}
+for seg in segments:
+    speaker_changes[seg["chunk_id"]] = seg.get("speaker_change", False)
+
+# Step 4: Cluster embeddings
+print("Clustering...")
+X = np.array(vectors)
+
+# Compute cosine distance matrix
+# Cosine distance = 1 - cosine_similarity
+cos_sim = cosine_similarity(X)
+cos_dist = 1 - cos_sim
+
+# Use AgglomerativeClustering with cosine distance
+# Determine optimal n_clusters by looking at speaker_change boundaries
+# First pass: use speaker_change as hard boundaries to get initial clusters
+# Then refine
+
+# Simpler: use a distance threshold
+n = len(vectors)
+labels = np.full(n, -1, dtype=int)
+current_speaker = 0
+
+# Start with first chunk as speaker 0
+labels[0] = current_speaker
+centroids = [np.array(vectors[0])]  # per-cluster centroid
+
+for i in range(1, n):
+    has_change = speaker_changes.get(chunk_ids[i], False)
+    vec = np.array(vectors[i])
+
+    if has_change:
+        # Speaker change: check if this is a NEW speaker or returning to a previous one
+        # Compare with centroid of current speaker vs others
+        similarities = [float(np.dot(vec, c) / (np.linalg.norm(vec) * np.linalg.norm(c) + 1e-10)) for c in centroids]
+        best_sim = max(similarities) if similarities else 0
+        best_cluster = similarities.index(best_sim) if similarities else 0
+
+        if best_sim > 0.65 and best_cluster != current_speaker:
+            # Returning to a previous speaker
+            labels[i] = best_cluster
+        elif best_sim < 0.55:
+            # New speaker
+            current_speaker = len(centroids)
+            labels[i] = current_speaker
+            centroids.append(vec)
+        else:
+            # Stay with current speaker (false change detection)
+            labels[i] = current_speaker
+            centroids[current_speaker] = (centroids[current_speaker] + vec) / 2
+    else:
+        # No speaker change: same speaker as previous
+        labels[i] = current_speaker
+        centroids[current_speaker] = (centroids[current_speaker] + vec) / 2
+
+n_speakers = len(set(labels))
+print(f"Identified {n_speakers} unique speakers")
+
+# Step 5: Update DB chunks with speaker assignment
+print("Updating DB chunks...")
+# Map: chunk_id -> speaker_id
+speaker_map = {}
+for cid, label in zip(chunk_ids, labels):
+    speaker_map[cid] = f"SPEAKER_{label}"
+
+updated = 0
+for cid, spk_id in speaker_map.items():
+    cur.execute("""
+        UPDATE dev.chunk SET metadata = COALESCE(metadata, '{}'::jsonb) || %s::jsonb
+        WHERE file_uuid = %s AND chunk_id = %s AND chunk_type = 'sentence'
+    """, (json.dumps({"speaker_id": spk_id}), UUID, cid))
+    updated += 1
+
+conn.commit()
+print(f"Updated {updated} chunks with speaker IDs")
+
+# Step 6: Save speaker map
+speaker_map_path = f"/Users/accusys/momentry/output_dev/{UUID}.speaker_map.json"
+with open(speaker_map_path, "w") as f:
+    json.dump({"speakers": n_speakers, "assignments": speaker_map}, f, indent=2)
+print(f"Speaker map saved: {speaker_map_path}")
+
+cur.close()
+conn.close()
+print("=== Done ===")
--- a/scripts/transcribe.py
+++ b/scripts/transcribe.py
@@ -0,0 +1,284 @@
+#!/opt/homebrew/bin/python3.11
+"""
+One-pass ASR + Speaker Change Detection + Split → asr.json
+"""
+import json, os, sys, time, argparse, subprocess, tempfile, shutil
+import numpy as np
+from pathlib import Path
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "asrx_self"))
+from speaker_encoder import load_speaker_encoder, extract_speaker_embedding, normalize_embeddings
+import torchaudio
+from faster_whisper import WhisperModel
+
+SUB_WIN = 0.5
+SUB_STRIDE = 0.25
+MIN_DUR = 0.3
+SIM_THRESHOLD = 0.45
+CHANGE_CONFIRM = 2
+
+def extract_audio(video_path, tmp_dir, sr=16000):
+    wav_path = os.path.join(tmp_dir, "audio.wav")
+    subprocess.run(["ffmpeg", "-y", "-v", "quiet", "-i", video_path,
+        "-ar", str(sr), "-ac", "1", "-sample_fmt", "s16", wav_path],
+        check=True, capture_output=True, timeout=300)
+    wav_data, sr_actual = torchaudio.load(wav_path)
+    if wav_data.shape[0] > 1:
+        wav_data = wav_data.mean(dim=0, keepdim=True)
+    return wav_data, sr_actual
+
+def transcribe_pass1(model, wav_path, vad_params=None):
+    print("  [faster-whisper] Transcribing...")
+    if vad_params is None:
+        vad_params = {"min_silence_duration_ms": 500, "speech_pad_ms": 200}
+    segments, info = model.transcribe(wav_path, beam_size=5,
+        vad_filter=True, word_timestamps=True, vad_parameters=vad_params)
+    pass1 = []
+    for i, seg in enumerate(segments):
+        words = []
+        if seg.words:
+            for w in seg.words:
+                words.append({"word": w.word.strip(), "start": round(w.start,3), "end": round(w.end,3)})
+        pass1.append({
+            "index": i,
+            "start": round(seg.start, 3),
+            "end": round(seg.end, 3),
+            "text": seg.text.strip(),
+            "words": words,
+        })
+    print(f"  Pass1 segments: {len(pass1)}")
+    return pass1
+
+def detect_speaker_changes(wav_data, sr, pass1_segs, encoder, progress_step=100):
+    print("  [Speaker Detection] Scanning...")
+    ws = int(SUB_WIN * sr)
+    sw = int(SUB_STRIDE * sr)
+    change_points = []  # List[List[float]] → change times per pass1 segment
+    t0 = time.time()
+
+    for si, seg in enumerate(pass1_segs):
+        st = int(seg["start"] * sr)
+        et = int(seg["end"] * sr)
+        dur = seg["end"] - seg["start"]
+
+        if dur < 1.0:
+            change_points.append([])
+            continue
+
+        sub_embs = []
+        sub_times = []
+        for wpos in range(st, et - ws + 1, sw):
+            chunk = wav_data[:, wpos:wpos+ws]
+            emb = extract_speaker_embedding(encoder, chunk.numpy(), sr)
+            emb = emb / (np.linalg.norm(emb) + 1e-10)
+            sub_embs.append(emb)
+            sub_times.append(wpos / sr)
+
+        if len(sub_embs) < 3:
+            change_points.append([])
+            continue
+
+        sub_embs = normalize_embeddings(np.array(sub_embs))
+        cps = []
+        # Require CHANGE_CONFIRM consecutive low-similarity windows before registering a change
+        low_run = 0
+        for i in range(1, len(sub_embs)):
+            sim = float(np.dot(sub_embs[i-1], sub_embs[i]))
+            if sim < SIM_THRESHOLD:
+                low_run += 1
+                if low_run >= CHANGE_CONFIRM:
+                    # Change point at the START of the low-sim run
+                    cps.append(round(sub_times[i - low_run + 1], 2))
+                    low_run = 0
+            else:
+                low_run = 0
+        change_points.append(cps)
+
+        if (si + 1) % progress_step == 0:
+            pct = (si + 1) * 100 // len(pass1_segs)
+            print(f"    {si+1}/{len(pass1_segs)} ({pct}%) [{time.time()-t0:.0f}s]")
+
+    total_changes = sum(len(cps) for cps in change_points)
+    print(f"  Speaker changes detected: {total_changes} in {len(pass1_segs)} segments ({time.time()-t0:.0f}s)")
+    return change_points
+
+def build_segments(pass1_segs, change_points, wav_data, sr, asr_model, tmp_dir, fps=24.0):
+    print("  [Split] Building final segments...")
+    final = []
+    chunk_idx = 0
+
+    for si, seg in enumerate(pass1_segs):
+        cps = change_points[si]
+        if not cps:
+            final.append({
+                "chunk_id": str(chunk_idx),
+                "pass1_index": si,
+                "start_time": seg["start"],
+                "end_time": seg["end"],
+                "start_frame": int(seg["start"] * fps),
+                "end_frame": int(seg["end"] * fps),
+                "text": seg["text"],
+            })
+            chunk_idx += 1
+            continue
+
+        seg["split"] = True
+        boundaries = [seg["start"]] + cps + [seg["end"]]
+        for pi in range(len(boundaries) - 1):
+            ps, pe = boundaries[pi], boundaries[pi+1]
+            if pe - ps < MIN_DUR:
+                continue
+
+            # Try word_timestamp mapping first (wider tolerance)
+            sub_words = [w["word"] for w in seg["words"] if w["start"] >= ps - 0.3 and w["end"] <= pe + 0.3]
+            text = " ".join(sub_words).strip() if sub_words else ""
+
+            # Fallback: call faster-whisper on the sub-audio chunk
+            if not text:
+                import soundfile as sf
+                chunk_path = os.path.join(tmp_dir, f"sub_{chunk_idx}.wav")
+                a_chunk = wav_data[:, int(ps*sr):int(pe*sr)].numpy()[0]
+                if len(a_chunk) > sr * 0.3:  # skip if < 0.3s
+                    sf.write(chunk_path, a_chunk, sr)
+                    try:
+                        sub_segs, _ = asr_model.transcribe(chunk_path, beam_size=5,
+                            vad_filter=True, vad_parameters={"min_silence_duration_ms": 100})
+                        text = " ".join(s.text.strip() for s in sub_segs)
+                    except:
+                        pass
+                    os.remove(chunk_path)
+                if not text:
+                    text = " ".join([w["word"] for w in seg["words"]
+                        if w["start"] >= ps - 0.5 and w["end"] <= pe + 0.5]).strip()
+                if not text:
+                    text = seg["text"][:60]
+
+            final.append({
+                "chunk_id": str(chunk_idx),
+                "pass1_index": si,
+                "start_time": round(ps, 3),
+                "end_time": round(pe, 3),
+                "start_frame": int(ps * fps),
+                "end_frame": int(pe * fps),
+                "text": text,
+                "speaker_change": True,
+            })
+            chunk_idx += 1
+
+    print(f"  Final segments: {len(final)}")
+    return final
+
+def voice_vectors_to_qdrant(wav_data, sr, final_segs, encoder, qdrant_url="http://localhost:6333"):
+    print("  [Voice Vectors] Extracting 192D embeddings...")
+    embeddings = []
+    t0 = time.time()
+    for si, seg in enumerate(final_segs):
+        st = int(seg["start_time"] * sr)
+        et = int(seg["end_time"] * sr)
+        a_chunk = wav_data[:, st:et]
+        emb = extract_speaker_embedding(encoder, a_chunk.numpy(), sr)
+        emb = emb / (np.linalg.norm(emb) + 1e-10)
+        embeddings.append({"chunk_id": seg["chunk_id"], "embedding": emb.tolist()})
+        if (si + 1) % 500 == 0:
+            print(f"    {si+1}/{len(final_segs)} [{time.time()-t0:.0f}s]")
+
+    print(f"  Writing to Qdrant...")
+    from urllib.request import Request, urlopen
+    batch = []
+    for i, e in enumerate(embeddings):
+        batch.append({"id": i + 1, "vector": e["embedding"],
+            "payload": {"chunk_id": e["chunk_id"], "chunk_type": "sentence"}})
+        if len(batch) >= 100:
+            req = Request(f"{qdrant_url}/collections/momentry_dev_voice/points?wait=true",
+                data=json.dumps({"points": batch}).encode(),
+                headers={"Content-Type": "application/json"}, method="PUT")
+            try: urlopen(req)
+            except: pass
+            batch = []
+    # Flush remaining
+    if batch:
+        req = Request(f"{qdrant_url}/collections/momentry_dev_voice/points?wait=true",
+            data=json.dumps({"points": batch}).encode(),
+            headers={"Content-Type": "application/json"}, method="PUT")
+        try: urlopen(req)
+        except: pass
+
+    print(f"  Voice vectors: {len(embeddings)} pts → Qdrant [{time.time()-t0:.0f}s]")
+    return embeddings
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--video", default="/Users/accusys/momentry/var/sftpgo/data/demo/Charade (1963) Cary Grant & Audrey Hepburn ｜ Comedy Mystery Romance Thriller ｜ Full Movie.mp4")
+    parser.add_argument("--output", help="Output path for asr.json", default="/Users/accusys/momentry/output_dev/aeed71342a899fe4b4c57b7d41bcb692.asr.json")
+    parser.add_argument("--sample", type=int, help="Process only first N pass1 segments (for testing)")
+    parser.add_argument("--no-qdrant", action="store_true", help="Skip Qdrant upload")
+    args = parser.parse_args()
+
+    t0 = time.time()
+
+    # Load models
+    print("=== Loading Models ===")
+    asr_model = WhisperModel("small", device="cpu", compute_type="int8")
+    print("  faster-whisper small loaded")
+    encoder = load_speaker_encoder()
+    print("  ECAPA-TDNN loaded")
+    print()
+
+    # Extract audio
+    print("=== Audio Extraction ===")
+    tmp_dir = tempfile.mkdtemp(prefix="transcribe_")
+    wav_data, sr = extract_audio(args.video, tmp_dir)
+    print(f"  Audio: {wav_data.shape[1]/sr:.0f}s, {sr}Hz")
+    wav_path = os.path.join(tmp_dir, "audio.wav")
+    print()
+
+    # Step 1: faster-whisper pass1
+    print("=== Step 1: Pass1 Transcription ===")
+    pass1_segs = transcribe_pass1(asr_model, wav_path)
+    if args.sample:
+        pass1_segs = pass1_segs[:args.sample]
+        print(f"  SAMPLE MODE: limiting to {args.sample} segments")
+    print()
+
+    # Step 2: Speaker change detection
+    print("=== Step 2: Speaker Change Detection ===")
+    change_points = detect_speaker_changes(wav_data, sr, pass1_segs, encoder)
+    print()
+
+    # Step 3: Build final segments
+    print("=== Step 3: Build Final Segments ===")
+    final_segs = build_segments(pass1_segs, change_points, wav_data, sr, asr_model, tmp_dir)
+    print()
+
+    # Step 4: Voice vectors → Qdrant
+    if not args.no_qdrant:
+        print("=== Step 4: Voice Vectors → Qdrant ===")
+        voice_vectors_to_qdrant(wav_data, sr, final_segs, encoder)
+        print()
+
+    # Step 5: Write asr.json
+    print("=== Step 5: Write asr.json ===")
+    uuid = os.path.basename(args.output).replace(".asr.json", "")
+    output = {
+        "file_uuid": uuid,
+        "pass1": pass1_segs,
+        "segments": final_segs,
+    }
+    with open(args.output, "w") as f:
+        json.dump(output, f, indent=2, ensure_ascii=False)
+    sz = os.path.getsize(args.output)
+    print(f"  {args.output} ({sz/1024:.0f} KB)")
+
+    # Cleanup
+    shutil.rmtree(tmp_dir, ignore_errors=True)
+
+    elapsed = time.time() - t0
+    print(f"\n=== Done ({elapsed:.0f}s) ===")
+    print(f"  Pass1 segments: {len(pass1_segs)}")
+    print(f"  Final segments: {len(final_segs)}")
+    fp = args.output
+    print(f"  Output: {fp}")
+
+if __name__ == "__main__":
+    main()
--- a/scripts/vec0.dylib
+++ b/scripts/vec0.dylib
--- a/scripts/vectorize_chunks.py
+++ b/scripts/vectorize_chunks.py
@@ -0,0 +1,69 @@
+#!/opt/homebrew/bin/python3.11
+"""Vectorize sentence chunks via Ollama mxbai-embed-large and store in DB + Qdrant."""
+import json, sys, time
+import psycopg2
+from urllib.request import Request, urlopen
+
+DB = "dbname=momentry user=accusys"
+UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
+OLLAMA = "http://localhost:11434/api/embeddings"
+QDRANT = "http://localhost:6333"
+
+conn = psycopg2.connect(DB)
+cur = conn.cursor()
+
+cur.execute("""
+    SELECT chunk_id, text_content FROM dev.chunk
+    WHERE file_uuid = %s AND chunk_type = 'sentence'
+    AND (text_content IS NOT NULL AND text_content != '')
+    ORDER BY id
+""", (UUID,))
+rows = cur.fetchall()
+print(f"Vectorizing {len(rows)} chunks for {UUID}...")
+
+stored = 0
+batch = []
+for chunk_id, text in rows:
+    req = Request(OLLAMA, data=json.dumps({
+        "model": "nomic-embed-text-v2-moe:latest",
+        "prompt": text
+    }).encode(), headers={"Content-Type": "application/json"})
+    resp = json.loads(urlopen(req).read())
+    embedding = resp["embedding"]
+    
+    # Store in PostgreSQL chunk_vectors
+    cur.execute("""
+        INSERT INTO dev.chunk_vectors (chunk_id, uuid, chunk_type, embedding)
+        VALUES (%s, %s, 'sentence', %s::jsonb)
+        ON CONFLICT (chunk_id, uuid) DO UPDATE SET embedding = EXCLUDED.embedding
+    """, (chunk_id, UUID, json.dumps(embedding)))
+    
+    # Batch for Qdrant
+    batch.append({
+        "id": int(chunk_id) + 1 if chunk_id.isdigit() else len(batch) + 10000,
+        "vector": embedding,
+        "payload": {"chunk_id": chunk_id, "chunk_type": "sentence"}
+    })
+    
+    if len(batch) >= 100:
+        req = Request(f"{QDRANT}/collections/momentry_dev_rule1_v2/points?wait=true",
+            data=json.dumps({"points": batch}).encode(),
+            headers={"Content-Type": "application/json"}, method="PUT")
+        urlopen(req)
+        batch = []
+    
+    stored += 1
+    if stored % 50 == 0:
+        print(f"  {stored}/{len(rows)}")
+        conn.commit()
+
+if batch:
+    req = Request(f"{QDRANT}/collections/momentry_dev_rule1_v2/points?wait=true",
+        data=json.dumps({"points": batch}).encode(),
+        headers={"Content-Type": "application/json"}, method="PUT")
+    urlopen(req)
+
+conn.commit()
+cur.close()
+conn.close()
+print(f"Done: {stored} vectors stored")