feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system

2026-06-02 07:13:23 +08:00
parent e3066c3f49
commit e1572907ae
198 changed files with 43705 additions and 8910 deletions
--- a/src/api/agent_search.rs
+++ b/src/api/agent_search.rs
@@ -1,10 +1,4 @@
-use axum::{
-    extract::State,
-    http::StatusCode,
-    response::Json,
-    routing::post,
-    Router,
-};
+use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
 use once_cell::sync::Lazy;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
@@ -13,7 +7,10 @@ use std::time::Instant;

 use crate::api::types::AppState;
 use crate::core::db::schema;
-use crate::core::llm::function_calling::{self, ChatMessage, LlmResponse, ToolCall, ToolDef};
+use crate::core::llm::function_calling::{
+    self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
+};
+use base64::{engine::general_purpose::STANDARD as BASE64, Engine};

 // ── Conversation Manager ─────────────────────────────────────────

@@ -43,11 +40,14 @@ fn get_or_create_conv(conv_id: Option<&str>) -> (String, Vec<ChatMessage>) {
        }
    }
    let id = uuid::Uuid::new_v4().to_string().replace('-', "")[..16].to_string();
-    map.insert(id.clone(), Conversation {
-        messages: Vec::new(),
-        created_at: Instant::now(),
-        last_active: Instant::now(),
-    });
+    map.insert(
+        id.clone(),
+        Conversation {
+            messages: Vec::new(),
+            created_at: Instant::now(),
+            last_active: Instant::now(),
+        },
+    );
    (id, Vec::new())
 }

@@ -85,8 +85,13 @@ const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶
 ## 工具使用規則
 1. 先確認用戶在問哪部影片 — 使用 find_file 或 list_files
 2. 人物問題優先使用 tkg_query
-3. 語意/內容問題使用 smart_search 或 universal_search
-4. 可以同時呼叫多個工具
+3. 人物台詞/發言問題使用 identities_search（輸入人名→回傳台詞片段）
+4. 人物對話互動（誰跟誰說話）使用 tkg_query 的 speaker_interaction
+5. 人物台詞內容使用 tkg_query 的 speaker_dialogue
+6. 用文字反查人物使用 identity_text（輸入關鍵字→找出誰說/提到這段話）
+7. 語意/內容問題使用 smart_search 或 universal_search
+8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容（場景、人物表情、動作、物件等）
+9. 可以同時呼叫多個工具

 ## 引導規則
 - 如果用戶沒說片名 → 用 find_file 搜尋，如果名稱不明確就反問
@@ -120,16 +125,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
        ),
        function_calling::make_tool(
            "tkg_query",
-            "查詢影片的人物互動、配對、同框資料。query_type 包括：top_identities（人物排名）、first_cooccurrence（第一次同框）、identity_details（人物詳細）、mutual_gaze（互看）、interaction_network（互動網絡）、identity_traces（出場片段）、file_info（影片資訊）。",
+            "查詢影片的人物互動、配對、同框、台詞資料。query_type 包括：top_identities（人物排名）、first_cooccurrence（第一次同框）、identity_details（人物詳細）、mutual_gaze（互看）、interaction_network（互動網絡）、identity_traces（出場片段）、file_info（影片資訊）、speaker_dialogue（人物台詞）、speaker_interaction（兩人對話互動）。",
            serde_json::json!({
                "file_uuid": {"type": "string", "description": "影片 UUID"},
                "query_type": {
                    "type": "string",
-                    "enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info"],
+                    "enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info", "speaker_dialogue", "speaker_interaction"],
                    "description": "查詢類型"
                },
-                "identity_name": {"type": "string", "description": "人物名稱（配合 identity_details / identity_traces）"},
-                "identity_b": {"type": "string", "description": "第二人物名稱（配合 first_cooccurrence / mutual_gaze）"},
+                "identity_name": {"type": "string", "description": "人物名稱（配合 identity_details / identity_traces / speaker_dialogue / speaker_interaction）"},
+                "identity_b": {"type": "string", "description": "第二人物名稱（配合 first_cooccurrence / mutual_gaze / speaker_interaction）"},
                "limit": {"type": "integer", "default": 5}
            }),
            vec!["file_uuid", "query_type"],
@@ -144,6 +149,26 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
            }),
            vec!["query"],
        ),
+        function_calling::make_tool(
+            "identity_text",
+            "搜尋文字關鍵字，找出有提及該內容的影片人物。適合回答「誰說了OOO」、「誰跟OOO有關」。不是查詢人物的台詞，而是用文字反查人物。",
+            serde_json::json!({
+                "q": {"type": "string", "description": "搜尋關鍵字（台詞片段、主題等）"},
+                "file_uuid": {"type": "string", "description": "限制搜尋範圍（可選）"},
+                "limit": {"type": "integer", "default": 10}
+            }),
+            vec!["q"],
+        ),
+        function_calling::make_tool(
+            "identities_search",
+            "查詢特定人物的台詞/發言內容。輸入人物名稱，回傳該人物在影片中說過的話。適合回答「某某人說了什麼」、「某某人的台詞」。",
+            serde_json::json!({
+                "q": {"type": "string", "description": "人物名稱關鍵字（姓名、角色名、別名）"},
+                "file_uuid": {"type": "string", "description": "限制搜尋範圍（可選）"},
+                "limit": {"type": "integer", "default": 10}
+            }),
+            vec!["q"],
+        ),
        function_calling::make_tool(
            "get_identity_detail",
            "查詢單一身份的詳細資料（名字、角色、TMDb 資訊）。",
@@ -168,6 +193,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
            }),
            vec!["file_uuid"],
        ),
+        function_calling::make_tool(
+            "analyze_frame",
+            "分析影片中指定畫面的視覺內容（場景、人物表情、動作、物件等）。若不指定 frame_number，會使用代表性畫面。問題會傳給視覺 LLM 分析。",
+            serde_json::json!({
+                "file_uuid": {"type": "string", "description": "影片 UUID"},
+                "question": {"type": "string", "description": "關於畫面的問題，例如「這個場景發生什麼事？」"},
+                "frame_number": {"type": "integer", "description": "指定的 frame 編號（可選）"}
+            }),
+            vec!["file_uuid"],
+        ),
    ]
 }

@@ -193,9 +228,10 @@ async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
    if rows.is_empty() {
        return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
    }
-    let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
-        serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
-    }).collect();
+    let files: Vec<serde_json::Value> = rows
+        .into_iter()
+        .map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
+        .collect();
    Ok(serde_json::json!({"found": true, "files": files}).to_string())
 }

@@ -214,15 +250,19 @@ async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Resul
    .await
    .map_err(|e| e.to_string())?;

-    let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
-        serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
-    }).collect();
+    let files: Vec<serde_json::Value> = rows
+        .into_iter()
+        .map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
+        .collect();
    Ok(serde_json::json!({"files": files}).to_string())
 }

 async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
-    let query_type = args.get("query_type").and_then(|v| v.as_str()).unwrap_or("");
+    let query_type = args
+        .get("query_type")
+        .and_then(|v| v.as_str())
+        .unwrap_or("");
    let identity_name = args.get("identity_name").and_then(|v| v.as_str());
    let identity_b = args.get("identity_b").and_then(|v| v.as_str());
    let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
@@ -242,9 +282,11 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
                 GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
                fd_table, id_table
            ))
-            .bind(file_uuid).bind(limit)
+            .bind(file_uuid)
+            .bind(limit)
            .fetch_all(pool)
-            .await.map_err(|e| e.to_string())?;
+            .await
+            .map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"identities": rows}).to_string())
        }
        "first_cooccurrence" => {
@@ -325,8 +367,9 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
        }
        "identity_traces" => {
            let name = identity_name.unwrap_or("");
-            let rows: Vec<(i32, i64, i32, i32)> = sqlx::query_as(&format!(
-                "SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::int, MAX(fd.frame_number)::int \
+            // MIN/MAX frame_number should be bigint (i64), not int
+            let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
+                "SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
                 FROM {} fd JOIN {} i ON i.id = fd.identity_id \
                 WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
                 GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
@@ -344,14 +387,133 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
            ))
            .bind(file_uuid)
            .fetch_optional(pool)
-            .await.map_err(|e| e.to_string())?;
+            .await
+            .map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
        }
-        _ => Ok(serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string()),
+        "speaker_dialogue" => {
+            let name = identity_name.unwrap_or("");
+            let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
+                "SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
+                 FROM {} i \
+                 JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
+                 JOIN {} fn ON fn.file_uuid = fd.file_uuid \
+                     AND fn.node_type = 'face_trace' \
+                     AND fn.external_id = CONCAT('trace_', fd.trace_id) \
+                 JOIN {} e ON e.source_node_id = fn.id \
+                     AND e.edge_type = 'SPEAKS_AS' \
+                     AND ($2::text IS NULL OR e.file_uuid = $2) \
+                 JOIN {} sn ON sn.id = e.target_node_id \
+                 WHERE i.name ILIKE $1 \
+                 LIMIT $3",
+                id_table, fd_table, nodes, edges, nodes
+            ))
+            .bind(name)
+            .bind(file_uuid)
+            .bind(limit)
+            .fetch_all(pool)
+            .await
+            .map_err(|e| e.to_string())?;
+
+            Ok(
+                serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
+                serde_json::json!({"speaker_id": sid, "dialogue": text})
+            }).collect::<Vec<_>>()})
+                .to_string(),
+            )
+        }
+        "speaker_interaction" => {
+            let name_a = identity_name.unwrap_or("");
+            let name_b = identity_b.unwrap_or("");
+            if name_a.is_empty() || name_b.is_empty() {
+                return Ok(
+                    serde_json::json!({"error": "identity_name and identity_b are required"})
+                        .to_string(),
+                );
+            }
+
+            // Get both speakers' segments from TKG
+            let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
+                "SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
+                 FROM {} i \
+                 JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
+                 JOIN {} fn ON fn.file_uuid = fd.file_uuid \
+                     AND fn.node_type = 'face_trace' \
+                     AND fn.external_id = CONCAT('trace_', fd.trace_id) \
+                 JOIN {} e ON e.source_node_id = fn.id \
+                     AND e.edge_type = 'SPEAKS_AS' \
+                     AND ($3::text IS NULL OR e.file_uuid = $3) \
+                 JOIN {} sn ON sn.id = e.target_node_id \
+                 WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
+                 ORDER BY sn.external_id",
+                id_table, fd_table, nodes, edges, nodes
+            ))
+            .bind(name_a)
+            .bind(name_b)
+            .bind(file_uuid)
+            .fetch_all(pool)
+            .await
+            .map_err(|e| e.to_string())?;
+
+            let mut interactions = Vec::new();
+            for i in 0..rows.len() {
+                for j in i + 1..rows.len() {
+                    let (sid_a, text_a, segs_a_val) = &rows[i];
+                    let (sid_b, text_b, segs_b_val) = &rows[j];
+                    let segs_a = segs_a_val.as_array();
+                    let segs_b = segs_b_val.as_array();
+                    if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
+                        for sa in a_list {
+                            let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                            let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                            let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
+                            if sa_text.is_empty() {
+                                continue;
+                            }
+                            for sb in b_list {
+                                let sb_start =
+                                    sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                                let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                                let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
+                                if sb_text.is_empty() {
+                                    continue;
+                                }
+                                // Check temporal overlap
+                                let overlap_start = sa_start.max(sb_start);
+                                let overlap_end = sa_end.min(sb_end);
+                                if overlap_start < overlap_end {
+                                    interactions.push(serde_json::json!({
+                                        "speaker_a": sid_a,
+                                        "speaker_b": sid_b,
+                                        "time_range_s": [overlap_start, overlap_end],
+                                        "dialogue_a": sa_text,
+                                        "dialogue_b": sb_text,
+                                    }));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            interactions.sort_by(|a, b| {
+                let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
+                let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
+                a_start.partial_cmp(&b_start).unwrap()
+            });
+            interactions.truncate(limit as usize);
+
+            Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
+        }
+        _ => Ok(
+            serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
+        ),
    }
 }

-async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
+async fn exec_smart_search(
+    _pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
    let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
    let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
@@ -359,7 +521,8 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
    let chunk_table = schema::table_name("chunk");
    let mut sql = format!(
        "SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
-         FROM {} WHERE text_content ILIKE $1", chunk_table
+         FROM {} WHERE text_content ILIKE $1",
+        chunk_table
    );
    if file_uuid.is_some() {
        sql.push_str(" AND file_uuid = $2");
@@ -369,21 +532,147 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
    if let Some(fuid) = file_uuid {
        let like = format!("%{}%", query);
        let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
-            .bind(&like).bind(fuid)
+            .bind(&like)
+            .bind(fuid)
            .fetch_all(_pool)
-            .await.map_err(|e| e.to_string())?;
+            .await
+            .map_err(|e| e.to_string())?;
        Ok(serde_json::json!({"results": rows}).to_string())
    } else {
        let like = format!("%{}%", query);
        let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
            .bind(&like)
            .fetch_all(_pool)
-            .await.map_err(|e| e.to_string())?;
+            .await
+            .map_err(|e| e.to_string())?;
        Ok(serde_json::json!({"results": rows}).to_string())
    }
 }

-async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
+async fn exec_identity_text(
+    pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
+    let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
+    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
+    let limit = args
+        .get("limit")
+        .and_then(|v| v.as_i64())
+        .unwrap_or(10)
+        .min(50);
+
+    let chunk_table = schema::table_name("chunk");
+    let fd_table = schema::table_name("face_detections");
+    let id_table = schema::table_name("identities");
+    let like_q = format!("%{}%", q.replace('%', "%%"));
+
+    let sql = format!(
+        "SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
+                i.name AS identity_name, fd.trace_id, i.source AS identity_source \
+         FROM {} c \
+         JOIN {} fd ON fd.file_uuid = c.file_uuid \
+             AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
+             AND fd.identity_id IS NOT NULL \
+         JOIN {} i ON i.id = fd.identity_id \
+         WHERE ($1::text IS NULL OR c.file_uuid = $1) \
+           AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
+         ORDER BY c.start_time \
+         LIMIT $3",
+        chunk_table, fd_table, id_table
+    );
+
+    let rows: Vec<(
+        String,
+        f64,
+        f64,
+        Option<String>,
+        String,
+        Option<i32>,
+        String,
+    )> = sqlx::query_as(&sql)
+        .bind(file_uuid)
+        .bind(&like_q)
+        .bind(limit)
+        .fetch_all(pool)
+        .await
+        .map_err(|e| e.to_string())?;
+
+    Ok(
+        serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
+        serde_json::json!({
+            "chunk_id": chunk_id,
+            "start_time": st,
+            "end_time": et,
+            "text": txt,
+            "identity_name": name,
+            "trace_id": tid,
+            "source": src
+        })
+    }    ).collect::<Vec<_>>()})
+        .to_string(),
+    )
+}
+
+async fn exec_identities_search(
+    pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
+    let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
+    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
+    let limit = args
+        .get("limit")
+        .and_then(|v| v.as_i64())
+        .unwrap_or(10)
+        .min(50);
+
+    let id_table = schema::table_name("identities");
+    let fd_table = schema::table_name("face_detections");
+    let chunk_table = schema::table_name("chunk");
+    let like_q = format!("%{}%", q.replace('%', "%%"));
+
+    let sql = format!(
+        "SELECT DISTINCT ON (i.name, c.chunk_id) \
+                i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
+         FROM {} i \
+         JOIN {} fd ON fd.identity_id = i.id \
+         JOIN {} c ON c.file_uuid = fd.file_uuid \
+             AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
+             AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
+         WHERE (i.name ILIKE $1 \
+             OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
+           AND ($2::text IS NULL OR fd.file_uuid = $2) \
+         ORDER BY i.name, c.chunk_id, c.start_time \
+         LIMIT $3",
+        id_table, fd_table, chunk_table
+    );
+
+    let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
+        .bind(&like_q)
+        .bind(file_uuid)
+        .bind(limit)
+        .fetch_all(pool)
+        .await
+        .map_err(|e| e.to_string())?;
+
+    Ok(
+        serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
+            serde_json::json!({
+                "identity_name": name,
+                "chunk_id": chunk_id,
+                "start_time": st,
+                "end_time": et,
+                "text": txt,
+                "trace_id": tid,
+            })
+        }).collect::<Vec<_>>()})
+        .to_string(),
+    )
+}
+
+async fn exec_get_identity_detail(
+    pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
    let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
    let id_table = schema::table_name("identities");
    let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
@@ -396,7 +685,10 @@ async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value)
    Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
 }

-async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
+async fn exec_get_file_info(
+    pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
    let videos = schema::table_name("videos");
    let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
@@ -405,11 +697,15 @@ async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
    ))
    .bind(file_uuid)
    .fetch_optional(pool)
-    .await.map_err(|e| e.to_string())?;
+    .await
+    .map_err(|e| e.to_string())?;
    Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
 }

-async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
+async fn exec_get_representative_frame(
+    pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
    match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
        Ok(r) => Ok(serde_json::json!({
@@ -417,24 +713,131 @@ async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::V
            "face_quality": r.face_quality,
            "main_identities": r.main_identities,
            "traces": r.traces,
-        }).to_string()),
+        })
+        .to_string()),
        Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
    }
 }

+async fn exec_analyze_frame(
+    pool: &sqlx::PgPool,
+    args: &serde_json::Value,
+) -> Result<String, String> {
+    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
+    let question = args
+        .get("question")
+        .and_then(|v| v.as_str())
+        .unwrap_or("請描述這個畫面中的內容");
+
+    if file_uuid.is_empty() {
+        return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
+    }
+
+    let videos = schema::table_name("videos");
+    let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
+        "SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
+        videos
+    ))
+    .bind(file_uuid)
+    .fetch_optional(pool)
+    .await
+    .map_err(|e| e.to_string())?
+    .ok_or_else(|| "Video not found".to_string())?;
+
+    let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
+        Some(f) => f,
+        None => {
+            match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
+                .await
+            {
+                Ok(r) => r.frame_number,
+                Err(_) => {
+                    let duration: f64 = sqlx::query_scalar(&format!(
+                        "SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
+                        videos
+                    ))
+                    .bind(file_uuid)
+                    .fetch_optional(pool)
+                    .await
+                    .map_err(|e| e.to_string())?
+                    .unwrap_or(0.0);
+                    if duration > 0.0 {
+                        ((duration / 2.0) * fps) as i64
+                    } else {
+                        0
+                    }
+                }
+            }
+        }
+    };
+
+    let timestamp_secs = frame_number as f64 / fps;
+
+    let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
+        let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
+        if std::path::Path::new(full).exists() {
+            full.to_string()
+        } else {
+            "ffmpeg".to_string()
+        }
+    });
+
+    let output = tokio::process::Command::new(&ffmpeg_path)
+        .args([
+            "-ss",
+            &format!("{:.3}", timestamp_secs),
+            "-i",
+            &video_path,
+            "-vframes",
+            "1",
+            "-f",
+            "image2pipe",
+            "-vcodec",
+            "mjpeg",
+            "-",
+        ])
+        .output()
+        .await
+        .map_err(|e| format!("ffmpeg execution error: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
+    }
+
+    let base64_img = BASE64.encode(&output.stdout);
+
+    let system_prompt =
+        "你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題，詳細描述畫面中的內容，包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
+    let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
+        .await
+        .map_err(|e| e.to_string())?;
+
+    Ok(serde_json::json!({
+        "frame_number": frame_number,
+        "timestamp_secs": timestamp_secs,
+        "analysis": vision_result,
+    })
+    .to_string())
+}
+
 // ── Tool Router ───────────────────────────────────────────────────

 async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
    let name = tool_call.function.name.clone();
-    let args: serde_json::Value = serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
+    let args: serde_json::Value =
+        serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
    let result = match name.as_str() {
        "find_file" => exec_find_file(pool, &args).await,
        "list_files" => exec_list_files(pool, &args).await,
        "tkg_query" => exec_tkg_query(pool, &args).await,
        "smart_search" => exec_smart_search(pool, &args).await,
+        "identity_text" => exec_identity_text(pool, &args).await,
+        "identities_search" => exec_identities_search(pool, &args).await,
        "get_identity_detail" => exec_get_identity_detail(pool, &args).await,
        "get_file_info" => exec_get_file_info(pool, &args).await,
        "get_representative_frame" => exec_get_representative_frame(pool, &args).await,
+        "analyze_frame" => exec_analyze_frame(pool, &args).await,
        _ => Err(format!("Unknown tool: {}", name)),
    };
    let content = match result {
@@ -476,7 +879,11 @@ async fn run_tool_loop(
                for call in &calls {
                    let (tool_call_id, name, content) = execute_tool(pool, call).await;
                    sources.push(serde_json::json!({"tool": name, "result": content}));
-                    messages.push(function_calling::make_tool_result(&tool_call_id, &name, &content));
+                    messages.push(function_calling::make_tool_result(
+                        &tool_call_id,
+                        &name,
+                        &content,
+                    ));
                }
            }
            Err(e) => {
@@ -484,7 +891,10 @@ async fn run_tool_loop(
            }
        }
    }
-    ("已達到最大查詢次數，請縮小問題範圍後重新詢問。".to_string(), sources)
+    (
+        "已達到最大查詢次數，請縮小問題範圍後重新詢問。".to_string(),
+        sources,
+    )
 }

 // ── Handler ───────────────────────────────────────────────────────
@@ -495,13 +905,8 @@ async fn agent_search(
 ) -> Result<Json<AgentSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
    let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref());

-    let (answer, sources) = run_tool_loop(
-        state.db.pool(),
-        SYSTEM_PROMPT,
-        &req.query,
-        history,
-    )
-    .await;
+    let (answer, sources) =
+        run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await;

    // Save updated messages for conversation continuation
    let new_msgs = function_calling::build_conversation(SYSTEM_PROMPT, &req.query, vec![]);
@@ -509,7 +914,11 @@ async fn agent_search(

    let needs_input = answer.contains('？') || answer.contains('?');
    let suggestions = if needs_input {
-        Some(vec!["演員名".to_string(), "電影片名".to_string(), "年份".to_string()])
+        Some(vec![
+            "演員名".to_string(),
+            "電影片名".to_string(),
+            "年份".to_string(),
+        ])
    } else {
        None
    };
@@ -526,6 +935,5 @@ async fn agent_search(
 // ── Routes ─────────────────────────────────────────────────────────

 pub fn agent_search_routes() -> Router<AppState> {
-    Router::new()
-        .route("/api/v1/agents/search", post(agent_search))
+    Router::new().route("/api/v1/agents/search", post(agent_search))
 }
--- a/src/api/docs.rs
+++ b/src/api/docs.rs
@@ -8,8 +8,7 @@ async fn doc_redirect() -> axum::response::Redirect {

 async fn wasm_doc_handler() -> Result<impl axum::response::IntoResponse, (StatusCode, &'static str)>
 {
-    let path =
-        std::path::Path::new("/Users/accusys/momentry_core/docs_v1.0/doc_wasm/index.html");
+    let path = std::path::Path::new("/Users/accusys/momentry_core/docs_v1.0/doc_wasm/index.html");
    match tokio::fs::read_to_string(path).await {
        Ok(html) => Ok(([("content-type", "text/html; charset=utf-8")], html)),
        Err(_) => Err((StatusCode::NOT_FOUND, "Doc not found")),
--- a/src/api/files.rs
+++ b/src/api/files.rs
@@ -12,7 +12,7 @@ use std::collections::HashMap;
 use super::types::AppState;
 use crate::core::config;
 use crate::core::db::schema;
-use crate::core::db::{Database, PostgresDb};
+use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient};
 use crate::core::storage::content_hash;
 use crate::FileManager;

@@ -767,17 +767,7 @@ async fn register_file(
            if let Some(ref vp) = video_path {
                if let Ok(job) = auto_state.db.create_monitor_job(&auto_uuid, Some(vp)).await {
                    tracing::info!("[AUTO-PIPELINE] Job {} created for {}", job.id, auto_uuid);
-                    let all_procs: Vec<&str> = vec![
-                        "asr",
-                        "cut",
-                        "yolo",
-                        "ocr",
-                        "face",
-                        "pose",
-                        "asrx",
-                        "visual_chunk",
-                        "5w1h",
-                    ];
+                    let all_procs: Vec<&str> = vec!["cut", "yolo", "ocr", "face", "pose", "asrx"];
                    let total = sqlx::query_scalar::<_, i64>(&format!(
                        "SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1",
                        schema::table_name("videos")
@@ -986,6 +976,10 @@ struct UnregisterResponse {
    deleted_face_detections: u64,
    deleted_processor_results: u64,
    deleted_chunks: u64,
+    deleted_tkg_nodes: u64,
+    deleted_qdrant_vectors: Option<u64>,
+    deleted_redis_keys: Option<u64>,
+    deleted_output_files: u64,
 }

 #[derive(Debug, Deserialize)]
@@ -994,18 +988,30 @@ struct UnregisterRequest {
    file_path: Option<String>,
 }

-fn delete_output_files(uuid: &str) {
-    let output_dir = config::OUTPUT_DIR.to_string();
-    if let Ok(entries) = std::fs::read_dir(&output_dir) {
-        for entry in entries.flatten() {
-            let path = entry.path();
-            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
-                if name.starts_with(uuid) {
-                    let _ = std::fs::remove_file(&path);
+fn delete_output_files(uuid: &str) -> u64 {
+    let mut deleted_count = 0u64;
+    let output_dirs = [
+        config::OUTPUT_DIR.to_string(),
+        "/Users/accusys/momentry/output_dev".to_string(),
+        "/Users/accusys/momentry/output".to_string(),
+    ];
+
+    for output_dir in &output_dirs {
+        if let Ok(entries) = std::fs::read_dir(output_dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
+                    if name.starts_with(uuid) && name.ends_with(".json") {
+                        if std::fs::remove_file(&path).is_ok() {
+                            deleted_count += 1;
+                            tracing::info!("[UNREGISTER] Deleted output file: {}", name);
+                        }
+                    }
                }
            }
        }
    }
+    deleted_count
 }

 async fn unregister(
@@ -1024,65 +1030,54 @@ async fn unregister(
    let processor_table = schema::table_name("processor_results");
    let chunks_table = schema::table_name("chunk");
    let parent_chunks_table = schema::table_name("parent_chunks");
-
-    let deleted_faces: i64 =
-        sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
-            .bind(&uuid)
-            .execute(state.db.pool())
-            .await
-            .map_err(|e| {
-                tracing::error!("[unregister] Failed to delete faces: {}", e);
-                StatusCode::INTERNAL_SERVER_ERROR
-            })?
-            .rows_affected() as i64;
-
-    let deleted_processors: i64 = sqlx::query(&format!(
-        "DELETE FROM {} WHERE file_uuid = $1",
-        processor_table
-    ))
-    .bind(&uuid)
-    .execute(state.db.pool())
-    .await
-    .map_err(|e| {
-        tracing::error!("[unregister] Failed to delete processors: {}", e);
-        StatusCode::INTERNAL_SERVER_ERROR
-    })?
-    .rows_affected() as i64;
-
-    let deleted_parent_chunks: i64 = sqlx::query(&format!(
-        "DELETE FROM {} WHERE uuid = $1",
-        parent_chunks_table
-    ))
-    .bind(&uuid)
-    .execute(state.db.pool())
-    .await
-    .map_err(|e| {
-        tracing::error!("[unregister] Failed to delete parent chunks: {}", e);
-        StatusCode::INTERNAL_SERVER_ERROR
-    })?
-    .rows_affected() as i64;
-
-    let deleted_chunks: i64 = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunks_table))
-        .bind(&uuid)
-        .execute(state.db.pool())
-        .await
-        .map_err(|e| {
-            tracing::error!("[unregister] Failed to delete chunks: {}", e);
-            StatusCode::INTERNAL_SERVER_ERROR
-        })?
-        .rows_affected() as i64;
-
-    // Delete pre_chunks
    let pre_chunks_table = schema::table_name("pre_chunks");
-    let deleted_pre_chunks: i64 = sqlx::query(&format!(
-        "DELETE FROM {} WHERE file_uuid = $1",
-        pre_chunks_table
+    let tkg_nodes_table = schema::table_name("tkg_nodes");
+    let cuts_table = schema::table_name("cuts");
+    let strangers_table = schema::table_name("strangers");
+    let chunk_vectors_table = schema::table_name("chunk_vectors");
+    let monitor_jobs_table = schema::table_name("monitor_jobs");
+    let frames_table = schema::table_name("frames");
+
+    let mut tx = state.db.pool().begin().await.map_err(|e| {
+        tracing::error!("[unregister] Failed to start transaction: {}", e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    macro_rules! delete_safe {
+        ($table:expr, $where:expr, $bind:expr, $label:expr) => {{
+            sqlx::query(&format!("DELETE FROM {} WHERE {}", $table, $where))
+                .bind($bind)
+                .execute(&mut *tx)
+                .await
+                .map_err(|e| {
+                    tracing::error!("[unregister] Failed to delete {}: {}", $label, e);
+                    StatusCode::INTERNAL_SERVER_ERROR
+                })?
+                .rows_affected() as i64
+        }};
+    }
+
+    let deleted_faces = delete_safe!(face_table, "file_uuid = $1", &uuid, "faces");
+    let deleted_processors = delete_safe!(processor_table, "file_uuid = $1", &uuid, "processors");
+    let deleted_parent_chunks =
+        delete_safe!(parent_chunks_table, "uuid = $1", &uuid, "parent chunks");
+    let deleted_chunks = delete_safe!(chunks_table, "file_uuid = $1", &uuid, "chunks");
+    let deleted_pre_chunks = delete_safe!(pre_chunks_table, "file_uuid = $1", &uuid, "pre_chunks");
+    let deleted_tkg_nodes = delete_safe!(tkg_nodes_table, "file_uuid = $1", &uuid, "TKG nodes");
+    let deleted_cuts = delete_safe!(cuts_table, "file_uuid = $1", &uuid, "cuts");
+    let deleted_strangers = delete_safe!(strangers_table, "file_uuid = $1", &uuid, "strangers");
+    let deleted_chunk_vectors =
+        delete_safe!(chunk_vectors_table, "uuid = $1", &uuid, "chunk vectors");
+    let deleted_monitor_jobs = delete_safe!(monitor_jobs_table, "uuid = $1", &uuid, "monitor jobs");
+    let deleted_frames: i64 = sqlx::query(&format!(
+        "DELETE FROM {} WHERE file_id = (SELECT id FROM {} WHERE file_uuid = $1)",
+        frames_table, videos_table
    ))
    .bind(&uuid)
-    .execute(state.db.pool())
+    .execute(&mut *tx)
    .await
    .map_err(|e| {
-        tracing::error!("[unregister] Failed to delete pre_chunks: {}", e);
+        tracing::error!("[unregister] Failed to delete frames: {}", e);
        StatusCode::INTERNAL_SERVER_ERROR
    })?
    .rows_affected() as i64;
@@ -1092,14 +1087,59 @@ async fn unregister(
        videos_table
    ))
    .bind(&uuid)
-    .execute(state.db.pool())
+    .execute(&mut *tx)
    .await
    .map_err(|e| {
        tracing::error!("[unregister] Failed: {}", e);
        StatusCode::INTERNAL_SERVER_ERROR
    })?;

-    delete_output_files(&uuid);
+    tx.commit().await.map_err(|e| {
+        tracing::error!("[unregister] Failed to commit transaction: {}", e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    tracing::info!(
+        "[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames",
+        deleted_faces, deleted_processors, deleted_parent_chunks, deleted_chunks,
+        deleted_pre_chunks, deleted_tkg_nodes, deleted_cuts, deleted_strangers,
+        deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames
+    );
+
+    let deleted_output_files = delete_output_files(&uuid);
+
+    let deleted_qdrant_vectors = {
+        let qdrant = QdrantDb::new();
+        match qdrant.delete_by_uuid(&uuid).await {
+            Ok(_) => {
+                tracing::info!("[UNREGISTER] Deleted Qdrant vectors for {}", uuid);
+                Some(1)
+            }
+            Err(e) => {
+                tracing::warn!("[UNREGISTER] Failed to delete Qdrant vectors: {}", e);
+                None
+            }
+        }
+    };
+
+    let deleted_redis_keys = {
+        match RedisClient::new() {
+            Ok(redis) => match redis.delete_worker_job(&uuid).await {
+                Ok(_) => {
+                    tracing::info!("[UNREGISTER] Deleted Redis keys for {}", uuid);
+                    Some(1)
+                }
+                Err(e) => {
+                    tracing::warn!("[UNREGISTER] Failed to delete Redis keys: {}", e);
+                    None
+                }
+            },
+            Err(e) => {
+                tracing::warn!("[UNREGISTER] Failed to create Redis client: {}", e);
+                None
+            }
+        }
+    };

    Ok(Json(UnregisterResponse {
        success: true,
@@ -1107,7 +1147,11 @@ async fn unregister(
        file_uuid: uuid,
        deleted_face_detections: deleted_faces as u64,
        deleted_processor_results: deleted_processors as u64,
-        deleted_chunks: (deleted_chunks + deleted_parent_chunks) as u64,
+        deleted_chunks: (deleted_chunks + deleted_parent_chunks + deleted_pre_chunks) as u64,
+        deleted_tkg_nodes: deleted_tkg_nodes as u64,
+        deleted_qdrant_vectors,
+        deleted_redis_keys,
+        deleted_output_files,
    }))
 }

--- a/src/api/five_w1h_agent_api.rs
+++ b/src/api/five_w1h_agent_api.rs
@@ -471,7 +471,7 @@ async fn store_parent_summary(
        "sentence_count": sentences.len(),
    });
    sqlx::query(&format!(
-        r#"UPDATE {} SET summary_text = $1, metadata = metadata || $2::jsonb
+        r#"UPDATE {} SET summary_text = $1, metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $2::jsonb)
           WHERE chunk_id = $3 AND file_uuid = $4"#,
        table
    ))
@@ -743,7 +743,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<

    // Auto-vectorize sentences with EmbeddingGemma (768D)
    tracing::info!("[5W1H] Starting vectorize for sentence chunks...");
-    let embedder = Embedder::new("embeddinggemma-300M-Q8_0.gguf".to_string());
+    let embedder = Embedder::new("embeddinggemma-300m".to_string());
    let qdrant = QdrantDb::new();
    qdrant.init_collection(768).await?;

--- a/src/api/health.rs
+++ b/src/api/health.rs
@@ -388,10 +388,18 @@ async fn health_detailed(State(state): State<AppState>) -> Json<DetailedHealthRe
            let directory_exists = identities_root.is_dir();
            let files_count = crate::core::identity::storage::count_identity_files();
            let index_ok = crate::core::identity::storage::read_index().is_ok();
-            let db_count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM identities")
+            let id_cnt: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM identities")
                .fetch_one(state.db.pool())
                .await
                .unwrap_or(0);
+            let st_cnt: i64 = sqlx::query_scalar(&format!(
+                "SELECT COUNT(*) FROM {} WHERE file_uuid IS NOT NULL",
+                crate::core::db::schema::table_name("strangers")
+            ))
+            .fetch_one(state.db.pool())
+            .await
+            .unwrap_or(0);
+            let db_count = id_cnt + st_cnt;
            IdentityHealth {
                directory_exists,
                files_count,
--- a/src/api/identities.rs
+++ b/src/api/identities.rs
@@ -220,8 +220,8 @@ async fn list_identities(
    .await
    .unwrap_or(0);
    let auto_identities: i64 = sqlx::query_scalar(&format!(
-        "SELECT COUNT(*) FROM {} WHERE source = 'auto'",
-        identities_table
+        "SELECT COUNT(*) FROM {} WHERE file_uuid IS NOT NULL",
+        crate::core::db::schema::table_name("strangers")
    ))
    .fetch_one(db.pool())
    .await
@@ -258,7 +258,7 @@ pub struct FaceCandidate {
    pub id: i32,
    pub face_id: Option<String>,
    pub file_uuid: String,
-    pub frame_number: i32,
+    pub frame_number: i64,
    pub confidence: f32,
    pub bbox: Option<serde_json::Value>,
    pub attributes: Option<serde_json::Value>,
@@ -352,7 +352,7 @@ async fn list_face_candidates(

    let rows = if let Some(file_uuid) = &query.file_uuid {
        let sql = format!(
-            "SELECT id, face_id, file_uuid, frame_number::int, confidence::float4, 
+            "SELECT id, face_id, file_uuid, frame_number::bigint, confidence::float4, 
                    jsonb_build_object('x', x, 'y', y, 'width', width, 'height', height) as bbox,
                    NULL::jsonb as attributes 
             FROM {} 
@@ -367,7 +367,7 @@ async fn list_face_candidates(
                i32,
                Option<String>,
                String,
-                i32,
+                i64,
                f32,
                Option<serde_json::Value>,
                Option<serde_json::Value>,
@@ -390,7 +390,7 @@ async fn list_face_candidates(
        }
    } else {
        let sql = format!(
-            "SELECT id, face_id, file_uuid, frame_number::int, confidence::float4, 
+            "SELECT id, face_id, file_uuid, frame_number::bigint, confidence::float4, 
                    jsonb_build_object('x', x, 'y', y, 'width', width, 'height', height) as bbox,
                    NULL::jsonb as attributes 
             FROM {} 
@@ -405,7 +405,7 @@ async fn list_face_candidates(
                i32,
                Option<String>,
                String,
-                i32,
+                i64,
                f32,
                Option<serde_json::Value>,
                Option<serde_json::Value>,
--- a/src/api/identity_agent_api.rs
+++ b/src/api/identity_agent_api.rs
@@ -640,8 +640,9 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
    );

    // Step 2: 載入所有 face_detections（含 frame_number），按 trace_id 分組
+    // frame_number is BIGINT (i64) in database
    let fd_table = schema::table_name("face_detections");
-    let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
+    let fd_rows = sqlx::query_as::<_, (i32, i64, Vec<f32>)>(&format!(
        "SELECT trace_id, frame_number, embedding FROM {} \
         WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
         ORDER BY trace_id, frame_number",
@@ -658,7 +659,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::

    // 分組：trace_id → (frame_number, embedding)
    use std::collections::HashMap;
-    let mut trace_faces_raw: HashMap<i32, Vec<(i32, Vec<f32>)>> = HashMap::new();
+    let mut trace_faces_raw: HashMap<i32, Vec<(i64, Vec<f32>)>> = HashMap::new();
    for (tid, frame, emb) in &fd_rows {
        trace_faces_raw
            .entry(*tid)
@@ -723,6 +724,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::

    // Step 5: 寫入 DB — Round 1 結果先存
    let identities_table = schema::table_name("identities");
+    let strangers_table = schema::table_name("strangers");
    let fd_table = schema::table_name("face_detections");
    let mut updated = 0usize;
    for (tid, name) in &matched {
@@ -805,13 +807,28 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
        }
    }

-    // Step 6: 未匹配的 trace 設 stranger_id = trace_id
-    // trace_id 在同一個 file 內是 sequential integer，直接複用為 stranger_id
+    // Step 6: 未匹配的 trace 設 stranger_id = strangers.id (FK)
+    // First: ensure strangers records exist
+    let _ = sqlx::query(&format!(
+        "INSERT INTO {} (file_uuid, trace_id) \
+         SELECT $1, fd.trace_id FROM {} fd \
+         WHERE fd.file_uuid = $1 AND fd.trace_id IS NOT NULL \
+           AND fd.identity_id IS NULL \
+         ON CONFLICT (file_uuid, trace_id) DO NOTHING",
+        strangers_table, fd_table
+    ))
+    .bind(file_uuid)
+    .execute(pool)
+    .await?;
+
+    // Then: update face_detections.stranger_id = strangers.id
    let stranger_update = sqlx::query(&format!(
-        "UPDATE {} SET stranger_id = trace_id \
-             WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
-               AND (stranger_id IS NULL OR stranger_id != trace_id)",
-        fd_table
+        "UPDATE {} fd SET stranger_id = s.id \
+         FROM {} s \
+         WHERE s.file_uuid = fd.file_uuid AND s.trace_id = fd.trace_id \
+           AND fd.file_uuid = $1 AND fd.identity_id IS NULL \
+           AND fd.trace_id IS NOT NULL AND fd.stranger_id IS NULL",
+        fd_table, strangers_table
    ))
    .bind(file_uuid)
    .execute(pool)
@@ -971,16 +988,30 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu

            let ib_table = schema::table_name("identity_bindings");
            let _ = sqlx::query(
-                &format!("INSERT INTO {} (identity_id, identity_type, identity_value, confidence, metadata) \
-                 VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
-                 ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
+                &format!("INSERT INTO {} (identity_id, identity_type, identity_value, file_uuid, confidence, metadata) \
+                 VALUES ($1, 'speaker', $2, $3, $4, $5::jsonb) \
+                 ON CONFLICT (identity_id, identity_type, identity_value, file_uuid) \
+                 DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
            )
            .bind(identity_id)
            .bind(&best_speaker)
+            .bind(file_uuid)
            .bind(overlap_ratio)
            .bind(&metadata)
            .execute(pool).await;

+            // Also update speaker_detections with the identity_id
+            let sd_table = schema::table_name("speaker_detections");
+            let _ = sqlx::query(
+                &format!("UPDATE {} SET identity_id = $1, confidence = $2 \
+                 WHERE file_uuid = $3 AND speaker_id = $4 AND identity_id IS NULL", sd_table)
+            )
+            .bind(identity_id)
+            .bind(overlap_ratio)
+            .bind(file_uuid)
+            .bind(&best_speaker)
+            .execute(pool).await;
+
            bindings += 1;
        }
    }
@@ -1028,31 +1059,31 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
        let speakers = extract_speakers_from_asrx_data(&asrx_data);
        let identities = analyze_person_speaker_overlap(&persons, &speakers);

-        for (idx, id_result) in identities.iter().enumerate() {
-            let identity_name = format!("stranger_{}", idx);
+        let _ = identities.len();
+        if !identities.is_empty() {
            let metadata = serde_json::json!({
                "source": "identity_agent",
-                "trace_ids": id_result.person_ids,
-                "speaker_ids": id_result.speaker_ids,
-                "confidence": id_result.confidence,
+                "speaker_ids": identities[0].speaker_ids,
+                "confidence": identities[0].confidence,
                "evidence": {
-                    "speaker_overlap": id_result.evidence.speaker_overlap,
-                    "frame_ratio": id_result.evidence.frame_ratio,
+                    "speaker_overlap": identities[0].evidence.speaker_overlap,
+                    "frame_ratio": identities[0].evidence.frame_ratio,
                },
-                "reasoning": id_result.reasoning,
+                "reasoning": identities[0].reasoning,
            });
-            let _ = sqlx::query(
-                &format!("INSERT INTO {} (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING", schema::table_name("identities"))
-            )
-            .bind(&identity_name)
+            let _ = sqlx::query(&format!(
+                "INSERT INTO {} (file_uuid, trace_id, metadata) \
+                     VALUES ($1, NULL, $2::jsonb) ON CONFLICT DO NOTHING",
+                schema::table_name("strangers")
+            ))
+            .bind(file_uuid)
            .bind(&metadata)
            .execute(pool)
            .await;
        }
-        let _created = identities.len();
        tracing::info!(
-            "[IdentityAgent] Created {} auto identities from face_clustered for {}",
-            _created,
+            "[IdentityAgent] Analyzed {} face clusters from face_clustered for {}",
+            identities.len(),
            file_uuid
        );
    } else {
--- a/src/api/identity_api.rs
+++ b/src/api/identity_api.rs
--- a/src/api/identity_binding.rs
+++ b/src/api/identity_binding.rs
--- a/src/api/media_api.rs
+++ b/src/api/media_api.rs
@@ -57,6 +57,10 @@ pub fn bbox_routes() -> Router<crate::api::types::AppState> {
            "/api/v1/file/:file_uuid/trace/:trace_id/video",
            get(trace_video),
        )
+        .route(
+            "/api/v1/file/:file_uuid/stranger/:stranger_id/video",
+            get(stranger_video),
+        )
        .route("/api/v1/file/:file_uuid/video", get(stream_video))
        .route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
        .route("/api/v1/file/:file_uuid/clip", get(video_clip))
@@ -210,8 +214,9 @@ async fn bbox_overlay_video(
    let start_sec = start_f as f64 / fps;

    // Get face bboxes
+    // frame_number is BIGINT (i64) in database
    let face_table = schema::table_name("face_detections");
-    let rows: Vec<(i32, i32, i32, i32, i32, Option<i32>, Option<String>)> = sqlx::query_as(
+    let rows: Vec<(i64, i32, i32, i32, i32, Option<i32>, Option<String>)> = sqlx::query_as(
        &format!("SELECT frame_number, x, y, width, height, trace_id, face_id FROM {} WHERE file_uuid = $1 AND frame_number BETWEEN $2 AND $3 ORDER BY frame_number", face_table)
    )
    .bind(face_fuid).bind(start_f).bind(end_f)
@@ -222,7 +227,7 @@ async fn bbox_overlay_video(
    let mut parts: Vec<String> = Vec::new();
    for (frame, x, y, w, h, trace_id, _) in &rows {
        let text = format!("t{}", trace_id.unwrap_or(0));
-        let offset = frame - start_f;
+        let offset = (*frame as i32) - start_f;
        parts.push(format!(
            "drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=4:enable='eq(n,{})'",
            x, y, w, h, offset
@@ -300,6 +305,15 @@ async fn trace_video(
    State(state): State<crate::api::types::AppState>,
    Path((file_uuid, trace_id)): Path<(String, i32)>,
    Query(params): Query<std::collections::HashMap<String, String>>,
+) -> Result<impl IntoResponse, StatusCode> {
+    trace_video_inner(&state, &file_uuid, trace_id, &params).await
+}
+
+async fn trace_video_inner(
+    state: &crate::api::types::AppState,
+    file_uuid: &str,
+    trace_id: i32,
+    params: &std::collections::HashMap<String, String>,
 ) -> Result<impl IntoResponse, StatusCode> {
    use axum::http::header;

@@ -317,8 +331,9 @@ async fn trace_video(
    let (video_path, fps, _width, _height) = row.ok_or(StatusCode::NOT_FOUND)?;

    // Query face detections to find frame range for target trace
+    // frame_number is BIGINT (i64) in database
    let face_table = schema::table_name("face_detections");
-    let rows: Vec<(i32, i32, i32, i32, i32)> = sqlx::query_as(&format!(
+    let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
        "SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND trace_id = $2 ORDER BY frame_number",
        face_table
    ))
@@ -371,11 +386,12 @@ async fn trace_video(

    // === DEBUG MODE: text overlay, list all traces in frame range ===
    let start_fn = (start_sec * fps) as i32;
-    let end_fn = ((start_sec + duration) * fps) as i32;
+    let end_fn = ((start_sec + duration) * fps) as i64;

    // Query all traces with identity names and bbox positions in the visible frame range
+    // frame_number is BIGINT (i64) in database
    let identities_table = schema::table_name("identities");
-    let all_rows: Vec<(i32, i32, i32, i32, i32, i32, Option<String>)> = sqlx::query_as(&format!(
+    let all_rows: Vec<(i32, i64, i32, i32, i32, i32, Option<String>)> = sqlx::query_as(&format!(
        "SELECT fd.trace_id, fd.frame_number, fd.x, fd.y, fd.width, fd.height, i.name \
         FROM {} fd \
         LEFT JOIN {} i ON fd.identity_id = i.id \
@@ -391,9 +407,10 @@ async fn trace_video(
    .unwrap_or_default();

    // Group frames by trace_id, compute start_frame per trace; collect bbox per frame
-    let mut trace_frames: HashMap<i32, Vec<i32>> = HashMap::new();
+    // frame_number is i64 (BIGINT), so HashMaps need i64 for frame values
+    let mut trace_frames: HashMap<i32, Vec<i64>> = HashMap::new();
    let mut trace_identity: HashMap<i32, String> = HashMap::new();
-    let mut bbox_per_frame: HashMap<(i32, i32), (i32, i32, i32, i32)> = HashMap::new(); // (tid, fn) -> (x, y, w, h)
+    let mut bbox_per_frame: HashMap<(i32, i64), (i32, i32, i32, i32)> = HashMap::new(); // (tid, fn) -> (x, y, w, h)
    for (tid, fn_, x, y, w, h, name_opt) in &all_rows {
        trace_frames.entry(*tid).or_default().push(*fn_);
        bbox_per_frame.insert((*tid, *fn_), (*x, *y, *w, *h));
@@ -417,7 +434,7 @@ async fn trace_video(
    .unwrap_or_else(|| "-".to_string());

    // Sort traces for consistent ordering
-    let mut sorted_traces: Vec<(i32, &Vec<i32>)> =
+    let mut sorted_traces: Vec<(i32, &Vec<i64>)> =
        trace_frames.iter().map(|(k, v)| (*k, v)).collect();
    sorted_traces.sort_by_key(|(tid, _)| *tid);

@@ -695,6 +712,7 @@ struct ThumbQuery {
    y: Option<i32>,
    w: Option<i32>,
    h: Option<i32>,
+    trace_id: Option<i32>,
 }

 async fn face_thumbnail(
@@ -717,15 +735,70 @@ async fn face_thumbnail(
        }
    };

-    let row: Option<(String,)> = sqlx::query_as(&format!(
-        "SELECT file_path FROM {} WHERE file_uuid = $1",
+    // Step 1: Check for pre-stored face crop if trace_id is provided
+    if let Some(trace_id) = q.trace_id {
+        let output_dir = crate::core::config::OUTPUT_DIR.as_str();
+        let cached_path = std::path::PathBuf::from(output_dir)
+            .join(".faces")
+            .join(&file_uuid)
+            .join(trace_id.to_string())
+            .join(format!("{}.jpg", frame));
+
+        if cached_path.exists() {
+            tracing::debug!("[thumbnail] Using cached face crop: {}", cached_path.display());
+            let bytes = tokio::fs::read(&cached_path)
+                .await
+                .map_err(|e| {
+                    tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
+                    StatusCode::INTERNAL_SERVER_ERROR
+                })?;
+
+            // Validate cached JPEG
+            crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
+                tracing::warn!("[thumbnail] Cached JPEG validation failed: {}", e);
+                StatusCode::INTERNAL_SERVER_ERROR
+            })?;
+
+            return Ok(Response::builder()
+                .status(StatusCode::OK)
+                .header(header::CONTENT_TYPE, "image/jpeg")
+                .header(header::CACHE_CONTROL, "public, max-age=86400")
+                .body(Body::from(bytes))
+                .unwrap());
+        }
+        
+        // Cached file not found, fallback to ffmpeg
+        tracing::debug!("[thumbnail] Cached file not found, falling back to ffmpeg");
+    }
+
+    // Step 2: Fallback to ffmpeg on-demand extraction
+    let row: Option<(String, Option<i64>, Option<i32>, Option<i32>)> = sqlx::query_as(&format!(
+        "SELECT file_path, total_frames, width, height FROM {} WHERE file_uuid = $1",
        videos_table
    ))
    .bind(&file_uuid)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-    let (file_path,) = row.ok_or(StatusCode::NOT_FOUND)?;
+    let (file_path, total_frames, video_width, video_height) = row.ok_or(StatusCode::NOT_FOUND)?;
+
+    if let Some(total) = total_frames {
+        if total > 0 {
+            crate::core::thumbnail::validator::validate_frame(frame, total).map_err(|e| {
+                tracing::warn!("[thumbnail] Frame validation failed: {}", e);
+                StatusCode::BAD_REQUEST
+            })?;
+        }
+    }
+
+    if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
+        if let (Some(vw), Some(vh)) = (video_width, video_height) {
+            crate::core::thumbnail::validator::validate_crop(x, y, w, h, vw, vh).map_err(|e| {
+                tracing::warn!("[thumbnail] Crop validation failed: {}", e);
+                StatusCode::BAD_REQUEST
+            })?;
+        }
+    }

    let select = format!("select=eq(n\\,{})", frame);
    let vf = if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
@@ -755,6 +828,11 @@ async fn face_thumbnail(
        return Err(StatusCode::INTERNAL_SERVER_ERROR);
    }

+    crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| {
+        tracing::warn!("[thumbnail] JPEG validation failed: {}", e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
    Ok(Response::builder()
        .status(StatusCode::OK)
        .header(header::CONTENT_TYPE, "image/jpeg")
@@ -849,3 +927,127 @@ async fn video_clip(
        .body(Body::from(output.stdout))
        .unwrap())
 }
+
+async fn stranger_video(
+    State(state): State<crate::api::types::AppState>,
+    Path((file_uuid, stranger_id)): Path<(String, i32)>,
+    Query(params): Query<std::collections::HashMap<String, String>>,
+) -> Result<impl IntoResponse, StatusCode> {
+    stranger_video_inner(&state, &file_uuid, stranger_id, &params).await
+}
+
+async fn stranger_video_inner(
+    state: &crate::api::types::AppState,
+    file_uuid: &str,
+    stranger_id: i32,
+    params: &std::collections::HashMap<String, String>,
+) -> Result<impl IntoResponse, StatusCode> {
+    use axum::http::header;
+    use uuid::Uuid;
+
+    tracing::info!("[stranger_video] Starting for file={}, stranger={}", file_uuid, stranger_id);
+
+    let (mode, audio) = parse_video_params(&params);
+
+    let videos_table = schema::table_name("videos");
+    tracing::debug!("[stranger_video] videos_table: {}", videos_table);
+    
+    let row: Option<(String, f64, i32, i32)> = sqlx::query_as(&format!(
+        "SELECT file_path, COALESCE(fps, 24.0), COALESCE(width, 0), COALESCE(height, 0) FROM {} WHERE file_uuid = $1",
+        videos_table
+    ))
+    .bind(&file_uuid)
+    .fetch_optional(state.db.pool())
+    .await
+    .map_err(|e| {
+        tracing::error!("[stranger_video] Video query error: {}", e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+    
+    let (video_path, fps, _width, _height) = row.ok_or_else(|| {
+        tracing::error!("[stranger_video] Video not found for uuid={}", file_uuid);
+        StatusCode::NOT_FOUND
+    })?;
+    
+    tracing::info!("[stranger_video] Found video: path={}, fps={}", video_path, fps);
+
+    // Query face detections by stranger_id directly
+    let face_table = schema::table_name("face_detections");
+    tracing::debug!("[stranger_video] face_table: {}", face_table);
+    
+    // frame_number is BIGINT (i64) in database
+    let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
+        "SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND stranger_id = $2 ORDER BY frame_number",
+        face_table
+    ))
+    .bind(&file_uuid).bind(stranger_id)
+    .fetch_all(state.db.pool()).await
+    .unwrap_or_else(|e| { 
+        tracing::error!("[stranger_video] Face query error: {}", e); 
+        vec![] 
+    });
+
+    tracing::info!("[stranger_video] Found {} faces", rows.len());
+
+    if rows.is_empty() {
+        tracing::error!("[stranger_video] No faces found for stranger_id={}", stranger_id);
+        return Err(StatusCode::NOT_FOUND);
+    }
+
+    let first_frame = rows[0].0;
+    let last_frame = rows[rows.len() - 1].0;
+    let start_sec = first_frame as f64 / fps;
+    let padding = params
+        .get("padding")
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(2.0);
+    let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
+    let seek = (start_sec - padding).max(0.0);
+
+    tracing::info!("[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s", 
+        first_frame, last_frame, seek, seek + duration);
+
+    // Only support normal mode for stranger video
+    let tmp = std::env::temp_dir().join(format!("stranger_{}.mp4", Uuid::new_v4()));
+    let tmp_str = tmp.to_str().unwrap_or("").to_string();
+    let sk = seek.to_string();
+    let du = duration.to_string();
+    let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
+    if audio == "off" {
+        cmd_args.push("-an");
+    }
+    cmd_args.extend_from_slice(&["-y", &tmp_str]);
+    
+    tracing::debug!("[stranger_video] ffmpeg args: {:?}", cmd_args);
+    
+    let result = ffmpeg_cmd()
+        .args(&cmd_args)
+        .output()
+        .map_err(|e| {
+            tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+    
+    if !result.status.success() {
+        tracing::error!("[stranger_video] ffmpeg failed: {}", String::from_utf8_lossy(&result.stderr));
+        return Err(StatusCode::INTERNAL_SERVER_ERROR);
+    }
+    
+    tracing::info!("[stranger_video] ffmpeg success, output size: {} bytes", result.stdout.len());
+    
+    let data = tokio::fs::read(&tmp)
+        .await
+        .map_err(|e| {
+            tracing::error!("[stranger_video] Read output error: {}", e);
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+    let _ = std::fs::remove_file(&tmp);
+    
+    tracing::info!("[stranger_video] Returning video, size: {} bytes", data.len());
+    
+    Ok(Response::builder()
+        .header(header::CONTENT_TYPE, "video/mp4")
+        .header(header::CONTENT_LENGTH, data.len())
+        .body(Body::from(data))
+        .unwrap())
+}
--- a/src/api/mod.rs
+++ b/src/api/mod.rs
@@ -4,7 +4,6 @@ pub mod auth;
 pub mod docs;
 pub mod files;
 pub mod five_w1h_agent_api;
-pub mod processing;
 pub mod health;
 pub mod identities;
 pub mod identity_agent_api;
@@ -12,6 +11,7 @@ pub mod identity_api;
 pub mod identity_binding;
 pub mod media_api;
 pub mod middleware;
+pub mod processing;
 pub mod scan;
 pub mod search;
 pub mod server;
@@ -19,7 +19,5 @@ pub mod tmdb_api;
 pub mod trace_agent_api;
 pub mod types;
 pub mod universal_search;
-pub mod visual_chunk_search;
-pub mod visual_search;

 pub use server::start_server;
--- a/src/api/processing.rs
+++ b/src/api/processing.rs
@@ -233,50 +233,54 @@ async fn trigger_processing(
        .await
        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

-    let processors_to_run: Vec<&str> = if let Some(procs) = &req.processors {
-        // 檢查 job 是否存在，不存在則 INSERT（state machine entry）
-        let existing_id: Option<i32> = sqlx::query_scalar(&format!(
-            "SELECT id FROM {monitor_jobs_table} WHERE uuid = $1"
-        ))
-        .bind(&file_uuid)
-        .fetch_optional(state.db.pool())
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-        if existing_id.is_none() {
-            state
-                .db
-                .create_monitor_job(&file_uuid, Some(&file_path))
-                .await
-                .map_err(|e| {
-                    tracing::error!(
-                        "[TRIGGER] Failed to create monitor job for {}: {}",
-                        file_uuid,
-                        e
-                    );
-                    StatusCode::INTERNAL_SERVER_ERROR
-                })?;
-        }
-
-        // UPDATE processors + reset 狀態讓 worker 可 pickup
-        let procs_db: Vec<String> = procs.iter().map(|s| s.to_string()).collect();
-        sqlx::query(&format!(
-            "UPDATE {monitor_jobs_table} SET processors = $1::text[], status = 'pending' WHERE uuid = $2"
-        ))
-        .bind(&procs_db)
-        .bind(&file_uuid)
-        .execute(state.db.pool())
-        .await
-        .map_err(|e| {
-            tracing::error!("[TRIGGER] Failed to update monitor job for {}: {}", file_uuid, e);
-            StatusCode::INTERNAL_SERVER_ERROR
-        })?;
-
-        procs.iter().map(|s| s.as_str()).collect()
+    let processors_to_run: Vec<String> = if let Some(procs) = &req.processors {
+        procs.iter().map(|s| s.to_string()).collect()
    } else {
-        vec![]
+        crate::core::db::ProcessorType::all()
+            .iter()
+            .map(|p| p.as_str().to_string())
+            .collect()
    };

+    // 確保 monitor_job 存在
+    let existing_id: Option<i32> = sqlx::query_scalar(&format!(
+        "SELECT id FROM {monitor_jobs_table} WHERE uuid = $1"
+    ))
+    .bind(&file_uuid)
+    .fetch_optional(state.db.pool())
+    .await
+    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
+    if existing_id.is_none() {
+        state
+            .db
+            .create_monitor_job(&file_uuid, Some(&file_path))
+            .await
+            .map_err(|e| {
+                tracing::error!(
+                    "[TRIGGER] Failed to create monitor job for {}: {}",
+                    file_uuid,
+                    e
+                );
+                StatusCode::INTERNAL_SERVER_ERROR
+            })?;
+    }
+
+    // UPDATE processors + reset 狀態讓 worker 可 pickup
+    sqlx::query(&format!(
+        "UPDATE {monitor_jobs_table} SET processors = $1::text[], status = 'pending' WHERE uuid = $2"
+    ))
+    .bind(&processors_to_run)
+    .bind(&file_uuid)
+    .execute(state.db.pool())
+    .await
+    .map_err(|e| {
+        tracing::error!("[TRIGGER] Failed to update monitor job for {}: {}", file_uuid, e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    let processors_to_run_refs: Vec<&str> = processors_to_run.iter().map(|s| s.as_str()).collect();
+
    let notification = serde_json::json!({
        "action": "process",
        "file_uuid": file_uuid,
@@ -285,7 +289,7 @@ async fn trigger_processing(
        "file_type": file_type,
        "content_hash": content_hash,
        "output_dir": output_dir,
-        "processors": processors_to_run,
+        "processors": processors_to_run_refs,
    });

    let notification_key = format!("{}notifications", REDIS_KEY_PREFIX.as_str());
--- a/src/api/scan.rs
+++ b/src/api/scan.rs
@@ -414,8 +414,6 @@ async fn get_ingestion_status(
        "SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'",
        schema::table_name("tkg_edges")
    ));
-    let scene_5w1h = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != ''"));
-
    let related_identities: Vec<IdentityRef> =
        match sqlx::query_as::<_, (String, String)>(&format!(
            "SELECT DISTINCT i.uuid::text, i.name FROM {identities} i \
@@ -491,11 +489,6 @@ async fn get_ingestion_status(
            Some(format!("{identity_count} identities matched"))
        ),
        step!("scene_metadata", scene_meta_ok, None),
-        step!(
-            "5w1h",
-            scene_5w1h > 0,
-            Some(format!("{scene_5w1h} scenes with 5W1H"))
-        ),
    ];

    Ok(Json(IngestionStatusResponse {
--- a/src/api/server.rs
+++ b/src/api/server.rs
@@ -5,7 +5,7 @@ use tokio::time::timeout;
 use tower_http::cors::{Any, CorsLayer};

 use crate::core::cache::{MongoCache, RedisCache};
-use crate::core::db::{Database, PostgresDb};
+use crate::core::db::{Database, PostgresDb, QdrantDb};
 use crate::Embedder;

 use super::agent_api;
@@ -14,7 +14,6 @@ use super::auth;
 use super::docs;
 use super::files;
 use super::five_w1h_agent_api;
-use super::processing;
 use super::health;
 use super::identities;
 use super::identity_agent_api;
@@ -22,18 +21,18 @@ use super::identity_api;
 use super::identity_binding;
 use super::media_api;
 use super::middleware::unified_auth;
+use super::processing;
 use super::scan;
 use super::search::search_routes;
 use super::tmdb_api;
 use super::trace_agent_api;
 use super::types::AppState;
 use super::universal_search::universal_search_routes;
-use super::visual_search;

 pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
    health::init_server_state(host, port);

-    let embedder = std::sync::Arc::new(Embedder::new("nomic-embed-text-v2-moe:latest".to_string()));
+    let embedder = std::sync::Arc::new(Embedder::new("embeddinggemma-300m".to_string()));

    // ── ⚠️ WARNING: DO NOT move MongoCache::init() back to critical path ──
    //
@@ -57,6 +56,9 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
    let redis_cache = RedisCache::new()?;
    let db = PostgresDb::init().await?;

+    // Run migrations (create identity_history table if not exists)
+    PostgresDb::run_migrations(db.pool()).await?;
+
    let schema_health = health::check_schema_migrations(db.pool()).await;
    if schema_health.ok {
        tracing::info!(
@@ -89,8 +91,10 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
    let db = std::sync::Arc::new(db);
    let api_state = super::middleware::ApiState { db: db.clone() };

+    let qdrant = std::sync::Arc::new(QdrantDb::new());
    let state = AppState {
        db,
+        qdrant,
        embedder,
        embedder_model: "nomic-embed-text-v2-moe:latest".to_string(),
        mongo_cache,
@@ -129,7 +133,6 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
        .merge(auth::auth_routes())
        .merge(health::health_routes())
        .merge(docs::doc_routes())
-        .merge(visual_search::visual_search_routes())
        .merge(protected_routes)
        .layer(cors)
        .with_state(state);
--- a/src/api/trace_agent_api.rs
+++ b/src/api/trace_agent_api.rs
@@ -25,14 +25,19 @@ pub fn trace_agent_routes() -> Router<crate::api::types::AppState> {
            "/api/v1/file/:file_uuid/trace/:trace_id/thumbnail",
            get(get_trace_thumbnail),
        )
+        .route(
+            "/api/v1/file/:file_uuid/stranger/:stranger_id/representative-face",
+            get(get_stranger_representative_face),
+        )
+        .route(
+            "/api/v1/file/:file_uuid/stranger/:stranger_id/thumbnail",
+            get(get_stranger_thumbnail),
+        )
        .route(
            "/api/v1/file/:file_uuid/identities/:identity_uuid_a/co-occur-with/:identity_uuid_b",
            get(get_cooccurrence),
        )
-        .route(
-            "/api/v1/file/:file_uuid/tkg/rebuild",
-            post(rebuild_tkg),
-        )
+        .route("/api/v1/file/:file_uuid/tkg/rebuild", post(rebuild_tkg))
        .route(
            "/api/v1/file/:file_uuid/representative-frame",
            get(get_representative_frame),
@@ -54,8 +59,8 @@ struct TracesRequest {
 struct TraceInfo {
    trace_id: i32,
    face_count: i64,
-    start_frame: i32,
-    end_frame: i32,
+    start_frame: i64,
+    end_frame: i64,
    start_time: f64,
    end_time: f64,
    duration_sec: f64,
@@ -110,8 +115,8 @@ async fn list_traces_sorted(
        "SELECT tt.*, fd.id AS sample_face_id FROM (
            SELECT trace_id::int AS trace_id,
                   COUNT(*) AS face_count,
-                   MIN(frame_number)::int AS start_frame,
-                   MAX(frame_number)::int AS end_frame,
+                   MIN(frame_number)::bigint AS start_frame,
+                   MAX(frame_number)::bigint AS end_frame,
                   (MAX(frame_number) - MIN(frame_number))::float8 AS duration_sec,
                   AVG(confidence)::float8 AS avg_confidence
            FROM {}
@@ -132,7 +137,7 @@ async fn list_traces_sorted(
        crate::core::db::schema::table_name("face_detections"),
    );

-    let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = sqlx::query_as(&query)
+    let rows: Vec<(i32, i64, i64, i64, f64, f64, Option<i32>)> = sqlx::query_as(&query)
        .bind(&file_uuid)
        .bind(min_faces)
        .bind(effective_limit)
@@ -193,8 +198,8 @@ struct TraceFacesQuery {
 #[derive(Debug, Serialize)]
 struct TraceFaceItem {
    id: i32,
-    start_frame: i32,
-    end_frame: i32,
+    start_frame: i64,
+    end_frame: i64,
    start_time: f64,
    end_time: f64,
    x: Option<i32>,
@@ -260,14 +265,14 @@ async fn list_trace_faces(

    let rows: Vec<(
        i32,
-        i32,
+        i64,
        Option<i32>,
        Option<i32>,
        Option<i32>,
        Option<i32>,
        f32,
    )> = sqlx::query_as(&format!(
-        "SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
+        "SELECT id, frame_number, x, y, width, height, confidence::float4 \
                   FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
                   ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
        crate::core::db::schema::table_name("face_detections")
@@ -405,7 +410,8 @@ where
    let video_table = schema::table_name("videos");

    let fps: f64 = sqlx::query_scalar(&format!(
-        "SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1", video_table
+        "SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
+        video_table
    ))
    .bind(file_uuid)
    .fetch_optional(pool)
@@ -414,7 +420,8 @@ where
    .unwrap_or(25.0);

    let face_count: (i64,) = sqlx::query_as(&format!(
-        "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2", fd_table
+        "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
+        fd_table
    ))
    .bind(file_uuid)
    .bind(trace_id)
@@ -422,7 +429,15 @@ where
    .await
    .map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?;

-    struct Candidate { frame: i64, x: i32, y: i32, w: i32, h: i32, conf: f64, score: f64 }
+    struct Candidate {
+        frame: i64,
+        x: i32,
+        y: i32,
+        w: i32,
+        h: i32,
+        conf: f64,
+        score: f64,
+    }

    let rows = sqlx::query_as::<_, (i64, i32, i32, i32, i32, f64)>(&format!(
        "SELECT frame_number::bigint, x, y, width, height, confidence::float8 \
@@ -431,7 +446,8 @@ where
         ORDER BY (width::float8 * height::float8) * confidence::float8 DESC LIMIT 10",
        fd_table
    ))
-    .bind(file_uuid).bind(trace_id)
+    .bind(file_uuid)
+    .bind(trace_id)
    .fetch_all(pool)
    .await
    .map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?;
@@ -440,15 +456,25 @@ where
        return Err(err_fn(anyhow::anyhow!("No suitable face found")));
    }

-    let candidates: Vec<Candidate> = rows.into_iter()
+    let candidates: Vec<Candidate> = rows
+        .into_iter()
        .map(|(frame, x, y, w, h, conf)| {
            let score = (w as f64 * h as f64) * conf;
-            Candidate { frame, x, y, w, h, conf, score }
+            Candidate {
+                frame,
+                x,
+                y,
+                w,
+                h,
+                conf,
+                score,
+            }
        })
        .collect();

    let video_path: String = sqlx::query_scalar(&format!(
-        "SELECT file_path FROM {} WHERE file_uuid = $1", video_table
+        "SELECT file_path FROM {} WHERE file_uuid = $1",
+        video_table
    ))
    .bind(file_uuid)
    .fetch_optional(pool)
@@ -463,16 +489,31 @@ where
    for (i, c) in candidates.iter().enumerate() {
        let seek = c.frame as f64 / fps;
        if let Ok(output) = tokio::process::Command::new("ffmpeg")
-            .args(["-ss", &format!("{:.2}", seek), "-i", &video_path,
-                   "-vframes", "1", "-vf", &format!("crop={}:{}:{}:{},blurdetect", c.w, c.h, c.x, c.y),
-                   "-f", "null", "-"])
-            .output().await
+            .args([
+                "-ss",
+                &format!("{:.2}", seek),
+                "-i",
+                &video_path,
+                "-vframes",
+                "1",
+                "-vf",
+                &format!("crop={}:{}:{}:{},blurdetect", c.w, c.h, c.x, c.y),
+                "-f",
+                "null",
+                "-",
+            ])
+            .output()
+            .await
        {
            let stderr = String::from_utf8_lossy(&output.stderr);
            for line in stderr.lines() {
                if let Some(blur_str) = line.split("blur mean: ").nth(1) {
                    if let Ok(blur) = blur_str.trim().parse::<f64>() {
-                        if blur < best_blur { best_blur = blur; best = c.frame; best_idx = i; }
+                        if blur < best_blur {
+                            best_blur = blur;
+                            best = c.frame;
+                            best_idx = i;
+                        }
                    }
                }
            }
@@ -481,9 +522,17 @@ where

    let chosen = &candidates[best_idx];
    Ok(RepFaceSelection {
-        frame: chosen.frame, x: chosen.x, y: chosen.y, w: chosen.w, h: chosen.h,
-        conf: chosen.conf, blur: best_blur, score: chosen.score,
-        video_path, fps, face_count: face_count.0,
+        frame: chosen.frame,
+        x: chosen.x,
+        y: chosen.y,
+        w: chosen.w,
+        h: chosen.h,
+        conf: chosen.conf,
+        blur: best_blur,
+        score: chosen.score,
+        video_path,
+        fps,
+        face_count: face_count.0,
    })
 }

@@ -491,19 +540,36 @@ async fn get_representative_face(
    State(state): State<crate::api::types::AppState>,
    Path((file_uuid, trace_id)): Path<(String, i32)>,
 ) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
-    let sel = select_rep_face(state.db.pool(), &file_uuid, trace_id, |e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
-    }).await?;
+    get_representative_face_inner(&state, &file_uuid, trace_id).await
+}
+
+async fn get_representative_face_inner(
+    state: &crate::api::types::AppState,
+    file_uuid: &str,
+    trace_id: i32,
+) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
+    let sel = select_rep_face(state.db.pool(), file_uuid, trace_id, |e| {
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
+    })
+    .await?;

    Ok(Json(RepFaceResponse {
        success: true,
-        file_uuid,
+        file_uuid: file_uuid.to_string(),
        trace_id,
        face_count: sel.face_count,
        representative: RepFaceResult {
            frame_number: sel.frame,
            timestamp_secs: sel.frame as f64 / sel.fps,
-            bbox: RepFaceBbox { x: sel.x, y: sel.y, width: sel.w, height: sel.h },
+            bbox: RepFaceBbox {
+                x: sel.x,
+                y: sel.y,
+                width: sel.w,
+                height: sel.h,
+            },
            confidence: sel.conf,
            quality_score: sel.score,
            blur_score: sel.blur,
@@ -515,34 +581,118 @@ async fn get_trace_thumbnail(
    State(state): State<crate::api::types::AppState>,
    Path((file_uuid, trace_id)): Path<(String, i32)>,
 ) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
+    get_trace_thumbnail_inner(&state, &file_uuid, trace_id).await
+}
+
+async fn get_trace_thumbnail_inner(
+    state: &crate::api::types::AppState,
+    file_uuid: &str,
+    trace_id: i32,
+) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
+    // Step 1: Check for pre-stored face crops in .faces/{file_uuid}/{trace_id}/
+    // For trace_id=0 (untracked/stranger), check unbound directory instead
+    let output_dir = crate::core::config::OUTPUT_DIR.as_str();
+    let trace_id_str = trace_id.to_string();
+    let trace_dir_name = if trace_id == 0 { "unbound" } else { &trace_id_str };
+    let trace_dir = std::path::PathBuf::from(output_dir)
+        .join(".faces")
+        .join(&file_uuid)
+        .join(trace_dir_name);
+
+    if trace_dir.exists() {
+        // Find any cached face crop in this trace directory
+        if let Ok(mut entries) = std::fs::read_dir(&trace_dir) {
+            while let Some(Ok(entry)) = entries.next() {
+                let path = entry.path();
+                if path.extension().map_or(false, |e| e == "jpg") {
+                    tracing::info!("[trace_thumbnail] Using cached face crop: {}", path.display());
+                    let bytes = tokio::fs::read(&path)
+                        .await
+                        .map_err(|e| {
+                            (
+                                StatusCode::INTERNAL_SERVER_ERROR,
+                                Json(serde_json::json!({"error": e.to_string()})),
+                            )
+                        })?;
+
+                    // Validate cached JPEG
+                    crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
+                        tracing::warn!("[trace_thumbnail] Cached JPEG validation failed: {}", e);
+                        (
+                            StatusCode::INTERNAL_SERVER_ERROR,
+                            Json(serde_json::json!({"error": "Invalid cached JPEG"})),
+                        )
+                    })?;
+
+                    return Ok(Response::builder()
+                        .status(StatusCode::OK)
+                        .header(header::CONTENT_TYPE, "image/jpeg")
+                        .header(header::CACHE_CONTROL, "public, max-age=86400")
+                        .body(Body::from(bytes))
+                        .unwrap());
+                }
+            }
+        }
+    }
+
+    // Step 2: Fallback to ffmpeg on-demand extraction
    let sel = select_rep_face(state.db.pool(), &file_uuid, trace_id, |e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
-    }).await?;
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
+    })
+    .await?;

    let seek = sel.frame as f64 / sel.fps;
    let tmp = std::env::temp_dir().join(format!("trace_{}_{}.jpg", file_uuid, trace_id));

+    tracing::debug!("[trace_thumbnail] Fallback to ffmpeg for trace {} frame {}", trace_id, sel.frame);
+
    let status = tokio::process::Command::new("ffmpeg")
        .args([
-            "-ss", &format!("{:.2}", seek),
-            "-i", &sel.video_path,
-            "-vframes", "1",
-            "-vf", &format!("crop={}:{}:{}:{},scale=320:320", sel.w, sel.h, sel.x, sel.y),
-            "-q:v", "2",
-            "-y", &tmp.to_string_lossy().to_string(),
+            "-ss",
+            &format!("{:.2}", seek),
+            "-i",
+            &sel.video_path,
+            "-vframes",
+            "1",
+            "-vf",
+            &format!("crop={}:{}:{}:{},scale=320:320", sel.w, sel.h, sel.x, sel.y),
+            "-q:v",
+            "2",
+            "-y",
+            &tmp.to_string_lossy().to_string(),
        ])
        .output()
        .await
        .map_err(|e| {
-            (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"error": e.to_string()})),
+            )
        })?;

    if !status.status.success() {
-        return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": "FFmpeg failed"}))));
+        return Err((
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": "FFmpeg failed"})),
+        ));
    }

    let bytes = tokio::fs::read(&tmp).await.map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
+    })?;
+
+    crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
+        tracing::warn!("[trace_thumbnail] JPEG validation failed: {}", e);
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": "Invalid JPEG output"})),
+        )
    })?;

    let _ = tokio::fs::remove_file(&tmp).await;
@@ -605,10 +755,16 @@ async fn get_cooccurrence(
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
    })?
    .ok_or_else(|| {
-        (StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "Identity A not found"})))
+        (
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({"error": "Identity A not found"})),
+        )
    })?;

    let id_b = sqlx::query_as::<_, (i32, String)>(&format!(
@@ -619,31 +775,38 @@ async fn get_cooccurrence(
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
    })?
    .ok_or_else(|| {
-        (StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "Identity B not found"})))
+        (
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({"error": "Identity B not found"})),
+        )
    })?;

    // Stage 2: Find first frame where both identity_ids appear
-    let cooccur: Option<(i64,)> = sqlx::query_as(
-        &format!(
-            "SELECT MIN(fd.frame_number)::bigint FROM {} fd \
+    let cooccur: Option<(i64,)> = sqlx::query_as(&format!(
+        "SELECT MIN(fd.frame_number)::bigint FROM {} fd \
             WHERE fd.file_uuid = $1 AND fd.identity_id = $2 \
             AND fd.frame_number IN ( \
               SELECT frame_number FROM {} \
               WHERE file_uuid = $1 AND identity_id = $3 \
             )",
-            fd_table, fd_table
-        )
-    )
+        fd_table, fd_table
+    ))
    .bind(&file_uuid)
    .bind(id_a.0)
    .bind(id_b.0)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
    })?;

    let (first_frame,) = cooccur.ok_or_else(|| {
@@ -653,13 +816,17 @@ async fn get_cooccurrence(
    // Get fps for timestamp
    let video_table = schema::table_name("videos");
    let fps: f64 = sqlx::query_scalar(&format!(
-        "SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1", video_table
+        "SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
+        video_table
    ))
    .bind(&file_uuid)
    .fetch_optional(state.db.pool())
    .await
    .map_err(|e| {
-        (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
    })?
    .unwrap_or(25.0);

@@ -685,40 +852,67 @@ async fn get_cooccurrence(
    // Stage 4: Get representative faces for both traces (reusing select_rep_face)
    let rep_a = if let Some((tid,)) = trace_a {
        select_rep_face(state.db.pool(), &file_uuid, tid, |e| {
-            (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
-        }).await.ok().map(|sel| CoOccurRepFace {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"error": e.to_string()})),
+            )
+        })
+        .await
+        .ok()
+        .map(|sel| CoOccurRepFace {
            frame_number: sel.frame,
-            bbox: RepFaceBbox { x: sel.x, y: sel.y, width: sel.w, height: sel.h },
+            bbox: RepFaceBbox {
+                x: sel.x,
+                y: sel.y,
+                width: sel.w,
+                height: sel.h,
+            },
            confidence: sel.conf,
            thumbnail_url: format!("/api/v1/file/{}/trace/{}/thumbnail", file_uuid, tid),
        })
-    } else { None };
+    } else {
+        None
+    };

    let rep_b = if let Some((tid,)) = trace_b {
        select_rep_face(state.db.pool(), &file_uuid, tid, |e| {
-            (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
-        }).await.ok().map(|sel| CoOccurRepFace {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({"error": e.to_string()})),
+            )
+        })
+        .await
+        .ok()
+        .map(|sel| CoOccurRepFace {
            frame_number: sel.frame,
-            bbox: RepFaceBbox { x: sel.x, y: sel.y, width: sel.w, height: sel.h },
+            bbox: RepFaceBbox {
+                x: sel.x,
+                y: sel.y,
+                width: sel.w,
+                height: sel.h,
+            },
            confidence: sel.conf,
            thumbnail_url: format!("/api/v1/file/{}/trace/{}/thumbnail", file_uuid, tid),
        })
-    } else { None };
+    } else {
+        None
+    };

    // Total co-occurrence frames (from TKG if available, otherwise from face_detections)
-    let total_cooccurrence_frames: i64 = sqlx::query_scalar(
-        &format!(
-            "SELECT COUNT(DISTINCT fd.frame_number)::bigint FROM {} fd \
+    let total_cooccurrence_frames: i64 = sqlx::query_scalar(&format!(
+        "SELECT COUNT(DISTINCT fd.frame_number)::bigint FROM {} fd \
             WHERE fd.file_uuid = $1 AND fd.identity_id = $2 \
             AND fd.frame_number IN ( \
               SELECT frame_number FROM {} \
               WHERE file_uuid = $1 AND identity_id = $3 \
             )",
-            fd_table, fd_table
-        )
-    )
-    .bind(&file_uuid).bind(id_a.0).bind(id_b.0)
-    .fetch_one(state.db.pool()).await
+        fd_table, fd_table
+    ))
+    .bind(&file_uuid)
+    .bind(id_a.0)
+    .bind(id_b.0)
+    .fetch_one(state.db.pool())
+    .await
    .unwrap_or(0);

    Ok(Json(CoOccurResponse {
@@ -758,12 +952,7 @@ async fn rebuild_tkg(
    State(state): State<crate::api::types::AppState>,
    Path(file_uuid): Path<String>,
 ) -> Json<TkgRebuildResponse> {
-    let result = crate::core::processor::tkg::build_tkg(
-        &state.db,
-        &file_uuid,
-        &OUTPUT_DIR,
-    )
-    .await;
+    let result = crate::core::processor::tkg::build_tkg(&state.db, &file_uuid, &OUTPUT_DIR).await;

    match result {
        Ok(r) => Json(TkgRebuildResponse {
@@ -807,14 +996,14 @@ async fn get_representative_frame(
    State(state): State<crate::api::types::AppState>,
    Path(file_uuid): Path<String>,
 ) -> Result<Json<RepFrameResponse>, (StatusCode, Json<serde_json::Value>)> {
-    let result = tkg::query_auto_representative_frame(
-        state.db.pool(),
-        &file_uuid,
-    )
-    .await
-    .map_err(|e| {
-        (StatusCode::NOT_FOUND, Json(serde_json::json!({"error": e.to_string()})))
-    })?;
+    let result = tkg::query_auto_representative_frame(state.db.pool(), &file_uuid)
+        .await
+        .map_err(|e| {
+            (
+                StatusCode::NOT_FOUND,
+                Json(serde_json::json!({"error": e.to_string()})),
+            )
+        })?;

    let fps = query_fps(state.db.pool(), &file_uuid).await;

@@ -843,3 +1032,59 @@ async fn query_fps(pool: &sqlx::PgPool, file_uuid: &str) -> f64 {
    .flatten()
    .unwrap_or(25.0)
 }
+
+async fn get_stranger_representative_face(
+    State(state): State<crate::api::types::AppState>,
+    Path((file_uuid, stranger_id)): Path<(String, i32)>,
+) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
+    let faces_table = crate::core::db::schema::table_name("face_detections");
+
+    let trace_id: i32 = sqlx::query_scalar(&format!(
+        "SELECT trace_id FROM {} WHERE file_uuid = $1 AND stranger_id = $2 LIMIT 1",
+        faces_table
+    ))
+    .bind(&file_uuid)
+    .bind(stranger_id)
+    .fetch_optional(state.db.pool())
+    .await
+    .map_err(|e| {
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
+    })?
+    .ok_or((
+        StatusCode::NOT_FOUND,
+        Json(serde_json::json!({"error": "Stranger not found"})),
+    ))?;
+
+    get_representative_face_inner(&state, &file_uuid, trace_id).await
+}
+
+async fn get_stranger_thumbnail(
+    State(state): State<crate::api::types::AppState>,
+    Path((file_uuid, stranger_id)): Path<(String, i32)>,
+) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
+    let faces_table = crate::core::db::schema::table_name("face_detections");
+
+    let trace_id: i32 = sqlx::query_scalar(&format!(
+        "SELECT trace_id FROM {} WHERE file_uuid = $1 AND stranger_id = $2 LIMIT 1",
+        faces_table
+    ))
+    .bind(&file_uuid)
+    .bind(stranger_id)
+    .fetch_optional(state.db.pool())
+    .await
+    .map_err(|e| {
+        (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(serde_json::json!({"error": e.to_string()})),
+        )
+    })?
+    .ok_or((
+        StatusCode::NOT_FOUND,
+        Json(serde_json::json!({"error": "Stranger not found"})),
+    ))?;
+
+    get_trace_thumbnail_inner(&state, &file_uuid, trace_id).await
+}
--- a/src/api/types.rs
+++ b/src/api/types.rs
@@ -1,6 +1,7 @@
 #[derive(Clone)]
 pub struct AppState {
    pub db: std::sync::Arc<crate::core::db::PostgresDb>,
+    pub qdrant: std::sync::Arc<crate::core::db::QdrantDb>,
    pub embedder: std::sync::Arc<crate::Embedder>,
    pub embedder_model: String,
    pub mongo_cache: crate::core::cache::MongoCache,
--- a/src/api/universal_search.rs
+++ b/src/api/universal_search.rs
@@ -60,13 +60,12 @@ pub struct UniversalSearchResponse {
 pub enum SearchResult {
    #[serde(rename = "chunk")]
    Chunk {
+        file_uuid: String,
        chunk_id: String,
        chunk_type: String,
-        // Primary: frame-accurate position
        start_frame: i64,
        end_frame: i64,
        fps: f64,
-        // Reference: time derived from frames (subject to FPS variation)
        start_time: f64,
        end_time: f64,
        score: f64,
@@ -76,9 +75,8 @@ pub enum SearchResult {
    },
    #[serde(rename = "frame")]
    Frame {
-        // Primary: exact frame number
+        file_uuid: String,
        frame_number: i64,
-        // Reference: time derived from frame (subject to FPS variation)
        timestamp: f64,
        score: f64,
        objects: Option<Vec<serde_json::Value>>,
@@ -88,6 +86,7 @@ pub enum SearchResult {
    },
    #[serde(rename = "person")]
    Person {
+        file_uuid: Option<String>,
        identity_id: i32,
        identity_uuid: String,
        name: Option<String>,
@@ -328,17 +327,15 @@ async fn search_chunks(
    db: &PostgresDb,
    req: &UniversalSearchRequest,
 ) -> Result<Vec<SearchResult>, anyhow::Error> {
-    // uuid is required for chunk search - chunk_id is only unique within a video
-    let uuid = match &req.file_uuid {
-        Some(u) => u.replace('\'', "''"),
-        None => return Err(anyhow::anyhow!("file_uuid is required for chunk search")),
-    };
-
    let chunk_table = schema::table_name("chunk");
    let mut sql = format!(
-        "SELECT chunk_id, chunk_type, start_time, end_time, (start_time * fps)::bigint as start_frame, (end_time * fps)::bigint as end_frame, fps, text_content, content FROM {} WHERE file_uuid = '{}'",
-        chunk_table, uuid
+        "SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, (start_time * fps)::bigint as start_frame, (end_time * fps)::bigint as end_frame, fps, text_content, content FROM {} WHERE 1=1",
+        chunk_table
    );
+
+    if let Some(uuid) = &req.file_uuid {
+        sql.push_str(&format!(" AND file_uuid = '{}'", uuid.replace('\'', "''")));
+    }
    if let Some(tr) = &req.time_range {
        sql.push_str(&format!(
            " AND start_time >= {} AND end_time <= {}",
@@ -422,6 +419,7 @@ async fn search_chunks(
    sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));

    let rows: Vec<(
+        String,
        String,
        String,
        f64,
@@ -437,6 +435,7 @@ async fn search_chunks(
        .into_iter()
        .map(
            |(
+                file_uuid,
                chunk_id,
                chunk_type,
                start_time,
@@ -457,7 +456,6 @@ async fn search_chunks(
                        .and_then(|v| v.as_str())
                        .map(String::from)
                });
-                // Simple scoring: if query matches, score 0.8
                let score = if !req.query.is_empty()
                    && text.as_ref().map_or(false, |t| {
                        t.to_lowercase().contains(&req.query.to_lowercase())
@@ -468,6 +466,7 @@ async fn search_chunks(
                };

                SearchResult::Chunk {
+                    file_uuid,
                    chunk_id,
                    chunk_type,
                    start_time,
@@ -549,7 +548,7 @@ async fn search_frames_internal(

    let results: Vec<SearchResult> = rows
        .into_iter()
-        .map(|(frame_number, timestamp, yolo, ocr, face, _uuid)| {
+        .map(|(frame_number, timestamp, yolo, ocr, face, file_uuid)| {
            let objects = yolo.as_ref().and_then(|v| {
                v.get("objects")
                    .map(|o| o.as_array().cloned().unwrap_or_default())
@@ -571,6 +570,7 @@ async fn search_frames_internal(
            });

            SearchResult::Frame {
+                file_uuid,
                frame_number,
                timestamp,
                score: 0.7,
@@ -589,37 +589,54 @@ async fn search_persons_internal(
    db: &PostgresDb,
    req: &UniversalSearchRequest,
 ) -> Result<Vec<SearchResult>, anyhow::Error> {
-    let uuid = match &req.file_uuid {
-        Some(u) => u.replace('\'', "''"),
-        None => return Err(anyhow::anyhow!("file_uuid is required for person search")),
-    };
-
    let id_table = schema::table_name("identities");
    let fd_table = schema::table_name("face_detections");
    let mut sql = format!(
        "SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
-         MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
-         FROM {} i JOIN {} fd ON fd.identity_id = i.id \
-         WHERE fd.file_uuid = '{}'",
-        id_table, fd_table, uuid
+         MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time, \
+         fd.file_uuid \
+         FROM {} i JOIN {} fd ON fd.identity_id = i.id WHERE 1=1",
+        id_table, fd_table
    );

+    if let Some(uuid) = &req.file_uuid {
+        sql.push_str(&format!(
+            " AND fd.file_uuid = '{}'",
+            uuid.replace('\'', "''")
+        ));
+    }
+
    if !req.query.is_empty() {
        let q = req.query.replace('\'', "''");
        sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
    }

-    sql.push_str(" GROUP BY i.id, i.uuid, i.name");
+    sql.push_str(" GROUP BY i.id, i.uuid, i.name, fd.file_uuid");
    sql.push_str(" ORDER BY appearance_count DESC");
    sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));

-    let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
-        sqlx::query_as(&sql).fetch_all(db.pool()).await?;
+    let rows: Vec<(
+        i32,
+        String,
+        Option<String>,
+        i64,
+        Option<f64>,
+        Option<f64>,
+        String,
+    )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;

    let results: Vec<SearchResult> = rows
        .into_iter()
        .map(
-            |(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
+            |(
+                identity_id,
+                identity_uuid,
+                name,
+                appearance_count,
+                first_time,
+                last_time,
+                file_uuid,
+            )| {
                let score = if !req.query.is_empty()
                    && name.as_ref().map_or(false, |n| {
                        n.to_lowercase().contains(&req.query.to_lowercase())
@@ -630,6 +647,7 @@ async fn search_persons_internal(
                };

                SearchResult::Person {
+                    file_uuid: Some(file_uuid),
                    identity_id,
                    identity_uuid,
                    name,
--- a/src/api/visual_chunk_search.rs
+++ b/src/api/visual_chunk_search.rs
@@ -1,513 +0,0 @@
-//! Visual chunk search functionality.
-//!
-//! This module provides search capabilities for visual chunks based on:
-//! - Object classes (e.g., "person", "car", "envelope")
-//! - Confidence thresholds
-//! - Object counts
-//! - Spatial density
-//! - Object relationships
-
-use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
-use crate::core::db::{schema, PostgresDb};
-use anyhow::Result;
-use serde_json::Value;
-use std::collections::HashMap;
-
-/// Criteria for searching visual chunks
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-pub struct VisualChunkSearchCriteria {
-    /// Minimum average confidence across frames
-    pub min_avg_confidence: Option<f32>,
-    /// Minimum number of frames with objects
-    pub min_frames_with_objects: Option<u32>,
-    /// Minimum number of unique object classes
-    pub min_unique_classes: Option<u32>,
-    /// Specific object classes to include (empty means all)
-    #[serde(default)]
-    pub required_classes: Vec<String>,
-    /// Object class counts to filter by
-    #[serde(default)]
-    pub class_counts: HashMap<String, (u32, u32)>,
-    /// Time range (optional)
-    pub time_range: Option<(f64, f64)>,
-}
-
-impl Default for VisualChunkSearchCriteria {
-    fn default() -> Self {
-        Self {
-            min_avg_confidence: None,
-            min_frames_with_objects: None,
-            min_unique_classes: None,
-            required_classes: Vec::new(),
-            class_counts: HashMap::new(),
-            time_range: None,
-        }
-    }
-}
-
-/// Search visual chunks based on criteria
-pub async fn search_visual_chunks(
-    db: &PostgresDb,
-    uuid: &str,
-    criteria: &VisualChunkSearchCriteria,
-) -> Result<Vec<Chunk>> {
-    // First, get all visual chunks for this video
-    let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
-
-    // Apply filters
-    let filtered_chunks: Vec<Chunk> = all_chunks
-        .into_iter()
-        .filter(|chunk| {
-            // Check min avg confidence
-            if let Some(min_avg_confidence) = criteria.min_avg_confidence {
-                if let Some(content) = &chunk.content.as_object() {
-                    if let Some(metadata) = content.get("metadata") {
-                        if let Some(avg_confidence) = metadata.get("avg_confidence") {
-                            if let Some(conf) = avg_confidence.as_f64() {
-                                if conf < min_avg_confidence as f64 {
-                                    return false;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Check min frames with objects
-            if let Some(min_frames) = criteria.min_frames_with_objects {
-                if let Some(stats) = &chunk.visual_stats {
-                    if let Some(frames_with_objects) = stats.get("frames_with_objects") {
-                        if let Some(count) = frames_with_objects.as_u64() {
-                            if count < min_frames as u64 {
-                                return false;
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Check min unique classes
-            if let Some(min_unique_classes) = criteria.min_unique_classes {
-                if let Some(content) = &chunk.content.as_object() {
-                    if let Some(metadata) = content.get("metadata") {
-                        if let Some(unique_classes) = metadata.get("unique_classes") {
-                            if let Some(classes) = unique_classes.as_array() {
-                                if (classes.len() as u32) < min_unique_classes {
-                                    return false;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Check required classes
-            if !criteria.required_classes.is_empty() {
-                if let Some(content) = &chunk.content.as_object() {
-                    if let Some(keyframe_objects) = content.get("keyframe_objects") {
-                        if let Some(objects) = keyframe_objects.as_array() {
-                            let mut found_all = true;
-                            for required_class in &criteria.required_classes {
-                                let mut found = false;
-                                for obj in objects {
-                                    if let Some(class_name) = obj.get("class_name") {
-                                        if let Some(class_str) = class_name.as_str() {
-                                            if class_str == required_class {
-                                                found = true;
-                                                break;
-                                            }
-                                        }
-                                    }
-                                }
-                                if !found {
-                                    found_all = false;
-                                    break;
-                                }
-                            }
-                            if !found_all {
-                                return false;
-                            }
-                        }
-                    }
-                }
-            }
-
-            // Check class counts
-            if !criteria.class_counts.is_empty() {
-                if let Some(content) = &chunk.content.as_object() {
-                    if let Some(metadata) = content.get("metadata") {
-                        if let Some(object_counts) = metadata.get("object_counts") {
-                            for (class, (min, max)) in &criteria.class_counts {
-                                if let Some(count_value) = object_counts.get(class) {
-                                    if let Some(count) = count_value.as_u64() {
-                                        if *min > 0 && count < *min as u64 {
-                                            return false;
-                                        }
-                                        if *max < u32::MAX && count > *max as u64 {
-                                            return false;
-                                        }
-                                    }
-                                } else if *min > 0 {
-                                    return false;
-                                }
-                            }
-                        } else if criteria.class_counts.values().any(|(min, _)| *min > 0) {
-                            return false;
-                        }
-                    }
-                }
-            }
-
-            // Check time range
-            if let Some((start_time, end_time)) = criteria.time_range {
-                // Calculate chunk time from frames
-                let chunk_start_time = chunk.start_frame as f64 / chunk.fps;
-                let chunk_end_time = chunk.end_frame as f64 / chunk.fps;
-
-                if chunk_start_time < start_time || chunk_end_time > end_time {
-                    return false;
-                }
-            }
-
-            true
-        })
-        .collect();
-
-    Ok(filtered_chunks)
-}
-
-/// Get all visual chunks for a video UUID
-async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Chunk>> {
-    let chunk_table = schema::table_name("chunk");
-    let sql = format!(
-        "SELECT file_id, file_uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, visual_stats FROM {} WHERE file_uuid = '{}' AND chunk_type = 'visual' ORDER BY start_frame ASC",
-        chunk_table, uuid.replace('\'', "''")
-    );
-
-    let rows: Vec<(
-        i32,            // file_id
-        String,         // uuid
-        String,         // chunk_id
-        String,         // chunk_type
-        f64,            // fps
-        i64,            // start_frame
-        i64,            // end_frame
-        Option<String>, // text_content
-        Value,          // content
-        Option<Value>,  // metadata
-        Option<String>, // vector_id
-        Option<Value>,  // visual_stats
-    )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
-
-    let mut chunks = Vec::new();
-    for row in rows {
-        let chunk_type = match row.3.as_str() {
-            "visual" => ChunkType::Visual,
-            "sentence" => ChunkType::Sentence,
-            "time_based" => ChunkType::TimeBased,
-            "cut" => ChunkType::Cut,
-            "trace" => ChunkType::Trace,
-            "story" => ChunkType::Story,
-            _ => ChunkType::TimeBased,
-        };
-
-        // Calculate frame_count
-        let frame_count = (row.6 - row.5) as i32;
-
-        chunks.push(Chunk {
-            file_id: row.0,
-            uuid: row.1,
-            chunk_id: row.2,
-            chunk_type,
-            rule: ChunkRule::Rule2, // Visual chunks use Rule2
-            fps: row.4,
-            start_frame: row.5,
-            end_frame: row.6,
-            text_content: row.7,
-            content: row.8,
-            metadata: row.9,
-            vector_id: row.10,
-            frame_count,
-            pre_chunk_ids: Vec::new(),
-            parent_chunk_id: None,
-            child_chunk_ids: Vec::new(),
-            visual_stats: row.11,
-        });
-    }
-
-    Ok(chunks)
-}
-
-/// Search visual chunks by object class
-pub async fn search_visual_chunks_by_class(
-    db: &PostgresDb,
-    uuid: &str,
-    object_class: &str,
-    min_count: Option<u32>,
-    max_count: Option<u32>,
-) -> Result<Vec<Chunk>> {
-    let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
-
-    let filtered_chunks: Vec<Chunk> = all_chunks
-        .into_iter()
-        .filter(|chunk| {
-            // Check if chunk contains the object class
-            let mut contains_class = false;
-            if let Some(content) = &chunk.content.as_object() {
-                if let Some(keyframe_objects) = content.get("keyframe_objects") {
-                    if let Some(objects) = keyframe_objects.as_array() {
-                        for obj in objects {
-                            if let Some(class_name) = obj.get("class_name") {
-                                if let Some(class_str) = class_name.as_str() {
-                                    if class_str == object_class {
-                                        contains_class = true;
-                                        break;
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            if !contains_class {
-                return false;
-            }
-
-            // Check count in visual_stats
-            if let Some(stats) = &chunk.visual_stats {
-                if let Some(count) = stats.get(object_class) {
-                    if let Some(c) = count.as_u64() {
-                        if let Some(min) = min_count {
-                            if c < min as u64 {
-                                return false;
-                            }
-                        }
-                        if let Some(max) = max_count {
-                            if c > max as u64 {
-                                return false;
-                            }
-                        }
-                    }
-                }
-            }
-
-            true
-        })
-        .collect();
-
-    Ok(filtered_chunks)
-}
-
-/// Search visual chunks by spatial density
-pub async fn search_visual_chunks_by_density(
-    db: &PostgresDb,
-    uuid: &str,
-    min_density: f32,
-    max_density: Option<f32>,
-) -> Result<Vec<Chunk>> {
-    let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
-
-    let filtered_chunks: Vec<Chunk> = all_chunks
-        .into_iter()
-        .filter(|chunk| {
-            if let Some(content) = &chunk.content.as_object() {
-                if let Some(metadata) = content.get("metadata") {
-                    if let Some(density_value) = metadata.get("spatial_density") {
-                        if let Some(density) = density_value.as_f64() {
-                            if density < min_density as f64 {
-                                return false;
-                            }
-                            if let Some(max_dens) = max_density {
-                                if density > max_dens as f64 {
-                                    return false;
-                                }
-                            }
-                            return true;
-                        }
-                    }
-                }
-            }
-            false
-        })
-        .collect();
-
-    Ok(filtered_chunks)
-}
-
-/// Find chunks containing specific object combinations
-pub async fn search_visual_chunks_by_combination(
-    db: &PostgresDb,
-    uuid: &str,
-    combination: &[(&str, u32)],
-) -> Result<Vec<Chunk>> {
-    let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
-
-    let filtered_chunks: Vec<Chunk> = all_chunks
-        .into_iter()
-        .filter(|chunk| {
-            // Check if all required combinations are present
-            for (object_class, min_count) in combination {
-                let mut found = false;
-                if let Some(stats) = &chunk.visual_stats {
-                    if let Some(object_counts) = stats.get("object_counts") {
-                        if let Some(count_value) = object_counts.get(*object_class) {
-                            if let Some(count) = count_value.as_u64() {
-                                if count >= *min_count as u64 {
-                                    found = true;
-                                }
-                            }
-                        }
-                    }
-                }
-                if !found {
-                    return false;
-                }
-            }
-            true
-        })
-        .collect();
-
-    Ok(filtered_chunks)
-}
-
-/// Get visual chunk statistics
-pub async fn get_visual_chunk_statistics(
-    db: &PostgresDb,
-    uuid: &str,
-) -> Result<HashMap<String, Value>> {
-    let chunk_table = schema::table_name("chunk");
-    let sql = format!(
-        "SELECT 
-            COUNT(*) as total_chunks,
-            AVG((content->'metadata'->>'avg_confidence')::float) as avg_confidence,
-            MIN((content->'metadata'->>'avg_confidence')::float) as min_confidence,
-            MAX((content->'metadata'->>'avg_confidence')::float) as max_confidence,
-            SUM((content->'metadata'->>'object_count')::int) as total_objects,
-            AVG((content->'metadata'->>'spatial_density')::float) as avg_density
-        FROM {} 
-        WHERE file_uuid = '{}' 
-        AND chunk_type = 'visual'",
-        chunk_table,
-        uuid.replace('\'', "''")
-    );
-
-    let row: (
-        i64,
-        Option<f64>,
-        Option<f64>,
-        Option<f64>,
-        Option<i64>,
-        Option<f64>,
-    ) = sqlx::query_as(&sql).fetch_one(db.pool()).await?;
-
-    let mut stats = HashMap::new();
-    stats.insert("total_chunks".to_string(), Value::from(row.0));
-    stats.insert(
-        "avg_confidence".to_string(),
-        Value::from(row.1.unwrap_or(0.0)),
-    );
-    stats.insert(
-        "min_confidence".to_string(),
-        Value::from(row.2.unwrap_or(0.0)),
-    );
-    stats.insert(
-        "max_confidence".to_string(),
-        Value::from(row.3.unwrap_or(0.0)),
-    );
-    stats.insert("total_objects".to_string(), Value::from(row.4.unwrap_or(0)));
-    stats.insert("avg_density".to_string(), Value::from(row.5.unwrap_or(0.0)));
-
-    Ok(stats)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_visual_chunk_search_criteria_default() {
-        let criteria = VisualChunkSearchCriteria::default();
-
-        assert_eq!(criteria.min_avg_confidence, None);
-        assert_eq!(criteria.min_frames_with_objects, None);
-        assert_eq!(criteria.min_unique_classes, None);
-        assert!(criteria.required_classes.is_empty());
-        assert!(criteria.class_counts.is_empty());
-        assert_eq!(criteria.time_range, None);
-    }
-
-    #[test]
-    fn test_visual_chunk_search_criteria_with_values() {
-        let mut criteria = VisualChunkSearchCriteria::default();
-        criteria.min_avg_confidence = Some(0.8);
-        criteria.min_frames_with_objects = Some(10);
-        criteria.min_unique_classes = Some(3);
-        criteria.required_classes = vec!["person".to_string(), "car".to_string()];
-        criteria.time_range = Some((0.0, 60.0));
-
-        assert_eq!(criteria.min_avg_confidence, Some(0.8));
-        assert_eq!(criteria.min_frames_with_objects, Some(10));
-        assert_eq!(criteria.min_unique_classes, Some(3));
-        assert_eq!(criteria.required_classes.len(), 2);
-        assert_eq!(criteria.time_range, Some((0.0, 60.0)));
-    }
-
-    #[test]
-    fn test_visual_chunk_search_criteria_serialization() {
-        let criteria = VisualChunkSearchCriteria {
-            min_avg_confidence: Some(0.85),
-            min_frames_with_objects: Some(5),
-            min_unique_classes: Some(2),
-            required_classes: vec!["person".to_string()],
-            class_counts: HashMap::new(),
-            time_range: Some((10.0, 30.0)),
-        };
-
-        let json = serde_json::to_string(&criteria).unwrap();
-        assert!(json.contains("min_avg_confidence"));
-        assert!(json.contains("required_classes"));
-
-        let deserialized: VisualChunkSearchCriteria = serde_json::from_str(&json).unwrap();
-        assert_eq!(deserialized.min_avg_confidence, Some(0.85));
-        assert_eq!(deserialized.required_classes.len(), 1);
-    }
-
-    #[test]
-    fn test_visual_chunk_search_criteria_with_class_counts() {
-        let mut criteria = VisualChunkSearchCriteria::default();
-        criteria.class_counts.insert("person".to_string(), (5, 20));
-        criteria.class_counts.insert("car".to_string(), (1, 10));
-
-        assert_eq!(criteria.class_counts.len(), 2);
-        assert_eq!(criteria.class_counts.get("person"), Some(&(5, 20)));
-        assert_eq!(criteria.class_counts.get("car"), Some(&(1, 10)));
-    }
-
-    #[test]
-    fn test_chunk_type_conversion() {
-        // Test chunk type string to enum conversion logic
-        let test_cases = vec![
-            ("visual", ChunkType::Visual),
-            ("sentence", ChunkType::Sentence),
-            ("time_based", ChunkType::TimeBased),
-            ("cut", ChunkType::Cut),
-            ("trace", ChunkType::Trace),
-            ("story", ChunkType::Story),
-            ("unknown", ChunkType::TimeBased), // Default fallback
-        ];
-
-        for (input, expected) in test_cases {
-            let chunk_type = match input {
-                "visual" => ChunkType::Visual,
-                "sentence" => ChunkType::Sentence,
-                "time_based" => ChunkType::TimeBased,
-                "cut" => ChunkType::Cut,
-                "trace" => ChunkType::Trace,
-                "story" => ChunkType::Story,
-                _ => ChunkType::TimeBased,
-            };
-            assert_eq!(chunk_type, expected);
-        }
-    }
-}
--- a/src/api/visual_search.rs
+++ b/src/api/visual_search.rs
@@ -1,217 +0,0 @@
-use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
-use serde::{Deserialize, Serialize};
-use sha2::{Digest, Sha256};
-
-use super::types::AppState;
-use super::visual_chunk_search;
-use crate::core::cache::keys;
-use crate::core::chunk::types::Chunk;
-use crate::core::db::{Database, PostgresDb};
-
-fn generate_visual_search_hash(
-    uuid: &str,
-    criteria: &visual_chunk_search::VisualChunkSearchCriteria,
-) -> String {
-    let data = serde_json::json!({
-        "uuid": uuid,
-        "criteria": criteria,
-    });
-    let mut hasher = Sha256::new();
-    hasher.update(data.to_string().as_bytes());
-    format!("{:x}", hasher.finalize())[..16].to_string()
-}
-
-#[derive(Debug, Deserialize)]
-struct VisualChunkSearchRequest {
-    file_uuid: String,
-    criteria: visual_chunk_search::VisualChunkSearchCriteria,
-}
-
-#[derive(Debug, Serialize)]
-struct VisualChunkSearchResponse {
-    chunks: Vec<Chunk>,
-    total: usize,
-}
-
-async fn search_visual_chunks(
-    State(state): State<AppState>,
-    Json(req): Json<VisualChunkSearchRequest>,
-) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
-    let criteria_hash = generate_visual_search_hash(&req.file_uuid, &req.criteria);
-    let cache_key = keys::visual_search(&req.file_uuid, &criteria_hash);
-    let ttl = state.mongo_cache.ttl_visual_search();
-
-    let chunks = state
-        .mongo_cache
-        .get_or_fetch(&cache_key, ttl, keys::CATEGORY_VISUAL_SEARCH, || async {
-            let db = PostgresDb::init()
-                .await
-                .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?;
-
-            visual_chunk_search::search_visual_chunks(&db, &req.file_uuid, &req.criteria)
-                .await
-                .map_err(|e| anyhow::anyhow!("Visual search failed: {}", e))
-        })
-        .await
-        .map_err(|e| {
-            tracing::error!("Visual chunk search failed: {}", e);
-            StatusCode::INTERNAL_SERVER_ERROR
-        })?;
-
-    Ok(Json(VisualChunkSearchResponse {
-        total: chunks.len(),
-        chunks,
-    }))
-}
-
-#[derive(Debug, Deserialize)]
-struct VisualChunkSearchByClassRequest {
-    uuid: String,
-    object_class: String,
-    min_count: Option<u32>,
-    max_count: Option<u32>,
-}
-
-#[derive(Debug, Deserialize)]
-struct VisualChunkSearchByDensityRequest {
-    uuid: String,
-    min_density: f32,
-    max_density: Option<f32>,
-}
-
-#[derive(Debug, Deserialize)]
-struct VisualChunkStatsRequest {
-    uuid: String,
-}
-
-async fn search_visual_chunks_by_class(
-    State(state): State<AppState>,
-    Json(req): Json<VisualChunkSearchByClassRequest>,
-) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
-    let db = PostgresDb::init()
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-    let chunks = visual_chunk_search::search_visual_chunks_by_class(
-        &db,
-        &req.uuid,
-        &req.object_class,
-        req.min_count,
-        req.max_count,
-    )
-    .await
-    .map_err(|e| {
-        tracing::error!("Visual chunk search by class failed: {}", e);
-        StatusCode::INTERNAL_SERVER_ERROR
-    })?;
-
-    Ok(Json(VisualChunkSearchResponse {
-        total: chunks.len(),
-        chunks,
-    }))
-}
-
-async fn search_visual_chunks_by_density(
-    State(state): State<AppState>,
-    Json(req): Json<VisualChunkSearchByDensityRequest>,
-) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
-    let db = PostgresDb::init()
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-    let chunks = visual_chunk_search::search_visual_chunks_by_density(
-        &db,
-        &req.uuid,
-        req.min_density,
-        req.max_density,
-    )
-    .await
-    .map_err(|e| {
-        tracing::error!("Visual chunk search by density failed: {}", e);
-        StatusCode::INTERNAL_SERVER_ERROR
-    })?;
-
-    Ok(Json(VisualChunkSearchResponse {
-        total: chunks.len(),
-        chunks,
-    }))
-}
-
-#[derive(Debug, Serialize)]
-struct VisualChunkStatsResponse {
-    uuid: String,
-    stats: std::collections::HashMap<String, serde_json::Value>,
-}
-
-async fn get_visual_chunk_stats(
-    State(state): State<AppState>,
-    Json(req): Json<VisualChunkStatsRequest>,
-) -> Result<Json<VisualChunkStatsResponse>, StatusCode> {
-    let db = PostgresDb::init()
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-    let stats = visual_chunk_search::get_visual_chunk_statistics(&db, &req.uuid)
-        .await
-        .map_err(|e| {
-            tracing::error!("Get visual chunk stats failed: {}", e);
-            StatusCode::INTERNAL_SERVER_ERROR
-        })?;
-
-    Ok(Json(VisualChunkStatsResponse {
-        uuid: req.uuid,
-        stats,
-    }))
-}
-
-#[derive(Debug, Deserialize)]
-struct VisualChunkSearchByCombinationRequest {
-    uuid: String,
-    combination: Vec<(String, u32)>,
-}
-
-async fn search_visual_chunks_by_combination(
-    State(state): State<AppState>,
-    Json(req): Json<VisualChunkSearchByCombinationRequest>,
-) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
-    let db = PostgresDb::init()
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-    let combination: Vec<(&str, u32)> = req
-        .combination
-        .iter()
-        .map(|(c, n)| (c.as_str(), *n))
-        .collect();
-
-    let chunks =
-        visual_chunk_search::search_visual_chunks_by_combination(&db, &req.uuid, &combination)
-            .await
-            .map_err(|e| {
-                tracing::error!("Visual chunk search by combination failed: {}", e);
-                StatusCode::INTERNAL_SERVER_ERROR
-            })?;
-
-    Ok(Json(VisualChunkSearchResponse {
-        total: chunks.len(),
-        chunks,
-    }))
-}
-
-pub fn visual_search_routes() -> Router<AppState> {
-    Router::new()
-        .route("/api/v1/search/visual", post(search_visual_chunks))
-        .route(
-            "/api/v1/search/visual/class",
-            post(search_visual_chunks_by_class),
-        )
-        .route(
-            "/api/v1/search/visual/density",
-            post(search_visual_chunks_by_density),
-        )
-        .route("/api/v1/search/visual/stats", post(get_visual_chunk_stats))
-        .route(
-            "/api/v1/search/visual/combination",
-            post(search_visual_chunks_by_combination),
-        )
-}
--- a/src/bin/check_db_schema.rs
+++ b/src/bin/check_db_schema.rs
@@ -0,0 +1,42 @@
+use sqlx::postgres::PgPoolOptions;
+use sqlx::Row;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let url = std::env::var("DATABASE_URL")
+        .unwrap_or_else(|_| "postgres://accusys@localhost:5432/momentry".into());
+    let pool = PgPoolOptions::new()
+        .max_connections(1)
+        .connect(&url)
+        .await?;
+
+    // Check videos columns
+    let rows = sqlx::query(
+        "SELECT column_name, data_type FROM information_schema.columns WHERE table_schema='public' AND table_name='videos' ORDER BY ordinal_position"
+    ).fetch_all(&pool).await?;
+    println!("=== public.videos columns ===");
+    for r in &rows {
+        let col: String = r.get("column_name");
+        let typ: String = r.get("data_type");
+        println!("  {}: {}", col, typ);
+    }
+
+    // Check chunks_rule1 columns
+    let rows2 = sqlx::query(
+        "SELECT column_name, data_type FROM information_schema.columns WHERE table_schema='public' AND table_name='chunks_rule1' ORDER BY ordinal_position"
+    ).fetch_all(&pool).await?;
+    println!("=== public.chunks_rule1 columns ===");
+    for r in &rows2 {
+        let col: String = r.get("column_name");
+        let typ: String = r.get("data_type");
+        println!("  {}: {}", col, typ);
+    }
+
+    // Check if jobs exists
+    let exists: bool = sqlx::query_scalar(
+        "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema='public' AND table_name='jobs')"
+    ).fetch_one(&pool).await?;
+    println!("=== public.jobs exists: {} ===", exists);
+
+    Ok(())
+}
--- a/src/bin/sync_qdrant_from_pg.rs
+++ b/src/bin/sync_qdrant_from_pg.rs
@@ -0,0 +1,95 @@
+use anyhow::{Context, Result};
+use momentry_core::{Database, PostgresDb, QdrantDb, VectorPayload};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env.development").ok();
+    tracing_subscriber::fmt::init();
+
+    let pg = PostgresDb::init().await.context("Failed to init PostgreSQL")?;
+    let qdrant = QdrantDb::new();
+    let chunk_table = momentry_core::core::db::schema::table_name("chunk");
+
+    let uuids = vec![
+        "63acd3bb02b5b9dfbb9d6db499fcc864",
+        "a6fb22eebefaef17e62af874997c5944",
+        "d81e01261391b45c1a14ddd5f082733e",
+    ];
+
+    for uuid in &uuids {
+        let rows = sqlx::query_as::<_, (String, String, i64, i64, f64, f64, String, String)>(
+            &format!(
+                "SELECT chunk_id, text_content, start_frame, end_frame, \
+                 start_time, end_time, embedding::text, content::text \
+                 FROM {} \
+                 WHERE file_uuid = $1 \
+                   AND chunk_type = 'sentence' \
+                   AND embedding IS NOT NULL \
+                   AND (text_content IS NOT NULL AND text_content != '') \
+                 ORDER BY id",
+                chunk_table
+            ),
+        )
+        .bind(uuid)
+        .fetch_all(pg.pool())
+        .await?;
+
+        let total = rows.len();
+        println!("[{}] Found {} sentence chunks with embeddings to sync to Qdrant", uuid, total);
+
+        if total == 0 {
+            continue;
+        }
+
+        let start = std::time::Instant::now();
+        let mut stored = 0usize;
+        let mut errors = 0usize;
+
+        for (chunk_id, text, start_frame, end_frame, start_time, end_time, vector_text, _content_str) in &rows {
+            let vector: Vec<f32> = serde_json::from_str(vector_text)
+                .map_err(|e| anyhow::anyhow!("Failed to parse vector for {}: {}", chunk_id, e))?;
+
+            let payload = VectorPayload {
+                file_uuid: uuid.to_string(),
+                chunk_id: chunk_id.clone(),
+                chunk_type: "sentence".to_string(),
+                start_frame: *start_frame,
+                end_frame: *end_frame,
+                start_time: *start_time,
+                end_time: *end_time,
+                text: Some(text.clone()),
+            };
+
+            if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await {
+                eprintln!("[ERROR] Qdrant upsert failed for {}: {}", chunk_id, e);
+                errors += 1;
+                continue;
+            }
+
+            stored += 1;
+            if stored % 5000 == 0 || stored == total {
+                let elapsed = start.elapsed();
+                let rate = stored as f64 / elapsed.as_secs_f64();
+                println!(
+                    "  [{}] {}/{} ({:.1}%) | {:.0} vec/s | {} errors",
+                    uuid.get(..8).unwrap_or(uuid),
+                    stored, total,
+                    100.0 * stored as f64 / total as f64,
+                    rate, errors,
+                );
+            }
+        }
+
+        let elapsed = start.elapsed();
+        println!(
+            "[{}] Done! {}/{} vectors synced ({} errors) in {:.1}s ({:.0} vec/s avg)",
+            uuid.get(..8).unwrap_or(uuid),
+            stored, total, errors,
+            elapsed.as_secs_f64(),
+            stored as f64 / elapsed.as_secs_f64(),
+        );
+    }
+
+    println!("\nAll files synced to Qdrant!");
+    Ok(())
+}
--- a/src/bin/test_bson_deserialize.rs
+++ b/src/bin/test_bson_deserialize.rs
@@ -0,0 +1,45 @@
+use bson::bson;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct TestIdentitySnapshot {
+    id: i32,
+    uuid: String,
+    name: String,
+    identity_type: Option<String>,
+    source: Option<String>,
+    status: String,
+    tmdb_id: Option<i32>,
+    tmdb_profile: Option<String>,
+    metadata: serde_json::Value,
+    #[serde(
+        with = "bson::serde_helpers::chrono_datetime_as_bson_datetime_optional",
+        default
+    )]
+    created_at: Option<DateTime<Utc>>,
+    face_count: i64,
+}
+
+fn main() {
+    // 模拟 MongoDB document
+    let doc = bson!({
+        "id": bson::Bson::Int32(21),
+        "uuid": "1524f6a1537f48a187526d44a236584f",
+        "name": "Albert Daumergue",
+        "identity_type": "people",
+        "source": "tmdb",
+        "status": "confirmed",
+        "tmdb_id": bson::Bson::Int32(1100817),
+        "tmdb_profile": bson::Bson::Null,
+        "metadata": {"role": "", "notes": "", "aliases": [], "starred": false},
+        "created_at": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
+        "face_count": bson::Bson::Int64(0)
+    });
+
+    let result: Result<TestIdentitySnapshot, _> = bson::from_bson(doc);
+    match result {
+        Ok(s) => println!("Success! {:?}", s),
+        Err(e) => println!("Error: {}", e),
+    }
+}
--- a/src/bin/test_empty_array.rs
+++ b/src/bin/test_empty_array.rs
@@ -0,0 +1,25 @@
+use bson::bson;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct FacesTransferred {
+    file_uuid: String,
+    face_ids: Vec<String>,
+    trace_ids: Vec<i32>,
+    count: i64,
+}
+
+fn main() {
+    let doc = bson!({
+        "file_uuid": "",
+        "face_ids": [],
+        "trace_ids": [],
+        "count": bson::Bson::Int64(0)
+    });
+
+    let result: Result<FacesTransferred, _> = bson::from_bson(doc);
+    match result {
+        Ok(f) => println!("Success! trace_ids len: {}", f.trace_ids.len()),
+        Err(e) => println!("Error: {}", e),
+    }
+}
--- a/src/bin/test_full_deserialize.rs
+++ b/src/bin/test_full_deserialize.rs
@@ -0,0 +1,131 @@
+use bson::bson;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct IdentitySnapshot {
+    id: i32,
+    uuid: String,
+    name: String,
+    identity_type: Option<String>,
+    source: Option<String>,
+    status: String,
+    tmdb_id: Option<i32>,
+    tmdb_profile: Option<String>,
+    metadata: serde_json::Value,
+    #[serde(
+        with = "bson::serde_helpers::chrono_datetime_as_bson_datetime_optional",
+        default
+    )]
+    created_at: Option<DateTime<Utc>>,
+    face_count: i64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct TargetIdentitySnapshot {
+    id: i32,
+    uuid: String,
+    name: String,
+    metadata_before: serde_json::Value,
+    metadata_after: Option<serde_json::Value>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct AliasEntry {
+    name: String,
+    locale: String,
+    source: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct FacesTransferred {
+    file_uuid: String,
+    face_ids: Vec<String>,
+    trace_ids: Vec<i32>,
+    count: i64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct MergeParams {
+    keep_history: bool,
+    cleared_stranger_id: bool,
+    performed_by_user: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct IdentityMergeHistory {
+    #[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
+    id: Option<bson::oid::ObjectId>,
+    merge_id: String,
+    source_identity: IdentitySnapshot,
+    target_identity: TargetIdentitySnapshot,
+    aliases_added_to_target: Vec<AliasEntry>,
+    metadata_fields_added: Vec<String>,
+    faces_transferred: FacesTransferred,
+    merge_params: MergeParams,
+    #[serde(with = "bson::serde_helpers::chrono_datetime_as_bson_datetime")]
+    merged_at: DateTime<Utc>,
+    #[serde(with = "bson::serde_helpers::chrono_datetime_as_bson_datetime")]
+    undo_deadline: DateTime<Utc>,
+    undone: bool,
+    #[serde(
+        with = "bson::serde_helpers::chrono_datetime_as_bson_datetime_optional",
+        skip_serializing_if = "Option::is_none"
+    )]
+    undone_at: Option<DateTime<Utc>>,
+    undone_by: Option<String>,
+    undone_snapshot: Option<serde_json::Value>,
+    undo_expired: bool,
+}
+
+fn main() {
+    let doc = bson!({
+        "merge_id": "test-id",
+        "source_identity": {
+            "id": bson::Bson::Int32(21),
+            "uuid": "test-uuid",
+            "name": "Test",
+            "identity_type": "people",
+            "source": "tmdb",
+            "status": "confirmed",
+            "tmdb_id": bson::Bson::Int32(123),
+            "tmdb_profile": bson::Bson::Null,
+            "metadata": {},
+            "created_at": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
+            "face_count": bson::Bson::Int64(0)
+        },
+        "target_identity": {
+            "id": bson::Bson::Int32(22),
+            "uuid": "target-uuid",
+            "name": "Target",
+            "metadata_before": {},
+            "metadata_after": bson::Bson::Null
+        },
+        "aliases_added_to_target": [],
+        "metadata_fields_added": [],
+        "faces_transferred": {
+            "file_uuid": "",
+            "face_ids": [],
+            "trace_ids": [],
+            "count": bson::Bson::Int64(0)
+        },
+        "merge_params": {
+            "keep_history": false,
+            "cleared_stranger_id": true,
+            "performed_by_user": "0"
+        },
+        "merged_at": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
+        "undo_deadline": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
+        "undone": false,
+        "undone_at": bson::Bson::Null,
+        "undone_by": bson::Bson::Null,
+        "undone_snapshot": bson::Bson::Null,
+        "undo_expired": false
+    });
+
+    let result: Result<IdentityMergeHistory, _> = bson::from_bson(doc);
+    match result {
+        Ok(h) => println!("Success! {:?}", h.merge_id),
+        Err(e) => println!("Error: {}", e),
+    }
+}
--- a/src/bin/test_number_types.rs
+++ b/src/bin/test_number_types.rs
@@ -0,0 +1,31 @@
+use bson::bson;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct TestId {
+    id: i32,
+    tmdb_id: Option<i32>,
+}
+
+fn main() {
+    // 测试 Int32
+    let doc_int32 = bson!({
+        "id": bson::Bson::Int32(21),
+        "tmdb_id": bson::Bson::Int32(1100817)
+    });
+
+    let result1: Result<TestId, _> = bson::from_bson(doc_int32);
+    println!("Int32 test: {}", result1.is_ok());
+
+    // 测试 Double (JavaScript Number 可能是这个)
+    let doc_double = bson!({
+        "id": bson::Bson::Double(21.0),
+        "tmdb_id": bson::Bson::Double(1100817.0)
+    });
+
+    let result2: Result<TestId, _> = bson::from_bson(doc_double);
+    println!("Double test: {}", result2.is_ok());
+    if result2.is_err() {
+        println!("Double error: {}", result2.unwrap_err());
+    }
+}
--- a/src/bin/vectorize_missing.rs
+++ b/src/bin/vectorize_missing.rs
@@ -0,0 +1,117 @@
+use anyhow::{Context, Result};
+use momentry_core::{
+    Database, Embedder, PostgresDb, QdrantDb, VectorPayload,
+};
+use std::time::{Duration, Instant};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env.development").ok();
+    tracing_subscriber::fmt::init();
+
+    let pg = PostgresDb::init().await.context("Failed to init PostgreSQL")?;
+    let qdrant = QdrantDb::new();
+    let embedder = Embedder::new("embeddinggemma-300m".to_string());
+
+    let uuid = "63acd3bb02b5b9dfbb9d6db499fcc864";
+    let chunk_table = momentry_core::core::db::schema::table_name("chunk");
+
+    let rows = sqlx::query_as::<_, (String, String, i64, i64, f64, f64, String)>(
+        &format!(
+            "SELECT chunk_id, text_content, start_frame, end_frame, \
+             start_time, end_time, content::text \
+             FROM {} \
+             WHERE file_uuid = $1 \
+               AND chunk_type = 'sentence' \
+               AND embedding IS NULL \
+               AND (text_content IS NOT NULL AND text_content != '') \
+             ORDER BY id",
+            chunk_table
+        ),
+    )
+    .bind(uuid)
+    .fetch_all(pg.pool())
+    .await?;
+
+    let total = rows.len();
+    println!("Found {} sentence chunks without embedding for {}", total, uuid);
+
+    if total == 0 {
+        println!("Nothing to vectorize. Exiting.");
+        return Ok(());
+    }
+
+    let start = Instant::now();
+    let mut stored = 0usize;
+    let mut errors = 0usize;
+
+    for (chunk_id, text, start_frame, end_frame, start_time, end_time, _content_str) in &rows {
+        if text.is_empty() {
+            continue;
+        }
+
+        match embedder.embed_document(text).await {
+            Ok(vector) => {
+                if let Err(e) = pg.store_vector(chunk_id, &vector, uuid).await {
+                    eprintln!("[ERROR] PG store failed for {}: {}", chunk_id, e);
+                    errors += 1;
+                    continue;
+                }
+
+                let payload = VectorPayload {
+                    file_uuid: uuid.to_string(),
+                    chunk_id: chunk_id.clone(),
+                    chunk_type: "sentence".to_string(),
+                    start_frame: *start_frame,
+                    end_frame: *end_frame,
+                    start_time: *start_time,
+                    end_time: *end_time,
+                    text: Some(text.clone()),
+                };
+
+                if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await {
+                    eprintln!("[ERROR] Qdrant upsert failed for {}: {}", chunk_id, e);
+                    errors += 1;
+                    continue;
+                }
+
+                stored += 1;
+                if stored % 500 == 0 || stored == total {
+                    let elapsed = start.elapsed();
+                    let rate = stored as f64 / elapsed.as_secs_f64();
+                    let eta = if stored < total {
+                        let remaining = Duration::from_secs_f64((total - stored) as f64 / rate);
+                        format!(" (ETA: {}s)", remaining.as_secs())
+                    } else {
+                        String::new()
+                    };
+                    println!(
+                        "  [{}/{}] {:.1}% done | {:.0} vec/s | {} errors{}",
+                        stored,
+                        total,
+                        100.0 * stored as f64 / total as f64,
+                        rate,
+                        errors,
+                        eta,
+                    );
+                }
+            }
+            Err(e) => {
+                eprintln!("[ERROR] Embedding failed for {}: {}", chunk_id, e);
+                errors += 1;
+            }
+        }
+    }
+
+    let elapsed = start.elapsed();
+    println!(
+        "\nDone! {}/{} vectors stored ({} errors) in {:.1}s ({:.0} vec/s avg)",
+        stored,
+        total,
+        errors,
+        elapsed.as_secs_f64(),
+        stored as f64 / elapsed.as_secs_f64(),
+    );
+
+    Ok(())
+}
--- a/src/core/chunk/rule3_ingest.rs
+++ b/src/core/chunk/rule3_ingest.rs
@@ -1,6 +1,5 @@
 use crate::core::config::OUTPUT_DIR;
 use crate::core::db::schema;
-use crate::core::llm::client::generate_5w1h_summary;
 use anyhow::{Context, Result};
 use serde::Deserialize;
 use sqlx::PgPool;
@@ -115,19 +114,6 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {

        let aggregated_text = texts.join(" ");

-        // 3. Call LLM for Summary
-        let summary = if !aggregated_text.is_empty() {
-            match generate_5w1h_summary(&aggregated_text).await {
-                Ok(s) => s,
-                Err(e) => {
-                    warn!("LLM Summary failed for scene {}: {}", scene.scene_number, e);
-                    "LLM Error".to_string()
-                }
-            }
-        } else {
-            "No Audio".to_string()
-        };
-
        info!(
            "Scene {}: {} -> {} ({} sentences)",
            scene.scene_number,
@@ -168,7 +154,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
        .bind(scene.end_frame as i64)
        .bind(&metadata)
        .bind(&aggregated_text)
-        .bind(&summary)
+        .bind(&String::new())
        .bind(&metadata)
        .bind(&child_ids)
        .execute(&mut *tx)
--- a/src/core/chunk/types.rs
+++ b/src/core/chunk/types.rs
@@ -1,7 +1,6 @@
 use crate::core::time::FrameTime;
 use serde::{Deserialize, Serialize};

-// ==================== ChunkType ====================
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ChunkType {
@@ -10,7 +9,6 @@ pub enum ChunkType {
    Cut,
    Trace,
    Story,
-    Visual, // 視覺分片 (Phase 2.1)
 }

 impl ChunkType {
@@ -21,17 +19,15 @@ impl ChunkType {
            ChunkType::Cut => "cut",
            ChunkType::Trace => "trace",
            ChunkType::Story => "story",
-            ChunkType::Visual => "visual",
        }
    }
 }

-// ==================== ChunkRule ====================
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ChunkRule {
-    Rule1, // 直接轉換
-    Rule2, // 集合內容
+    Rule1,
+    Rule2,
 }

 impl ChunkRule {
@@ -43,73 +39,6 @@ impl ChunkRule {
    }
 }

-// ==================== 視覺分片相關結構 (Phase 2.1) ====================
-/// 邊界框
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct BoundingBox {
-    pub x: i32,
-    pub y: i32,
-    pub width: i32,
-    pub height: i32,
-}
-
-/// 檢測到的物件
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct DetectedObject {
-    /// 物件類別名稱
-    pub class_name: String,
-    /// 物件類別 ID
-    pub class_id: u32,
-    /// 信心值 (0.0-1.0)
-    pub confidence: f32,
-    /// 邊界框
-    pub bbox: Option<BoundingBox>,
-    /// 出現次數 (在分片內)
-    pub occurrence: u32,
-}
-
-/// 關鍵幀的物件列表
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct KeyframeObjects {
-    /// 關鍵幀時間 (秒) - 僅供參考，主要使用 frame_number
-    pub timestamp: f64,
-    /// 關鍵幀幀號 - 主要時間標示
-    pub frame_number: u64,
-    /// 檢測到的物件
-    pub objects: Vec<DetectedObject>,
-}
-
-/// 視覺元數據
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct VisualMetadata {
-    /// 總物件數量
-    pub object_count: u32,
-    /// 唯一物件類別列表
-    pub unique_classes: Vec<String>,
-    /// 最高信心值
-    pub max_confidence: f32,
-    /// 平均信心值
-    pub avg_confidence: f32,
-    /// 空間密度（每幀平均物件數）
-    pub spatial_density: f32,
-}
-
-/// 視覺分片內容
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct VisualChunkContent {
-    /// 關鍵幀物件列表，每個關鍵幀包含 frame_number
-    pub keyframe_objects: Vec<KeyframeObjects>,
-    /// 主要物件標籤（出現在大多數幀中的物件）
-    pub dominant_objects: Vec<String>,
-    /// 物件關係 (object1, relationship, object2) - 可選
-    pub object_relationships: Vec<(String, String, String)>,
-    /// 場景描述 - 可選
-    pub scene_description: Option<String>,
-    /// 視覺元數據
-    pub metadata: VisualMetadata,
-}
-
-// ==================== Chunk 主結構 ====================
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Chunk {
    pub file_id: i32,
@@ -117,11 +46,8 @@ pub struct Chunk {
    pub chunk_id: String,
    pub chunk_type: ChunkType,
    pub rule: ChunkRule,
-    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
    pub fps: f64,
-    /// Start frame (0-based) - 主要時間標示
    pub start_frame: i64,
-    /// End frame (exclusive) - 主要時間標示
    pub end_frame: i64,
    pub text_content: Option<String>,
    pub content: serde_json::Value,
@@ -129,13 +55,11 @@ pub struct Chunk {
    pub vector_id: Option<String>,
    pub frame_count: i32,
    pub pre_chunk_ids: Vec<i32>,
-    pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
-    pub child_chunk_ids: Vec<String>,    // Child chunk IDs (for parent chunks)
-    pub visual_stats: Option<serde_json::Value>,
+    pub parent_chunk_id: Option<String>,
+    pub child_chunk_ids: Vec<String>,
 }

 impl Chunk {
-    /// 創建新分片
    pub fn new(
        file_id: i32,
        uuid: String,
@@ -166,167 +90,17 @@ impl Chunk {
            pre_chunk_ids: vec![],
            parent_chunk_id: None,
            child_chunk_ids: vec![],
-            visual_stats: None,
        }
    }

-    /// 創建視覺分片 (Phase 2.1)
-    pub fn new_visual(
-        file_id: i32,
-        uuid: String,
-        chunk_id: String,
-        start_frame: i64,
-        end_frame: i64,
-        fps: f64,
-        visual_content: VisualChunkContent,
-    ) -> Self {
-        let content = serde_json::to_value(&visual_content)
-            .unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
-
-        Self::new(
-            file_id,
-            uuid,
-            chunk_id,
-            ChunkType::Visual,
-            ChunkRule::Rule2,
-            start_frame,
-            end_frame,
-            fps,
-            content,
-        )
-    }
-
-    /// 從 YOLO 幀創建視覺分片 (Phase 2.1)
-    pub fn from_yolo_frames(
-        file_id: i32,
-        uuid: String,
-        chunk_id: String,
-        start_frame: i64,
-        end_frame: i64,
-        fps: f64,
-        yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
-    ) -> Self {
-        // 將 YOLO 幀轉換為關鍵幀物件
-        let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
-            .iter()
-            .map(|frame| {
-                let objects: Vec<DetectedObject> = frame
-                    .objects
-                    .iter()
-                    .map(|obj| DetectedObject {
-                        class_name: obj.class_name.clone(),
-                        class_id: obj.class_id,
-                        confidence: obj.confidence,
-                        bbox: Some(BoundingBox {
-                            x: obj.x,
-                            y: obj.y,
-                            width: obj.width,
-                            height: obj.height,
-                        }),
-                        occurrence: 1,
-                    })
-                    .collect();
-
-                KeyframeObjects {
-                    timestamp: frame.timestamp,
-                    frame_number: frame.frame,
-                    objects,
-                }
-            })
-            .collect();
-
-        // 計算物件統計
-        let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
-
-        // 收集所有物件類別
-        let all_classes: Vec<String> = yolo_frames
-            .iter()
-            .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
-            .collect();
-
-        // 獲取唯一類別
-        let unique_classes: Vec<String> = all_classes
-            .iter()
-            .cloned()
-            .collect::<std::collections::HashSet<_>>()
-            .into_iter()
-            .collect();
-
-        // 計算信心值統計
-        let confidences: Vec<f32> = yolo_frames
-            .iter()
-            .flat_map(|f| f.objects.iter().map(|o| o.confidence))
-            .collect();
-
-        let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
-        let avg_confidence = if !confidences.is_empty() {
-            confidences.iter().sum::<f32>() / confidences.len() as f32
-        } else {
-            0.0
-        };
-
-        // 計算主要物件（出現在大多數幀中的物件）
-        let mut object_counts = std::collections::HashMap::new();
-        for frame in &yolo_frames {
-            let frame_classes: std::collections::HashSet<_> =
-                frame.objects.iter().map(|o| o.class_name.clone()).collect();
-            for class in frame_classes {
-                *object_counts.entry(class).or_insert(0) += 1;
-            }
-        }
-
-        let mut dominant_objects: Vec<String> = object_counts
-            .into_iter()
-            .filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5)
-            .map(|(class, _)| class)
-            .collect();
-        dominant_objects.sort();
-
-        // 創建視覺內容
-        let visual_content = VisualChunkContent {
-            keyframe_objects,
-            dominant_objects,
-            object_relationships: vec![], // 可選：後期添加關係檢測
-            scene_description: None,      // 可選：後期添加 LLM 生成的場景描述
-            metadata: VisualMetadata {
-                object_count: total_objects,
-                unique_classes,
-                max_confidence,
-                avg_confidence,
-                spatial_density: if yolo_frames.len() > 0 {
-                    total_objects as f32 / yolo_frames.len() as f32
-                } else {
-                    0.0
-                },
-            },
-        };
-
-        Self::new_visual(
-            file_id,
-            uuid,
-            chunk_id,
-            start_frame,
-            end_frame,
-            fps,
-            visual_content,
-        )
-    }
-
-    /// 將分片轉換為幀時間
    pub fn to_frame_time(&self) -> FrameTime {
-        // 使用第一個幀作為參考點
        FrameTime::from_frames(self.start_frame, self.fps)
    }

-    /// 檢查是否是父分片
    pub fn is_parent(&self) -> bool {
        self.parent_chunk_id.is_some()
    }

-    /// 從秒數創建新分片（舊版轉換）
-    ///
-    /// 這對於從存儲時間為秒的舊系統遷移很有用。
-    /// 幀數通過舍入 `seconds * fps` 計算。
    #[allow(clippy::too_many_arguments)]
    pub fn from_seconds(
        file_id: i32,
@@ -354,197 +128,82 @@ impl Chunk {
        )
    }

-    /// 返回開始時間為 `FrameTime`
    pub fn start_time(&self) -> FrameTime {
        FrameTime::from_frames(self.start_frame, self.fps)
    }

-    /// 返回結束時間為 `FrameTime`
    pub fn end_time(&self) -> FrameTime {
        FrameTime::from_frames(self.end_frame, self.fps)
    }

-    /// 返回持續時間的幀數
    pub fn duration_frames(&self) -> i64 {
        self.end_frame - self.start_frame
    }

-    /// 返回持續時間的秒數
    pub fn duration_seconds(&self) -> f64 {
        self.duration_frames() as f64 / self.fps
    }

-    /// 將開始時間格式化為 "seconds.frame" (例如："123.04")
    pub fn format_start_sec_frame(&self) -> String {
        self.start_time().format_sec_frame()
    }

-    /// 將結束時間格式化為 "seconds.frame" (例如："456.15")
    pub fn format_end_sec_frame(&self) -> String {
        self.end_time().format_sec_frame()
    }

-    /// 將開始時間格式化為 "HH:MM:SS"
    pub fn format_start_hms(&self) -> String {
        self.start_time().format_hms()
    }

-    /// 將結束時間格式化為 "HH:MM:SS"
    pub fn format_end_hms(&self) -> String {
        self.end_time().format_hms()
    }

-    /// 將開始時間格式化為 "HH:MM:SS.FF"
    pub fn format_start_hms_frame(&self) -> String {
        self.start_time().format_hms_frame()
    }

-    /// 將結束時間格式化為 "HH:MM:SS.FF"
    pub fn format_end_hms_frame(&self) -> String {
        self.end_time().format_hms_frame()
    }

-    /// 返回 (start_seconds, end_seconds) 元組用於兼容性
-    ///
-    /// 這在遷移期間提供向後兼容性。
-    /// 建議使用 `start_time()` 和 `end_time()` 方法。
    pub fn time_range_seconds(&self) -> (f64, f64) {
        (self.start_time().seconds(), self.end_time().seconds())
    }

-    /// 添加元數據
    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
        self.metadata = Some(metadata);
        self
    }

-    /// 添加向量 ID
    pub fn with_vector_id(mut self, vector_id: String) -> Self {
        self.vector_id = Some(vector_id);
        self
    }

-    /// 添加文本內容
    pub fn with_text_content(mut self, text: String) -> Self {
        self.text_content = Some(text);
        self
    }

-    /// 設置幀數
    pub fn with_frame_count(mut self, count: i32) -> Self {
        self.frame_count = count;
        self
    }

-    /// 設置前一個分片 ID
    pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
        self.pre_chunk_ids = ids;
        self
    }

-    /// 設置父分片 ID
    pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
        self.parent_chunk_id = Some(parent_id);
        self
    }

-    /// 設置子分片 ID
    pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
        self.child_chunk_ids = child_ids;
        self
    }
 }
-
-// ==================== VisualChunkContent 輔助方法 ====================
-impl VisualChunkContent {
-    /// 計算兩個 YOLO 幀之間的相似度（基於物件組成）
-    pub fn frame_similarity(
-        frame1: &crate::core::processor::yolo::YoloFrame,
-        frame2: &crate::core::processor::yolo::YoloFrame,
-    ) -> f32 {
-        if frame1.objects.is_empty() && frame2.objects.is_empty() {
-            return 1.0; // 兩個空幀完全相似
-        }
-
-        if frame1.objects.is_empty() || frame2.objects.is_empty() {
-            return 0.0; // 一個空一個非空，不相似
-        }
-
-        // 創建物件類別名稱集合
-        let set1: std::collections::HashSet<String> = frame1
-            .objects
-            .iter()
-            .map(|o| o.class_name.clone())
-            .collect();
-        let set2: std::collections::HashSet<String> = frame2
-            .objects
-            .iter()
-            .map(|o| o.class_name.clone())
-            .collect();
-
-        // 計算 Jaccard 相似度
-        let intersection: Vec<_> = set1.intersection(&set2).collect();
-        let union: Vec<_> = set1.union(&set2).collect();
-
-        if union.is_empty() {
-            0.0
-        } else {
-            intersection.len() as f32 / union.len() as f32
-        }
-    }
-
-    /// 獲取視覺分片的摘要（使用關鍵幀的 frame_number）
-    pub fn summary(&self, fps: f64) -> String {
-        if self.keyframe_objects.is_empty() {
-            return "Empty visual chunk".to_string();
-        }
-
-        let first_frame = self.keyframe_objects.first().unwrap().frame_number;
-        let last_frame = self.keyframe_objects.last().unwrap().frame_number;
-
-        // 計算時間（僅供參考）
-        let start_time = if fps > 0.0 {
-            first_frame as f64 / fps
-        } else {
-            0.0
-        };
-        let end_time = if fps > 0.0 {
-            last_frame as f64 / fps
-        } else {
-            0.0
-        };
-        let duration = end_time - start_time;
-        let frame_count = self.keyframe_objects.len();
-
-        format!(
-            "Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}",
-            first_frame,
-            last_frame,
-            duration,
-            frame_count,
-            self.metadata.object_count,
-            self.metadata.unique_classes.len(),
-            if self.dominant_objects.is_empty() {
-                "none".to_string()
-            } else {
-                self.dominant_objects.join(", ")
-            }
-        )
-    }
-
-    /// 檢查是否包含特定物件類別
-    pub fn contains_object(&self, class_name: &str) -> bool {
-        self.keyframe_objects
-            .iter()
-            .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
-    }
-
-    /// 獲取信心值高於閾值的所有物件
-    pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
-        self.keyframe_objects
-            .iter()
-            .flat_map(|ko| ko.objects.iter())
-            .filter(|obj| obj.confidence >= threshold)
-            .collect()
-    }
-}
--- a/src/core/config.rs
+++ b/src/core/config.rs
@@ -56,7 +56,7 @@ pub static REDIS_URL: Lazy<String> = Lazy::new(|| {
    env::var("REDIS_URL").unwrap_or_else(|_| {
        let password = env::var("REDIS_PASSWORD").unwrap_or_else(|_| "accusys".to_string());
        // Format: redis://[:password]@host:port (use default user)
-        format!("redis://:{}@localhost:6379", password)
+        format!("redis://default:{}@localhost:6379", password)
    })
 });

@@ -277,12 +277,14 @@ pub mod llm {
 }

 /// Ollama embedding endpoint (vector embeddings for text sync).
-pub static OLLAMA_URL: Lazy<String> =
-    Lazy::new(|| env::var("MOMENTRY_OLLAMA_URL").unwrap_or_else(|_| "http://127.0.0.1:11434".to_string()));
+pub static OLLAMA_URL: Lazy<String> = Lazy::new(|| {
+    env::var("MOMENTRY_OLLAMA_URL").unwrap_or_else(|_| "http://127.0.0.1:11434".to_string())
+});

 /// Text embedding server (comic-embed or alternative).
-pub static EMBED_URL: Lazy<String> =
-    Lazy::new(|| env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://127.0.0.1:11436".to_string()));
+pub static EMBED_URL: Lazy<String> = Lazy::new(|| {
+    env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://127.0.0.1:11436".to_string())
+});

 /// LLM health endpoint.
 pub static LLM_HEALTH_URL: Lazy<String> = Lazy::new(|| {
--- a/src/core/db/identity_merge_history.rs
+++ b/src/core/db/identity_merge_history.rs
@@ -0,0 +1,604 @@
+use anyhow::{Context, Result};
+use bson::{doc, oid::ObjectId, DateTime as BsonDateTime, Document};
+use chrono::{DateTime, Utc};
+use mongodb::{Client, Collection, Database, IndexModel};
+use serde::{Deserialize, Serialize};
+use serde_json::Value as JsonValue;
+use uuid::Uuid;
+
+const COLLECTION_NAME: &str = "identity_merge_history";
+
+fn bson_doc_to_json(doc: &Document) -> JsonValue {
+    match bson::to_bson(doc) {
+        Ok(bson) => bson.into_relaxed_extjson(),
+        Err(_) => JsonValue::Null,
+    }
+}
+
+fn json_value_to_bson_doc(value: &JsonValue) -> Document {
+    bson::to_document(value).unwrap_or_default()
+}
+
+fn doc_field_to_json(doc: &Document, key: &str) -> JsonValue {
+    doc.get(key)
+        .map(|b| b.clone().into_relaxed_extjson())
+        .unwrap_or(JsonValue::Null)
+}
+
+fn json_to_bson(value: &JsonValue) -> bson::Bson {
+    bson::to_bson(value).unwrap_or(bson::Bson::Null)
+}
+
+#[derive(Debug, Clone)]
+pub struct IdentityMergeHistory {
+    pub id: Option<ObjectId>,
+    pub merge_id: String,
+    pub source_identity: IdentitySnapshot,
+    pub target_identity: TargetIdentitySnapshot,
+    pub aliases_added_to_target: Vec<AliasEntry>,
+    pub metadata_fields_added: Vec<String>,
+    pub faces_transferred: FacesTransferred,
+    pub merge_params: MergeParams,
+    pub merged_at: DateTime<Utc>,
+    pub undo_deadline: DateTime<Utc>,
+    pub undone: bool,
+    pub undone_at: Option<DateTime<Utc>>,
+    pub undone_by: Option<String>,
+    pub undone_snapshot: Option<UndoneSnapshot>,
+    pub undo_expired: bool,
+}
+
+#[derive(Debug, Clone)]
+pub struct IdentitySnapshot {
+    pub id: i64,
+    pub uuid: String,
+    pub name: String,
+    pub identity_type: Option<String>,
+    pub source: Option<String>,
+    pub status: String,
+    pub tmdb_id: Option<i64>,
+    pub tmdb_profile: Option<String>,
+    pub metadata: JsonValue,
+    pub created_at: Option<DateTime<Utc>>,
+    pub face_count: i64,
+}
+
+#[derive(Debug, Clone)]
+pub struct TargetIdentitySnapshot {
+    pub id: i64,
+    pub uuid: String,
+    pub name: String,
+    pub metadata_before: JsonValue,
+    pub metadata_after: Option<JsonValue>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AliasEntry {
+    pub name: String,
+    pub locale: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub source: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FacesTransferred {
+    pub file_uuid: String,
+    pub face_ids: Vec<String>,
+    pub trace_ids: Vec<i32>,
+    pub count: i64,
+}
+
+#[derive(Debug, Clone)]
+pub struct UndoneSnapshot {
+    pub source_identity_id: i64,
+    pub source_uuid: String,
+    pub source_name: String,
+    pub target_metadata_at_undo: JsonValue,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MergeParams {
+    pub keep_history: bool,
+    pub cleared_stranger_id: bool,
+    pub performed_by_user: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MergeHistoryQuery {
+    pub source_uuid: Option<String>,
+    pub target_uuid: Option<String>,
+    pub merge_id: Option<String>,
+    pub undone: Option<bool>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MergeHistoryEntry {
+    pub merge_id: String,
+    pub source_name: String,
+    pub target_name: String,
+    pub faces_transferred: i64,
+    pub merged_at: DateTime<Utc>,
+    pub undo_deadline: DateTime<Utc>,
+    pub undone: bool,
+    pub undo_expired: bool,
+}
+
+impl IdentityMergeHistory {
+    pub fn from_document(doc: &Document) -> Result<Self> {
+        let source = doc
+            .get_document("source_identity")
+            .context("Missing source_identity")?;
+        let target = doc
+            .get_document("target_identity")
+            .context("Missing target_identity")?;
+        let faces = doc
+            .get_document("faces_transferred")
+            .context("Missing faces_transferred")?;
+        let aliases = doc
+            .get_array("aliases_added_to_target")
+            .unwrap_or(&vec![])
+            .clone();
+        let fields = doc
+            .get_array("metadata_fields_added")
+            .unwrap_or(&vec![])
+            .clone();
+        let merge_params_doc = doc
+            .get_document("merge_params")
+            .unwrap_or(&Document::new())
+            .clone();
+
+        let mut parsed_aliases = Vec::new();
+        for a in aliases {
+            if let Some(d) = a.as_document() {
+                parsed_aliases.push(AliasEntry {
+                    name: d.get_str("name").unwrap_or("").to_string(),
+                    locale: d.get_str("locale").unwrap_or("en").to_string(),
+                    source: d.get_str("source").ok().map(|s| s.to_string()),
+                });
+            }
+        }
+
+        let mut parsed_fields = Vec::new();
+        for f in fields {
+            if let Some(s) = f.as_str() {
+                parsed_fields.push(s.to_string());
+            }
+        }
+
+        let undone_snapshot = doc.get_document("undone_snapshot").ok().and_then(|d| {
+            let sid = d.get_i64("source_identity_id").unwrap_or(0);
+            let suuid = d.get_str("source_uuid").unwrap_or("").to_string();
+            let sname = d.get_str("source_name").unwrap_or("").to_string();
+            let meta = doc_field_to_json(d, "target_metadata_at_undo");
+            Some(UndoneSnapshot {
+                source_identity_id: sid,
+                source_uuid: suuid,
+                source_name: sname,
+                target_metadata_at_undo: meta,
+            })
+        });
+
+        Ok(IdentityMergeHistory {
+            id: doc.get_object_id("_id").ok(),
+            merge_id: doc.get_str("merge_id").unwrap_or("").to_string(),
+            source_identity: IdentitySnapshot {
+                id: source.get_i64("id").unwrap_or(0),
+                uuid: source.get_str("uuid").unwrap_or("").to_string(),
+                name: source.get_str("name").unwrap_or("").to_string(),
+                identity_type: source.get_str("identity_type").ok().map(|s| s.to_string()),
+                source: source.get_str("source").ok().map(|s| s.to_string()),
+                status: source.get_str("status").unwrap_or("").to_string(),
+                tmdb_id: source.get_i64("tmdb_id").ok(),
+                tmdb_profile: source.get_str("tmdb_profile").ok().map(|s| s.to_string()),
+                metadata: doc_field_to_json(source, "metadata"),
+                created_at: source
+                    .get_datetime("created_at")
+                    .map(|d| d.to_chrono())
+                    .ok(),
+                face_count: source.get_i64("face_count").unwrap_or(0),
+            },
+            target_identity: TargetIdentitySnapshot {
+                id: target.get_i64("id").unwrap_or(0),
+                uuid: target.get_str("uuid").unwrap_or("").to_string(),
+                name: target.get_str("name").unwrap_or("").to_string(),
+                metadata_before: doc_field_to_json(target, "metadata_before"),
+                metadata_after: target
+                    .get("metadata_after")
+                    .map(|b| b.clone().into_relaxed_extjson()),
+            },
+            aliases_added_to_target: parsed_aliases,
+            metadata_fields_added: parsed_fields,
+            faces_transferred: FacesTransferred {
+                file_uuid: faces.get_str("file_uuid").unwrap_or("").to_string(),
+                face_ids: faces
+                    .get_array("face_ids")
+                    .map(|arr| {
+                        arr.iter()
+                            .filter_map(|b| b.as_str().map(|s| s.to_string()))
+                            .collect()
+                    })
+                    .unwrap_or_default(),
+                trace_ids: faces
+                    .get_array("trace_ids")
+                    .map(|arr| arr.iter().filter_map(|b| b.as_i32()).collect())
+                    .unwrap_or_default(),
+                count: faces.get_i64("count").unwrap_or(0),
+            },
+            merge_params: MergeParams {
+                keep_history: merge_params_doc.get_bool("keep_history").unwrap_or(true),
+                cleared_stranger_id: merge_params_doc
+                    .get_bool("cleared_stranger_id")
+                    .unwrap_or(true),
+                performed_by_user: merge_params_doc
+                    .get_str("performed_by_user")
+                    .ok()
+                    .map(|s| s.to_string()),
+            },
+            merged_at: doc
+                .get_datetime("merged_at")
+                .map(|d| d.to_chrono())
+                .unwrap_or_default(),
+            undo_deadline: doc
+                .get_datetime("undo_deadline")
+                .map(|d| d.to_chrono())
+                .unwrap_or_default(),
+            undone: doc.get_bool("undone").unwrap_or(false),
+            undone_at: doc.get_datetime("undone_at").map(|d| d.to_chrono()).ok(),
+            undone_by: doc.get_str("undone_by").ok().map(|s| s.to_string()),
+            undone_snapshot,
+            undo_expired: doc.get_bool("undo_expired").unwrap_or(false),
+        })
+    }
+
+    pub fn to_document(&self) -> Document {
+        let mut doc = doc! {
+            "merge_id": &self.merge_id,
+            "source_identity": {
+                "id": self.source_identity.id as i64,
+                "uuid": &self.source_identity.uuid,
+                "name": &self.source_identity.name,
+                "identity_type": self.source_identity.identity_type.as_deref(),
+                "source": self.source_identity.source.as_deref(),
+                "status": &self.source_identity.status,
+                "tmdb_id": self.source_identity.tmdb_id,
+                "tmdb_profile": self.source_identity.tmdb_profile.as_deref(),
+                "metadata": json_to_bson(&self.source_identity.metadata),
+                "created_at": self.source_identity.created_at
+                    .map(|dt| BsonDateTime::from_chrono(dt)),
+                "face_count": self.source_identity.face_count,
+            },
+            "target_identity": {
+                "id": self.target_identity.id as i64,
+                "uuid": &self.target_identity.uuid,
+                "name": &self.target_identity.name,
+                "metadata_before": json_to_bson(&self.target_identity.metadata_before),
+                "metadata_after": self.target_identity.metadata_after.as_ref().map(json_to_bson),
+            },
+            "aliases_added_to_target": self.aliases_added_to_target.iter().map(|a| {
+                doc! {
+                    "name": &a.name,
+                    "locale": &a.locale,
+                    "source": a.source.as_deref(),
+                }
+            }).collect::<Vec<Document>>(),
+            "metadata_fields_added": &self.metadata_fields_added,
+            "faces_transferred": {
+                "file_uuid": &self.faces_transferred.file_uuid,
+                "face_ids": &self.faces_transferred.face_ids,
+                "trace_ids": &self.faces_transferred.trace_ids,
+                "count": self.faces_transferred.count,
+            },
+            "merge_params": {
+                "keep_history": self.merge_params.keep_history,
+                "cleared_stranger_id": self.merge_params.cleared_stranger_id,
+                "performed_by_user": self.merge_params.performed_by_user.as_deref(),
+            },
+            "merged_at": BsonDateTime::from_chrono(self.merged_at),
+            "undo_deadline": BsonDateTime::from_chrono(self.undo_deadline),
+            "undone": self.undone,
+            "undone_at": self.undone_at.map(|dt| BsonDateTime::from_chrono(dt)),
+            "undone_by": self.undone_by.as_deref(),
+            "undone_snapshot": self.undone_snapshot.as_ref().map(|s| {
+                doc! {
+                    "source_identity_id": s.source_identity_id,
+                    "source_uuid": &s.source_uuid,
+                    "source_name": &s.source_name,
+                    "target_metadata_at_undo": json_to_bson(&s.target_metadata_at_undo),
+                }
+            }),
+            "undo_expired": self.undo_expired,
+        };
+
+        if let Some(ref oid) = self.id {
+            doc.insert("_id", oid.clone());
+        }
+
+        doc
+    }
+}
+
+#[derive(Clone)]
+pub struct IdentityMergeHistoryStore {
+    client: Client,
+    db: Database,
+    collection: Collection<Document>,
+}
+
+impl IdentityMergeHistoryStore {
+    pub async fn init() -> Result<Self> {
+        let uri = crate::core::config::MONGODB_URL.as_str();
+        let client = Client::with_uri_str(uri)
+            .await
+            .context("Failed to connect to MongoDB")?;
+        let db_name = crate::core::config::MONGODB_DATABASE.as_str();
+        let db = client.database(db_name);
+        let collection: Collection<Document> = db.collection(COLLECTION_NAME);
+
+        let store = Self {
+            client,
+            db,
+            collection,
+        };
+
+        store.ensure_indexes().await?;
+        Ok(store)
+    }
+
+    async fn ensure_indexes(&self) -> Result<()> {
+        let merge_id_index = IndexModel::builder()
+            .keys(doc! { "merge_id": 1 })
+            .options(
+                mongodb::options::IndexOptions::builder()
+                    .unique(true)
+                    .build(),
+            )
+            .build();
+
+        let merged_at_index = IndexModel::builder().keys(doc! { "merged_at": -1 }).build();
+
+        let source_uuid_index = IndexModel::builder()
+            .keys(doc! { "source_identity.uuid": 1 })
+            .build();
+
+        let target_uuid_index = IndexModel::builder()
+            .keys(doc! { "target_identity.uuid": 1 })
+            .build();
+
+        self.collection
+            .create_indexes(
+                [
+                    merge_id_index,
+                    merged_at_index,
+                    source_uuid_index,
+                    target_uuid_index,
+                ],
+                None,
+            )
+            .await
+            .context("Failed to create identity_merge_history indexes")?;
+
+        tracing::info!("MongoDB identity_merge_history indexes ensured");
+        Ok(())
+    }
+
+    pub fn generate_merge_id() -> String {
+        Uuid::new_v4().to_string()
+    }
+
+    pub async fn store_merge_history(&self, history: &IdentityMergeHistory) -> Result<()> {
+        let doc = history.to_document();
+        self.collection
+            .insert_one(doc, None)
+            .await
+            .context("Failed to store merge history in MongoDB")?;
+
+        tracing::info!(
+            "Stored merge history: merge_id={}, source={}, target={}, faces={}",
+            history.merge_id,
+            history.source_identity.name,
+            history.target_identity.name,
+            history.faces_transferred.count
+        );
+        Ok(())
+    }
+
+    pub async fn get_merge_history(&self, merge_id: &str) -> Result<Option<IdentityMergeHistory>> {
+        let filter = doc! { "merge_id": merge_id };
+        let result = self
+            .collection
+            .find_one(filter, None)
+            .await
+            .context("Failed to get merge history from MongoDB")?;
+
+        match result {
+            Some(doc) => {
+                let history = IdentityMergeHistory::from_document(&doc)
+                    .context("Failed to parse merge history from MongoDB")?;
+                Ok(Some(history))
+            }
+            None => Ok(None),
+        }
+    }
+
+    pub async fn query_merge_history(
+        &self,
+        query: MergeHistoryQuery,
+        page: u32,
+        page_size: u32,
+    ) -> Result<(Vec<MergeHistoryEntry>, u64)> {
+        let mut filter = doc! {};
+
+        if let Some(source_uuid) = query.source_uuid {
+            filter.insert("source_identity.uuid", source_uuid);
+        }
+        if let Some(target_uuid) = query.target_uuid {
+            filter.insert("target_identity.uuid", target_uuid);
+        }
+        if let Some(merge_id) = query.merge_id {
+            filter.insert("merge_id", merge_id);
+        }
+        if let Some(undone) = query.undone {
+            filter.insert("undone", undone);
+        }
+
+        let skip = (page - 1) * page_size;
+        let limit = page_size;
+
+        let mut cursor = self
+            .collection
+            .find(filter.clone(), None)
+            .await
+            .context("Failed to query merge history")?;
+
+        let total = self
+            .collection
+            .count_documents(filter, None)
+            .await
+            .context("Failed to count merge history")?;
+
+        let mut results: Vec<MergeHistoryEntry> = Vec::new();
+        let mut count = 0;
+
+        while cursor.advance().await.context("Failed to advance cursor")? {
+            if count >= skip && results.len() < limit as usize {
+                let doc: Document = cursor
+                    .deserialize_current()
+                    .context("Failed to deserialize")?;
+
+                let merge_id = doc.get_str("merge_id").unwrap_or("").to_string();
+                let source_name = doc
+                    .get_document("source_identity")
+                    .map(|d| d.get_str("name").unwrap_or("").to_string())
+                    .unwrap_or_default();
+                let target_name = doc
+                    .get_document("target_identity")
+                    .map(|d| d.get_str("name").unwrap_or("").to_string())
+                    .unwrap_or_default();
+                let faces_count = doc
+                    .get_document("faces_transferred")
+                    .map(|d| d.get_i64("count").unwrap_or(0))
+                    .unwrap_or(0);
+                let merged_at = doc
+                    .get_datetime("merged_at")
+                    .map(|d| d.to_chrono())
+                    .unwrap_or_default();
+                let undo_deadline = doc
+                    .get_datetime("undo_deadline")
+                    .map(|d| d.to_chrono())
+                    .unwrap_or_default();
+                let undone = doc.get_bool("undone").unwrap_or(false);
+                let undo_expired = doc.get_bool("undo_expired").unwrap_or(false);
+
+                results.push(MergeHistoryEntry {
+                    merge_id,
+                    source_name,
+                    target_name,
+                    faces_transferred: faces_count,
+                    merged_at,
+                    undo_deadline,
+                    undone,
+                    undo_expired,
+                });
+            }
+            count += 1;
+        }
+
+        Ok((results, total))
+    }
+
+    pub async fn mark_as_undone(
+        &self,
+        merge_id: &str,
+        undone_by: Option<&str>,
+        undone_snapshot: UndoneSnapshot,
+    ) -> Result<()> {
+        let filter = doc! { "merge_id": merge_id };
+        let snapshot_doc = doc! {
+            "source_identity_id": undone_snapshot.source_identity_id,
+            "source_uuid": &undone_snapshot.source_uuid,
+            "source_name": &undone_snapshot.source_name,
+            "target_metadata_at_undo": json_to_bson(&undone_snapshot.target_metadata_at_undo),
+        };
+        let update = doc! {
+            "$set": {
+                "undone": true,
+                "undone_at": BsonDateTime::from_chrono(Utc::now()),
+                "undone_by": undone_by,
+                "undone_snapshot": snapshot_doc,
+            }
+        };
+
+        self.collection
+            .update_one(filter, update, None)
+            .await
+            .context("Failed to mark merge as undone")?;
+
+        tracing::info!("Marked merge {} as undone", merge_id);
+        Ok(())
+    }
+
+    pub async fn mark_as_redone(&self, merge_id: &str, redone_by: Option<&str>) -> Result<()> {
+        let now = Utc::now();
+        let new_deadline = now + chrono::Duration::hours(24);
+        let filter = doc! { "merge_id": merge_id };
+        let update = doc! {
+            "$set": {
+                "undone": false,
+                "undone_at": bson::Bson::Null,
+                "undone_by": redone_by,
+                "undone_snapshot": bson::Bson::Null,
+                "undo_deadline": BsonDateTime::from_chrono(new_deadline),
+                "undo_expired": false
+            }
+        };
+
+        self.collection
+            .update_one(filter, update, None)
+            .await
+            .context("Failed to mark merge as redone")?;
+
+        tracing::info!(
+            "Marked merge {} as redone (new deadline: {})",
+            merge_id,
+            new_deadline
+        );
+        Ok(())
+    }
+
+    pub async fn check_undo_deadline(&self, merge_id: &str) -> Result<bool> {
+        let history = self
+            .get_merge_history(merge_id)
+            .await?
+            .context("Merge history not found")?;
+
+        let now = Utc::now();
+        if now > history.undo_deadline {
+            return Ok(false);
+        }
+
+        Ok(true)
+    }
+
+    pub async fn mark_expired_merges(&self) -> Result<u64> {
+        let now = BsonDateTime::from_chrono(Utc::now());
+        let filter = doc! {
+            "undo_deadline": { "$lt": now },
+            "undone": false,
+            "undo_expired": false
+        };
+        let update = doc! { "$set": { "undo_expired": true } };
+
+        let result = self
+            .collection
+            .update_many(filter, update, None)
+            .await
+            .context("Failed to mark expired merges")?;
+
+        let count = result.modified_count;
+        if count > 0 {
+            tracing::info!("Marked {} expired merges", count);
+        }
+
+        Ok(count)
+    }
+}
--- a/src/core/db/mod.rs
+++ b/src/core/db/mod.rs
@@ -32,17 +32,21 @@ pub trait VectorStore: Send + Sync {
    async fn search(&self, query_vector: &[f32], limit: usize) -> Result<Vec<SearchResult>>;
 }

+pub mod identity_merge_history;
 pub mod mongodb_db;
 pub mod postgres_db;
 pub mod qdrant_db;
 pub mod redis_client;
 pub mod redis_db;
-pub mod sync_db;
-
+pub use identity_merge_history::{
+    AliasEntry, FacesTransferred, IdentityMergeHistory, IdentityMergeHistoryStore,
+    IdentitySnapshot, MergeHistoryEntry, MergeHistoryQuery, MergeParams, TargetIdentitySnapshot,
+    UndoneSnapshot,
+};
 pub use mongodb_db::MongoDb;
 pub use postgres_db::{
-    Bm25Result, CandidateRecord, CreateApiKeyConfig, FileIdentityRecord, FileRecord,
-    HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
+    Bm25Result, CandidateRecord, CreateApiKeyConfig, FileFaceRecord, FileIdentityRecord,
+    FileRecord, HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
    IdentityFileRecord, MonitorJob, MonitorJobStats, MonitorJobStatus, PipelineType, PostgresDb,
    ProcessorJobStatus, ProcessorResult, ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
 };
@@ -52,4 +56,3 @@ pub use redis_client::{
    ProgressMessage, RedisClient,
 };
 pub use redis_db::RedisDb;
-pub use sync_db::SyncDb;
--- a/src/core/db/mongodb_db.rs
+++ b/src/core/db/mongodb_db.rs
@@ -131,7 +131,6 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
-                    visual_stats: None,
                }
            })
            .collect();
@@ -190,7 +189,6 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
-                    visual_stats: None,
                }
            })
            .collect();
@@ -246,7 +244,6 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
-                    visual_stats: None,
                }
            })
            .collect();
--- a/src/core/db/qdrant_db.rs
+++ b/src/core/db/qdrant_db.rs
@@ -70,7 +70,7 @@ impl QdrantDb {
            return Ok(());
        }

-        let create_url = format!("{}/collections", self.base_url);
+        let create_url = format!("{}/collections/{}", self.base_url, self.collection_name);
        let body = serde_json::json!({
            "vectors": {
                "size": vector_dim,
@@ -79,7 +79,7 @@ impl QdrantDb {
        });

        self.client
-            .post(&create_url)
+            .put(&create_url)
            .header("api-key", &self.api_key)
            .header("Content-Type", "application/json")
            .json(&body)
@@ -867,50 +867,6 @@ impl VectorStore for QdrantDb {
    }
 }

-/// Sync face embeddings from PostgreSQL to Qdrant for ANN search
-pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
-    use crate::core::config::DATABASE_URL;
-    use sqlx::Row;
-
-    let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
-    let table = crate::core::db::schema::table_name("face_detections");
-
-    let qdrant: QdrantDb = QdrantDb::new();
-
-    let query = format!(
-        "SELECT id, trace_id, frame_number, embedding FROM {} \
-         WHERE file_uuid = $1 AND embedding IS NOT NULL \
-         AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
-        table
-    );
-    let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
-
-    let mut count = 0u64;
-    for row in &rows {
-        let id: i32 = row.get(0);
-        let trace_id: Option<i32> = row.get(1);
-        let frame_number: i64 = row.get(2);
-        let embedding: Option<Vec<f32>> = row.get(3);
-
-        if let (Some(emb), Some(tid)) = (embedding, trace_id) {
-            if let Err(e) = qdrant
-                .upsert_face_embedding(id as u64, &emb, file_uuid, tid, frame_number)
-                .await
-            {
-                tracing::warn!("Qdrant upsert failed for face {}: {}", id, e);
-                continue;
-            }
-            count += 1;
-        }
-    }
-    tracing::info!(
-        "Synced {} face embeddings to Qdrant for {}",
-        count,
-        file_uuid
-    );
-    Ok(())
-}
-
 pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
    use crate::core::config::DATABASE_URL;
    use sqlx::Row;
@@ -984,12 +940,22 @@ pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
    }

    // Push to Qdrant in batches
+    // Point ID: hash(file_uuid + trace_id) for global uniqueness
    for chunk in trace_avgs.chunks(500) {
        let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
            .iter()
            .map(|t| {
+                let point_id = {
+                    use sha2::{Digest, Sha256};
+                    let mut hasher = Sha256::new();
+                    hasher.update(file_uuid.as_bytes());
+                    hasher.update(b"_");
+                    hasher.update(t.tid.to_string().as_bytes());
+                    let hash = hasher.finalize();
+                    u64::from_be_bytes(hash[0..8].try_into().unwrap())
+                };
                (
-                    t.tid as u64,
+                    point_id,
                    t.avg_emb.as_slice(),
                    Some(serde_json::json!({
                        "trace_id": t.tid,
--- a/src/core/db/redis_client.rs
+++ b/src/core/db/redis_client.rs
@@ -319,7 +319,9 @@ impl RedisClient {
            "timestamp": chrono::Utc::now().to_rfc3339(),
        });

-        let _: usize = conn.publish(&channel, serde_json::to_string(&alert_json)?).await?;
+        let _: usize = conn
+            .publish(&channel, serde_json::to_string(&alert_json)?)
+            .await?;

        tracing::warn!(
            "Processor alert: {} | {} | {} | {}",
--- a/src/core/db/sync_db.rs.bak
+++ b/src/core/db/sync_db.rs.bak
@@ -78,7 +78,10 @@ impl SyncDb {
    pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
        let client = reqwest::Client::new();
        let response = client
-            .post(&format!("{}/api/embeddings", crate::core::config::OLLAMA_URL.as_str()))
+            .post(&format!(
+                "{}/api/embeddings",
+                crate::core::config::OLLAMA_URL.as_str()
+            ))
            .json(&serde_json::json!({
                "model": "all-minilm",
                "prompt": text,
--- a/src/core/frame_cache.rs
+++ b/src/core/frame_cache.rs
@@ -78,12 +78,19 @@ impl FrameManager {
                .and_then(|s| s.strip_suffix(".jpg"))
            {
                if let Ok(frame_num) = num_str.parse::<u64>() {
-                    let timestamp = frame_num as f64 / fps;
-                    frames.push(CachedFrame {
-                        path: entry.path(),
-                        frame_number: frame_num,
-                        timestamp_secs: timestamp,
-                    });
+                    let frame_path = entry.path();
+                    if let Ok(data) = std::fs::read(&frame_path) {
+                        if crate::core::thumbnail::validator::is_valid_jpeg(&data) {
+                            let timestamp = frame_num as f64 / fps;
+                            frames.push(CachedFrame {
+                                path: frame_path,
+                                frame_number: frame_num,
+                                timestamp_secs: timestamp,
+                            });
+                        } else {
+                            info!("[FrameCache] Skipping invalid JPEG: {:?}", frame_path);
+                        }
+                    }
                }
            }
        }
--- a/src/core/identity/storage.rs
+++ b/src/core/identity/storage.rs
@@ -193,7 +193,7 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu

    let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
        &format!(
-            "SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
+            "SELECT id::bigint, uuid::text, name, identity_type, source, status, metadata, COALESCE(reference_data, '{{}}'::jsonb) as reference_data, \
              NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
              face_embedding::real[] as face_embedding, \
              tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
--- a/src/core/llm/function_calling.rs
+++ b/src/core/llm/function_calling.rs
@@ -97,6 +97,68 @@ pub fn llm_vision_model() -> String {
    config::llm::VISION_MODEL.clone()
 }

+/// Call the vision LLM with text + base64 images. Returns the generated text.
+pub async fn call_llm_vision(
+    system_prompt: &str,
+    user_text: &str,
+    base64_images: Vec<String>,
+    max_tokens: u32,
+    timeout_secs: u64,
+) -> anyhow::Result<String> {
+    let mut content_parts: Vec<Value> = vec![json!({"type": "text", "text": user_text})];
+    for img in &base64_images {
+        content_parts.push(json!({
+            "type": "image_url",
+            "image_url": {"url": format!("data:image/jpeg;base64,{}", img)}
+        }));
+    }
+
+    let messages = json!([
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": content_parts}
+    ]);
+
+    let req = json!({
+        "model": llm_vision_model(),
+        "messages": messages,
+        "temperature": 0.1,
+        "max_tokens": max_tokens,
+        "stream": false,
+    });
+
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(timeout_secs))
+        .build()?;
+
+    let res = client.post(&llm_vision_url()).json(&req).send().await?;
+    if !res.status().is_success() {
+        let text = res.text().await.unwrap_or_default();
+        anyhow::bail!("Vision LLM API error: {}", text);
+    }
+
+    #[derive(Deserialize)]
+    struct VisionResponse {
+        choices: Vec<VisionChoice>,
+    }
+    #[derive(Deserialize)]
+    struct VisionChoice {
+        message: VisionMessage,
+    }
+    #[derive(Deserialize)]
+    struct VisionMessage {
+        content: Option<String>,
+    }
+
+    let vision_res: VisionResponse = res.json().await?;
+    let content = vision_res
+        .choices
+        .into_iter()
+        .next()
+        .and_then(|c| c.message.content)
+        .unwrap_or_default();
+    Ok(content.trim().to_string())
+}
+
 /// Build a tool definition JSON for function calling
 pub fn make_tool(name: &str, description: &str, properties: Value, required: Vec<&str>) -> ToolDef {
    ToolDef {
@@ -121,9 +183,11 @@ pub async fn call_llm(
    timeout_secs: u64,
 ) -> anyhow::Result<LlmResponse> {
    let client = reqwest::Client::builder()
-        .timeout(std::time::Duration::from_secs(
-            if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS },
-        ))
+        .timeout(std::time::Duration::from_secs(if timeout_secs > 0 {
+            timeout_secs
+        } else {
+            *config::llm::CHAT_TIMEOUT_SECS
+        }))
        .build()?;

    let req = ChatRequest {
@@ -135,11 +199,7 @@ pub async fn call_llm(
        tools,
    };

-    let res = client
-        .post(&llm_chat_url())
-        .json(&req)
-        .send()
-        .await?;
+    let res = client.post(&llm_chat_url()).json(&req).send().await?;

    if !res.status().is_success() {
        let text = res.text().await.unwrap_or_default();
@@ -147,13 +207,17 @@ pub async fn call_llm(
    }

    let chat_res: ChatResponse = res.json().await?;
-    let choice = chat_res.choices.into_iter().next()
+    let choice = chat_res
+        .choices
+        .into_iter()
+        .next()
        .ok_or_else(|| anyhow::anyhow!("Empty LLM response"))?;

    match choice.finish_reason.as_deref() {
        Some("tool_calls") => {
-            let calls = choice.message.tool_calls
-                .ok_or_else(|| anyhow::anyhow!("finish_reason=tool_calls but no tool_calls in message"))?;
+            let calls = choice.message.tool_calls.ok_or_else(|| {
+                anyhow::anyhow!("finish_reason=tool_calls but no tool_calls in message")
+            })?;
            Ok(LlmResponse::ToolCalls(calls))
        }
        _ => {
@@ -164,16 +228,18 @@ pub async fn call_llm(
 }

 /// Helper to build the system prompt + user messages
-pub fn build_conversation(system_prompt: &str, user_query: &str, history: Vec<ChatMessage>) -> Vec<ChatMessage> {
-    let mut messages = vec![
-        ChatMessage {
-            role: "system".to_string(),
-            content: Some(system_prompt.to_string()),
-            tool_calls: None,
-            tool_call_id: None,
-            name: None,
-        },
-    ];
+pub fn build_conversation(
+    system_prompt: &str,
+    user_query: &str,
+    history: Vec<ChatMessage>,
+) -> Vec<ChatMessage> {
+    let mut messages = vec![ChatMessage {
+        role: "system".to_string(),
+        content: Some(system_prompt.to_string()),
+        tool_calls: None,
+        tool_call_id: None,
+        name: None,
+    }];
    // Add history (user + assistant exchanges)
    messages.extend(history);
    // Add current user query
--- a/src/core/processor/asrx.rs
+++ b/src/core/processor/asrx.rs
@@ -18,12 +18,22 @@ pub struct AsrxResult {

 #[derive(Debug, Serialize, Deserialize)]
 pub struct AsrxSegment {
+    #[serde(alias = "start")]
    pub start_time: f64,
+    #[serde(alias = "end")]
    pub end_time: f64,
+    #[serde(default)]
    pub start_frame: u64,
+    #[serde(default)]
    pub end_frame: u64,
    pub text: String,
    pub speaker_id: Option<String>,
+    #[serde(default)]
+    pub language: Option<String>,
+    #[serde(default)]
+    pub lang_prob: Option<f64>,
+    #[serde(default)]
+    pub quality: Option<f64>,
 }

 pub async fn process_asrx(
@@ -32,24 +42,16 @@ pub async fn process_asrx(
    uuid: Option<&str>,
 ) -> Result<AsrxResult> {
    let executor = PythonExecutor::new()?;
-    let script_path = executor.script_path("asrx_processor_custom.py");
+    let script_path = executor.script_path("asrx_processor.py");

    tracing::info!(
-        "[ASRX] Starting speaker diarization (custom): {}",
+        "[ASRX] Starting hybrid speaker diarization: {}",
        video_path
    );

    if !script_path.exists() {
-        tracing::warn!("[ASRX] Custom script not found, falling back to original");
-        let fallback_path = executor.script_path("asrx_processor.py");
-        if !fallback_path.exists() {
-            tracing::warn!("[ASRX] No script found, returning empty result");
-            return Ok(AsrxResult {
-                language: None,
-                segments: vec![],
-                embeddings: None,
-            });
-        }
+        tracing::error!("[ASRX] Script not found: {:?}", script_path);
+        anyhow::bail!("asrx_processor.py not found");
    }

    tracing::info!(
@@ -65,6 +67,7 @@ pub async fn process_asrx(

    if let Some(u) = uuid {
        cmd.arg("--uuid").arg(u);
+        cmd.arg("--file-uuid").arg(u);
    }

    cmd.stdout(std::process::Stdio::piped())
@@ -126,6 +129,9 @@ mod tests {
                end_frame: 75,
                text: "Hello".to_string(),
                speaker_id: Some("SPEAKER_00".to_string()),
+                language: None,
+                lang_prob: None,
+                quality: None,
            }],
            embeddings: None,
        };
@@ -173,7 +179,27 @@ mod tests {
            end_frame: 150,
            text: "Test".to_string(),
            speaker_id: None,
+            language: None,
+            lang_prob: None,
+            quality: None,
        };
        assert!(segment.end_time > segment.start_time);
    }
+
+    #[test]
+    fn test_asrx_backward_compat_old_format() {
+        let json = r#"{
+            "language": "en",
+            "segments": [
+                {"start": 10.0, "end": 12.5, "text": "Hello", "speaker_id": "SPEAKER_00"}
+            ]
+        }"#;
+        let result: AsrxResult = serde_json::from_str(json).unwrap();
+        assert_eq!(result.segments.len(), 1);
+        assert_eq!(result.segments[0].start_time, 10.0);
+        assert_eq!(result.segments[0].end_time, 12.5);
+        assert_eq!(result.segments[0].text, "Hello");
+        assert_eq!(result.segments[0].start_frame, 0);
+        assert_eq!(result.segments[0].end_frame, 0);
+    }
 }
--- a/src/core/processor/cut.rs
+++ b/src/core/processor/cut.rs
@@ -43,11 +43,15 @@ pub async fn process_cut(
    let script_path = executor.script_path("cut_processor.py");

    if !script_path.exists() {
-        return Ok(CutResult {
+        let empty_result = CutResult {
            frame_count: 0,
            fps: 0.0,
            scenes: vec![],
-        });
+        };
+        let json = serde_json::to_string_pretty(&empty_result)?;
+        std::fs::write(output_path, &json)
+            .with_context(|| format!("Failed to write {:?}", output_path))?;
+        return Ok(empty_result);
    }

    executor
@@ -127,18 +131,26 @@ fn try_native_cut(video_path: &str) -> Result<CutResult> {
        .context("Failed to run ffmpeg scene detection")?;

    let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
+    let stdout_output = String::from_utf8_lossy(&scene_output.stdout);
    let mut scene_times: Vec<f64> = Vec::new();

-    // Parse ffmpeg showinfo output for scene changes
-    // Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
-    for line in stderr_output.lines() {
-        if line.contains("pts_time:") {
-            if let Some(pos) = line.find("pts_time:") {
-                let rest = &line[pos + 9..];
-                let time_str = rest.split_whitespace().next().unwrap_or("");
-                if let Ok(t) = time_str.parse::<f64>() {
-                    scene_times.push(t);
-                }
+    // Parse ffprobe output for scene changes (check both stderr and stdout)
+    // Format: pts_time=123.456 or pts_time:123.456
+    for line in stderr_output.lines().chain(stdout_output.lines()) {
+        // Try pts_time= format (standard ffprobe output)
+        if let Some(pos) = line.find("pts_time=") {
+            let rest = &line[pos + 9..];
+            let time_str = rest.split_whitespace().next().unwrap_or("");
+            if let Ok(t) = time_str.parse::<f64>() {
+                scene_times.push(t);
+            }
+        }
+        // Try pts_time: format (showinfo filter output)
+        else if let Some(pos) = line.find("pts_time:") {
+            let rest = &line[pos + 9..];
+            let time_str = rest.split_whitespace().next().unwrap_or("");
+            if let Ok(t) = time_str.parse::<f64>() {
+                scene_times.push(t);
            }
        }
    }
--- a/src/core/processor/mod.rs
+++ b/src/core/processor/mod.rs
@@ -11,7 +11,6 @@ pub mod pose;
 pub mod scene_classification;
 pub mod story;
 pub mod tkg;
-pub mod visual_chunk;
 pub mod yolo;

 pub use asr::{process_asr, AsrResult, AsrSegment};
@@ -40,5 +39,4 @@ pub use tkg::{
    build_tkg, query_auto_representative_frame, FrameTraceInfo, MainIdentityInfo,
    RepresentativeFrameResult, TkgResult,
 };
-pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
 pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
--- a/src/core/processor/tkg.rs
+++ b/src/core/processor/tkg.rs
@@ -38,7 +38,10 @@ fn load_face_pose_data(output_dir: &str, file_uuid: &str) -> Result<Vec<FacePose
    let mut poses = Vec::new();
    if let Some(frames) = json.get("frames").and_then(|v| v.as_array()) {
        for frame_entry in frames {
-            let frame_num = frame_entry.get("frame").and_then(|v| v.as_i64()).unwrap_or(0);
+            let frame_num = frame_entry
+                .get("frame")
+                .and_then(|v| v.as_i64())
+                .unwrap_or(0);
            if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) {
                for face in faces {
                    let bbox = match face.get("bbox") {
@@ -68,7 +71,14 @@ fn load_face_pose_data(output_dir: &str, file_uuid: &str) -> Result<Vec<FacePose

 /// Match a face from face_detections (frame, x, y, w, h) to its pose in face.json
 /// Uses bbox center distance to find the best match when multiple faces per frame.
-fn get_pose_for_face(frame: i64, x: f64, y: f64, w: f64, h: f64, poses: &[FacePose]) -> Option<(f64, f64, f64)> {
+fn get_pose_for_face(
+    frame: i64,
+    x: f64,
+    y: f64,
+    w: f64,
+    h: f64,
+    poses: &[FacePose],
+) -> Option<(f64, f64, f64)> {
    let cx = x + w / 2.0;
    let cy = y + h / 2.0;
    let mut best_dist = f64::MAX;
@@ -86,8 +96,12 @@ fn get_pose_for_face(frame: i64, x: f64, y: f64, w: f64, h: f64, poses: &[FacePo
 }

 fn detect_mutual_gaze(
-    bbox_a_x: f64, bbox_a_w: f64, yaw_a: f64,
-    bbox_b_x: f64, bbox_b_w: f64, yaw_b: f64,
+    bbox_a_x: f64,
+    bbox_a_w: f64,
+    yaw_a: f64,
+    bbox_b_x: f64,
+    bbox_b_w: f64,
+    yaw_b: f64,
    threshold: f64,
 ) -> bool {
    let cx_a = bbox_a_x + bbox_a_w / 2.0;
@@ -138,12 +152,16 @@ struct AsrxSegmentEntry {
    #[serde(default)]
    speaker_id: String,
    #[serde(default)]
-    start_time: f64,
+    start: f64,
    #[serde(default)]
-    end_time: f64,
+    end: f64,
+    #[serde(default)]
+    text: String,
    #[allow(dead_code)]
+    #[serde(default)]
    start_frame: i64,
    #[allow(dead_code)]
+    #[serde(default)]
    end_frame: i64,
 }

@@ -195,7 +213,10 @@ pub struct TkgResult {
 pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
    let pool = db.pool();
    let pose_data = load_face_pose_data(output_dir, file_uuid).unwrap_or_default();
-    tracing::info!("[TKG] Loaded {} pose entries from face.json", pose_data.len());
+    tracing::info!(
+        "[TKG] Loaded {} pose entries from face.json",
+        pose_data.len()
+    );

    let n_face = build_face_trace_nodes(pool, file_uuid, &pose_data).await?;
    let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
@@ -217,7 +238,11 @@ pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Re

 // ── Node builders ─────────────────────────────────────────────────

-async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[FacePose]) -> Result<usize> {
+async fn build_face_trace_nodes(
+    pool: &PgPool,
+    file_uuid: &str,
+    pose_data: &[FacePose],
+) -> Result<usize> {
    let face_table = t("face_detections");
    let nodes_table = t("tkg_nodes");

@@ -257,7 +282,10 @@ async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[Fac
    // Group by trace_id: trace_id → Vec<(frame, x, y, w, h)>
    let mut trace_frames: HashMap<i64, Vec<(i64, f64, f64, f64, f64)>> = HashMap::new();
    for (tid, frame, x, y, w, h) in &frame_rows {
-        trace_frames.entry(*tid).or_default().push((*frame, *x, *y, *w, *h));
+        trace_frames
+            .entry(*tid)
+            .or_default()
+            .push((*frame, *x, *y, *w, *h));
    }

    let mut count = 0;
@@ -274,7 +302,9 @@ async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[Fac

        if let Some(frames) = trace_frames.get(&tid) {
            for (frame, x, y, w, h) in frames {
-                if let Some((yaw, pitch, roll)) = get_pose_for_face(*frame, *x, *y, *w, *h, pose_data) {
+                if let Some((yaw, pitch, roll)) =
+                    get_pose_for_face(*frame, *x, *y, *w, *h, pose_data)
+                {
                    yaw_sum += yaw;
                    pitch_sum += pitch;
                    roll_sum += roll;
@@ -284,7 +314,11 @@ async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[Fac
        }

        let (avg_yaw, avg_pitch, avg_roll) = if pose_count > 0 {
-            (yaw_sum / pose_count as f64, pitch_sum / pose_count as f64, roll_sum / pose_count as f64)
+            (
+                yaw_sum / pose_count as f64,
+                pitch_sum / pose_count as f64,
+                roll_sum / pose_count as f64,
+            )
        } else {
            (0.0, 0.0, 0.0)
        };
@@ -401,8 +435,44 @@ async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -
    let nodes_table = t("tkg_nodes");
    let mut count = 0;

+    // Group segments by speaker_id
+    let mut speaker_segments: HashMap<String, Vec<&AsrxSegmentEntry>> = HashMap::new();
+    for seg in &asrx.segments {
+        speaker_segments
+            .entry(seg.speaker_id.clone())
+            .or_default()
+            .push(seg);
+    }
+
    for (sid, stat) in &stats {
-        let props = serde_json::json!({ "segment_count": stat.count });
+        let segs = speaker_segments.get(sid);
+        let (full_text, segments_json) = if let Some(seg_list) = segs {
+            let full: String = seg_list
+                .iter()
+                .map(|s| s.text.trim())
+                .filter(|t| !t.is_empty())
+                .collect::<Vec<_>>()
+                .join(" ");
+            let segments: Vec<serde_json::Value> = seg_list
+                .iter()
+                .map(|s| {
+                    serde_json::json!({
+                        "start": s.start,
+                        "end": s.end,
+                        "text": s.text,
+                    })
+                })
+                .collect();
+            (full, serde_json::Value::Array(segments))
+        } else {
+            (String::new(), serde_json::Value::Array(vec![]))
+        };
+
+        let props = serde_json::json!({
+            "segment_count": stat.count,
+            "segments": segments_json,
+            "full_text": full_text,
+        });

        sqlx::query(&format!(
            r#"
@@ -576,8 +646,8 @@ async fn build_speaker_face_edges(

    // Calculate fps from last segment
    let last = asrx.segments.last().unwrap();
-    let fps = if last.end_time > 0.0 {
-        last.end_frame as f64 / last.end_time
+    let fps = if last.end > 0.0 {
+        last.end_frame as f64 / last.end
    } else {
        30.0
    };
@@ -604,8 +674,8 @@ async fn build_speaker_face_edges(
        let face_end_sec = *ef as f64 / fps;

        for seg in &asrx.segments {
-            let seg_start = seg.start_time;
-            let seg_end = seg.end_time;
+            let seg_start = seg.start;
+            let seg_end = seg.end;
            let overlap_start = face_start_sec.max(seg_start);
            let overlap_end = face_end_sec.min(seg_end);

@@ -669,7 +739,11 @@ async fn build_speaker_face_edges(
    Ok(edge_count)
 }

-async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[FacePose]) -> Result<usize> {
+async fn build_face_face_edges(
+    pool: &PgPool,
+    file_uuid: &str,
+    pose_data: &[FacePose],
+) -> Result<usize> {
    let face_table = t("face_detections");
    let nodes_table = t("tkg_nodes");
    let edges_table = t("tkg_edges");
@@ -722,8 +796,9 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
            (Some(&(xa, ya, wa, ha)), Some(&(xb, yb, wb, hb))) => {
                get_pose_for_face(*frame, xa, ya, wa, ha, pose_data)
                    .and_then(|(yaw_a, _, _)| {
-                        get_pose_for_face(*frame, xb, yb, wb, hb, pose_data)
-                            .map(|(yaw_b, _, _)| detect_mutual_gaze(xa, wa, yaw_a, xb, wb, yaw_b, 0.05))
+                        get_pose_for_face(*frame, xb, yb, wb, hb, pose_data).map(|(yaw_b, _, _)| {
+                            detect_mutual_gaze(xa, wa, yaw_a, xb, wb, yaw_b, 0.05)
+                        })
                    })
                    .unwrap_or(false)
            }
@@ -770,7 +845,11 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
        };

        let frames: Vec<i64> = frame_data.iter().map(|(f, _)| *f).collect();
-        let gaze_frames: Vec<i64> = frame_data.iter().filter(|(_, g)| *g).map(|(f, _)| *f).collect();
+        let gaze_frames: Vec<i64> = frame_data
+            .iter()
+            .filter(|(_, g)| *g)
+            .map(|(f, _)| *f)
+            .collect();
        let gaze_count = gaze_frames.len() as i64;
        let has_gaze = gaze_count > 0;

@@ -793,8 +872,13 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
                }
            }
            let (avg_ya, avg_yb) = if gaze_sample > 0 {
-                (yaw_a_sum / gaze_sample as f64, yaw_b_sum / gaze_sample as f64)
-            } else { (0.0, 0.0) };
+                (
+                    yaw_a_sum / gaze_sample as f64,
+                    yaw_b_sum / gaze_sample as f64,
+                )
+            } else {
+                (0.0, 0.0)
+            };

            serde_json::json!({
                "first_frame": frames[0],
@@ -902,9 +986,14 @@ pub async fn query_auto_representative_frame(
    .context("Failed to detect main identities")?;

    let main_ids: Vec<(i32, String, String, i64)> = mains;
-    let main_idents: Vec<MainIdentityInfo> = main_ids.iter().map(|(_, u, n, c)|
-        MainIdentityInfo { identity_uuid: u.clone(), name: n.clone(), face_count: *c }
-    ).collect();
+    let main_idents: Vec<MainIdentityInfo> = main_ids
+        .iter()
+        .map(|(_, u, n, c)| MainIdentityInfo {
+            identity_uuid: u.clone(),
+            name: n.clone(),
+            face_count: *c,
+        })
+        .collect();

    let frame_number: Option<i64> = if main_ids.len() >= 2 {
        let id_a = main_ids[0].0;
@@ -915,16 +1004,20 @@ pub async fn query_auto_representative_frame(
             AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1",
            fd_table
        ))
-        .bind(file_uuid).bind(id_a)
-        .fetch_optional(pool).await?;
+        .bind(file_uuid)
+        .bind(id_a)
+        .fetch_optional(pool)
+        .await?;

        let trace_b: Option<(i32,)> = sqlx::query_as(&format!(
            "SELECT trace_id FROM {} WHERE file_uuid = $1 AND identity_id = $2 \
             AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1",
            fd_table
        ))
-        .bind(file_uuid).bind(id_b)
-        .fetch_optional(pool).await?;
+        .bind(file_uuid)
+        .bind(id_b)
+        .fetch_optional(pool)
+        .await?;

        match (trace_a, trace_b) {
            (Some((ta,)), Some((tb,))) => {
@@ -940,11 +1033,18 @@ pub async fn query_auto_representative_frame(
                     LIMIT 1",
                    edges_table, nodes_table, nodes_table
                ))
-                .bind(file_uuid).bind(ta).bind(tb)
-                .fetch_optional(pool).await?;
+                .bind(file_uuid)
+                .bind(ta)
+                .bind(tb)
+                .fetch_optional(pool)
+                .await?;

                if let Some((f,)) = tkg_frame {
-                    if f <= half_frame { Some(f) } else { None }
+                    if f <= half_frame {
+                        Some(f)
+                    } else {
+                        None
+                    }
                } else {
                    sqlx::query_scalar::<_, i64>(&format!(
                        "SELECT MIN(fd_a.frame_number)::bigint \
@@ -954,8 +1054,12 @@ pub async fn query_auto_representative_frame(
                         AND fd_b.identity_id = $3 AND fd_a.frame_number <= $4",
                        fd_table, fd_table
                    ))
-                    .bind(file_uuid).bind(id_a).bind(id_b).bind(half_frame)
-                    .fetch_optional(pool).await?
+                    .bind(file_uuid)
+                    .bind(id_a)
+                    .bind(id_b)
+                    .bind(half_frame)
+                    .fetch_optional(pool)
+                    .await?
                }
            }
            _ => None,
@@ -976,8 +1080,11 @@ pub async fn query_auto_representative_frame(
                     LIMIT 1",
                    fd_table
                ))
-                .bind(file_uuid).bind(first_id).bind(half_frame)
-                .fetch_optional(pool).await?
+                .bind(file_uuid)
+                .bind(first_id)
+                .bind(half_frame)
+                .fetch_optional(pool)
+                .await?
            } else {
                None
            }
@@ -995,20 +1102,25 @@ pub async fn query_auto_representative_frame(
                 LIMIT 1",
                fd_table
            ))
-            .bind(file_uuid).bind(half_frame)
-            .fetch_optional(pool).await?
+            .bind(file_uuid)
+            .bind(half_frame)
+            .fetch_optional(pool)
+            .await?
        }
    };

-    let frame_number = frame_number.ok_or_else(|| anyhow::anyhow!("No faces found in this file"))?;
+    let frame_number =
+        frame_number.ok_or_else(|| anyhow::anyhow!("No faces found in this file"))?;

    let face_quality: f64 = sqlx::query_scalar::<_, f64>(&format!(
        "SELECT COALESCE(MAX((width::float8 * height::float8) * confidence::float8), 0) \
         FROM {} WHERE file_uuid = $1 AND frame_number = $2",
        fd_table
    ))
-    .bind(file_uuid).bind(frame_number)
-    .fetch_one(pool).await?;
+    .bind(file_uuid)
+    .bind(frame_number)
+    .fetch_one(pool)
+    .await?;

    let traces: Vec<FrameTraceInfo> = sqlx::query_as::<_, (i32, Option<String>, Option<String>, i32, i32, i32, i32, f64)>(&format!(
        "SELECT fd.trace_id, i.uuid::text, i.name, fd.x, fd.y, fd.width, fd.height, fd.confidence::float8 \
--- a/src/core/processor/visual_chunk.rs
+++ b/src/core/processor/visual_chunk.rs
@@ -1,594 +0,0 @@
-//! 視覺分片處理器 (Phase 2.2)
-//!
-//! 從 YOLO 結果生成視覺分片
-
-use anyhow::{Context, Result};
-use serde::{Deserialize, Serialize};
-use std::time::Duration;
-
-use super::executor::PythonExecutor;
-use super::yolo::{YoloFrame, YoloResult};
-
-const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
-
-/// 視覺分片處理結果
-#[derive(Debug, Serialize, Deserialize, Clone, Default)]
-pub struct VisualChunkResult {
-    /// 生成的視覺分片數量
-    pub chunk_count: u32,
-    /// 處理的總幀數
-    pub total_frames: u32,
-    /// 檢測到的總物件數
-    pub total_objects: u32,
-    /// 唯一物件類別數
-    pub unique_classes: u32,
-    /// 生成的視覺分片
-    pub chunks: Vec<crate::core::chunk::Chunk>,
-}
-
-/// 從 YOLO 結果生成視覺分片
-pub async fn process_visual_chunk(
-    file_id: i32,
-    uuid: String,
-    video_path: &str,
-    yolo_result: &YoloResult,
-    chunk_index_offset: u32,
-    fps: f64,
-) -> Result<VisualChunkResult> {
-    tracing::info!(
-        "[VisualChunk] Starting visual chunk generation for video: {}, {} frames",
-        video_path,
-        yolo_result.frames.len()
-    );
-
-    if yolo_result.frames.is_empty() {
-        tracing::warn!("[VisualChunk] No YOLO frames to process");
-        return Ok(VisualChunkResult {
-            chunk_count: 0,
-            total_frames: 0,
-            total_objects: 0,
-            unique_classes: 0,
-            chunks: vec![],
-        });
-    }
-
-    // 策略 1: 固定幀數分片（每 N 幀一個分片）
-    let chunks = create_fixed_frame_chunks(file_id, &uuid, yolo_result, chunk_index_offset, fps);
-
-    // 統計信息
-    let total_objects: u32 = yolo_result
-        .frames
-        .iter()
-        .map(|f| f.objects.len() as u32)
-        .sum();
-    let all_classes: Vec<String> = yolo_result
-        .frames
-        .iter()
-        .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
-        .collect();
-    let unique_classes: u32 = all_classes
-        .iter()
-        .cloned()
-        .collect::<std::collections::HashSet<_>>()
-        .len() as u32;
-
-    tracing::info!(
-        "[VisualChunk] Generated {} visual chunks from {} frames, {} total objects, {} unique classes",
-        chunks.len(),
-        yolo_result.frames.len(),
-        total_objects,
-        unique_classes
-    );
-
-    Ok(VisualChunkResult {
-        chunk_count: chunks.len() as u32,
-        total_frames: yolo_result.frames.len() as u32,
-        total_objects,
-        unique_classes,
-        chunks,
-    })
-}
-
-/// 創建固定幀數分片（每 N 幀一個分片）
-fn create_fixed_frame_chunks(
-    file_id: i32,
-    uuid: &str,
-    yolo_result: &YoloResult,
-    chunk_index_offset: u32,
-    fps: f64,
-) -> Vec<crate::core::chunk::Chunk> {
-    let mut chunks = Vec::new();
-
-    // 配置：每 30 幀創建一個分片（約 1 秒，如果 fps=30）
-    let frames_per_chunk = 30;
-    let total_frames = yolo_result.frames.len();
-
-    if total_frames == 0 {
-        return chunks;
-    }
-
-    let mut chunk_index = chunk_index_offset;
-    let mut start_idx = 0;
-
-    while start_idx < total_frames {
-        let end_idx = std::cmp::min(start_idx + frames_per_chunk, total_frames);
-
-        // 獲取這個分片的幀
-        let chunk_frames: Vec<YoloFrame> = yolo_result.frames[start_idx..end_idx]
-            .iter()
-            .cloned()
-            .collect();
-
-        if chunk_frames.is_empty() {
-            break;
-        }
-
-        // 計算幀範圍
-        let start_frame = chunk_frames.first().unwrap().frame as i64;
-        let end_frame = chunk_frames.last().unwrap().frame as i64 + 1; // exclusive
-
-        // 創建視覺分片
-        let chunk = crate::core::chunk::Chunk::from_yolo_frames(
-            file_id,
-            uuid.to_string(),
-            format!("vis_{}", chunk_index),
-            start_frame,
-            end_frame,
-            fps,
-            chunk_frames,
-        );
-
-        chunks.push(chunk);
-
-        // 更新索引
-        start_idx = end_idx;
-        chunk_index += 1;
-    }
-
-    chunks
-}
-
-/// 基於物件相似度創建分片
-fn create_similarity_based_chunks(
-    file_id: i32,
-    uuid: &str,
-    yolo_result: &YoloResult,
-    chunk_index_offset: u32,
-    fps: f64,
-    similarity_threshold: f32,
-    min_frames_per_chunk: usize,
-) -> Vec<crate::core::chunk::Chunk> {
-    let mut chunks = Vec::new();
-
-    if yolo_result.frames.is_empty() {
-        return chunks;
-    }
-
-    let mut current_chunk_frames: Vec<YoloFrame> = Vec::new();
-    let mut chunk_index = chunk_index_offset;
-    let mut current_start_frame = 0;
-
-    for (i, frame) in yolo_result.frames.iter().enumerate() {
-        if current_chunk_frames.is_empty() {
-            current_chunk_frames.push(frame.clone());
-            current_start_frame = frame.frame as i64;
-            continue;
-        }
-
-        // 檢查相似度（簡化版本：檢查物件類別是否相同）
-        let last_frame = current_chunk_frames.last().unwrap();
-        let similarity = calculate_frame_similarity(last_frame, frame);
-
-        if similarity >= similarity_threshold {
-            // 相似度高，加入當前分片
-            current_chunk_frames.push(frame.clone());
-        } else {
-            // 相似度低，創建新分片
-            if current_chunk_frames.len() >= min_frames_per_chunk {
-                let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
-
-                let chunk = crate::core::chunk::Chunk::from_yolo_frames(
-                    file_id,
-                    uuid.to_string(),
-                    format!("vis_{}", chunk_index),
-                    current_start_frame,
-                    end_frame,
-                    fps,
-                    current_chunk_frames.clone(),
-                );
-
-                chunks.push(chunk);
-                chunk_index += 1;
-            }
-
-            // 開始新的分片
-            current_chunk_frames = vec![frame.clone()];
-            current_start_frame = frame.frame as i64;
-        }
-    }
-
-    // 處理最後一個分片
-    if current_chunk_frames.len() >= min_frames_per_chunk {
-        let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
-
-        let chunk = crate::core::chunk::Chunk::from_yolo_frames(
-            file_id,
-            uuid.to_string(),
-            format!("vis_{}", chunk_index),
-            current_start_frame,
-            end_frame,
-            fps,
-            current_chunk_frames,
-        );
-
-        chunks.push(chunk);
-    }
-
-    chunks
-}
-
-/// 計算兩個幀之間的相似度（基於物件類別）
-fn calculate_frame_similarity(frame1: &YoloFrame, frame2: &YoloFrame) -> f32 {
-    if frame1.objects.is_empty() && frame2.objects.is_empty() {
-        return 1.0;
-    }
-
-    if frame1.objects.is_empty() || frame2.objects.is_empty() {
-        return 0.0;
-    }
-
-    let set1: std::collections::HashSet<String> = frame1
-        .objects
-        .iter()
-        .map(|o| o.class_name.clone())
-        .collect();
-    let set2: std::collections::HashSet<String> = frame2
-        .objects
-        .iter()
-        .map(|o| o.class_name.clone())
-        .collect();
-
-    let intersection: Vec<_> = set1.intersection(&set2).collect();
-    let union: Vec<_> = set1.union(&set2).collect();
-
-    if union.is_empty() {
-        0.0
-    } else {
-        intersection.len() as f32 / union.len() as f32
-    }
-}
-
-/// 使用 Python 腳本生成視覺分片（進階版本）
-pub async fn process_visual_chunk_advanced(
-    video_path: &str,
-    output_path: &str,
-    uuid: Option<&str>,
-) -> Result<VisualChunkResult> {
-    let executor = PythonExecutor::new()?;
-    let script_path = executor.script_path("visual_chunk_processor.py");
-
-    tracing::info!(
-        "[VisualChunk] Starting advanced visual chunk generation: {}",
-        video_path
-    );
-
-    if !script_path.exists() {
-        tracing::warn!("[VisualChunk] Script not found, using basic generation");
-        // 這裡可以回退到基本生成方法
-        return Ok(VisualChunkResult {
-            chunk_count: 0,
-            total_frames: 0,
-            total_objects: 0,
-            unique_classes: 0,
-            chunks: vec![],
-        });
-    }
-
-    let yolo_path = uuid.map(|u| {
-        std::path::PathBuf::from(crate::core::config::OUTPUT_DIR.as_str())
-            .join(format!("{}.yolo.json", u))
-            .to_string_lossy()
-            .to_string()
-    });
-    let args: &[&str] = if let Some(ref yp) = yolo_path {
-        &[video_path, output_path, "--yolo-result", yp]
-    } else {
-        &[video_path, output_path]
-    };
-    let result = match executor
-        .run(
-            "visual_chunk_processor.py",
-            args,
-            uuid,
-            "VisualChunk",
-            Some(VISUAL_CHUNK_TIMEOUT),
-        )
-        .await
-    {
-        Ok(_) => match std::fs::read_to_string(output_path) {
-            Ok(json_str) => match serde_json::from_str::<VisualChunkResult>(&json_str) {
-                Ok(r) => r,
-                Err(e) => {
-                    tracing::warn!(
-                        "[VisualChunk] Failed to parse output ({}), returning empty",
-                        e
-                    );
-                    VisualChunkResult::default()
-                }
-            },
-            Err(e) => {
-                tracing::warn!(
-                    "[VisualChunk] Failed to read output ({}), returning empty",
-                    e
-                );
-                VisualChunkResult::default()
-            }
-        },
-        Err(e) => {
-            tracing::warn!(
-                "[VisualChunk] Failed to run script ({}), returning empty",
-                e
-            );
-            VisualChunkResult::default()
-        }
-    };
-
-    tracing::info!(
-        "[VisualChunk] Advanced generation result: {} chunks, {} frames",
-        result.chunk_count,
-        result.total_frames
-    );
-
-    Ok(result)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_calculate_frame_similarity() {
-        use crate::core::processor::yolo::{YoloFrame, YoloObject};
-
-        let frame1 = YoloFrame {
-            frame: 0,
-            timestamp: 0.0,
-            objects: vec![
-                YoloObject {
-                    class_name: "person".to_string(),
-                    class_id: 0,
-                    x: 100,
-                    y: 200,
-                    width: 50,
-                    height: 100,
-                    confidence: 0.95,
-                },
-                YoloObject {
-                    class_name: "car".to_string(),
-                    class_id: 2,
-                    x: 300,
-                    y: 150,
-                    width: 80,
-                    height: 60,
-                    confidence: 0.87,
-                },
-            ],
-        };
-
-        let frame2 = YoloFrame {
-            frame: 1,
-            timestamp: 0.033,
-            objects: vec![
-                YoloObject {
-                    class_name: "person".to_string(),
-                    class_id: 0,
-                    x: 110,
-                    y: 210,
-                    width: 52,
-                    height: 102,
-                    confidence: 0.92,
-                },
-                YoloObject {
-                    class_name: "car".to_string(),
-                    class_id: 2,
-                    x: 310,
-                    y: 155,
-                    width: 82,
-                    height: 62,
-                    confidence: 0.85,
-                },
-            ],
-        };
-
-        let frame3 = YoloFrame {
-            frame: 2,
-            timestamp: 0.066,
-            objects: vec![YoloObject {
-                class_name: "dog".to_string(),
-                class_id: 16,
-                x: 150,
-                y: 250,
-                width: 40,
-                height: 60,
-                confidence: 0.78,
-            }],
-        };
-
-        // 相同物件的幀應該高度相似
-        let similarity_same = calculate_frame_similarity(&frame1, &frame2);
-        assert!((similarity_same - 1.0).abs() < 0.001);
-
-        // 不同物件的幀應該不相似
-        let similarity_diff = calculate_frame_similarity(&frame1, &frame3);
-        assert!((similarity_diff - 0.0).abs() < 0.001);
-
-        // 空幀應該完全相似
-        let empty_frame = YoloFrame {
-            frame: 3,
-            timestamp: 0.1,
-            objects: vec![],
-        };
-        let similarity_empty = calculate_frame_similarity(&empty_frame, &empty_frame);
-        assert!((similarity_empty - 1.0).abs() < 0.001);
-    }
-
-    #[tokio::test]
-    async fn test_create_fixed_frame_chunks() {
-        use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
-
-        // 創建測試 YOLO 結果（60 幀，每幀都有物件）
-        let mut frames = Vec::new();
-        for i in 0..60 {
-            frames.push(YoloFrame {
-                frame: i as u64,
-                timestamp: i as f64 / 30.0, // 假設 fps=30
-                objects: vec![YoloObject {
-                    class_name: "person".to_string(),
-                    class_id: 0,
-                    x: 100,
-                    y: 200,
-                    width: 50,
-                    height: 100,
-                    confidence: 0.9,
-                }],
-            });
-        }
-
-        let yolo_result = YoloResult {
-            frame_count: 60,
-            fps: 30.0,
-            frames,
-        };
-
-        let chunks = create_fixed_frame_chunks(1, "test-uuid", &yolo_result, 0, 30.0);
-
-        // 60 幀，每 30 幀一個分片，應該有 2 個分片
-        assert_eq!(chunks.len(), 2);
-
-        // 檢查第一個分片
-        let first_chunk = &chunks[0];
-        assert_eq!(
-            first_chunk.chunk_type,
-            crate::core::chunk::ChunkType::Visual
-        );
-        assert_eq!(first_chunk.start_frame, 0);
-        assert_eq!(first_chunk.end_frame, 30); // exclusive
-        assert_eq!(first_chunk.frame_count, 30);
-
-        // 檢查第二個分片
-        let second_chunk = &chunks[1];
-        assert_eq!(
-            second_chunk.chunk_type,
-            crate::core::chunk::ChunkType::Visual
-        );
-        assert_eq!(second_chunk.start_frame, 30);
-        assert_eq!(second_chunk.end_frame, 60); // exclusive
-        assert_eq!(second_chunk.frame_count, 30);
-    }
-
-    #[test]
-    fn test_create_similarity_based_chunks() {
-        use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
-
-        // 創建測試 YOLO 結果
-        let frames = vec![
-            YoloFrame {
-                // 幀 0-4: 都有 person 和 car
-                frame: 0,
-                timestamp: 0.0,
-                objects: vec![
-                    YoloObject {
-                        class_name: "person".to_string(),
-                        class_id: 0,
-                        x: 100,
-                        y: 200,
-                        width: 50,
-                        height: 100,
-                        confidence: 0.9,
-                    },
-                    YoloObject {
-                        class_name: "car".to_string(),
-                        class_id: 2,
-                        x: 300,
-                        y: 150,
-                        width: 80,
-                        height: 60,
-                        confidence: 0.8,
-                    },
-                ],
-            },
-            YoloFrame {
-                // 幀 1
-                frame: 1,
-                timestamp: 0.033,
-                objects: vec![
-                    YoloObject {
-                        class_name: "person".to_string(),
-                        class_id: 0,
-                        x: 110,
-                        y: 210,
-                        width: 52,
-                        height: 102,
-                        confidence: 0.88,
-                    },
-                    YoloObject {
-                        class_name: "car".to_string(),
-                        class_id: 2,
-                        x: 310,
-                        y: 155,
-                        width: 82,
-                        height: 62,
-                        confidence: 0.78,
-                    },
-                ],
-            },
-            YoloFrame {
-                // 幀 5-9: 只有 dog
-                frame: 5,
-                timestamp: 0.166,
-                objects: vec![YoloObject {
-                    class_name: "dog".to_string(),
-                    class_id: 16,
-                    x: 150,
-                    y: 250,
-                    width: 40,
-                    height: 60,
-                    confidence: 0.7,
-                }],
-            },
-            YoloFrame {
-                // 幀 6
-                frame: 6,
-                timestamp: 0.2,
-                objects: vec![YoloObject {
-                    class_name: "dog".to_string(),
-                    class_id: 16,
-                    x: 155,
-                    y: 255,
-                    width: 42,
-                    height: 62,
-                    confidence: 0.68,
-                }],
-            },
-        ];
-
-        let yolo_result = YoloResult {
-            frame_count: 7,
-            fps: 30.0,
-            frames,
-        };
-
-        let chunks = create_similarity_based_chunks(
-            1,
-            "test-uuid",
-            &yolo_result,
-            0,
-            30.0,
-            0.5, // similarity threshold
-            2,   // min frames per chunk
-        );
-
-        // 應該有 2 個分片：一個是 person+car，一個是 dog
-        assert_eq!(chunks.len(), 2);
-    }
-}
--- a/src/core/thumbnail/mod.rs
+++ b/src/core/thumbnail/mod.rs
@@ -1,3 +1,5 @@
+pub mod validator;
+
 use anyhow::{Context, Result};
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
--- a/src/core/thumbnail/validator.rs
+++ b/src/core/thumbnail/validator.rs
@@ -0,0 +1,202 @@
+use anyhow::{bail, Result};
+
+pub const JPEG_MIN_SIZE: usize = 100;
+pub const JPEG_SOI_MARKER: [u8; 3] = [0xFF, 0xD8, 0xFF];
+pub const JPEG_EOI_MARKER: [u8; 2] = [0xFF, 0xD9];
+
+pub fn validate_jpeg(data: &[u8]) -> Result<()> {
+    if data.len() < JPEG_MIN_SIZE {
+        bail!(
+            "JPEG too small: {} bytes (minimum {})",
+            data.len(),
+            JPEG_MIN_SIZE
+        );
+    }
+
+    if data[0..3] != JPEG_SOI_MARKER {
+        bail!(
+            "Invalid JPEG header: expected {:02X?}, got {:02X?}",
+            JPEG_SOI_MARKER,
+            &data[0..3]
+        );
+    }
+
+    if data[data.len() - 2..] != JPEG_EOI_MARKER {
+        bail!(
+            "Incomplete JPEG: missing EOI marker, got {:02X?}",
+            &data[data.len() - 2..]
+        );
+    }
+
+    Ok(())
+}
+
+pub fn is_valid_jpeg(data: &[u8]) -> bool {
+    validate_jpeg(data).is_ok()
+}
+
+pub fn jpeg_size_ok(data: &[u8]) -> bool {
+    data.len() >= JPEG_MIN_SIZE
+}
+
+pub fn jpeg_header_ok(data: &[u8]) -> bool {
+    data.len() >= 3 && data[0..3] == JPEG_SOI_MARKER
+}
+
+pub fn jpeg_footer_ok(data: &[u8]) -> bool {
+    data.len() >= 2 && data[data.len() - 2..] == JPEG_EOI_MARKER
+}
+
+pub fn validate_frame(frame: i64, total_frames: i64) -> Result<()> {
+    if frame < 0 {
+        bail!("Frame number cannot be negative: {}", frame);
+    }
+    if frame > total_frames {
+        bail!("Frame {} exceeds total frames {}", frame, total_frames);
+    }
+    Ok(())
+}
+
+pub fn validate_crop(
+    x: i32,
+    y: i32,
+    w: i32,
+    h: i32,
+    video_width: i32,
+    video_height: i32,
+) -> Result<()> {
+    if x < 0 || y < 0 || w <= 0 || h <= 0 {
+        bail!(
+            "Invalid crop parameters: x={}, y={}, w={}, h={} (must be positive)",
+            x,
+            y,
+            w,
+            h
+        );
+    }
+    if x + w > video_width {
+        bail!(
+            "Crop width exceeds video: x+w={} > video_width={}",
+            x + w,
+            video_width
+        );
+    }
+    if y + h > video_height {
+        bail!(
+            "Crop height exceeds video: y+h={} > video_height={}",
+            y + h,
+            video_height
+        );
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_validate_jpeg_valid() {
+        let valid_jpeg = vec![
+            0xFF, 0xD8, 0xFF, // SOI marker
+            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+            0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B,
+            0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
+            0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+            0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
+            0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
+            0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0xFF,
+            0xD9, // EOI marker
+        ];
+        assert!(validate_jpeg(&valid_jpeg).is_ok());
+    }
+
+    #[test]
+    fn test_validate_jpeg_too_small() {
+        let small_data = vec![0xFF, 0xD8, 0xFF, 0xFF, 0xD9];
+        assert!(validate_jpeg(&small_data).is_err());
+    }
+
+    #[test]
+    fn test_validate_jpeg_invalid_header() {
+        let invalid_header = vec![
+            0x00, 0x00, 0x00, // wrong header
+            0x00, 0x01, 0x02, 0x03, 0xFF, 0xD9,
+        ];
+        assert!(validate_jpeg(&invalid_header).is_err());
+    }
+
+    #[test]
+    fn test_validate_jpeg_missing_footer() {
+        let missing_footer = vec![0xFF, 0xD8, 0xFF, 0x00, 0x01, 0x02, 0x03];
+        assert!(validate_jpeg(&missing_footer).is_err());
+    }
+
+    #[test]
+    fn test_validate_frame_valid() {
+        assert!(validate_frame(500, 1000).is_ok());
+        assert!(validate_frame(0, 1000).is_ok());
+        assert!(validate_frame(1000, 1000).is_ok());
+    }
+
+    #[test]
+    fn test_validate_frame_exceeds() {
+        assert!(validate_frame(1001, 1000).is_err());
+        assert!(validate_frame(-1, 1000).is_err());
+    }
+
+    #[test]
+    fn test_validate_crop_valid() {
+        assert!(validate_crop(100, 100, 200, 200, 1920, 1080).is_ok());
+        assert!(validate_crop(0, 0, 1920, 1080, 1920, 1080).is_ok());
+    }
+
+    #[test]
+    fn test_validate_crop_exceeds_width() {
+        assert!(validate_crop(1800, 100, 200, 200, 1920, 1080).is_err());
+    }
+
+    #[test]
+    fn test_validate_crop_exceeds_height() {
+        assert!(validate_crop(100, 900, 200, 200, 1920, 1080).is_err());
+    }
+
+    #[test]
+    fn test_validate_crop_negative() {
+        assert!(validate_crop(-1, 100, 200, 200, 1920, 1080).is_err());
+        assert!(validate_crop(100, -1, 200, 200, 1920, 1080).is_err());
+    }
+
+    #[test]
+    fn test_is_valid_jpeg() {
+        let valid_jpeg = vec![
+            0xFF, 0xD8, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+            0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+            0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
+            0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34,
+            0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42,
+            0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+            0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
+            0x5F, 0xFF, 0xD9,
+        ];
+        assert!(is_valid_jpeg(&valid_jpeg));
+        assert!(!is_valid_jpeg(&[0xFF, 0xD8, 0xFF, 0xFF, 0xD9])); // too small
+    }
+
+    #[test]
+    fn test_jpeg_helpers() {
+        let valid_jpeg = vec![
+            0xFF, 0xD8, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+            0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+            0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
+            0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34,
+            0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42,
+            0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+            0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
+            0x5F, 0xFF, 0xD9,
+        ];
+        assert!(jpeg_size_ok(&valid_jpeg));
+        assert!(jpeg_header_ok(&valid_jpeg));
+        assert!(jpeg_footer_ok(&valid_jpeg));
+    }
+}
--- a/src/core/tmdb/probe.rs
+++ b/src/core/tmdb/probe.rs
@@ -91,22 +91,21 @@ async fn upsert_identities_from_disk(
                {
                    Ok(identity_file) => {
                        let identities_table = crate::core::db::schema::table_name("identities");
+                        let uuid_clean = identity_file.identity_uuid.replace('-', "");
                        let result = sqlx::query(&format!(
                            "INSERT INTO {} (uuid, name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \
-                             VALUES ($1::uuid, $2, 'people', 'tmdb', 'confirmed', $3, $4, $5::jsonb) \
+                             VALUES (gen_random_uuid(), $1, 'people', 'tmdb', 'confirmed', $2, $3, $4::jsonb) \
                             ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET \
-                             uuid = COALESCE({}.uuid, $1::uuid), \
                             tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
-                             metadata = {}.metadata || $5::jsonb",
-                            identities_table, identities_table, identities_table, identities_table
-                        ))
-                        .bind(&identity_file.identity_uuid)
-                        .bind(&identity_file.name)
-                        .bind(identity_file.tmdb_id)
-                        .bind(&identity_file.tmdb_profile)
-                        .bind(&identity_file.metadata)
-                        .execute(db.pool())
-                        .await;
+                             metadata = jsonb_deep_merge({}.metadata, $4::jsonb)",
+                             identities_table, identities_table, identities_table
+                         ))
+                         .bind(&identity_file.name)
+                         .bind(identity_file.tmdb_id)
+                         .bind(&identity_file.tmdb_profile)
+                         .bind(&identity_file.metadata)
+                         .execute(db.pool())
+                         .await;

                        match result {
                            Ok(_) => {
@@ -226,7 +225,7 @@ pub async fn create_identities_from_data(
             VALUES ($1, 'people', 'tmdb', 'confirmed', $2, $3, $4::jsonb) \
             ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET \
             tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
-             metadata = {}.metadata || $4::jsonb \
+             metadata = jsonb_deep_merge({}.metadata, $4::jsonb) \
             RETURNING uuid",
             identities_table, identities_table, identities_table
        ))
--- a/src/playground.rs
+++ b/src/playground.rs
@@ -2426,7 +2426,7 @@ async fn main() -> Result<()> {
                .await
                .context("Failed to init PostgreSQL")?;
            let qdrant = QdrantDb::init().await.context("Failed to init Qdrant")?;
-            let embedder = Embedder::new("nomic-embed-text-v2-moe:latest".to_string());
+            let embedder = Embedder::new("embeddinggemma-300m".to_string());

            let target_uuid = if uuid == "all" {
                None
--- a/src/verification/verifier.rs
+++ b/src/verification/verifier.rs
@@ -41,7 +41,6 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
    let proc_name = processor.as_str();
    let filename = match processor {
        ProcessorType::Story => format!("{}.story_story.json", file_uuid),
-        ProcessorType::FiveW1H => format!("{}.story_llm.json", file_uuid),
        _ => format!("{}.{}.json", file_uuid, proc_name),
    };
    let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename);
@@ -65,7 +64,7 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
    };

    match processor {
-        ProcessorType::Asr | ProcessorType::Asrx => {
+        ProcessorType::Asrx => {
            let segs = value.get("segments").and_then(|v| v.as_array());
            match segs {
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
@@ -105,18 +104,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
                None => VerificationResult::ok(proc_name, file_uuid),
            }
        }
-        ProcessorType::VisualChunk => VerificationResult::ok(proc_name, file_uuid),
        ProcessorType::Story => VerificationResult::ok(proc_name, file_uuid),
-        ProcessorType::FiveW1H => {
-            let scenes = value.get("scenes").and_then(|v| v.as_array());
-            match scenes {
-                Some(s) if s.is_empty() => {
-                    VerificationResult::fail(proc_name, file_uuid, "0 scenes")
-                }
-                Some(_) => VerificationResult::ok(proc_name, file_uuid),
-                None => VerificationResult::ok(proc_name, file_uuid),
-            }
-        }
+        _ => VerificationResult::ok(proc_name, file_uuid),
    }
 }

--- a/src/worker/job_worker.rs
+++ b/src/worker/job_worker.rs
@@ -91,6 +91,7 @@ impl JobWorker {
        self.processor_pool.sweep_stale().await;

        // Reset stale running jobs: jobs stuck in 'running' with no active processor results
+        // Exclude jobs where all processor_results are completed (waiting for ingestion)
        let monitor_jobs_table = schema::table_name("monitor_jobs");
        let processor_results_table = schema::table_name("processor_results");
        if let Err(e) = sqlx::query(&format!(
@@ -99,8 +100,13 @@ impl JobWorker {
             AND id NOT IN (
                 SELECT DISTINCT job_id FROM {}
                 WHERE status IN ('pending', 'running')
+             )
+             AND id NOT IN (
+                 SELECT job_id FROM {}
+                 GROUP BY job_id
+                 HAVING bool_and(status = 'completed')
             )",
-            monitor_jobs_table, processor_results_table
+            monitor_jobs_table, processor_results_table, processor_results_table
        ))
        .execute(self.db.pool())
        .await
@@ -197,9 +203,9 @@ impl JobWorker {
                    job.processors.len()
                };
                let should_retry = self
-                    .check_and_complete_job(job.id, &job.uuid, expected_count)
+                    .check_and_complete_job(job.id, &job.uuid, &job.processors, expected_count)
                    .await
-                    .is_ok();
+                    .unwrap_or(false);
                if should_retry && self.processor_pool.can_start().await {
                    if let Err(e) = self.process_job(job.clone()).await {
                        error!("Failed to reprocess job {}: {}", job.uuid, e);
@@ -708,14 +714,14 @@ impl JobWorker {
        } else {
            job.processors.len()
        };
-        self.check_and_complete_job(job.id, &job.uuid, expected_count)
+        self.check_and_complete_job(job.id, &job.uuid, &job.processors, expected_count)
            .await?;

        Ok(())
    }

    /// 檢查所有入庫步驟是否已完成（與 ingestion-status endpoint 同步邏輯）
-    async fn ingestion_complete(pool: &PgPool, uuid: &str) -> bool {
+    async fn ingestion_complete(pool: &PgPool, uuid: &str, job_processors: &[String]) -> bool {
        let chunk_t = schema::table_name("chunk");
        let fd_t = schema::table_name("face_detections");

@@ -730,18 +736,30 @@ impl JobWorker {
        }

        let fu = uuid;
-        let rule1 = check!(&format!(
-            "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
-        ));
-        let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
-        let rule3 = check!(&format!(
-            "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
-        ));
-        let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
+        // Only check conditions relevant to the job's processors
+        let has_asr_or_asrx = job_processors.is_empty()
+            || job_processors.iter().any(|p| p == "asrx" || p == "asr");
+        let has_cut = job_processors.is_empty()
+            || job_processors.iter().any(|p| p == "cut");
+        let has_face = job_processors.is_empty()
+            || job_processors.iter().any(|p| p == "face");
+
+        let rule1 = !has_asr_or_asrx
+            || check!(&format!(
+                "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
+            ));
+        let vector = !has_asr_or_asrx
+            || check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
+        let rule3 = !has_cut
+            || check!(&format!(
+                "SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
+            ));
+        let trace = !has_face
+            || check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
        let all_ok = rule1 && vector && rule3 && trace;
        if !all_ok {
            tracing::info!(
-                "[Ingestion] waiting: rule1={rule1} vector={vector} rule3={rule3} trace={trace}"
+                "[Ingestion] waiting (uuid={fu}): rule1={rule1} vector={vector} rule3={rule3} trace={trace}"
            );
        }
        all_ok
@@ -751,8 +769,9 @@ impl JobWorker {
        &self,
        job_id: i32,
        uuid: &str,
+        job_processors: &[String],
        expected_count: usize,
-    ) -> Result<()> {
+    ) -> Result<bool> {
        let results = self.db.get_processor_results_by_job(job_id).await?;

        info!(
@@ -831,10 +850,29 @@ impl JobWorker {
            .await?;

        if has_asrx {
-            info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
-            let db_clone = self.db.clone();
-            let uuid_clone = uuid.to_string();
-            tokio::spawn(async move {
+            // Guard: only spawn Rule 1 if sentence chunks don't exist yet
+            let chunk_t = schema::table_name("chunk");
+            let already_spawned: bool = sqlx::query_scalar::<_, i64>(
+                &format!(
+                    "SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
+                ),
+            )
+            .bind(uuid)
+            .fetch_optional(self.db.pool())
+            .await?
+            .unwrap_or(0)
+                > 0;
+
+            if already_spawned {
+                info!(
+                    "✅ Rule 1 already completed for {}, skipping spawn",
+                    uuid
+                );
+            } else {
+                info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
+                let db_clone = self.db.clone();
+                let uuid_clone = uuid.to_string();
+                tokio::spawn(async move {
                match db_clone.get_video_by_uuid(&uuid_clone).await {
                    Ok(Some(video)) => {
                        let fps = video.fps;
@@ -886,6 +924,7 @@ impl JobWorker {
                    Err(e) => error!("Failed to get video info for chunking: {}", e),
                }
            });
+            }
        }

        if all_completed {
@@ -1031,12 +1070,12 @@ impl JobWorker {
                });
            }

-            if !Self::ingestion_complete(self.db.pool(), uuid).await {
+            if !Self::ingestion_complete(self.db.pool(), uuid, job_processors).await {
                info!(
                    "Job {}: all processors done, waiting for ingestion...",
                    job_id
                );
-                return Ok(());
+                return Ok(false);
            }

            self.db
@@ -1114,7 +1153,7 @@ impl JobWorker {
                .await?;
        }

-        Ok(())
+        Ok(false)
    }

    pub async fn shutdown(&self) {
--- a/src/worker/processor.rs
+++ b/src/worker/processor.rs
@@ -7,8 +7,6 @@ use std::sync::Arc;
 use tokio::sync::{mpsc, RwLock};
 use tracing::{error, info, warn};

-
-
 /// Guard that ensures processor pool cleanup runs even if the task panics.
 struct ProcessorCleanupGuard {
    job_id: i32,
@@ -28,17 +26,23 @@ impl Drop for ProcessorCleanupGuard {
            warn!("[ProcessorCleanupGuard] running lock contended");
        }
        if let Ok(mut guard) = self.running_count.try_write() {
-            if *guard > 0 { *guard -= 1; }
+            if *guard > 0 {
+                *guard -= 1;
+            }
        }
        match self.pipeline {
            PipelineType::Frame => {
                if let Ok(mut guard) = self.frame_count.try_write() {
-                    if *guard > 0 { *guard -= 1; }
+                    if *guard > 0 {
+                        *guard -= 1;
+                    }
                }
            }
            PipelineType::Time => {
                if let Ok(mut guard) = self.time_count.try_write() {
-                    if *guard > 0 { *guard -= 1; }
+                    if *guard > 0 {
+                        *guard -= 1;
+                    }
                }
            }
            PipelineType::Cross => {} // cross pipeline not tracked in slot counts
@@ -66,7 +70,6 @@ use crate::core::processor::face::FaceResult;
 use crate::core::processor::ocr::OcrResult;
 use crate::core::processor::pose::PoseResult;
 use crate::core::processor::scene_classification::SceneClassificationResult;
-use crate::core::processor::visual_chunk::VisualChunkResult;
 use crate::core::processor::yolo::YoloResult;
 use crate::worker::resources::SystemResources;

@@ -518,32 +521,10 @@ impl ProcessorPool {
        let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0);

        match processor_type {
-            ProcessorType::Asr => {
-                let result =
-                    processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
-                let chunks_produced = result.segments.len() as i32;
-                tracing::info!(
-                    "ASR completed, storing {} segments for {}",
-                    chunks_produced,
-                    job.uuid
-                );
-                if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await {
-                    tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e);
-                }
-                Ok(ProcessorOutput {
-                    data: serde_json::to_value(result)?,
-                    chunks_produced,
-                    frames_processed: total_frames,
-                    total_frames,
-                    retry_count: 0,
-                    pid: 0,
-                })
-            }
            ProcessorType::Cut => {
                let cut_path =
                    std::path::Path::new(&output_dir).join(format!("{}.cut.json", job.uuid));
                let result = if cut_path.exists() {
-                    // CUT 在 register 階段已完成，直接載入
                    let content =
                        std::fs::read_to_string(&cut_path).context("Failed to read cut.json")?;
                    serde_json::from_str(&content).context("Failed to parse cut.json")?
@@ -624,10 +605,6 @@ impl ProcessorPool {
                if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
                    tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
                }
-                // 將 face embedding 寫入 Qdrant
-                if let Err(e) = Self::store_face_embeddings_to_qdrant(&job.uuid, &result).await {
-                    tracing::error!("Failed to store face embeddings to Qdrant: {}", e);
-                }
                Ok(ProcessorOutput {
                    data: serde_json::to_value(result)?,
                    chunks_produced,
@@ -685,31 +662,6 @@ impl ProcessorPool {
                    pid: 0,
                })
            }
-            ProcessorType::VisualChunk => {
-                let result = processor::process_visual_chunk_advanced(
-                    video_path,
-                    output_path.to_str().unwrap(),
-                    uuid,
-                )
-                .await?;
-                let chunks_produced = result.chunk_count as i32;
-                tracing::info!(
-                    "VisualChunk completed, storing {} chunks for {}",
-                    chunks_produced,
-                    job.uuid
-                );
-                if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
-                    tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
-                }
-                Ok(ProcessorOutput {
-                    data: serde_json::to_value(result)?,
-                    chunks_produced,
-                    frames_processed: total_frames,
-                    total_frames,
-                    retry_count: 0,
-                    pid: 0,
-                })
-            }
            ProcessorType::Scene => {
                let scene_path =
                    std::path::Path::new(&output_dir).join(format!("{}.scene.json", job.uuid));
@@ -717,7 +669,6 @@ impl ProcessorPool {
                    std::path::Path::new(&output_dir).join(format!("{}.scene.err", job.uuid));
                let scene_tmp =
                    std::path::Path::new(&output_dir).join(format!("{}.scene.tmp", job.uuid));
-                // 優先順序：.err（跳過）→ .json（載入）→ .tmp（等待或重新執行）
                let result = if scene_err.exists() {
                    tracing::warn!("Scene previously failed for {}, skipping", job.uuid);
                    return Ok(ProcessorOutput {
@@ -1009,72 +960,6 @@ impl ProcessorPool {
        Ok(())
    }

-    /// 將 face embeddings 寫入 Qdrant momentry_dev_face collection
-    pub async fn store_face_embeddings_to_qdrant(
-        uuid: &str,
-        face_result: &FaceResult,
-    ) -> Result<()> {
-        let qdrant = QdrantDb::new();
-        let collection = format!(
-            "{}{}",
-            crate::core::config::REDIS_KEY_PREFIX
-                .as_str()
-                .trim_end_matches(':'),
-            "_face"
-        );
-
-        // 確保 collection 存在（dim=512 for FaceNet）
-        if let Err(e) = qdrant.ensure_collection(&collection, 512).await {
-            tracing::error!("Failed to ensure Qdrant face collection: {}", e);
-            return Ok(());
-        }
-
-        let mut count = 0;
-        for frame in &face_result.frames {
-            for face in &frame.faces {
-                if let Some(embedding) = &face.embedding {
-                    if embedding.len() != 512 {
-                        continue;
-                    }
-                    // 使用 hash 作為 Qdrant point ID（需要 unsigned integer）
-                    // 使用 frame number 作為 Qdrant point ID（u64）
-                    let point_id = frame.frame as u64;
-
-                    let payload = serde_json::json!({
-                        "file_uuid": uuid,
-                        "face_id": face.face_id,
-                        "frame": frame.frame,
-                        "timestamp": frame.timestamp,
-                        "x": face.x,
-                        "y": face.y,
-                        "width": face.width,
-                        "height": face.height,
-                        "confidence": face.confidence,
-                    });
-
-                    if let Err(e) = qdrant
-                        .upsert_vector_to_collection(
-                            &collection,
-                            point_id,
-                            embedding,
-                            Some(payload),
-                        )
-                        .await
-                    {
-                        tracing::error!("Failed to upsert face vector {}: {}", point_id, e);
-                    } else {
-                        count += 1;
-                    }
-                }
-            }
-        }
-
-        if count > 0 {
-            tracing::info!("Stored {} face embeddings to Qdrant for {}", count, uuid);
-        }
-        Ok(())
-    }
-
    /// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
    pub async fn store_voice_embeddings_to_qdrant(
        uuid: &str,
@@ -1106,9 +991,22 @@ impl ProcessorPool {
                if emb.len() != 192 {
                    continue;
                }
+                // Point ID: hash(file_uuid + speaker_id + index) for global uniqueness
+                let point_id = {
+                    use sha2::{Digest, Sha256};
+                    let mut hasher = Sha256::new();
+                    hasher.update(uuid.as_bytes());
+                    hasher.update(b"_");
+                    hasher.update(segment.speaker_id.clone().unwrap_or_default().as_bytes());
+                    hasher.update(b"_");
+                    hasher.update(i.to_string().as_bytes());
+                    let hash = hasher.finalize();
+                    u64::from_be_bytes(hash[0..8].try_into().unwrap())
+                };
+
                let payload = serde_json::json!({
                    "file_uuid": uuid,
-                    "speaker_id": segment.speaker_id,
+                    "speaker_id": segment.speaker_id.clone().unwrap_or_default(),
                    "segment_index": i,
                    "start_frame": segment.start_frame,
                    "end_frame": segment.end_frame,
@@ -1117,7 +1015,7 @@ impl ProcessorPool {
                });

                if let Err(e) = qdrant
-                    .upsert_vector_to_collection(&collection, i as u64, emb, Some(payload))
+                    .upsert_vector_to_collection(&collection, point_id, emb, Some(payload))
                    .await
                {
                    tracing::error!("Failed to upsert voice vector {}: {}", i, e);
@@ -1174,6 +1072,7 @@ impl ProcessorPool {
        );

        let mut pre_chunks_to_store = Vec::new();
+        let mut speaker_detections = Vec::new();

        for (i, segment) in asrx_result.segments.iter().enumerate() {
            let data = serde_json::json!({
@@ -1184,28 +1083,23 @@ impl ProcessorPool {

            // ASRX is time-based, so we use segment index or start time as coordinate.
            pre_chunks_to_store.push((i as i64, Some(segment.start_time), data, None, None));
+
+            speaker_detections.push((
+                segment.speaker_id.clone().unwrap_or_default(),
+                segment.start_time,
+                segment.end_time,
+                segment.text.clone(),
+                None::<String>,     // chunk_id: unknown yet, filled later
+                0.0,                 // confidence: updated after binding
+            ));
        }

        db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)
            .await?;
-        Ok(())
-    }
-
-    pub async fn store_visual_chunk_chunks(
-        db: &PostgresDb,
-        uuid: &str,
-        visual_chunk_result: &VisualChunkResult,
-    ) -> Result<()> {
-        for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() {
-            match db.store_chunk(chunk).await {
-                Ok(_) => {
-                    tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid);
-                }
-                Err(e) => {
-                    tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e);
-                }
-            }
-        }
+        db.store_raw_pre_chunks_batch(uuid, "asr", &pre_chunks_to_store)
+            .await?;
+        db.store_speaker_detections_batch(uuid, &speaker_detections)
+            .await?;
        Ok(())
    }

@@ -1256,7 +1150,7 @@ impl ProcessorPool {
            });
            let chunk_table = crate::core::db::schema::table_name("chunk");
            let _ = sqlx::query(&format!(
-                "UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
+                "UPDATE {} SET metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $1::jsonb) WHERE file_uuid=$2 AND chunk_id=$3",
                chunk_table
            ))
            .bind(&meta)