feat: Phase 1 handover - schema migration, correction mechanism, API fixes

Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index Correction: asr-1.json format, generate/apply scripts API: 37/37 endpoints fixed and tested Docs: HANDOVER_V2.0.md for M4
2026-05-11 07:03:22 +08:00
parent ef894a44ad
commit 39ba5ddf76
147 changed files with 19843 additions and 3053 deletions
--- a/src/api/five_w1h_agent_api.rs
+++ b/src/api/five_w1h_agent_api.rs
@@ -58,7 +58,6 @@ pub struct BatchJobStatus {
 #[derive(Debug, Clone)]
 struct CutScene {
    chunk_id: String,
-    chunk_index: i32,
    start_frame: i64,
    end_frame: i64,
    fps: f64,
@@ -66,6 +65,7 @@ struct CutScene {
    end_time: f64,
    content: serde_json::Value,
    metadata: serde_json::Value,
+    summary_text: Option<String>,
 }

 #[derive(Debug, Clone)]
@@ -108,21 +108,25 @@ fn llm_model() -> String {
 // ── Data Fetching ──

 async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
-    let table = schema::table_name("chunks");
-    sqlx::query_as::<_, (String, i32, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value)>(&format!(
-        r#"SELECT chunk_id, chunk_index, start_frame, end_frame, fps, start_time, end_time, content, metadata
+    let table = schema::table_name("chunk");
+    sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option<String>)>(&format!(
+        r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
           FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
    ))
    .bind(file_uuid)
    .fetch_all(db.pool()).await?
    .into_iter().map(|r| Ok(CutScene {
-        chunk_id: r.0, chunk_index: r.1, start_frame: r.2, end_frame: r.3,
-        fps: r.4, start_time: r.5, end_time: r.6, content: r.7, metadata: r.8,
+        chunk_id: r.0, start_frame: r.1, end_frame: r.2,
+        fps: r.3, start_time: r.4, end_time: r.5, content: r.6, metadata: r.7, summary_text: r.8,
    })).collect()
 }

-async fn fetch_sentences_in_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<SentenceChunk>> {
-    let table = schema::table_name("chunks");
+async fn fetch_sentences_in_scene(
+    db: &PostgresDb,
+    file_uuid: &str,
+    cut: &CutScene,
+) -> anyhow::Result<Vec<SentenceChunk>> {
+    let table = schema::table_name("chunk");
    sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!(
        r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
           FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
@@ -137,7 +141,11 @@ async fn fetch_sentences_in_scene(db: &PostgresDb, file_uuid: &str, cut: &CutSce
 }

 /// Fetch actor names present in this scene from face_detections + identity_bindings + identities
-async fn fetch_identity_names_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
+async fn fetch_identity_names_for_scene(
+    db: &PostgresDb,
+    file_uuid: &str,
+    cut: &CutScene,
+) -> anyhow::Result<Vec<String>> {
    let fd_table = schema::table_name("face_detections");
    let ib_table = schema::table_name("identity_bindings");
    let id_table = schema::table_name("identities");
@@ -148,43 +156,65 @@ async fn fetch_identity_names_for_scene(db: &PostgresDb, file_uuid: &str, cut: &
           JOIN {} i ON i.id = ib.identity_id
           WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3
           AND fd.trace_id IS NOT NULL
-           ORDER BY i.name"#, fd_table, ib_table, id_table
+           ORDER BY i.name"#,
+        fd_table, ib_table, id_table
    ))
-    .bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
-    .fetch_all(db.pool()).await?;
+    .bind(file_uuid)
+    .bind(cut.start_frame)
+    .bind(cut.end_frame)
+    .fetch_all(db.pool())
+    .await?;
    Ok(rows)
 }

 /// Fetch YOLO object labels detected in this scene from pre_chunks
-async fn fetch_yolo_objects_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
+async fn fetch_yolo_objects_for_scene(
+    db: &PostgresDb,
+    file_uuid: &str,
+    cut: &CutScene,
+) -> anyhow::Result<Vec<String>> {
    let table = schema::table_name("pre_chunks");
    let rows = sqlx::query_scalar::<_, String>(&format!(
        r#"SELECT DISTINCT data->>'label'
           FROM {} WHERE file_uuid = $1 AND processor_type = 'yolo'
           AND frame_number >= $2 AND frame_number <= $3
           AND data->>'label' IS NOT NULL
-           ORDER BY data->>'label'"#, table
+           ORDER BY data->>'label'"#,
+        table
    ))
-    .bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
-    .fetch_all(db.pool()).await?;
+    .bind(file_uuid)
+    .bind(cut.start_frame)
+    .bind(cut.end_frame)
+    .fetch_all(db.pool())
+    .await?;
    Ok(rows)
 }

 /// Fetch active speakers + their actor names for a scene's frame range
 /// Uses identity_bindings to map SPEAKER_X to actor names
-async fn fetch_speakers_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
+async fn fetch_speakers_for_scene(
+    db: &PostgresDb,
+    file_uuid: &str,
+    cut: &CutScene,
+) -> anyhow::Result<Vec<String>> {
    let pc_table = schema::table_name("pre_chunks");
    let speakers = sqlx::query_scalar::<_, String>(&format!(
        r#"SELECT DISTINCT data->>'speaker_id'
           FROM {} WHERE file_uuid = $1 AND processor_type = 'asrx'
           AND data->>'speaker_id' IS NOT NULL
           AND start_frame <= $3 AND end_frame >= $2
-           ORDER BY data->>'speaker_id'"#, pc_table
+           ORDER BY data->>'speaker_id'"#,
+        pc_table
    ))
-    .bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
-    .fetch_all(db.pool()).await?;
+    .bind(file_uuid)
+    .bind(cut.start_frame)
+    .bind(cut.end_frame)
+    .fetch_all(db.pool())
+    .await?;

-    if speakers.is_empty() { return Ok(vec![]); }
+    if speakers.is_empty() {
+        return Ok(vec![]);
+    }

    // Map speaker_ids to actor names via identity_bindings
    let ib_table = schema::table_name("identity_bindings");
@@ -194,10 +224,12 @@ async fn fetch_speakers_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutSce
        let name: Option<String> = sqlx::query_scalar(&format!(
            r#"SELECT i.name FROM {} ib JOIN {} i ON i.id = ib.identity_id
               WHERE ib.identity_type = 'speaker' AND ib.identity_value = $1 AND i.name IS NOT NULL
-               LIMIT 1"#, ib_table, id_table
+               LIMIT 1"#,
+            ib_table, id_table
        ))
        .bind(spk)
-        .fetch_optional(db.pool()).await?;
+        .fetch_optional(db.pool())
+        .await?;
        match name {
            Some(n) => result.push(format!("{} ({})", spk, n)),
            None => result.push(spk.clone()),
@@ -207,7 +239,11 @@ async fn fetch_speakers_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutSce
 }

 /// Fetch trace IDs with identity names for a scene's frame range
-async fn fetch_trace_info(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
+async fn fetch_trace_info(
+    db: &PostgresDb,
+    file_uuid: &str,
+    cut: &CutScene,
+) -> anyhow::Result<Vec<String>> {
    let fd_table = schema::table_name("face_detections");
    let ib_table = schema::table_name("identity_bindings");
    let id_table = schema::table_name("identities");
@@ -218,18 +254,25 @@ async fn fetch_trace_info(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> a
           LEFT JOIN {} i ON i.id = ib.identity_id
           WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3
           AND fd.trace_id IS NOT NULL
-           ORDER BY fd.trace_id"#, fd_table, ib_table, id_table
+           ORDER BY fd.trace_id"#,
+        fd_table, ib_table, id_table
    ))
-    .bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
-    .fetch_all(db.pool()).await?;
+    .bind(file_uuid)
+    .bind(cut.start_frame)
+    .bind(cut.end_frame)
+    .fetch_all(db.pool())
+    .await?;

-    Ok(rows.iter().map(|(trace, name)| {
-        if let Some(n) = name {
-            format!("trace_{} ({})", trace, n)
-        } else {
-            format!("trace_{}", trace)
-        }
-    }).collect())
+    Ok(rows
+        .iter()
+        .map(|(trace, name)| {
+            if let Some(n) = name {
+                format!("trace_{} ({})", trace, n)
+            } else {
+                format!("trace_{}", trace)
+            }
+        })
+        .collect())
 }

 // ── LLM Prompt (Embedding-Optimized) ──
@@ -243,19 +286,31 @@ async fn summarize_one_scene(
 ) -> anyhow::Result<SceneSummaryResult> {
    if sentences.is_empty() {
        return Ok(SceneSummaryResult {
-            parent_summary: String::new(), five_w1h: serde_json::Value::Null, child_summaries: vec![],
+            parent_summary: String::new(),
+            five_w1h: serde_json::Value::Null,
+            child_summaries: vec![],
        });
    }

-    let faces = fetch_identity_names_for_scene(db, file_uuid, cut).await.unwrap_or_default();
-    let objects = fetch_yolo_objects_for_scene(db, file_uuid, cut).await.unwrap_or_default();
-    let traces = fetch_trace_info(db, file_uuid, cut).await.unwrap_or_default();
-    let speakers = fetch_speakers_for_scene(db, file_uuid, cut).await.unwrap_or_default();
+    let faces = fetch_identity_names_for_scene(db, file_uuid, cut)
+        .await
+        .unwrap_or_default();
+    let objects = fetch_yolo_objects_for_scene(db, file_uuid, cut)
+        .await
+        .unwrap_or_default();
+    let traces = fetch_trace_info(db, file_uuid, cut)
+        .await
+        .unwrap_or_default();
+    let speakers = fetch_speakers_for_scene(db, file_uuid, cut)
+        .await
+        .unwrap_or_default();

    let mut dialogue = String::new();
    for (i, s) in sentences.iter().enumerate() {
        let t = s.text.trim();
-        if !t.is_empty() { dialogue.push_str(&format!("[{}] {}\n", i + 1, t)); }
+        if !t.is_empty() {
+            dialogue.push_str(&format!("[{}] {}\n", i + 1, t));
+        }
    }

    let story_so_far = if prev_context.is_empty() {
@@ -306,7 +361,14 @@ Rules:
 - Each sentence.enhanced: self-contained for search, include actual spoken words.
 - Return ONLY valid JSON. No markdown.
 - A short scene with 1-2 lines should have a short summary."#,
-        cut.start_time, cut.end_time, dialogue, faces.join(", "), objects.join(", "), traces.join(", "), speakers.join(", "), story_so_far,
+        cut.start_time,
+        cut.end_time,
+        dialogue,
+        faces.join(", "),
+        objects.join(", "),
+        traces.join(", "),
+        speakers.join(", "),
+        story_so_far,
    );

    let body = serde_json::json!({
@@ -321,22 +383,32 @@ Rules:
    });

    let client = Client::new();
-    let resp = client.post(llm_base_url()).json(&body)
+    let resp = client
+        .post(llm_base_url())
+        .json(&body)
        .timeout(std::time::Duration::from_secs(180))
-        .send().await?
-        .json::<serde_json::Value>().await?;
+        .send()
+        .await?
+        .json::<serde_json::Value>()
+        .await?;

-    let content = resp["choices"][0]["message"]["content"].as_str().unwrap_or("{}");
+    let content = resp["choices"][0]["message"]["content"]
+        .as_str()
+        .unwrap_or("{}");
    // Strip markdown code fences if present
    let cleaned = content
        .trim_start_matches("```json")
        .trim_start_matches("```")
        .trim_end_matches("```")
        .trim();
-    let parsed: serde_json::Value = serde_json::from_str(cleaned).unwrap_or(serde_json::Value::Null);
+    let parsed: serde_json::Value =
+        serde_json::from_str(cleaned).unwrap_or(serde_json::Value::Null);

    let parent_summary = parsed["scene_summary"].as_str().unwrap_or("").to_string();
-    let five_w1h = parsed.get("5w1h").cloned().unwrap_or(serde_json::Value::Null);
+    let five_w1h = parsed
+        .get("5w1h")
+        .cloned()
+        .unwrap_or(serde_json::Value::Null);
    let mut child_summaries = Vec::new();

    if let Some(arr) = parsed["sentences"].as_array() {
@@ -376,16 +448,24 @@ Rules:
        }
    }

-    Ok(SceneSummaryResult { parent_summary, five_w1h, child_summaries })
+    Ok(SceneSummaryResult {
+        parent_summary,
+        five_w1h,
+        child_summaries,
+    })
 }

 // ── DB Storage ──

 async fn store_parent_summary(
-    db: &PostgresDb, cut_chunk_id: &str, file_uuid: &str,
-    summary: &str, five_w1h: &serde_json::Value, sentences: &[SentenceChunk],
+    db: &PostgresDb,
+    cut_chunk_id: &str,
+    file_uuid: &str,
+    summary: &str,
+    five_w1h: &serde_json::Value,
+    sentences: &[SentenceChunk],
 ) -> anyhow::Result<()> {
-    let table = schema::table_name("chunks");
+    let table = schema::table_name("chunk");
    let meta = serde_json::json!({
        "5w1h": five_w1h,
        "sentence_ids": sentences.iter().map(|s| s.chunk_id.clone()).collect::<Vec<_>>(),
@@ -393,28 +473,42 @@ async fn store_parent_summary(
    });
    sqlx::query(&format!(
        r#"UPDATE {} SET summary_text = $1, metadata = metadata || $2::jsonb
-           WHERE chunk_id = $3 AND file_uuid = $4"#, table
+           WHERE chunk_id = $3 AND file_uuid = $4"#,
+        table
    ))
-    .bind(summary).bind(&meta).bind(cut_chunk_id).bind(file_uuid)
-    .execute(db.pool()).await?;
+    .bind(summary)
+    .bind(&meta)
+    .bind(cut_chunk_id)
+    .bind(file_uuid)
+    .execute(db.pool())
+    .await?;
    Ok(())
 }

 async fn store_child_summaries(
-    db: &PostgresDb, file_uuid: &str, children: &[ChildSummary],
+    db: &PostgresDb,
+    file_uuid: &str,
+    children: &[ChildSummary],
 ) -> anyhow::Result<()> {
-    let table = schema::table_name("chunks");
+    let table = schema::table_name("chunk");
    for c in children {
        let text = c.enhanced.trim();
-        if text.is_empty() || text.len() < 10 { continue; }
+        if text.is_empty() || text.len() < 10 {
+            continue;
+        }
        // Update text_content (for embedding) + merge 5w1h into content
        let merge = serde_json::json!({ "5w1h": c.five_w1h });
        sqlx::query(&format!(
            r#"UPDATE {} SET text_content = $1, content = content || $2::jsonb, embedding = NULL
-               WHERE chunk_id = $3 AND file_uuid = $4"#, table
+               WHERE chunk_id = $3 AND file_uuid = $4"#,
+            table
        ))
-        .bind(text).bind(&merge).bind(&c.chunk_id).bind(file_uuid)
-        .execute(db.pool()).await?;
+        .bind(text)
+        .bind(&merge)
+        .bind(&c.chunk_id)
+        .bind(file_uuid)
+        .execute(db.pool())
+        .await?;
    }
    Ok(())
 }
@@ -427,7 +521,8 @@ async fn analyze_5w1h(
 ) -> Result<Json<Analyze5W1HResponse>, (StatusCode, String)> {
    let db = PostgresDb::from_pool(state.db.pool().clone());

-    let cuts = fetch_cut_scenes(&db, &req.file_uuid).await
+    let cuts = fetch_cut_scenes(&db, &req.file_uuid)
+        .await
        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;

    let total = cuts.len();
@@ -435,29 +530,71 @@ async fn analyze_5w1h(
    let mut prev_context: Vec<String> = Vec::new();

    for cut in &cuts {
-        let sentences = fetch_sentences_in_scene(&db, &req.file_uuid, cut).await
-            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
-        if sentences.is_empty() { continue; }
+        // Skip already-summarized scenes but preserve context
+        if let Some(ref t) = cut.summary_text {
+            if t.len() > 20 {
+                processed += 1;
+                prev_context.push(format!(
+                    "Scene (t={:.0}s): {}",
+                    cut.start_time, t
+                ));
+                continue;
+            }
+        }
+
+        let sentences = match fetch_sentences_in_scene(&db, &req.file_uuid, cut).await {
+            Ok(s) => s,
+            Err(e) => {
+                tracing::error!("[5W1H] fetch sentences failed: {}", e);
+                continue;
+            }
+        };
+        if sentences.is_empty() {
+            continue;
+        }

        let context = prev_context.join("\n");
-        let result = summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await
-            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+        let result = match summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await
+        {
+            Ok(r) => r,
+            Err(e) => {
+                tracing::error!("[5W1H] scene {} failed: {}", cut.chunk_id, e);
+                processed += 1;
+                continue;
+            }
+        };

        if !result.parent_summary.is_empty() {
-            if let Err(e) = store_parent_summary(&db, &cut.chunk_id, &req.file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await {
+            if let Err(e) = store_parent_summary(
+                &db,
+                &cut.chunk_id,
+                &req.file_uuid,
+                &result.parent_summary,
+                &result.five_w1h,
+                &sentences,
+            )
+            .await
+            {
                tracing::error!("[5W1H] parent: {}", e);
            }
-            if let Err(e) = store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await {
+            if let Err(e) =
+                store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await
+            {
                tracing::error!("[5W1H] child: {}", e);
            }
-            prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary));
+            prev_context.push(format!(
+                "Scene (t={:.0}s): {}",
+                cut.start_time, result.parent_summary
+            ));
        }
        processed += 1;
    }

    Ok(Json(Analyze5W1HResponse {
-        success: true, file_uuid: req.file_uuid,
-        scenes_processed: processed, scenes_total: total,
+        success: true,
+        file_uuid: req.file_uuid,
+        scenes_processed: processed,
+        scenes_total: total,
    }))
 }

@@ -475,14 +612,39 @@ async fn batch_analyze_5w1h(
        let mut prev_context: Vec<String> = Vec::new();

        for cut in &cuts {
-            let sentences = fetch_sentences_in_scene(&db, uuid, cut).await.unwrap_or_default();
-            if sentences.is_empty() { continue; }
+            if let Some(ref t) = cut.summary_text {
+                if t.len() > 20 {
+                    processed += 1;
+                    prev_context.push(format!(
+                        "Scene (t={:.0}s): {}",
+                        cut.start_time, t
+                    ));
+                    continue;
+                }
+            }
+            let sentences = fetch_sentences_in_scene(&db, uuid, cut)
+                .await
+                .unwrap_or_default();
+            if sentences.is_empty() {
+                continue;
+            }
            let context = prev_context.join("\n");
            if let Ok(result) = summarize_one_scene(&db, uuid, cut, &sentences, &context).await {
                if !result.parent_summary.is_empty() {
-                    let _ = store_parent_summary(&db, &cut.chunk_id, uuid, &result.parent_summary, &result.five_w1h, &sentences).await;
+                    let _ = store_parent_summary(
+                        &db,
+                        &cut.chunk_id,
+                        uuid,
+                        &result.parent_summary,
+                        &result.five_w1h,
+                        &sentences,
+                    )
+                    .await;
                    let _ = store_child_summaries(&db, uuid, &result.child_summaries).await;
-                    prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary));
+                    prev_context.push(format!(
+                        "Scene (t={:.0}s): {}",
+                        cut.start_time, result.parent_summary
+                    ));
                }
            }
            processed += 1;
@@ -490,12 +652,19 @@ async fn batch_analyze_5w1h(

        jobs.push(BatchJobStatus {
            file_uuid: uuid.clone(),
-            status: if processed > 0 { "completed".to_string() } else { "no_cut_scenes".to_string() },
+            status: if processed > 0 {
+                "completed".to_string()
+            } else {
+                "no_cut_scenes".to_string()
+            },
            message: format!("{}/{} scenes processed", processed, total),
        });
    }

-    Ok(Json(BatchAnalyze5W1HResponse { success: true, jobs }))
+    Ok(Json(BatchAnalyze5W1HResponse {
+        success: true,
+        jobs,
+    }))
 }

 async fn get_5w1h_status(
@@ -505,19 +674,26 @@ async fn get_5w1h_status(
    let rows = sqlx::query(&format!(
        r#"SELECT file_uuid, processing_status->'agents'->'five_w1h' as s
           FROM {} WHERE processing_status->'agents'->'five_w1h' IS NOT NULL
-           ORDER BY updated_at DESC LIMIT 50"#, table
+           ORDER BY updated_at DESC LIMIT 50"#,
+        table
    ))
-    .fetch_all(state.db.pool()).await
+    .fetch_all(state.db.pool())
+    .await
    .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;

-    let videos: Vec<serde_json::Value> = rows.iter().map(|r| {
-        serde_json::json!({
-            "uuid": r.try_get::<String,_>("file_uuid").unwrap_or_default(),
-            "five_w1h_status": r.try_get::<Option<serde_json::Value>,_>("s").ok().flatten(),
+    let videos: Vec<serde_json::Value> = rows
+        .iter()
+        .map(|r| {
+            serde_json::json!({
+                "uuid": r.try_get::<String,_>("file_uuid").unwrap_or_default(),
+                "five_w1h_status": r.try_get::<Option<serde_json::Value>,_>("s").ok().flatten(),
+            })
        })
-    }).collect();
+        .collect();

-    Ok(Json(serde_json::json!({ "success": true, "videos": videos })))
+    Ok(Json(
+        serde_json::json!({ "success": true, "videos": videos }),
+    ))
 }

 /// Pipeline-triggered entry point: run 5W1H agent for a file.
@@ -528,24 +704,52 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
    let mut prev_context: Vec<String> = Vec::new();

    for cut in &cuts {
-        let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?;
-        if sentences.is_empty() { continue; }
-
-            let context = prev_context.join("\n");
-            match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await {
-                Ok(result) => {
-                    if !result.parent_summary.is_empty() {
-                        let _ = store_parent_summary(db, &cut.chunk_id, file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await;
-                        let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await;
-                        prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary));
-                    }
-                    processed += 1;
-                }
-                Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e),
+        if let Some(ref t) = cut.summary_text {
+            if t.len() > 20 {
+                processed += 1;
+                prev_context.push(format!(
+                    "Scene (t={:.0}s): {}",
+                    cut.start_time, t
+                ));
+                continue;
            }
+        }
+        let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?;
+        if sentences.is_empty() {
+            continue;
+        }
+
+        let context = prev_context.join("\n");
+        match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await {
+            Ok(result) => {
+                if !result.parent_summary.is_empty() {
+                    let _ = store_parent_summary(
+                        db,
+                        &cut.chunk_id,
+                        file_uuid,
+                        &result.parent_summary,
+                        &result.five_w1h,
+                        &sentences,
+                    )
+                    .await;
+                    let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await;
+                    prev_context.push(format!(
+                        "Scene (t={:.0}s): {}",
+                        cut.start_time, result.parent_summary
+                    ));
+                }
+                processed += 1;
+            }
+            Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e),
+        }
    }

-    tracing::info!("[5W1H] Done for {}: {}/{} scenes", file_uuid, processed, total);
+    tracing::info!(
+        "[5W1H] Done for {}: {}/{} scenes",
+        file_uuid,
+        processed,
+        total
+    );

    // Auto-vectorize sentences with EmbeddingGemma (768D)
    tracing::info!("[5W1H] Starting vectorize for sentence chunks...");
@@ -555,17 +759,20 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<

    let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>(
        r#"SELECT chunk_id, chunk_type, text_content, start_time, end_time
-           FROM dev.chunks WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL
-           AND (text_content IS NOT NULL AND text_content != '') ORDER BY chunk_index"#
+           FROM dev.chunk WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL
+           AND (text_content IS NOT NULL AND text_content != '') ORDER BY id"#,
    )
    .bind(file_uuid)
-    .fetch_all(db.pool()).await?;
+    .fetch_all(db.pool())
+    .await?;

    let total_vec = rows.len();
    let mut stored = 0usize;
    for (chunk_id, _ctype, text, start_time, end_time) in &rows {
        let text = text.trim();
-        if text.is_empty() || text.len() < 5 { continue; }
+        if text.is_empty() || text.len() < 5 {
+            continue;
+        }
        match embedder.embed_document(text).await {
            Ok(vector) => {
                if let Err(e) = sqlx::query(
--- a/src/api/identity_agent_api.rs
+++ b/src/api/identity_agent_api.rs
@@ -140,15 +140,37 @@ async fn analyze_identity(
    }

    let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)
-        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read face data: {}", e)))?
+        .map_err(|e| {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                format!("Failed to read face data: {}", e),
+            )
+        })?
        .parse()
-        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse face data: {}", e)))?;
+        .map_err(|e| {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                format!("Failed to parse face data: {}", e),
+            )
+        })?;

    let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
-        Some(std::fs::read_to_string(&asrx_path)
-            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read asrx data: {}", e)))?
-            .parse()
-            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse asrx data: {}", e)))?)
+        Some(
+            std::fs::read_to_string(&asrx_path)
+                .map_err(|e| {
+                    (
+                        StatusCode::INTERNAL_SERVER_ERROR,
+                        format!("Failed to read asrx data: {}", e),
+                    )
+                })?
+                .parse()
+                .map_err(|e| {
+                    (
+                        StatusCode::INTERNAL_SERVER_ERROR,
+                        format!("Failed to parse asrx data: {}", e),
+                    )
+                })?,
+        )
    } else {
        None
    };
@@ -161,7 +183,14 @@ async fn analyze_identity(
    // 將 identity 結果寫入 DB
    let pool = state.db.pool();
    for id_result in &identities {
-        let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
+        let identity_name = format!(
+            "person_{}",
+            id_result
+                .person_ids
+                .first()
+                .map(|s| &**s)
+                .unwrap_or("unknown")
+        );
        let metadata = serde_json::json!({
            "source": "identity_agent",
            "trace_ids": id_result.person_ids,
@@ -184,7 +213,9 @@ async fn analyze_identity(
    }

    // 迭代多角度 face embedding 比對（TMDb seed → 傳播）
-    let _ = match_faces_iterative(pool, &req.file_uuid).await.unwrap_or(0);
+    let _ = match_faces_iterative(pool, &req.file_uuid)
+        .await
+        .unwrap_or(0);

    // 將 ASRX speaker 綁定到已匹配 identity 的 trace
    let _ = bind_speakers(pool, &req.file_uuid).await.unwrap_or(0);
@@ -309,11 +340,21 @@ fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec
            let mut speaker_segments_map: std::collections::HashMap<String, Vec<(f64, f64)>> =
                std::collections::HashMap::new();
            for segment in segments {
-                let speaker_id = segment.get("speaker_id").and_then(|s| s.as_str())
+                let speaker_id = segment
+                    .get("speaker_id")
+                    .and_then(|s| s.as_str())
                    .or_else(|| segment.get("speaker").and_then(|s| s.as_str()));
                if let Some(speaker_id) = speaker_id {
-                    let start = segment.get("start").or_else(|| segment.get("start_time")).and_then(|s| s.as_f64()).unwrap_or(0.0);
-                    let end = segment.get("end").or_else(|| segment.get("end_time")).and_then(|e| e.as_f64()).unwrap_or(0.0);
+                    let start = segment
+                        .get("start")
+                        .or_else(|| segment.get("start_time"))
+                        .and_then(|s| s.as_f64())
+                        .unwrap_or(0.0);
+                    let end = segment
+                        .get("end")
+                        .or_else(|| segment.get("end_time"))
+                        .and_then(|e| e.as_f64())
+                        .unwrap_or(0.0);
                    speaker_segments_map
                        .entry(speaker_id.to_string())
                        .or_insert_with(Vec::new)
@@ -321,7 +362,10 @@ fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec
                }
            }
            for (speaker_id, segments) in speaker_segments_map {
-                speakers.push(SpeakerData { speaker_id, segments });
+                speakers.push(SpeakerData {
+                    speaker_id,
+                    segments,
+                });
            }
        }
    }
@@ -598,11 +642,17 @@ struct SpeakerData {
 }

 fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
-    if a.len() != b.len() || a.is_empty() { return 0.0; }
+    if a.len() != b.len() || a.is_empty() {
+        return 0.0;
+    }
    let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
    let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
    let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
-    if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
+    if na == 0.0 || nb == 0.0 {
+        0.0
+    } else {
+        dot / (na * nb)
+    }
 }

 /// 迭代多角度 face embedding 比對 + 傳播
@@ -619,16 +669,20 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
        tracing::warn!("[FaceMatch] No TMDb identities with face embeddings");
        return Ok(0);
    }
-    tracing::info!("[FaceMatch] Loaded {} TMDb seed identities", tmdb_rows.len());
+    tracing::info!(
+        "[FaceMatch] Loaded {} TMDb seed identities",
+        tmdb_rows.len()
+    );

    // Step 2: 載入所有 face_detections，按 trace_id 分組
    let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
        "SELECT trace_id, embedding FROM dev.face_detections \
         WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
-         ORDER BY trace_id"
+         ORDER BY trace_id",
    )
    .bind(file_uuid)
-    .fetch_all(pool).await?;
+    .fetch_all(pool)
+    .await?;

    if fd_rows.is_empty() {
        tracing::warn!("[FaceMatch] No face detections with embeddings");
@@ -639,7 +693,10 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
    use std::collections::HashMap;
    let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
    for (tid, emb) in &fd_rows {
-        trace_faces.entry(*tid).or_insert_with(Vec::new).push(emb.clone());
+        trace_faces
+            .entry(*tid)
+            .or_insert_with(Vec::new)
+            .push(emb.clone());
    }

    // 去重：同一個 trace 內，embedding 太接近的只留一個
@@ -649,7 +706,11 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
    }

    let total_traces = trace_faces.len();
-    tracing::info!("[FaceMatch] Loaded {} traces with {} faces", total_traces, fd_rows.len());
+    tracing::info!(
+        "[FaceMatch] Loaded {} traces with {} faces",
+        total_traces,
+        fd_rows.len()
+    );

    // Step 3: 建立 TMDb 查找表
    let tmdb_seeds: Vec<(i32, String, Vec<f32>)> = tmdb_rows;
@@ -665,14 +726,21 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
        for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
            for face_emb in faces {
                let s = cosine_similarity(face_emb, tmdb_emb);
-                if s > best_sim { best_sim = s; best_name = name.clone(); }
+                if s > best_sim {
+                    best_sim = s;
+                    best_name = name.clone();
+                }
            }
        }
        if best_sim >= TH {
            matched.insert(tid, best_name);
        }
    }
-    tracing::info!("[FaceMatch] Round 1: {} matched ({}%)", matched.len(), matched.len() * 100 / total_traces);
+    tracing::info!(
+        "[FaceMatch] Round 1: {} matched ({}%)",
+        matched.len(),
+        matched.len() * 100 / total_traces
+    );

    // Round 2+: 用已匹配的 face 作為 seed 傳播
    for round_n in 2..=10 {
@@ -681,21 +749,31 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
        let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
        for (&tid, name) in &matched {
            if let Some(faces) = trace_faces.get(&tid) {
-                seed_pool.entry(name.clone()).or_default().extend(faces.iter());
+                seed_pool
+                    .entry(name.clone())
+                    .or_default()
+                    .extend(faces.iter());
            }
        }

        let mut new_matches: Vec<(i32, String)> = Vec::new();
        for (&tid, faces) in &trace_faces {
-            if matched.contains_key(&tid) { continue; }
+            if matched.contains_key(&tid) {
+                continue;
+            }
            let mut best_name = String::new();
            let mut best_sim = 0.0f32;
-            if faces.is_empty() { continue; }
+            if faces.is_empty() {
+                continue;
+            }
            let ref_face = &faces[0];
            for (name, seed_faces) in &seed_pool {
                for seed in seed_faces {
                    let s = cosine_similarity(ref_face, seed);
-                    if s > best_sim { best_sim = s; best_name = name.clone(); }
+                    if s > best_sim {
+                        best_sim = s;
+                        best_name = name.clone();
+                    }
                }
            }
            if best_sim >= TH {
@@ -706,31 +784,46 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
            matched.insert(tid, name);
        }
        let new = matched.len() - prev;
-        tracing::info!("[FaceMatch] Round {}: +{} matched (total {}, {}%)", round_n, new, matched.len(), matched.len() * 100 / total_traces);
-        if new < 5 { break; }
+        tracing::info!(
+            "[FaceMatch] Round {}: +{} matched (total {}, {}%)",
+            round_n,
+            new,
+            matched.len(),
+            matched.len() * 100 / total_traces
+        );
+        if new < 5 {
+            break;
+        }
    }

    // Step 5: 寫入 DB
    let mut updated = 0usize;
    for (tid, name) in &matched {
        let id_opt = sqlx::query_scalar::<_, Option<i32>>(
-            "SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'"
+            "SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'",
        )
        .bind(name)
-        .fetch_optional(pool).await?;
+        .fetch_optional(pool)
+        .await?;
        if let Some(identity_id) = id_opt {
            let _ = sqlx::query(
-                "UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
+                "UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
            )
            .bind(identity_id)
            .bind(file_uuid)
            .bind(tid)
-            .execute(pool).await;
+            .execute(pool)
+            .await;
            updated += 1;
        }
    }

-    tracing::info!("[FaceMatch] Done: {}/{} traces matched ({}%)", matched.len(), total_traces, matched.len() * 100 / total_traces);
+    tracing::info!(
+        "[FaceMatch] Done: {}/{} traces matched ({}%)",
+        matched.len(),
+        total_traces,
+        matched.len() * 100 / total_traces
+    );
    Ok(updated)
 }

@@ -771,12 +864,25 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
    let mut speakers: HashMap<String, Vec<(f64, f64)>> = HashMap::new();
    if let Some(segments) = asrx_data.get("segments").and_then(|s| s.as_array()) {
        for seg in segments {
-            let sid = seg.get("speaker_id").and_then(|s| s.as_str())
+            let sid = seg
+                .get("speaker_id")
+                .and_then(|s| s.as_str())
                .or_else(|| seg.get("speaker").and_then(|s| s.as_str()));
            if let Some(sid) = sid {
-                let start = seg.get("start_time").or_else(|| seg.get("start")).and_then(|v| v.as_f64()).unwrap_or(0.0);
-                let end = seg.get("end_time").or_else(|| seg.get("end")).and_then(|v| v.as_f64()).unwrap_or(0.0);
-                speakers.entry(sid.to_string()).or_default().push((start, end));
+                let start = seg
+                    .get("start_time")
+                    .or_else(|| seg.get("start"))
+                    .and_then(|v| v.as_f64())
+                    .unwrap_or(0.0);
+                let end = seg
+                    .get("end_time")
+                    .or_else(|| seg.get("end"))
+                    .and_then(|v| v.as_f64())
+                    .unwrap_or(0.0);
+                speakers
+                    .entry(sid.to_string())
+                    .or_default()
+                    .push((start, end));
            }
        }
    }
@@ -792,7 +898,9 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
    // For each trace, compute overlap with each speaker
    let mut bindings = 0usize;
    for (trace_id, frames) in &traces {
-        if frames.is_empty() { continue; }
+        if frames.is_empty() {
+            continue;
+        }

        // Get identity_id for this trace
        let identity_id: Option<i32> = sqlx::query_scalar(
@@ -801,7 +909,9 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
        .bind(file_uuid).bind(trace_id)
        .fetch_optional(pool).await?.flatten();

-        if identity_id.is_none() { continue; }
+        if identity_id.is_none() {
+            continue;
+        }
        let identity_id = identity_id.unwrap();

        // Compute overlap with each speaker
@@ -850,7 +960,11 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
        }
    }

-    tracing::info!("[SpeakerBind] Created {}/{} speaker bindings", bindings, traces.len());
+    tracing::info!(
+        "[SpeakerBind] Created {}/{} speaker bindings",
+        bindings,
+        traces.len()
+    );
    Ok(bindings)
 }

@@ -870,7 +984,10 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
    };

    if !face_clustered_path.exists() {
-        tracing::warn!("[IdentityAgent] face_clustered.json not found for {}", file_uuid);
+        tracing::warn!(
+            "[IdentityAgent] face_clustered.json not found for {}",
+            file_uuid
+        );
        return Ok(());
    }

@@ -888,7 +1005,14 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res

    let pool = db.pool();
    for id_result in &identities {
-        let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
+        let identity_name = format!(
+            "person_{}",
+            id_result
+                .person_ids
+                .first()
+                .map(|s| &**s)
+                .unwrap_or("unknown")
+        );
        let metadata = serde_json::json!({
            "source": "identity_agent",
            "trace_ids": id_result.person_ids,
@@ -914,7 +1038,10 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res

    tracing::info!(
        "[IdentityAgent] Done for {}: {} identities, {} face matches, {} speaker bindings",
-        file_uuid, identities.len(), matched, bound
+        file_uuid,
+        identities.len(),
+        matched,
+        bound
    );
    Ok(())
 }
--- a/src/api/identity_api.rs
+++ b/src/api/identity_api.rs
@@ -501,7 +501,7 @@ async fn get_identity_chunks(
    let data: Vec<IdentityChunkItem> = records
        .into_iter()
        .map(|r| IdentityChunkItem {
-            id: r.id,
+            id: r.id as i64,
            file_uuid: r.file_uuid,
            chunk_id: r.chunk_id,
            chunk_type: r.chunk_type,
--- a/src/api/media_api.rs
+++ b/src/api/media_api.rs
@@ -13,14 +13,20 @@ use crate::core::db::{schema, PostgresDb};
 static FFMPEG: Lazy<String> = Lazy::new(|| {
    std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
        let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
-        if std::path::Path::new(full).exists() { full.to_string() } else { "ffmpeg".to_string() }
+        if std::path::Path::new(full).exists() {
+            full.to_string()
+        } else {
+            "ffmpeg".to_string()
+        }
    })
 });

 fn ffmpeg_cmd() -> std::process::Command {
    let mut cmd = std::process::Command::new(&*FFMPEG);
    let full_lib = "/opt/homebrew/opt/ffmpeg-full/lib";
-    if std::path::Path::new(full_lib).exists() { cmd.env("DYLD_LIBRARY_PATH", full_lib); }
+    if std::path::Path::new(full_lib).exists() {
+        cmd.env("DYLD_LIBRARY_PATH", full_lib);
+    }
    cmd
 }

@@ -293,20 +299,32 @@ async fn trace_video(
    let first_frame = rows[0].0;
    let last_frame = rows[rows.len() - 1].0;
    let start_sec = first_frame as f64 / fps;
-    let padding = params.get("padding").and_then(|s| s.parse().ok()).unwrap_or(2.0);
+    let padding = params
+        .get("padding")
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(2.0);
    let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
    let seek = (start_sec - padding).max(0.0);

    // Build filters: bbox+drawtext (1 filter + 1 drawtext per detection)
    let mut parts: Vec<String> = Vec::new();
    for (i, (frame, x, y, w, h)) in rows.iter().enumerate() {
-        let next_frame = if i + 1 < rows.len() { rows[i + 1].0 } else { last_frame + (padding * fps) as i32 };
+        let next_frame = if i + 1 < rows.len() {
+            rows[i + 1].0
+        } else {
+            last_frame + (padding * fps) as i32
+        };
        let start_offset = frame - first_frame + (padding * fps) as i32;
        let end_offset = next_frame - first_frame + (padding * fps) as i32;
        // Bbox
        parts.push(format!(
            "drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='between(n,{},{})'",
-            x, y, w, h, start_offset, end_offset - 1
+            x,
+            y,
+            w,
+            h,
+            start_offset,
+            end_offset - 1
        ));
        // Text label (drawtext, 1 filter vs ~175 bitmap drawboxes)
        parts.push(format!(
@@ -325,14 +343,31 @@ async fn trace_video(
    let tmp_str = tmp.to_str().unwrap_or("").to_string();
    let result = ffmpeg_cmd()
        .args([
-            "-ss", &seek.to_string(), "-i", &video_path,
-            "-t", &duration.to_string(),
-            "-/filter_complex", &filter_path,
-            "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
-            "-an", "-movflags", "+faststart", "-y", &tmp_str,
+            "-ss",
+            &seek.to_string(),
+            "-i",
+            &video_path,
+            "-t",
+            &duration.to_string(),
+            "-/filter_complex",
+            &filter_path,
+            "-c:v",
+            "libx264",
+            "-preset",
+            "ultrafast",
+            "-crf",
+            "28",
+            "-an",
+            "-movflags",
+            "+faststart",
+            "-y",
+            &tmp_str,
        ])
        .output()
-        .map_err(|e| { tracing::error!("ffmpeg spawn: {}", e); StatusCode::INTERNAL_SERVER_ERROR })?;
+        .map_err(|e| {
+            tracing::error!("ffmpeg spawn: {}", e);
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
    if !result.status.success() {
        let stderr = String::from_utf8_lossy(&result.stderr);
        tracing::error!("ffmpeg failed: {}", &stderr[..stderr.len().min(300)]);
--- a/src/api/search.rs
+++ b/src/api/search.rs
@@ -13,6 +13,8 @@ use crate::core::embedding::Embedder;
 pub struct SmartSearchRequest {
    pub uuid: String,
    pub query: String,
+    pub page: Option<usize>,
+    pub page_size: Option<usize>,
    pub limit: Option<usize>,
 }

@@ -41,6 +43,8 @@ pub struct SearchResult {
 pub struct SmartSearchResponse {
    pub query: String,
    pub results: Vec<SearchResult>,
+    pub page: usize,
+    pub page_size: usize,
    pub strategy: String,
 }

@@ -51,7 +55,18 @@ pub async fn smart_search(
    Json(req): Json<SmartSearchRequest>,
 ) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
    let db = &state.db;
-    let limit = req.limit.unwrap_or(5);
+    let page = req.page.unwrap_or(1).max(1);
+    // Backward compat: if old `limit` sent without `page_size`, use limit as page_size
+    let page_size = if req.page_size.is_some() {
+        req.page_size.unwrap()
+    } else if req.limit.is_some() && req.page.is_none() {
+        req.limit.unwrap()
+    } else {
+        5
+    }
+    .max(1);
+    let hard_limit = req.limit.unwrap_or(usize::MAX);
+    let limit = hard_limit.min(page_size);

    // 1. Generate Embedding using EmbeddingGemma via MOMENTRY_EMBED_URL
    let embedder = Embedder::new("embeddinggemma-300m".to_string());
@@ -83,6 +98,8 @@ pub async fn smart_search(
        return Ok(Json(SmartSearchResponse {
            query: req.query,
            results: vec![],
+            page,
+            page_size,
            strategy: "semantic_vector_search".to_string(),
        }));
    }
@@ -145,13 +162,15 @@ pub async fn smart_search(
    });

    // 7. Limit the final results (optional, but good for API consistency)
-    let limit = req.limit.unwrap_or(5) * 5; // Allow more children per parent context
-    results.truncate(limit);
+    let truncate_limit = hard_limit.min(page_size * 5); // Allow more children per parent context
+    results.truncate(truncate_limit);

    // 8. Format Response
    let response = SmartSearchResponse {
        query: req.query,
        results,
+        page,
+        page_size,
        strategy: "drill_down_semantic_search".to_string(),
    };

--- a/src/api/server.rs
+++ b/src/api/server.rs
@@ -2286,7 +2286,8 @@ async fn list_jobs(Query(params): Query<JobsQuery>) -> Result<Json<JobListRespon
        .into_iter()
        .map(|r| {
            let status_str: String = r.try_get("status").unwrap_or_default();
-            let status = MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
+            let status =
+                MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
            JobInfoResponse {
                id: r.try_get("id").unwrap_or(0),
                uuid: r.try_get("uuid").unwrap_or_default(),
@@ -2507,7 +2508,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
        .route("/api/v1/files/scan", get(scan_files))
        .route("/api/v1/file/:file_uuid/probe", get(probe_by_uuid))
        .route("/api/v1/file/:file_uuid/process", post(trigger_processing))
-        .route("/api/v1/file/:file_uuid/chunks", get(list_pre_chunks))
+
        .route("/api/v1/progress/:uuid", get(get_progress))
        .route("/api/v1/jobs", get(list_jobs))
        .route("/api/v1/config/cache", post(cache_toggle))
@@ -2585,7 +2586,7 @@ async fn get_ingest_stats(
    State(state): State<AppState>,
 ) -> Result<Json<IngestStatsResponse>, StatusCode> {
    let table_videos = schema::table_name("videos");
-    let table_chunks = schema::table_name("chunks");
+    let table_chunks = schema::table_name("chunk");

    let total_videos: (i64,) = sqlx::query_as(&format!("SELECT COUNT(*) FROM {}", table_videos))
        .fetch_one(state.db.pool())
@@ -3048,15 +3049,15 @@ async fn video_details(
    Query(query): Query<VideoDetailsQuery>,
    State(state): State<AppState>,
 ) -> Result<Json<VideoDetailsResponse>, StatusCode> {
-    let table = schema::table_name("chunks");
+    let table = schema::table_name("chunk");

    if let Some(chunk_id) = query.chunk_id {
        let row: Option<(
-            i32, String, String, i32, String, f64, i64, i64,
+            i32, String, String, String, f64, i64, i64,
            Option<String>, serde_json::Value, Option<serde_json::Value>,
            Option<String>, i32, Option<String>, Option<serde_json::Value>, Option<String>,
        )> = sqlx::query_as(&format!(
-            "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type::text, fps, start_frame, end_frame, 
+            "SELECT file_id, uuid, chunk_id, chunk_type::text, fps, start_frame, end_frame, 
                    text_content, content, metadata, vector_id, frame_count, 
                    parent_chunk_id, visual_stats, summary_text
             FROM {} WHERE chunk_id = $1 AND uuid = $2",
@@ -3081,20 +3082,20 @@ async fn video_details(

        let row = row.ok_or(StatusCode::NOT_FOUND)?;

-        let fps = if row.5 > 0.0 { row.5 } else { 24.0 };
-        let start_frame = row.6;
-        let end_frame = row.7;
+        let fps = if row.4 > 0.0 { row.4 } else { 24.0 };
+        let start_frame = row.5;
+        let end_frame = row.6;
        let duration_frames = end_frame - start_frame;

        let start_time = start_frame as f64 / fps;
        let end_time = end_frame as f64 / fps;

-        let row_metadata = row.10.clone();
+        let row_metadata = row.9.clone();

-        let mut summary_text = row.15.clone();
+        let mut summary_text = row.14.clone();
        let mut metadata = None;

-        if let Some(ref pid_str) = row.13 {
+        if let Some(ref pid_str) = row.12 {
            if !pid_str.is_empty() {
                if let Ok(pid) = pid_str.parse::<i32>() {
                    let parent_table = schema::table_name("parent_chunks");
@@ -3168,7 +3169,7 @@ async fn video_details(
            uuid: row.1.clone(),
            details: VideoDetailsResult::Chunk(ChunkDetailResponse {
                chunk_id: row.2.clone(),
-                chunk_type: row.4.clone(),
+                chunk_type: row.3.clone(),
                frame_range: FrameRange {
                    start_frame,
                    end_frame,
@@ -3179,12 +3180,12 @@ async fn video_details(
                    start: start_time,
                    end: end_time,
                },
-                text_content: row.8.clone(),
-                content: Some(row.9.clone()),
-                parent_id: row.13.clone(),
+                text_content: row.7.clone(),
+                content: Some(row.8.clone()),
+                parent_id: row.12.clone(),
                summary_text,
                metadata,
-                visual_stats: row.14.clone(),
+                visual_stats: row.13.clone(),
                speaker_ids,
                person_ids,
            }),
@@ -3194,123 +3195,6 @@ async fn video_details(
    Err(StatusCode::BAD_REQUEST)
 }

-#[derive(Debug, Deserialize)]
-struct PreChunksQuery {
-    processor_type: Option<String>,
-    page: Option<usize>,
-    page_size: Option<usize>,
-}
-
-#[derive(Debug, Serialize)]
-struct PreChunksResponse {
-    pre_chunks: Vec<PreChunkItem>,
-    count: i64,
-    page: usize,
-    page_size: usize,
-}
-
-#[derive(Debug, Serialize)]
-struct PreChunkItem {
-    id: i64,
-    processor_type: String,
-    coordinate_type: String,
-    coordinate_index: i64,
-    start_frame: Option<i64>,
-    end_frame: Option<i64>,
-    start_time: Option<f64>,
-    end_time: Option<f64>,
-    fps: Option<f64>,
-    data: serde_json::Value,
-    identity_id: Option<String>,
-    confidence: Option<f64>,
-    created_at: String,
-}
-
-async fn list_pre_chunks(
-    Path(uuid): Path<String>,
-    Query(query): Query<PreChunksQuery>,
-    State(state): State<AppState>,
-) -> Result<Json<PreChunksResponse>, StatusCode> {
-    let table = schema::table_name("pre_chunks");
-    let page = query.page.unwrap_or(1);
-    let page_size = query.page_size.unwrap_or(20);
-    let offset = (page - 1) * page_size;
-
-    let processor_filter = if let Some(pt) = &query.processor_type {
-        format!("AND processor_type = '{}'", pt.to_lowercase())
-    } else {
-        "".to_string()
-    };
-
-    let count_query = format!(
-        "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 {}",
-        table, processor_filter
-    );
-
-    let count: i64 = sqlx::query(&count_query)
-        .bind(&uuid)
-        .fetch_one(state.db.pool())
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
-        .try_get(0)
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-    let data_query = format!(
-        "SELECT id, processor_type, coordinate_type, coordinate_index, 
-                start_frame, end_frame, start_time, end_time, fps,
-                data, created_at
-         FROM {} 
-         WHERE file_uuid = $1 {}
-         ORDER BY coordinate_index ASC
-         LIMIT {} OFFSET {}",
-        table, processor_filter, page_size, offset
-    );
-
-    let rows: Vec<(
-        i64,
-        String,
-        String,
-        i64,
-        Option<i64>,
-        Option<i64>,
-        Option<f64>,
-        Option<f64>,
-        Option<f64>,
-        serde_json::Value,
-        chrono::DateTime<chrono::Utc>,
-    )> = sqlx::query_as(&data_query)
-        .bind(&uuid)
-        .fetch_all(state.db.pool())
-        .await
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-
-    let pre_chunks = rows
-        .iter()
-        .map(|row| PreChunkItem {
-            id: row.0,
-            processor_type: row.1.clone(),
-            coordinate_type: row.2.clone(),
-            coordinate_index: row.3,
-            start_frame: row.4,
-            end_frame: row.5,
-            start_time: row.6,
-            end_time: row.7,
-            fps: row.8,
-            data: row.9.clone(),
-            identity_id: None,
-            confidence: None,
-            created_at: row.10.to_rfc3339(),
-        })
-        .collect();
-
-    Ok(Json(PreChunksResponse {
-        pre_chunks,
-        count,
-        page,
-        page_size,
-    }))
-}
-
 #[derive(Debug, Serialize)]
 struct DeleteVideoResponse {
    success: bool,
@@ -3404,7 +3288,7 @@ async fn delete_video(
    let videos_table = schema::table_name("videos");
    let face_table = schema::table_name("face_detections");
    let processor_table = schema::table_name("processor_results");
-    let chunks_table = schema::table_name("chunks");
+    let chunks_table = schema::table_name("chunk");
    let parent_chunks_table = schema::table_name("parent_chunks");

    // Check if video exists first
--- a/src/api/trace_agent_api.rs
+++ b/src/api/trace_agent_api.rs
@@ -25,6 +25,8 @@ pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
 struct TracesRequest {
    min_faces: Option<i64>,
    sort_by: Option<String>,
+    page: Option<i64>,
+    page_size: Option<i64>,
    limit: Option<i64>,
    min_confidence: Option<f64>,
    max_confidence: Option<f64>,
@@ -49,6 +51,8 @@ struct TracesResponse {
    file_uuid: String,
    total_traces: i64,
    total_faces: i64,
+    page: i64,
+    page_size: i64,
    traces: Vec<TraceInfo>,
 }

@@ -59,7 +63,11 @@ async fn list_traces_sorted(
 ) -> Result<Json<TracesResponse>, (StatusCode, String)> {
    let min_faces = req.min_faces.unwrap_or(1);
    let sort = req.sort_by.as_deref().unwrap_or("first_appearance");
-    let limit = req.limit.unwrap_or(500);
+    let page = req.page.unwrap_or(1).max(1);
+    let page_size = req.page_size.unwrap_or(50).max(1).min(500);
+    let hard_limit = req.limit.unwrap_or(500);
+    let effective_limit = hard_limit.min(page_size);
+    let db_offset = (page - 1) * page_size;
    let min_confidence = req.min_confidence.unwrap_or(0.0);
    let max_confidence = req.max_confidence.unwrap_or(1.0);

@@ -92,11 +100,11 @@ async fn list_traces_sorted(
                      AVG(confidence) AS avg_confidence
               FROM dev.face_detections
               WHERE file_uuid = $1 AND trace_id IS NOT NULL
-                 AND confidence >= $4 AND confidence <= $5
+                 AND confidence >= $5 AND confidence <= $6
               GROUP BY trace_id
               HAVING COUNT(*) >= $2
               ORDER BY {}
-               LIMIT $3
+               LIMIT $3 OFFSET $4
           ) tt
            LEFT JOIN LATERAL (
                SELECT id FROM dev.face_detections
@@ -111,7 +119,8 @@ async fn list_traces_sorted(
        sqlx::query_as(&query)
            .bind(&file_uuid)
            .bind(min_faces)
-            .bind(limit)
+            .bind(effective_limit)
+            .bind(db_offset)
            .bind(min_confidence)
            .bind(max_confidence)
            .fetch_all(state.db.pool())
@@ -146,6 +155,8 @@ async fn list_traces_sorted(
        file_uuid,
        total_traces,
        total_faces,
+        page,
+        page_size,
        traces,
    }))
 }
@@ -154,6 +165,8 @@ async fn list_traces_sorted(

 #[derive(Debug, Deserialize)]
 struct TraceFacesQuery {
+    page: Option<i64>,
+    page_size: Option<i64>,
    limit: Option<i64>,
    offset: Option<i64>,
    interpolate: Option<bool>,
@@ -194,7 +207,14 @@ async fn list_trace_faces(
    Query(q): Query<TraceFacesQuery>,
 ) -> Result<Json<TraceFacesResponse>, (StatusCode, String)> {
    let limit = q.limit.unwrap_or(200).min(1000);
-    let offset = q.offset.unwrap_or(0);
+    // Support both page/page_size and offset; page/page_size takes precedence
+    let offset = if q.page.is_some() || q.page_size.is_some() {
+        let p = q.page.unwrap_or(1).max(1);
+        let ps = q.page_size.unwrap_or(200).max(1).min(1000);
+        (p - 1) * ps
+    } else {
+        q.offset.unwrap_or(0)
+    };
    let interpolate = q.interpolate.unwrap_or(false);

    let fps: f64 =
@@ -206,7 +226,7 @@ async fn list_trace_faces(
            .unwrap_or(24.0);

    let total_detected: i64 = sqlx::query_scalar(
-        "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1 AND trace_id = $2"
+        "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1 AND trace_id = $2",
    )
    .bind(&file_uuid)
    .bind(trace_id)
@@ -214,21 +234,28 @@ async fn list_trace_faces(
    .await
    .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;

-    let rows: Vec<(i32, i32, Option<i32>, Option<i32>, Option<i32>, Option<i32>, f32)> =
-        sqlx::query_as(
-            "SELECT id, frame_number, x, y, width, height, confidence
+    let rows: Vec<(
+        i32,
+        i32,
+        Option<i32>,
+        Option<i32>,
+        Option<i32>,
+        Option<i32>,
+        f32,
+    )> = sqlx::query_as(
+        "SELECT id, frame_number, x, y, width, height, confidence
             FROM dev.face_detections
             WHERE file_uuid = $1 AND trace_id = $2
             ORDER BY frame_number ASC
-             LIMIT $3 OFFSET $4"
-        )
-        .bind(&file_uuid)
-        .bind(trace_id)
-        .bind(limit)
-        .bind(offset)
-        .fetch_all(state.db.pool())
-        .await
-        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+             LIMIT $3 OFFSET $4",
+    )
+    .bind(&file_uuid)
+    .bind(trace_id)
+    .bind(limit)
+    .bind(offset)
+    .fetch_all(state.db.pool())
+    .await
+    .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;

    let mut faces: Vec<TraceFaceItem> = Vec::new();

--- a/src/api/universal_search.rs
+++ b/src/api/universal_search.rs
@@ -327,7 +327,7 @@ async fn search_chunks(
    };

    let mut sql = format!(
-        "SELECT chunk_id, chunk_type, start_time, end_time, start_frame, end_frame, text_content, content FROM chunks WHERE file_uuid = '{}'",
+        "SELECT chunk_id, chunk_type, start_time, end_time, start_frame, end_frame, text_content, content FROM dev.chunk WHERE file_uuid = '{}'",
        uuid
    );
    if let Some(tr) = &req.time_range {
@@ -483,7 +483,7 @@ async fn search_frames_internal(
    let video_table = "videos";

    let mut sql = format!(
-        "SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, f.pose_results, v.file_uuid 
+        "SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, v.file_uuid 
         FROM {} f JOIN {} v ON f.file_id = v.id WHERE 1=1",
        table, video_table
    );
@@ -532,13 +532,12 @@ async fn search_frames_internal(
        Option<serde_json::Value>,
        Option<serde_json::Value>,
        Option<serde_json::Value>,
-        Option<serde_json::Value>,
        String,
    )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;

    let results: Vec<SearchResult> = rows
        .into_iter()
-        .map(|(frame_number, timestamp, yolo, ocr, face, pose, _uuid)| {
+        .map(|(frame_number, timestamp, yolo, ocr, face, _uuid)| {
            let objects = yolo.as_ref().and_then(|v| {
                v.get("objects")
                    .map(|o| o.as_array().cloned().unwrap_or_default())
@@ -558,10 +557,6 @@ async fn search_frames_internal(
                v.get("faces")
                    .map(|f| f.as_array().cloned().unwrap_or_default())
            });
-            let pose_persons = pose.as_ref().and_then(|v| {
-                v.get("persons")
-                    .map(|p| p.as_array().cloned().unwrap_or_default())
-            });

            SearchResult::Frame {
                frame_number,
@@ -570,7 +565,7 @@ async fn search_frames_internal(
                objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
                ocr_texts,
                faces,
-                pose_persons,
+                pose_persons: None,
            }
        })
        .collect();
@@ -652,7 +647,7 @@ async fn search_frames_internal_v2(
    let video_table = "videos";

    let mut sql = format!(
-        "SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, f.pose_results, v.file_uuid 
+        "SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, v.file_uuid 
         FROM {} f JOIN {} v ON f.file_id = v.id WHERE 1=1",
        table, video_table
    );
@@ -685,13 +680,12 @@ async fn search_frames_internal_v2(
        Option<serde_json::Value>,
        Option<serde_json::Value>,
        Option<serde_json::Value>,
-        Option<serde_json::Value>,
        String,
    )> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;

    let results: Vec<FrameResult> = rows
        .into_iter()
-        .map(|(frame_number, timestamp, yolo, ocr, face, pose, uuid)| {
+        .map(|(frame_number, timestamp, yolo, ocr, face, uuid)| {
            let objects = yolo.as_ref().and_then(|v| {
                v.get("objects")
                    .map(|o| o.as_array().cloned().unwrap_or_default())
@@ -711,11 +705,6 @@ async fn search_frames_internal_v2(
                v.get("faces")
                    .map(|f| f.as_array().cloned().unwrap_or_default())
            });
-            let pose_persons = pose.as_ref().and_then(|v| {
-                v.get("persons")
-                    .map(|p| p.as_array().cloned().unwrap_or_default())
-            });
-
            FrameResult {
                frame_number,
                timestamp,
@@ -723,7 +712,7 @@ async fn search_frames_internal_v2(
                objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
                ocr_texts,
                faces,
-                pose_persons,
+                pose_persons: None,
            }
        })
        .collect();
--- a/src/api/visual_chunk_search.rs
+++ b/src/api/visual_chunk_search.rs
@@ -177,7 +177,7 @@ pub async fn search_visual_chunks(
 /// Get all visual chunks for a video UUID
 async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Chunk>> {
    let sql = format!(
-        "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, visual_stats FROM chunks WHERE uuid = '{}' AND chunk_type = 'visual' ORDER BY start_frame ASC",
+        "SELECT file_id, file_uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, visual_stats FROM dev.chunk WHERE file_uuid = '{}' AND chunk_type = 'visual' ORDER BY start_frame ASC",
        uuid.replace('\'', "''")
    );

@@ -185,7 +185,6 @@ async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Ch
        i32,            // file_id
        String,         // uuid
        String,         // chunk_id
-        i32,            // chunk_index
        String,         // chunk_type
        f64,            // fps
        i64,            // start_frame
@@ -199,7 +198,7 @@ async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Ch

    let mut chunks = Vec::new();
    for row in rows {
-        let chunk_type = match row.4.as_str() {
+        let chunk_type = match row.3.as_str() {
            "visual" => ChunkType::Visual,
            "sentence" => ChunkType::Sentence,
            "time_based" => ChunkType::TimeBased,
@@ -210,27 +209,26 @@ async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Ch
        };

        // Calculate frame_count
-        let frame_count = (row.7 - row.6) as i32;
+        let frame_count = (row.6 - row.5) as i32;

        chunks.push(Chunk {
            file_id: row.0,
            uuid: row.1,
            chunk_id: row.2,
-            chunk_index: row.3 as u32,
            chunk_type,
            rule: ChunkRule::Rule2, // Visual chunks use Rule2
-            fps: row.5,
-            start_frame: row.6,
-            end_frame: row.7,
-            text_content: row.8,
-            content: row.9,
-            metadata: row.10,
-            vector_id: row.11,
+            fps: row.4,
+            start_frame: row.5,
+            end_frame: row.6,
+            text_content: row.7,
+            content: row.8,
+            metadata: row.9,
+            vector_id: row.10,
            frame_count,
            pre_chunk_ids: Vec::new(),
            parent_chunk_id: None,
            child_chunk_ids: Vec::new(),
-            visual_stats: row.12,
+            visual_stats: row.11,
        });
    }

@@ -383,13 +381,13 @@ pub async fn get_visual_chunk_statistics(
            MAX((content->'metadata'->>'avg_confidence')::float) as max_confidence,
            SUM((content->'metadata'->>'object_count')::int) as total_objects,
            AVG((content->'metadata'->>'spatial_density')::float) as avg_density
-        FROM chunks 
-        WHERE uuid = '{}' 
+        FROM dev.chunk 
+        WHERE file_uuid = '{}' 
        AND chunk_type = 'visual'",
        uuid.replace('\'', "''")
    );

-    let row: (i64, Option<f64>, Option<f64>, Option<f64>, i64, Option<f64>) =
+    let row: (i64, Option<f64>, Option<f64>, Option<f64>, Option<i64>, Option<f64>) =
        sqlx::query_as(&sql).fetch_one(db.pool()).await?;

    let mut stats = HashMap::new();
@@ -406,7 +404,7 @@ pub async fn get_visual_chunk_statistics(
        "max_confidence".to_string(),
        Value::from(row.3.unwrap_or(0.0)),
    );
-    stats.insert("total_objects".to_string(), Value::from(row.4));
+    stats.insert("total_objects".to_string(), Value::from(row.4.unwrap_or(0)));
    stats.insert("avg_density".to_string(), Value::from(row.5.unwrap_or(0.0)));

    Ok(stats)
--- a/src/core/chunk/mod.rs
+++ b/src/core/chunk/mod.rs
@@ -6,6 +6,6 @@ pub mod types;

 pub use rule1_ingest::execute_rule1;
 pub use rule3_ingest::ingest_rule3;
-pub use trace_ingest::ingest_traces;
 pub use splitter::{AsrSegment, ChunkSplitter};
+pub use trace_ingest::ingest_traces;
 pub use types::{Chunk, ChunkType};
--- a/src/core/chunk/rule1_ingest.rs
+++ b/src/core/chunk/rule1_ingest.rs
@@ -50,7 +50,7 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result
        let chunk = Chunk::from_seconds(
            file_id as i32,
            file_uuid.to_string(),
-            idx as u32,
+            format!("{}", idx),
            ChunkType::Sentence,
            ChunkRule::Rule1,
            seg.start_time,
--- a/src/core/chunk/rule3_ingest.rs
+++ b/src/core/chunk/rule3_ingest.rs
@@ -73,7 +73,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
        // Query chunks table for Rule 1 sentence chunks
        let rule1_rows: Vec<(String,)> = sqlx::query_as(
            r#"
-            SELECT chunk_id FROM chunks
+            SELECT chunk_id FROM dev.chunk
            WHERE file_uuid = $1 AND chunk_type = 'sentence'
            AND start_frame >= $2
            AND end_frame <= $3
@@ -98,7 +98,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {

        let texts: Vec<String> = sqlx::query_scalar(
            r#"
-            SELECT text_content FROM chunks
+            SELECT text_content FROM dev.chunk
            WHERE file_uuid = $1 AND chunk_type = 'sentence'
            AND start_frame >= $2
            AND end_frame <= $3
@@ -135,10 +135,11 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
        );

        // 4. Insert into dev.chunks
-        let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE file_uuid = $1")
-            .bind(file_uuid)
-            .fetch_optional(&mut *tx)
-            .await?;
+        let fps_query: Option<f64> =
+            sqlx::query_scalar("SELECT fps FROM videos WHERE file_uuid = $1")
+                .bind(file_uuid)
+                .fetch_optional(&mut *tx)
+                .await?;
        let fps = fps_query.unwrap_or(29.97);

        // Prepare metadata JSON
@@ -149,12 +150,12 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {

        sqlx::query(
            r#"
-            INSERT INTO chunks (
-                file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type,
+            INSERT INTO dev.chunk (
+                file_uuid, chunk_id, chunk_type,
                start_time, end_time, fps, start_frame, end_frame,
                content, text_content, summary_text, metadata, child_chunk_ids
-            ) VALUES ($1, $2, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
-            ON CONFLICT (file_uuid, old_chunk_id) DO NOTHING
+            ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
+            ON CONFLICT (file_uuid, chunk_id) DO NOTHING
            "#,
        )
        .bind(file_uuid)
--- a/src/core/chunk/splitter.rs
+++ b/src/core/chunk/splitter.rs
@@ -23,7 +23,7 @@ impl ChunkSplitter {
            chunks.push(Chunk::from_seconds(
                0, // file_id
                uuid.to_string(),
-                index,
+                format!("{}", index),
                ChunkType::TimeBased,
                ChunkRule::Rule1,
                current_time,
@@ -48,7 +48,7 @@ impl ChunkSplitter {
            chunks.push(Chunk::from_seconds(
                0, // file_id
                uuid.to_string(),
-                index as u32,
+                format!("{}", index),
                ChunkType::Sentence,
                ChunkRule::Rule1,
                segment.start,
--- a/src/core/chunk/trace_ingest.rs
+++ b/src/core/chunk/trace_ingest.rs
@@ -95,7 +95,7 @@ pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
        let chunk = Chunk::new(
            file_id,
            file_uuid.to_string(),
-            (count + 1) as u32,
+            format!("trace_{}", count + 1),
            ChunkType::Trace,
            ChunkRule::Rule1,
            trace.first_frame as i64,
@@ -110,17 +110,29 @@ pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
        if let Err(e) = db.store_chunk(&chunk).await {
            error!("Failed to store trace chunk {}: {}", trace.trace_id, e);
        } else {
-            let preview = chunk.text_content.as_deref().unwrap_or("").chars().take(60).collect::<String>();
-            let co = chunk.metadata.as_ref()
+            let preview = chunk
+                .text_content
+                .as_deref()
+                .unwrap_or("")
+                .chars()
+                .take(60)
+                .collect::<String>();
+            let co = chunk
+                .metadata
+                .as_ref()
                .and_then(|m| m.get("co_appearances"))
                .and_then(|c| c.as_array())
                .map(|a| a.len())
                .unwrap_or(0);
            info!(
                "Trace chunk {}: trace_id={} frames={}-{} faces={} co_appear={} text={}",
-                chunk.chunk_id, trace.trace_id,
-                trace.first_frame, trace.last_frame,
-                trace.face_count, co, preview,
+                chunk.chunk_id,
+                trace.trace_id,
+                trace.first_frame,
+                trace.last_frame,
+                trace.face_count,
+                co,
+                preview,
            );
            count += 1;
        }
@@ -209,14 +221,11 @@ impl<'r> sqlx::FromRow<'r, sqlx::postgres::PgRow> for AsrSegment {

 impl AsrSegment {
    fn text(&self) -> Option<&str> {
-        self.data
-            .get("text")
-            .and_then(|v| v.as_str())
-            .or_else(|| {
-                self.data
-                    .get("data")
-                    .and_then(|d| d.get("text"))
-                    .and_then(|v| v.as_str())
-            })
+        self.data.get("text").and_then(|v| v.as_str()).or_else(|| {
+            self.data
+                .get("data")
+                .and_then(|d| d.get("text"))
+                .and_then(|v| v.as_str())
+        })
    }
 }
--- a/src/core/chunk/types.rs
+++ b/src/core/chunk/types.rs
@@ -115,7 +115,6 @@ pub struct Chunk {
    pub file_id: i32,
    pub uuid: String,
    pub chunk_id: String,
-    pub chunk_index: u32,
    pub chunk_type: ChunkType,
    pub rule: ChunkRule,
    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
@@ -140,7 +139,7 @@ impl Chunk {
    pub fn new(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        chunk_type: ChunkType,
        rule: ChunkRule,
        start_frame: i64,
@@ -149,13 +148,11 @@ impl Chunk {
        content: serde_json::Value,
    ) -> Self {
        let frame_count = (end_frame - start_frame) as i32;
-        let chunk_id = format!("{}_{}", uuid, chunk_index);

        Self {
            file_id,
            uuid,
            chunk_id,
-            chunk_index,
            chunk_type,
            rule,
            fps,
@@ -177,7 +174,7 @@ impl Chunk {
    pub fn new_visual(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        start_frame: i64,
        end_frame: i64,
        fps: f64,
@@ -189,7 +186,7 @@ impl Chunk {
        Self::new(
            file_id,
            uuid,
-            chunk_index,
+            chunk_id,
            ChunkType::Visual,
            ChunkRule::Rule2,
            start_frame,
@@ -203,7 +200,7 @@ impl Chunk {
    pub fn from_yolo_frames(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        start_frame: i64,
        end_frame: i64,
        fps: f64,
@@ -307,7 +304,7 @@ impl Chunk {
        Self::new_visual(
            file_id,
            uuid,
-            chunk_index,
+            chunk_id,
            start_frame,
            end_frame,
            fps,
@@ -334,7 +331,7 @@ impl Chunk {
    pub fn from_seconds(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        chunk_type: ChunkType,
        rule: ChunkRule,
        start_time: f64,
@@ -347,7 +344,7 @@ impl Chunk {
        Self::new(
            file_id,
            uuid,
-            chunk_index,
+            chunk_id,
            chunk_type,
            rule,
            start_frame,
--- a/src/core/chunk/types_fixed.rs
+++ b/src/core/chunk/types_fixed.rs
@@ -103,7 +103,6 @@ pub struct Chunk {
    pub file_id: i32,
    pub uuid: String,
    pub chunk_id: String,
-    pub chunk_index: u32,
    pub chunk_type: ChunkType,
    pub rule: ChunkRule,
    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
@@ -128,7 +127,7 @@ impl Chunk {
    pub fn new_visual(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        start_frame: i64,
        end_frame: i64,
        fps: f64,
@@ -140,7 +139,7 @@ impl Chunk {
        Self::new(
            file_id,
            uuid,
-            chunk_index,
+            chunk_id,
            ChunkType::Visual,
            ChunkRule::Rule2,
            start_frame,
@@ -154,7 +153,7 @@ impl Chunk {
    pub fn from_yolo_result(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        start_frame: i64,
        end_frame: i64,
        fps: f64,
@@ -263,7 +262,7 @@ impl Chunk {
        Self::new_visual(
            file_id,
            uuid,
-            chunk_index,
+            chunk_id,
            start_frame,
            end_frame,
            fps,
@@ -275,7 +274,7 @@ impl Chunk {
    pub fn new(
        file_id: i32,
        uuid: String,
-        chunk_index: u32,
+        chunk_id: String,
        chunk_type: ChunkType,
        rule: ChunkRule,
        start_frame: i64,
@@ -284,13 +283,11 @@ impl Chunk {
        content: serde_json::Value,
    ) -> Self {
        let frame_count = (end_frame - start_frame) as i32;
-        let chunk_id = format!("{}_{}", uuid, chunk_index);

        Self {
            file_id,
            uuid,
            chunk_id,
-            chunk_index,
            chunk_type,
            rule,
            fps,
--- a/src/core/db/mongodb_db.rs
+++ b/src/core/db/mongodb_db.rs
@@ -13,7 +13,6 @@ pub struct MongoDb {
 pub struct ChunkDocument {
    pub uuid: String,
    pub chunk_id: String,
-    pub chunk_index: u32,
    pub chunk_type: String,
    pub start_time: f64,
    pub end_time: f64,
@@ -34,7 +33,6 @@ impl From<Chunk> for ChunkDocument {
        Self {
            uuid: chunk.uuid,
            chunk_id: chunk.chunk_id,
-            chunk_index: chunk.chunk_index,
            chunk_type: chunk.chunk_type.as_str().to_string(),
            start_time,
            end_time,
@@ -119,7 +117,7 @@ impl MongoDb {
                    file_id: 0,
                    uuid: doc.uuid,
                    chunk_id: doc.chunk_id,
-                    chunk_index: doc.chunk_index,
+
                    chunk_type,
                    rule: ChunkRule::Rule1,
                    fps: doc.fps,
@@ -178,7 +176,7 @@ impl MongoDb {
                    file_id: 0,
                    uuid: doc.uuid,
                    chunk_id: doc.chunk_id,
-                    chunk_index: doc.chunk_index,
+
                    chunk_type,
                    rule: ChunkRule::Rule1,
                    fps: doc.fps,
@@ -234,7 +232,7 @@ impl MongoDb {
                    file_id: 0,
                    uuid: doc.uuid,
                    chunk_id: doc.chunk_id,
-                    chunk_index: doc.chunk_index,
+
                    chunk_type,
                    rule: ChunkRule::Rule1,
                    fps: doc.fps,
--- a/src/core/db/postgres_db.rs
+++ b/src/core/db/postgres_db.rs
@@ -56,7 +56,7 @@ pub struct CandidateRecord {

 #[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
 pub struct FileIdentityRecord {
-    pub id: i64,
+    pub id: i32,
    pub file_uuid: String,
    pub identity_id: i32,
    pub name: String,
@@ -116,7 +116,7 @@ pub struct IdentityFaceRecord {

 #[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
 pub struct IdentityChunkRecord {
-    pub id: i64,
+    pub id: i32,
    pub file_uuid: String,
    pub chunk_id: String,
    pub chunk_type: String,
@@ -788,8 +788,8 @@ impl PostgresDb {
            .await?;

        // Chunks
-        sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
-        sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
+        sqlx::query("CREATE TABLE IF NOT EXISTS chunk (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
+        sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunk_file ON chunk(file_uuid)")
            .execute(pool)
            .await?;
        sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
@@ -845,7 +845,7 @@ impl PostgresDb {

        sqlx::query(
            "CREATE TRIGGER chunks_search_vector_trigger
-            BEFORE INSERT OR UPDATE ON chunks
+            BEFORE INSERT OR UPDATE ON chunk
            FOR EACH ROW EXECUTE FUNCTION update_search_vector()",
        )
        .execute(pool)
@@ -1232,7 +1232,7 @@ impl PostgresDb {
        let tx = self.pool.begin().await?;

        let chunk_vectors = schema::table_name("chunk_vectors");
-        let chunks = schema::table_name("chunks");
+        let chunks = "dev.chunk";
        let processor_results = schema::table_name("processor_results");
        let videos = schema::table_name("videos");

@@ -1254,6 +1254,11 @@ impl PostgresDb {
        .execute(&self.pool)
        .await?;

+        sqlx::query(&format!("DELETE FROM dev.pre_chunks WHERE file_uuid = $1"))
+            .bind(uuid)
+            .execute(&self.pool)
+            .await?;
+
        sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", videos))
            .bind(uuid)
            .execute(&self.pool)
@@ -1277,7 +1282,7 @@ impl PostgresDb {
    }

    pub async fn get_chunk_count(&self, uuid: &str) -> Result<(i64, i64)> {
-        let chunks = schema::table_name("chunks");
+        let chunks = "dev.chunk";
        let sentence_count: i64 = sqlx::query_scalar(&format!(
            "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'",
            chunks
@@ -2417,8 +2422,10 @@ impl PostgresDb {
    pub async fn get_identity_by_uuid(&self, uuid: &Uuid) -> Result<Option<IdentityDetailRecord>> {
        let query = r#"
            SELECT id, uuid, name, identity_type, source, status, metadata, reference_data,
-                   voice_embedding, identity_embedding, face_embedding,
-                   tmdb_id, tmdb_profile, created_at, NULL::timestamptz as updated_at
+                   voice_embedding::real[] as voice_embedding,
+                   identity_embedding::real[] as identity_embedding,
+                   face_embedding::real[] as face_embedding,
+                   tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at
            FROM identities
            WHERE uuid = $1
        "#;
@@ -2497,7 +2504,7 @@ impl PostgresDb {
        let query = r#"
            SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
                   c.start_time, c.end_time, c.text_content, c.content
-            FROM chunks c
+            FROM dev.chunk c
            WHERE c.file_uuid IN (
                SELECT DISTINCT fd.file_uuid
                FROM face_detections fd
@@ -2538,7 +2545,7 @@ impl PostgresDb {
    }

    pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let content_with_rule = serde_json::json!({
            "rule": chunk.rule.as_str(),
            "data": chunk.content
@@ -2567,9 +2574,9 @@ impl PostgresDb {

        sqlx::query(&format!(
            r#"
-            INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
-            VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
-            ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
+            INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
+            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
+            ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
                start_time = EXCLUDED.start_time,
                end_time = EXCLUDED.end_time,
                fps = EXCLUDED.fps,
@@ -2590,7 +2597,6 @@ impl PostgresDb {
        .bind(chunk.file_id)
        .bind(&chunk.uuid)
        .bind(&chunk.chunk_id)
-        .bind(chunk.chunk_index as i32)
        .bind(chunk.chunk_type.as_str())
        .bind(chunk.start_time().seconds())
        .bind(chunk.end_time().seconds())
@@ -2616,7 +2622,7 @@ impl PostgresDb {
        chunk: &Chunk,
        tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
    ) -> Result<()> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let content_with_rule = serde_json::json!({
            "rule": chunk.rule.as_str(),
            "data": chunk.content
@@ -2642,9 +2648,9 @@ impl PostgresDb {

        sqlx::query(&format!(
            r#"
-            INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
-            VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
-            ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
+            INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
+            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
+            ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
                start_time = EXCLUDED.start_time,
                end_time = EXCLUDED.end_time,
                fps = EXCLUDED.fps,
@@ -2665,7 +2671,6 @@ impl PostgresDb {
        .bind(chunk.file_id)
        .bind(&chunk.uuid)
        .bind(&chunk.chunk_id)
-        .bind(chunk.chunk_index as i32)
        .bind(chunk.chunk_type.as_str())
        .bind(chunk.start_time().seconds())
        .bind(chunk.end_time().seconds())
@@ -2687,9 +2692,9 @@ impl PostgresDb {
    }

    pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let rows = sqlx::query(&format!(
-            "SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
+            "SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY id",
            table
        ))
        .bind(uuid)
@@ -2699,8 +2704,7 @@ impl PostgresDb {
        let chunks: Vec<Chunk> = rows
            .into_iter()
            .map(|r| {
-                let chunk_type_str: String = r.get(4);
-                let chunk_index: i32 = r.get(3);
+                let chunk_type_str: String = r.get(3);
                let chunk_type = match chunk_type_str.as_str() {
                    "time" => ChunkType::TimeBased,
                    "sentence" => ChunkType::Sentence,
@@ -2740,7 +2744,7 @@ impl PostgresDb {
                    file_id,
                    uuid: r.get("uuid"),
                    chunk_id: r.get("chunk_id"),
-                    chunk_index: chunk_index as u32,
+
                    chunk_type,
                    rule,

@@ -2768,9 +2772,9 @@ impl PostgresDb {
        chunk_id: &str,
        uuid: &str,
    ) -> Result<Option<Chunk>> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let row = sqlx::query(&format!(
-            "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
+            "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
            table
        ))
            .bind(chunk_id)
@@ -2779,25 +2783,24 @@ impl PostgresDb {
            .await?;

        if let Some(r) = row {
-            let chunk_type_str: String = r.get(4);
-            let chunk_index: i32 = r.get(3);
-            let chunk_type = match chunk_type_str.as_str() {
-                "time" => ChunkType::TimeBased,
-                "sentence" => ChunkType::Sentence,
-                "cut" => ChunkType::Cut,
-                "trace" => ChunkType::Trace,
-                "story" => ChunkType::Story,
-                _ => ChunkType::TimeBased,
-            };
+            let chunk_type_str: String = r.get(3);
+                let chunk_type = match chunk_type_str.as_str() {
+                    "time" => ChunkType::TimeBased,
+                    "sentence" => ChunkType::Sentence,
+                    "cut" => ChunkType::Cut,
+                    "trace" => ChunkType::Trace,
+                    "story" => ChunkType::Story,
+                    _ => ChunkType::TimeBased,
+                };

-            let content: serde_json::Value = r.get(9);
-            let metadata: Option<serde_json::Value> = r.get(10);
+                let content: serde_json::Value = r.get(8);
+                let metadata: Option<serde_json::Value> = r.get(9);

-            let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
-            let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
-            let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
+                let pre_chunk_ids: Vec<i32> = r.try_get(12).unwrap_or_default();
+                let parent_chunk_id: Option<String> = r.try_get(13).ok().flatten();
+                let child_chunk_ids: Vec<String> = r.try_get(14).unwrap_or_default();

-            let (rule, content_data) = if content.get("rule").is_some() {
+                let (rule, content_data) = if content.get("rule").is_some() {
                let rule_str = content
                    .get("rule")
                    .and_then(|v| v.as_str())
@@ -2820,7 +2823,7 @@ impl PostgresDb {
                file_id,
                uuid: r.get("uuid"),
                chunk_id: r.get("chunk_id"),
-                chunk_index: chunk_index as u32,
+
                chunk_type,
                rule,
                fps: r.get("fps"),
@@ -2996,9 +2999,9 @@ impl PostgresDb {
        start_time: f64,
        end_time: f64,
    ) -> Result<Vec<Chunk>> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let rows = sqlx::query(&format!(
-            "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids 
+            "SELECT file_id, uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids 
             FROM {} 
             WHERE file_id = $1 AND start_time >= $2 AND end_time <= $3 
             ORDER BY start_time",
@@ -3013,8 +3016,7 @@ impl PostgresDb {
        let chunks: Vec<Chunk> = rows
            .into_iter()
            .map(|r| {
-                let chunk_type_str: String = r.get(4);
-                let chunk_index: i32 = r.get(3);
+                let chunk_type_str: String = r.get(3);
                let chunk_type = match chunk_type_str.as_str() {
                    "time" => ChunkType::TimeBased,
                    "sentence" => ChunkType::Sentence,
@@ -3024,12 +3026,12 @@ impl PostgresDb {
                    _ => ChunkType::TimeBased,
                };

-                let content: serde_json::Value = r.get(11);
-                let metadata: Option<serde_json::Value> = r.get(12);
+                let content: serde_json::Value = r.get(10);
+                let metadata: Option<serde_json::Value> = r.get(11);

-                let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
-                let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
-                let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
+                let pre_chunk_ids: Vec<i32> = r.try_get(14).unwrap_or_default();
+                let parent_chunk_id: Option<String> = r.try_get(15).ok().flatten();
+                let child_chunk_ids: Vec<String> = r.try_get(16).unwrap_or_default();

                let (rule, content_data) = if content.get("rule").is_some() {
                    let rule_str = content
@@ -3054,7 +3056,7 @@ impl PostgresDb {
                    file_id,
                    uuid: r.get("uuid"),
                    chunk_id: r.get("chunk_id"),
-                    chunk_index: chunk_index as u32,
+
                    chunk_type,
                    rule,

@@ -3082,9 +3084,9 @@ impl PostgresDb {
            return Ok(vec![]);
        }

-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let rows = sqlx::query(&format!(
-            "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY chunk_index",
+            "SELECT file_id, uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY id",
            table
        ))
        .bind(chunk_ids)
@@ -3094,8 +3096,7 @@ impl PostgresDb {
        let chunks: Vec<Chunk> = rows
            .into_iter()
            .map(|r| {
-                let chunk_type_str: String = r.get(4);
-                let chunk_index: i32 = r.get(3);
+                let chunk_type_str: String = r.get(3);
                let chunk_type = match chunk_type_str.as_str() {
                    "time" => ChunkType::TimeBased,
                    "sentence" => ChunkType::Sentence,
@@ -3135,7 +3136,7 @@ impl PostgresDb {
                    file_id,
                    uuid: r.get("uuid"),
                    chunk_id: r.get("chunk_id"),
-                    chunk_index: chunk_index as u32,
+
                    chunk_type,
                    rule,

@@ -3192,7 +3193,7 @@ impl PostgresDb {
    }

    pub async fn update_vector_id(&self, chunk_id: &str, vector_id: &str) -> Result<()> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        sqlx::query(&format!(
            "UPDATE {} SET vector_id = $1 WHERE chunk_id = $2",
            table
@@ -3214,12 +3215,12 @@ impl PostgresDb {
    }

    pub async fn search_text(&self, query: &str, chunk_type: Option<&str>) -> Result<Vec<Chunk>> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let query_pattern = format!("%{}%", query);

        let sql = match chunk_type {
-            Some(_) => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY chunk_index", table),
-            None => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY chunk_index", table),
+            Some(_) => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY id", table),
+            None => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY id", table),
        };

        let chunks = if let Some(ct) = chunk_type {
@@ -3228,7 +3229,6 @@ impl PostgresDb {
                (
                    String,
                    String,
-                    i32,
                    String,
                    f64,
                    f64,
@@ -3252,7 +3252,6 @@ impl PostgresDb {
                (
                    String,
                    String,
-                    i32,
                    String,
                    f64,
                    f64,
@@ -3274,7 +3273,7 @@ impl PostgresDb {
        let results: Vec<Chunk> = chunks
            .into_iter()
            .map(|r| {
-                let chunk_type = match r.3.as_str() {
+                let chunk_type = match r.2.as_str() {
                    "time_based" => ChunkType::TimeBased,
                    "sentence" => ChunkType::Sentence,
                    "cut" => ChunkType::Cut,
@@ -3284,29 +3283,29 @@ impl PostgresDb {
                };

                let content: serde_json::Value =
-                    serde_json::from_str(&r.9).unwrap_or(serde_json::json!({}));
+                    serde_json::from_str(&r.8).unwrap_or(serde_json::json!({}));

                let metadata: Option<serde_json::Value> =
-                    r.10.and_then(|m| serde_json::from_str(&m).ok());
+                    r.9.and_then(|m| serde_json::from_str(&m).ok());

                Chunk {
                    file_id: 0,
                    uuid: r.0,
                    chunk_id: r.1,
-                    chunk_index: r.2 as u32,
+
                    chunk_type,
                    rule: ChunkRule::Rule1,
-                    fps: r.6,
-                    start_frame: r.7,
-                    end_frame: r.8,
-                    text_content: Some(r.9),
+                    fps: r.5,
+                    start_frame: r.6,
+                    end_frame: r.7,
+                    text_content: Some(r.8),
                    content,
                    metadata,
-                    vector_id: r.11,
+                    vector_id: r.10,
                    frame_count: 0,
                    pre_chunk_ids: vec![],
-                    parent_chunk_id: r.12,
-                    child_chunk_ids: r.13,
+                    parent_chunk_id: r.11,
+                    child_chunk_ids: r.12,
                    visual_stats: None,
                }
            })
@@ -3321,13 +3320,13 @@ impl PostgresDb {
        uuid: Option<&str>,
        limit: usize,
    ) -> Result<Vec<Bm25Result>> {
-        let table = schema::table_name("chunks");
+        let table = "dev.chunk";
        let tsquery = self.prepare_tsquery(query).await?;

        let sql = match uuid {
            Some(_) => &format!(
                r#"
-                SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time, 
+                SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time, 
                       c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
                       c.visual_stats,
                       pc.metadata->'structured_summary' as scene_summary,
@@ -3342,7 +3341,7 @@ impl PostgresDb {
            ),
            None => &format!(
                r#"
-                SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time, 
+                SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time, 
                       c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
                       c.visual_stats,
                       pc.metadata->'structured_summary' as scene_summary,
@@ -3406,7 +3405,7 @@ impl PostgresDb {
                Bm25Result {
                    chunk_id: r.0,
                    uuid: r.1,
-                    chunk_index: r.2 as u32,
+
                    chunk_type: r.3,
                    start_frame: r.4,
                    end_frame: r.5,
@@ -3472,7 +3471,7 @@ impl PostgresDb {
                HybridSearchResult {
                    chunk_id: r.chunk_id.clone(),
                    uuid: r.uuid.clone(),
-                    chunk_index: r.chunk_index,
+
                    chunk_type: r.chunk_type.clone(),
                    start_frame: r.start_frame,
                    end_frame: r.end_frame,
@@ -3526,7 +3525,7 @@ impl PostgresDb {
                    HybridSearchResult {
                        chunk_id: r.chunk_id.clone(),
                        uuid: r.uuid.clone(),
-                        chunk_index: chunk_data.map(|c| c.chunk_index).unwrap_or(0),
+
                        chunk_type: chunk_data
                            .map(|c| c.chunk_type.as_str().to_string())
                            .unwrap_or_default(),
@@ -3779,7 +3778,6 @@ pub struct SceneSummary {
 pub struct Bm25Result {
    pub chunk_id: String,
    pub uuid: String,
-    pub chunk_index: u32,
    pub chunk_type: String,
    pub start_frame: i64,
    pub end_frame: i64,
@@ -3797,7 +3795,6 @@ pub struct Bm25Result {
 pub struct HybridSearchResult {
    pub uuid: String,
    pub chunk_id: String,
-    pub chunk_index: u32,
    pub chunk_type: String,
    pub start_frame: i64,
    pub end_frame: i64,
@@ -4443,7 +4440,7 @@ impl PostgresDb {
        total_frames: u64,
    ) -> Result<()> {
        let table = schema::table_name("videos");
-        let chunks_table = schema::table_name("chunks");
+        let chunks_table = "dev.chunk";
        let pre_chunks_table = schema::table_name("pre_chunks");

        // Query chunks count and frames
@@ -4622,7 +4619,7 @@ impl PostgresDb {
        let results = sqlx::query_as::<_, SemanticSearchResult>(
            r#"
            SELECT 
-                id, chunk_index as scene_order, start_time, end_time, 
+                id as scene_order, start_time, end_time, 
                COALESCE(summary_text, text_content, '') as summary,
                metadata,
                (1 - (embedding <=> $1::vector)) as similarity
@@ -4820,7 +4817,7 @@ mod tests {
            "file_id": 1,
            "uuid": "test",
            "chunk_id": "c1",
-            "chunk_index": 0,
+
            "chunk_type": "time_based",
            "rule": "rule1",
            "start_time": 0.0,
@@ -4960,7 +4957,7 @@ mod tests {
        let result = Bm25Result {
            chunk_id: "sentence_001".to_string(),
            uuid: "test-uuid".to_string(),
-            chunk_index: 1,
+
            chunk_type: "sentence".to_string(),
            start_frame: 0,
            end_frame: 150,
@@ -4985,7 +4982,7 @@ mod tests {
        let result = HybridSearchResult {
            chunk_id: "sentence_001".to_string(),
            uuid: "test-uuid".to_string(),
-            chunk_index: 1,
+
            chunk_type: "sentence".to_string(),
            start_frame: 0,
            end_frame: 150,
--- a/src/core/db/qdrant_db.rs
+++ b/src/core/db/qdrant_db.rs
@@ -120,9 +120,16 @@ impl QdrantDb {
            .json(&body)
            .send()
            .await
-            .context(format!("Failed to create Qdrant collection: {}", collection))?;
+            .context(format!(
+                "Failed to create Qdrant collection: {}",
+                collection
+            ))?;

-        tracing::info!("Created Qdrant collection: {} (dim={})", collection, vector_dim);
+        tracing::info!(
+            "Created Qdrant collection: {} (dim={})",
+            collection,
+            vector_dim
+        );
        Ok(())
    }

--- a/src/core/db/sync_db.rs
+++ b/src/core/db/sync_db.rs
@@ -129,7 +129,7 @@ impl SyncDb {
            let chunk = Chunk::from_seconds(
                0, // file_id - will be set later
                uuid.to_string(),
-                i as u32,
+                format!("{}", i),
                ChunkType::Sentence,
                ChunkRule::Rule1,
                segment.start,
--- a/src/core/embedding/comic_embed.rs
+++ b/src/core/embedding/comic_embed.rs
@@ -43,8 +43,7 @@ impl Embedder {
    }

    fn default_url() -> String {
-        std::env::var("MOMENTRY_EMBED_URL")
-            .unwrap_or_else(|_| "http://localhost:11434".to_string())
+        std::env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
    }

    pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
@@ -91,7 +90,12 @@ impl Embedder {
                .await
                .context("Failed to parse embedding response")?;

-            Ok(result.data.into_iter().next().map(|d| d.embedding).unwrap_or_default())
+            Ok(result
+                .data
+                .into_iter()
+                .next()
+                .map(|d| d.embedding)
+                .unwrap_or_default())
        } else {
            let url = format!("{}/api/embeddings", self.base_url);
            let response = self
--- a/src/core/processor/asr.rs
+++ b/src/core/processor/asr.rs
@@ -1,11 +1,8 @@
 use anyhow::{Context, Result};
 use serde::{Deserialize, Serialize};
-use std::time::Duration;

 use super::executor::PythonExecutor;

-const ASR_TIMEOUT: Duration = Duration::from_secs(1800); // 30 minutes
-
 #[derive(Debug, Serialize, Deserialize)]
 pub struct AsrResult {
    pub language: Option<String>,
@@ -36,7 +33,7 @@ pub async fn process_asr(
            &[video_path, output_path],
            uuid,
            "ASR",
-            Some(ASR_TIMEOUT),
+            None,
        )
        .await
        .with_context(|| format!("Failed to run {:?}", script_path))?;
--- a/src/core/processor/executor.rs
+++ b/src/core/processor/executor.rs
@@ -247,7 +247,10 @@ impl PythonExecutor {
                            let mut partial_path = out.to_path_buf();
                            partial_path.set_extension("json.partial");
                            let _ = std::fs::rename(tmp, &partial_path);
-                            tracing::warn!("[Executor] Partial output preserved: {:?}", partial_path);
+                            tracing::warn!(
+                                "[Executor] Partial output preserved: {:?}",
+                                partial_path
+                            );
                        } else {
                            let mut err_path = out.to_path_buf();
                            err_path.set_extension("json.err");
--- a/src/core/processor/visual_chunk.rs
+++ b/src/core/processor/visual_chunk.rs
@@ -131,7 +131,7 @@ fn create_fixed_frame_chunks(
        let chunk = crate::core::chunk::Chunk::from_yolo_frames(
            file_id,
            uuid.to_string(),
-            chunk_index,
+            format!("vis_{}", chunk_index),
            start_frame,
            end_frame,
            fps,
@@ -190,7 +190,7 @@ fn create_similarity_based_chunks(
                let chunk = crate::core::chunk::Chunk::from_yolo_frames(
                    file_id,
                    uuid.to_string(),
-                    chunk_index,
+                    format!("vis_{}", chunk_index),
                    current_start_frame,
                    end_frame,
                    fps,
@@ -214,7 +214,7 @@ fn create_similarity_based_chunks(
        let chunk = crate::core::chunk::Chunk::from_yolo_frames(
            file_id,
            uuid.to_string(),
-            chunk_index,
+            format!("vis_{}", chunk_index),
            current_start_frame,
            end_frame,
            fps,
--- a/src/core/tmdb/face_agent.rs
+++ b/src/core/tmdb/face_agent.rs
@@ -13,11 +13,17 @@ struct TmdbIdentity {
 }

 fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
-    if a.len() != b.len() || a.is_empty() { return 0.0; }
+    if a.len() != b.len() || a.is_empty() {
+        return 0.0;
+    }
    let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
    let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
    let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
-    if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
+    if na == 0.0 || nb == 0.0 {
+        0.0
+    } else {
+        dot / (na * nb)
+    }
 }

 /// Match face detections against TMDb identities using iterative multi-angle propagation.
@@ -42,10 +48,11 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
    let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
        "SELECT trace_id, embedding FROM dev.face_detections \
         WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
-         ORDER BY trace_id"
+         ORDER BY trace_id",
    )
    .bind(file_uuid)
-    .fetch_all(pool).await?;
+    .fetch_all(pool)
+    .await?;

    if fd_rows.is_empty() {
        info!("[TKG-MATCH] No face detections for {}", file_uuid);
@@ -77,14 +84,23 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
        for (id, name, tmdb_emb) in &tmdb_rows {
            for face in faces {
                let s = cosine_similarity(face, tmdb_emb);
-                if s > best_sim { best_sim = s; best_id = *id; best_name = name.clone(); }
+                if s > best_sim {
+                    best_sim = s;
+                    best_id = *id;
+                    best_name = name.clone();
+                }
            }
        }
        if best_sim >= TH {
            matched.insert(tid, (best_id, best_name));
        }
    }
-    info!("[TKG-MATCH] Round 1: {} ({}/{})", matched.len(), matched.len() * 100 / total, total);
+    info!(
+        "[TKG-MATCH] Round 1: {} ({}/{})",
+        matched.len(),
+        matched.len() * 100 / total,
+        total
+    );

    // Round 2+: propagate
    for round_n in 2..=10 {
@@ -98,7 +114,9 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul

        let mut new_matches: Vec<(i32, i32, String)> = Vec::new();
        for (&tid, faces) in &trace_faces {
-            if matched.contains_key(&tid) || faces.is_empty() { continue; }
+            if matched.contains_key(&tid) || faces.is_empty() {
+                continue;
+            }
            let ref_face = &faces[0];
            let mut best_id = 0i32;
            let mut best_name = String::new();
@@ -106,13 +124,19 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
            for (&id, seed_faces) in &seed_pool {
                for seed in seed_faces {
                    let s = cosine_similarity(ref_face, seed);
-                    if s > best_sim { best_sim = s; best_id = id; }
+                    if s > best_sim {
+                        best_sim = s;
+                        best_id = id;
+                    }
                }
            }
            if best_sim >= TH {
                // Look up name for this id
                for (id, name, _) in &tmdb_rows {
-                    if *id == best_id { best_name = name.clone(); break; }
+                    if *id == best_id {
+                        best_name = name.clone();
+                        break;
+                    }
                }
                new_matches.push((tid, best_id, best_name));
            }
@@ -121,7 +145,9 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
            matched.insert(tid, (id, name));
        }
        let new = matched.len() - prev;
-        if new < 5 { break; }
+        if new < 5 {
+            break;
+        }
    }

    // Step 4: Quality control
@@ -129,41 +155,62 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
    let mut after_qc = HashMap::new();
    for (&tid, &(id, ref name)) in &matched {
        let cnt: i64 = sqlx::query_scalar(
-            "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2"
+            "SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2",
        )
-        .bind(file_uuid).bind(tid)
-        .fetch_one(pool).await.unwrap_or(0);
+        .bind(file_uuid)
+        .bind(tid)
+        .fetch_one(pool)
+        .await
+        .unwrap_or(0);
        if cnt >= 4 {
            after_qc.insert(tid, (id, name.clone()));
        } else {
-            info!("[TKG-QC] trace {} removed: only {} face(s), need >= 4", tid, cnt);
+            info!(
+                "[TKG-QC] trace {} removed: only {} face(s), need >= 4",
+                tid, cnt
+            );
        }
    }
    let matched = after_qc;
    let removed_low = total - matched.len();
    if removed_low > 0 {
-        info!("[TKG-QC] Removed {} low-confidence traces (< 4 faces)", removed_low);
+        info!(
+            "[TKG-QC] Removed {} low-confidence traces (< 4 faces)",
+            removed_low
+        );
    }

    // 4b: Temporal collision check
    let removed_collisions = quality_check_temporal_collisions(pool, file_uuid).await?;
    if removed_collisions > 0 {
-        info!("[TKG-QC] Resolved {} temporal collisions", removed_collisions);
+        info!(
+            "[TKG-QC] Resolved {} temporal collisions",
+            removed_collisions
+        );
    }

    // Step 5: Update DB
    let mut updated = 0usize;
    for (&tid, &(id, _)) in &matched {
        let r = sqlx::query(
-            "UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
+            "UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
        )
-        .bind(id).bind(file_uuid).bind(tid)
-        .execute(pool).await?;
-        if r.rows_affected() > 0 { updated += 1; }
+        .bind(id)
+        .bind(file_uuid)
+        .bind(tid)
+        .execute(pool)
+        .await?;
+        if r.rows_affected() > 0 {
+            updated += 1;
+        }
    }

-    info!("[TKG-MATCH] Done: {}/{} traces matched ({}%)",
-        matched.len(), total, matched.len() * 100 / total);
+    info!(
+        "[TKG-MATCH] Done: {}/{} traces matched ({}%)",
+        matched.len(),
+        total,
+        matched.len() * 100 / total
+    );
    Ok(updated)
 }

@@ -185,10 +232,11 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
          AND a.identity_id IS NOT NULL
          AND a.identity_id = b.identity_id
        ORDER BY a.identity_id, a.frame_number
-        "#
+        "#,
    )
    .bind(file_uuid)
-    .fetch_all(pool).await?;
+    .fetch_all(pool)
+    .await?;

    if collisions.is_empty() {
        return Ok(0);
@@ -221,10 +269,12 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
        let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };

        sqlx::query(
-            "UPDATE dev.face_detections SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2"
+            "UPDATE dev.face_detections SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
        )
-        .bind(file_uuid).bind(victim)
-        .execute(pool).await?;
+        .bind(file_uuid)
+        .bind(victim)
+        .execute(pool)
+        .await?;

        unbound += 1;
        warn!("[TKG-QC] Collision identity={}: trace {} vs trace {} ({} overlap frames). Unbound trace {} ({} detections)",
--- a/src/playground.rs
+++ b/src/playground.rs
@@ -2147,7 +2147,7 @@ async fn main() -> Result<()> {
                let mut chunk = Chunk::from_seconds(
                    file_id as i32,
                    uuid.clone(),
-                    i as u32,
+                    format!("{}", i),
                    ChunkType::Sentence,
                    ChunkRule::Rule1,
                    seg.start,
@@ -2193,7 +2193,7 @@ async fn main() -> Result<()> {
                let chunk = Chunk::from_seconds(
                    file_id as i32,
                    uuid.clone(),
-                    i as u32,
+                    format!("cut_{}", i),
                    ChunkType::Cut,
                    ChunkRule::Rule1,
                    scene.start_time,
@@ -2216,7 +2216,7 @@ async fn main() -> Result<()> {
                let chunk = Chunk::new(
                    file_id as i32,
                    uuid.clone(),
-                    i as u32,
+                    format!("time_{}", i),
                    ChunkType::TimeBased,
                    ChunkRule::Rule1,
                    tc.start_frame,
--- a/src/verification/verifier.rs
+++ b/src/verification/verifier.rs
@@ -48,19 +48,25 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification

    let json_str = match std::fs::read_to_string(&output_path) {
        Ok(s) => s,
-        Err(e) => return VerificationResult::fail(proc_name, file_uuid, &format!("unreadable: {}", e)),
+        Err(e) => {
+            return VerificationResult::fail(proc_name, file_uuid, &format!("unreadable: {}", e))
+        }
    };

    let value: serde_json::Value = match serde_json::from_str(&json_str) {
        Ok(v) => v,
-        Err(e) => return VerificationResult::fail(proc_name, file_uuid, &format!("invalid JSON: {}", e)),
+        Err(e) => {
+            return VerificationResult::fail(proc_name, file_uuid, &format!("invalid JSON: {}", e))
+        }
    };

    match processor {
        ProcessorType::Asr | ProcessorType::Asrx => {
            let segs = value.get("segments").and_then(|v| v.as_array());
            match segs {
-                Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 segments"),
+                Some(s) if s.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 segments")
+                }
                Some(s) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::fail(proc_name, file_uuid, "missing 'segments' field"),
            }
@@ -68,7 +74,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
        ProcessorType::Cut => {
            let scenes = value.get("scenes").and_then(|v| v.as_array());
            match scenes {
-                Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
+                Some(s) if s.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 scenes")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::fail(proc_name, file_uuid, "missing 'scenes' field"),
            }
@@ -76,15 +84,22 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
        ProcessorType::Yolo => {
            let frames = value.get("frames").and_then(|v| v.as_object());
            match frames {
-                Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
+                Some(f) if f.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 frames")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames' field"),
            }
        }
        ProcessorType::Face => {
-            let faces = value.get("faces").or_else(|| value.get("frames")).and_then(|v| v.as_array());
+            let faces = value
+                .get("faces")
+                .or_else(|| value.get("frames"))
+                .and_then(|v| v.as_array());
            match faces {
-                Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 faces"),
+                Some(f) if f.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 faces")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::fail(proc_name, file_uuid, "missing 'faces'/'frames'"),
            }
@@ -92,7 +107,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
        ProcessorType::Ocr => {
            let frames = value.get("frames").and_then(|v| v.as_array());
            match frames {
-                Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
+                Some(f) if f.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 frames")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames'"),
            }
@@ -100,7 +117,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
        ProcessorType::Pose => {
            let frames = value.get("frames").and_then(|v| v.as_array());
            match frames {
-                Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
+                Some(f) if f.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 frames")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames'"),
            }
@@ -108,7 +127,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
        ProcessorType::Scene => {
            let scenes = value.get("scenes").and_then(|v| v.as_array());
            match scenes {
-                Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
+                Some(s) if s.is_empty() => {
+                    VerificationResult::fail(proc_name, file_uuid, "0 scenes")
+                }
                Some(_) => VerificationResult::ok(proc_name, file_uuid),
                None => VerificationResult::ok(proc_name, file_uuid),
            }
@@ -142,7 +163,10 @@ pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
            }
        }
        if removed > 0 {
-            info!("Cleaned up {} temp files for {}.{}", removed, file_uuid, proc_name);
+            info!(
+                "Cleaned up {} temp files for {}.{}",
+                removed, file_uuid, proc_name
+            );
        }
    }
 }
--- a/src/worker/job_worker.rs
+++ b/src/worker/job_worker.rs
@@ -6,11 +6,11 @@ use std::time::Duration;
 use tokio::time::sleep;
 use tracing::{error, info, warn};

+use crate::api::five_w1h_agent_api::run_5w1h_agent;
+use crate::api::identity_agent_api::run_identity_agent;
 use crate::core::chunk::{rule1_ingest, rule3_ingest};
 use crate::core::config::OUTPUT_DIR;
 use crate::core::db::qdrant_db::QdrantDb;
-use crate::api::five_w1h_agent_api::run_5w1h_agent;
-use crate::api::identity_agent_api::run_identity_agent;
 use crate::core::db::{
    MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
 };
@@ -72,7 +72,7 @@ impl JobWorker {
             AND id NOT IN (
                 SELECT DISTINCT job_id FROM dev.processor_results
                 WHERE status IN ('pending', 'running')
-             )"
+             )",
        )
        .execute(self.db.pool())
        .await
@@ -168,7 +168,10 @@ impl JobWorker {
                } else {
                    job.processors.len()
                };
-                let should_retry = self.check_and_complete_job(job.id, &job.uuid, expected_count).await.is_ok();
+                let should_retry = self
+                    .check_and_complete_job(job.id, &job.uuid, expected_count)
+                    .await
+                    .is_ok();
                if should_retry && self.processor_pool.can_start().await {
                    if let Err(e) = self.process_job(job.clone()).await {
                        error!("Failed to reprocess job {}: {}", job.uuid, e);
@@ -329,8 +332,11 @@ impl JobWorker {
                .await?;

            // Check if output file already exists on disk (source of truth)
-            let output_path =
-                PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.{}.json", job.uuid, processor_type.as_str()));
+            let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!(
+                "{}.{}.json",
+                job.uuid,
+                processor_type.as_str()
+            ));
            if output_path.exists() {
                info!(
                    "Processor {} output file exists, marking completed and skipping",
@@ -361,23 +367,26 @@ impl JobWorker {
                    .await?;
                started_count += 1;
                // 覆寫 result_map 讓相依性檢查能正確判斷
-                result_map.insert(*processor_type, crate::core::db::ProcessorResult {
-                    id: 0,
-                    job_id: job.id,
-                    processor_type: *processor_type,
-                    status: ProcessorJobStatus::Completed,
-                    started_at: None,
-                    completed_at: None,
-                    duration_secs: None,
-                    chunks_produced: 0,
-                    frames_processed: total_frames as i32,
-                    output_size_bytes: 0,
-                    error_message: None,
-                    output_data: None,
-                    retry_count: 0,
-                    created_at: String::new(),
-                    updated_at: String::new(),
-                });
+                result_map.insert(
+                    *processor_type,
+                    crate::core::db::ProcessorResult {
+                        id: 0,
+                        job_id: job.id,
+                        processor_type: *processor_type,
+                        status: ProcessorJobStatus::Completed,
+                        started_at: None,
+                        completed_at: None,
+                        duration_secs: None,
+                        chunks_produced: 0,
+                        frames_processed: total_frames as i32,
+                        output_size_bytes: 0,
+                        error_message: None,
+                        output_data: None,
+                        retry_count: 0,
+                        created_at: String::new(),
+                        updated_at: String::new(),
+                    },
+                );
                continue;
            }

@@ -524,7 +533,12 @@ impl JobWorker {
                        info!("Backup already exists: {}, skipping", bak_path.display());
                    } else {
                        match std::fs::copy(entry.path(), &bak_path) {
-                            Ok(bytes) => info!("Backed up {} -> {} ({} bytes)", name, bak_path.display(), bytes),
+                            Ok(bytes) => info!(
+                                "Backed up {} -> {} ({} bytes)",
+                                name,
+                                bak_path.display(),
+                                bytes
+                            ),
                            Err(e) => warn!("Failed to backup {}: {}", name, e),
                        }
                    }
@@ -568,12 +582,18 @@ impl JobWorker {
        } else {
            job.processors.len()
        };
-        self.check_and_complete_job(job.id, &job.uuid, expected_count).await?;
+        self.check_and_complete_job(job.id, &job.uuid, expected_count)
+            .await?;

        Ok(())
    }

-    async fn check_and_complete_job(&self, job_id: i32, uuid: &str, expected_count: usize) -> Result<()> {
+    async fn check_and_complete_job(
+        &self,
+        job_id: i32,
+        uuid: &str,
+        expected_count: usize,
+    ) -> Result<()> {
        let results = self.db.get_processor_results_by_job(job_id).await?;

        info!(
@@ -676,24 +696,41 @@ impl JobWorker {
                                info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
                                // Automatically vectorize new sentence chunks
                                if count > 0 {
-                                    info!("📝 Starting automatic vectorize for {} chunks...", count);
-                                    if let Err(e) = Self::vectorize_chunks(&db_clone, &uuid_clone).await {
-                                        error!("❌ Auto-vectorize failed for {}: {}", uuid_clone, e);
+                                    info!(
+                                        "📝 Starting automatic vectorize for {} chunks...",
+                                        count
+                                    );
+                                    if let Err(e) =
+                                        Self::vectorize_chunks(&db_clone, &uuid_clone).await
+                                    {
+                                        error!(
+                                            "❌ Auto-vectorize failed for {}: {}",
+                                            uuid_clone, e
+                                        );
                                    }
                                }
                                // Phase 1 release: sentence chunk embedding 交付
                                info!("📦 Phase 1 release packaging...");
                                let executor = match crate::core::processor::PythonExecutor::new() {
                                    Ok(ex) => ex,
-                                    Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
+                                    Err(e) => {
+                                        error!("Failed PythonExecutor for release pack: {}", e);
+                                        return;
+                                    }
                                };
-                                match executor.run(
-                                    "release_pack.py",
-                                    &["--phase", "1", "--file-uuid", &uuid_clone],
-                                    None, "RELEASE_P1",
-                                    Some(std::time::Duration::from_secs(120)),
-                                ).await {
-                                    Ok(()) => info!("✅ Phase 1 release packaged for {}", uuid_clone),
+                                match executor
+                                    .run(
+                                        "release_pack.py",
+                                        &["--phase", "1", "--file-uuid", &uuid_clone],
+                                        None,
+                                        "RELEASE_P1",
+                                        Some(std::time::Duration::from_secs(120)),
+                                    )
+                                    .await
+                                {
+                                    Ok(()) => {
+                                        info!("✅ Phase 1 release packaged for {}", uuid_clone)
+                                    }
                                    Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
                                }
                            }
@@ -851,14 +888,21 @@ impl JobWorker {
                            info!("📦 Phase 2 release packaging...");
                            let executor = match crate::core::processor::PythonExecutor::new() {
                                Ok(ex) => ex,
-                                Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
+                                Err(e) => {
+                                    error!("Failed PythonExecutor for release pack: {}", e);
+                                    return;
+                                }
                            };
-                            match executor.run(
-                                "release_pack.py",
-                                &["--phase", "2", "--file-uuid", &uuid_clone],
-                                None, "RELEASE_P2",
-                                Some(std::time::Duration::from_secs(120)),
-                            ).await {
+                            match executor
+                                .run(
+                                    "release_pack.py",
+                                    &["--phase", "2", "--file-uuid", &uuid_clone],
+                                    None,
+                                    "RELEASE_P2",
+                                    Some(std::time::Duration::from_secs(120)),
+                                )
+                                .await
+                            {
                                Ok(()) => info!("✅ Phase 2 release packaged for {}", uuid_clone),
                                Err(e) => error!("❌ Phase 2 release pack failed: {}", e),
                            }
@@ -970,7 +1014,10 @@ impl JobWorker {
        }

        let total = rows.len();
-        info!("[Vectorize] Starting vectorize of {} chunks for {}", total, uuid);
+        info!(
+            "[Vectorize] Starting vectorize of {} chunks for {}",
+            total, uuid
+        );

        let mut stored = 0usize;
        for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
@@ -998,7 +1045,10 @@ impl JobWorker {
                    }
                    stored += 1;
                    if stored % 50 == 0 {
-                        info!("[Vectorize] {}/{} vectors stored for {}", stored, total, uuid);
+                        info!(
+                            "[Vectorize] {}/{} vectors stored for {}",
+                            stored, total, uuid
+                        );
                    }
                }
                Err(e) => {
@@ -1007,7 +1057,10 @@ impl JobWorker {
            }
        }

-        info!("[Vectorize] Completed: {}/{} vectors stored for {}", stored, total, uuid);
+        info!(
+            "[Vectorize] Completed: {}/{} vectors stored for {}",
+            stored, total, uuid
+        );
        Ok(())
    }
 }
--- a/src/worker/processor.rs
+++ b/src/worker/processor.rs
@@ -142,15 +142,21 @@ impl ProcessorPool {
                .flatten();
            if let Some(pid) = old_pid {
                if pid > 0 {
-                    warn!("[PID] Killing existing process {} for {}/{}", pid, uuid, processor);
-                    unsafe { libc::kill(pid, libc::SIGKILL); }
+                    warn!(
+                        "[PID] Killing existing process {} for {}/{}",
+                        pid, uuid, processor
+                    );
+                    unsafe {
+                        libc::kill(pid, libc::SIGKILL);
+                    }
                }
            }
        }
    }

    pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
-        Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()).await;
+        Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str())
+            .await;

        let (cancel_tx, cancel_rx) = mpsc::channel(1);
        let job_id = task.job.id;
@@ -231,15 +237,16 @@ impl ProcessorPool {
            match result {
                Ok(output) => {
                    // 驗收 agent 檢查產出內容
-                    let verification = crate::verification::verifier::verify_output(
-                        &processor_type,
-                        &job.uuid,
-                    );
+                    let verification =
+                        crate::verification::verifier::verify_output(&processor_type, &job.uuid);

                    if verification.passed {
                        info!(
                            "Processor {} completed and verified for job {} ({} chunks, {} frames)",
-                            processor_name, job.uuid, output.chunks_produced, output.frames_processed
+                            processor_name,
+                            job.uuid,
+                            output.chunks_produced,
+                            output.frames_processed
                        );

                        // 清理暫存備份