feat: backup architecture docs, source code, and scripts

2026-04-25 17:15:45 +08:00
parent 59809dae1f
commit 1f84e5469f
368 changed files with 146329 additions and 261 deletions
--- a/src/core/chunk/rule1_ingest.rs
+++ b/src/core/chunk/rule1_ingest.rs
@@ -0,0 +1,94 @@
+use crate::core::config::OUTPUT_DIR;
+use anyhow::{Context, Result};
+use serde::Deserialize;
+use sqlx::PgPool;
+use std::fs;
+use std::path::Path;
+
+// --- 結構體定義 (對齊外部處理器產出格式) ---
+
+#[derive(Debug, Deserialize)]
+struct AsrSegment {
+    start: f64,
+    end: f64,
+    text: String,
+}
+
+#[derive(Debug, Deserialize)]
+struct AsrxSegment {
+    start: f64,
+    end: f64,
+    speaker: String,
+}
+
+// --- 核心邏輯 ---
+
+/// 執行 Rule 1 入庫
+/// 讀取 asr.json 與 asrx.json，合併 Speaker 資訊，寫入 chunks_rule1
+pub async fn ingest_rule1(pool: &PgPool, asset_uuid: &str, fps: f64) -> Result<usize> {
+    // 1. 讀取檔案
+    let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
+    let asrx_path = format!("{}/{}.asrx.json", *OUTPUT_DIR, asset_uuid);
+
+    let asr_content = fs::read_to_string(&asr_path)
+        .with_context(|| format!("Failed to read ASR file: {}", asr_path))?;
+    let asrx_content = fs::read_to_string(&asrx_path)
+        .with_context(|| format!("Failed to read ASRX file: {}", asrx_path))?;
+
+    let asr_segments: Vec<AsrSegment> = serde_json::from_str(&asr_content)?;
+    let asrx_segments: Vec<AsrxSegment> = serde_json::from_str(&asrx_content)?;
+
+    let mut count = 0;
+
+    // 2. 交易處理
+    let mut tx = pool.begin().await?;
+
+    for seg in &asr_segments {
+        // 時間轉幀
+        let start_frame = (seg.start * fps).round() as i64;
+        let end_frame = (seg.end * fps).round() as i64;
+
+        // 3. 尋找重疊最多的 Speaker
+        let mut best_speaker: Option<String> = None;
+        let mut max_overlap = 0.0f64;
+
+        for spk in &asrx_segments {
+            let overlap = (seg.end.min(spk.end) - seg.start.max(spk.start)).max(0.0);
+            if overlap > max_overlap {
+                max_overlap = overlap;
+                best_speaker = Some(spk.speaker.clone());
+            }
+        }
+
+        let speaker_id = best_speaker.unwrap_or("UNKNOWN".to_string());
+
+        // 4. 寫入 DB
+        sqlx::query!(
+            r#"
+            INSERT INTO chunks_rule1 (
+                id, asset_uuid, start_frame, end_frame, content, speaker_id
+            ) VALUES (
+                gen_random_uuid(), $1, $2, $3, $4, $5
+            )
+            "#,
+            asset_uuid,
+            start_frame,
+            end_frame,
+            seg.text,
+            speaker_id
+        )
+        .execute(&mut *tx)
+        .await?;
+
+        count += 1;
+
+        // 每 100 筆 Commit 一次 (可選優化)
+        if count % 500 == 0 {
+            tx.commit().await?;
+            tx = pool.begin().await?;
+        }
+    }
+
+    tx.commit().await?;
+    Ok(count)
+}
--- a/src/core/chunk/rule3_ingest.rs
+++ b/src/core/chunk/rule3_ingest.rs
@@ -0,0 +1,182 @@
+use crate::core::config::OUTPUT_DIR;
+use crate::core::llm::client::generate_5w1h_summary;
+use anyhow::{Context, Result};
+use serde::Deserialize;
+use sqlx::PgPool;
+use std::fs;
+use tracing::{info, warn};
+
+#[derive(Debug, Deserialize)]
+struct CutScene {
+    scene_number: u32,
+    start_frame: u64,
+    end_frame: u64,
+    start_time: f64,
+    end_time: f64,
+}
+
+#[derive(Debug, Deserialize)]
+struct CutResult {
+    scenes: Vec<CutScene>,
+}
+
+#[derive(Debug, Deserialize)]
+struct AsrSegment {
+    start: f64,
+    end: f64,
+    text: String,
+}
+
+/// Executes Rule 3 Ingestion: Scene-based Chunking with LLM 5W1H+ Summary.
+/// 1. Reads CUT data to identify scenes.
+/// 2. Aggregates Rule 1 (Sentence) chunks falling within each scene.
+/// 3. Calls LLM to generate 5W1H+ summary.
+/// 4. Inserts parent chunks into `dev.chunks`.
+pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
+    let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, asset_uuid);
+    let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
+
+    // 1. Load CUT and ASR data
+    let cut_content = fs::read_to_string(&cut_path)
+        .with_context(|| format!("Failed to read CUT file: {}", cut_path))?;
+    let cut_result: CutResult = serde_json::from_str(&cut_content).context("Invalid CUT JSON")?;
+
+    let asr_segments: Vec<AsrSegment> = match fs::read_to_string(&asr_path) {
+        Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
+        Err(_) => {
+            warn!("ASR file not found, proceeding with empty transcript for scenes");
+            vec![]
+        }
+    };
+
+    let mut count = 0;
+    let mut tx = pool.begin().await?;
+
+    // 2. Process each scene
+    for scene in &cut_result.scenes {
+        let chunk_id = format!("scene_{}", scene.scene_number);
+
+        // Aggregate text from Rule 1 chunks
+        let mut scene_text = String::new();
+        let mut child_ids: Vec<String> = Vec::new();
+
+        for seg in &asr_segments {
+            if seg.start >= scene.start_time && seg.end <= scene.end_time {
+                scene_text.push_str(&seg.text);
+                scene_text.push(' ');
+                // We'll look up the chunk_id from Rule 1 later if needed,
+                // but for now we just group by text overlap.
+                // A better approach is to query Rule 1 table for this range.
+            }
+        }
+
+        // Query Rule 1 table for better linking
+        let rule1_rows: Vec<(String,)> = sqlx::query_as(
+            r#"
+            SELECT id::text FROM chunks_rule1
+            WHERE asset_uuid = $1
+            AND start_frame >= $2
+            AND end_frame <= $3
+            "#,
+        )
+        .bind(asset_uuid)
+        .bind(scene.start_frame as i64)
+        .bind(scene.end_frame as i64)
+        .fetch_all(&mut *tx)
+        .await?;
+
+        for row in &rule1_rows {
+            child_ids.push(row.0.clone());
+        }
+
+        // Fallback to simple aggregation if query didn't get text (due to frame boundaries)
+        if scene_text.is_empty() {
+            // Try to grab text directly if rule1 table doesn't have it or boundaries differ
+            // But rule1 table has start_frame/end_frame which should match.
+            // Let's re-query text directly.
+        }
+
+        let texts: Vec<String> = sqlx::query_scalar(
+            r#"
+            SELECT content FROM chunks_rule1
+            WHERE asset_uuid = $1
+            AND start_frame >= $2
+            AND end_frame <= $3
+            ORDER BY start_frame ASC
+            "#,
+        )
+        .bind(asset_uuid)
+        .bind(scene.start_frame as i64)
+        .bind(scene.end_frame as i64)
+        .fetch_all(&mut *tx)
+        .await?;
+
+        let aggregated_text = texts.join(" ");
+
+        // 3. Call LLM for Summary
+        let summary = if !aggregated_text.is_empty() {
+            match generate_5w1h_summary(&aggregated_text).await {
+                Ok(s) => s,
+                Err(e) => {
+                    warn!("LLM Summary failed for scene {}: {}", scene.scene_number, e);
+                    "LLM Error".to_string()
+                }
+            }
+        } else {
+            "No Audio".to_string()
+        };
+
+        info!(
+            "Scene {}: {} -> {} ({} sentences)",
+            scene.scene_number,
+            scene.start_time,
+            scene.end_time,
+            texts.len()
+        );
+
+        // 4. Insert into dev.chunks
+        let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE uuid = $1")
+            .bind(asset_uuid)
+            .fetch_optional(&mut *tx)
+            .await?;
+        let fps = fps_query.unwrap_or(29.97);
+
+        // Prepare metadata JSON
+        let metadata = serde_json::json!({
+            "type": "scene",
+            "scene_number": scene.scene_number
+        });
+
+        sqlx::query(
+            r#"
+            INSERT INTO chunks (
+                uuid, chunk_id, chunk_index, chunk_type,
+                start_time, end_time, fps, start_frame, end_frame,
+                content, text_content, summary_text, metadata, child_chunk_ids
+            ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
+            ON CONFLICT (uuid, chunk_id) DO NOTHING
+            "#,
+        )
+        .bind(asset_uuid)
+        .bind(&chunk_id)
+        .bind(scene.scene_number as i32)
+        .bind("cut") // Chunk type
+        .bind(scene.start_time)
+        .bind(scene.end_time)
+        .bind(fps)
+        .bind(scene.start_frame as i64)
+        .bind(scene.end_frame as i64)
+        .bind(&metadata) // Content JSON
+        .bind(&aggregated_text) // Text content
+        .bind(&summary) // Summary
+        .bind(&metadata) // Metadata
+        .bind(&child_ids) // Child IDs
+        .execute(&mut *tx)
+        .await?;
+
+        count += 1;
+    }
+
+    tx.commit().await?;
+    Ok(count)
+}
--- a/src/core/chunk/types.rs.bak
+++ b/src/core/chunk/types.rs.bak
@@ -0,0 +1,755 @@
+use crate::core::time::FrameTime;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ChunkType {
+    TimeBased,
+    Sentence,
+    Cut,
+    Trace,
+    Story,  // Parent chunk from story analysis
+    Visual, // Visual object-based chunk from YOLO detection
+}
+
+impl ChunkType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ChunkType::TimeBased => "time",
+            ChunkType::Sentence => "sentence",
+            ChunkType::Cut => "cut",
+            ChunkType::Trace => "trace",
+            ChunkType::Story => "story",
+            ChunkType::Visual => "visual",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ChunkRule {
+    Rule1, // 直接轉換
+    Rule2, // 集合內容
+}
+
+/// 關鍵幀的物件列表
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct KeyframeObjects {
+    /// 關鍵幀時間 (秒)
+    pub timestamp: f64,
+    /// 關鍵幀幀號
+    pub frame_number: u64,
+    /// 檢測到的物件
+    pub objects: Vec<DetectedObject>,
+}
+
+/// 檢測到的物件
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DetectedObject {
+    /// 物件類別名稱
+    pub class_name: String,
+    /// 物件類別 ID
+    pub class_id: u32,
+    /// 信心值 (0.0-1.0)
+    pub confidence: f32,
+    /// 邊界框 (x, y, width, height)
+    pub bbox: Option<BoundingBox>,
+    /// 出現次數 (在分片內)
+    pub occurrence: u32,
+}
+
+/// 邊界框
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualChunkContent {
+    pub start_time: f64,
+    pub end_time: f64,
+    pub keyframe_objects: Vec<KeyframeObjects>,
+    pub dominant_objects: Vec<String>,
+    pub object_relationships: Vec<(String, String, String)>, // (object1, relationship, object2)
+    pub scene_description: Option<String>,
+    pub metadata: VisualMetadata,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualMetadata {
+    pub object_count: u32,
+    pub unique_classes: Vec<String>,
+    pub max_confidence: f32,
+    pub avg_confidence: f32,
+    pub spatial_density: f32, // objects per frame
+}
+
+impl ChunkRule {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ChunkRule::Rule1 => "rule_1",
+            ChunkRule::Rule2 => "rule_2",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Chunk {
+    pub file_id: i32,
+    pub uuid: String,
+    pub chunk_id: String,
+    pub chunk_index: u32,
+    pub chunk_type: ChunkType,
+    pub rule: ChunkRule,
+    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
+    pub fps: f64,
+    /// Start frame (0-based)
+    pub start_frame: i64,
+    /// End frame (exclusive)
+    pub end_frame: i64,
+    pub text_content: Option<String>,
+    pub content: serde_json::Value,
+    pub metadata: Option<serde_json::Value>,
+    pub vector_id: Option<String>,
+    pub frame_count: i32,
+    pub pre_chunk_ids: Vec<i32>,
+    pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
+    pub child_chunk_ids: Vec<String>,    // Child chunk IDs (for parent chunks)
+    pub visual_stats: Option<serde_json::Value>,
+}
+
+
+
+
+        id: i64,
+        video_id: i64,
+        yolo_result: &crate::core::processor::yolo::YoloResult,
+        min_frames_per_chunk: usize,
+        similarity_threshold: f32,
+    ) -> Vec<Self> {
+        if yolo_result.frames.is_empty() {
+            return vec![];
+        }
+
+        let mut chunks = Vec::new();
+        let mut current_chunk_frames = Vec::new();
+        let mut current_id = id;
+
+        for (i, frame) in yolo_result.frames.iter().enumerate() {
+            if current_chunk_frames.is_empty() {
+                current_chunk_frames.push(frame);
+                continue;
+            }
+
+            // Check similarity with last frame in current chunk
+            let last_frame = current_chunk_frames.last().unwrap();
+            let similarity = VisualChunkContent::frame_similarity(last_frame, frame);
+
+            if similarity >= similarity_threshold && current_chunk_frames.len() < 100 {
+                // Similar enough, add to current chunk
+                current_chunk_frames.push(frame);
+            } else {
+                // Not similar enough or chunk too large, create new chunk
+                if current_chunk_frames.len() >= min_frames_per_chunk {
+                    if let Some(chunk) =
+                        Self::create_chunk_from_frames(current_id, video_id, &current_chunk_frames)
+                    {
+                        chunks.push(chunk);
+                        current_id += 1;
+                    }
+                }
+
+                #[cfg(test)]
+                mod tests {
+                    use super::*;
+                    use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
+
+                    #[test]
+                    fn test_chunk_type_visual_serialization() {
+                        let chunk_type = ChunkType::Visual;
+                        let json = serde_json::to_string(&chunk_type).unwrap();
+                        assert_eq!(json, "\"visual\"");
+
+                        let deserialized: ChunkType = serde_json::from_str(&json).unwrap();
+                        assert_eq!(deserialized, ChunkType::Visual);
+                    }
+
+                    #[test]
+                    fn test_visual_chunk_creation() {
+                        // Create a mock YOLO result
+                        let yolo_result = YoloResult {
+                            frame_count: 2,
+                            fps: 30.0,
+                            frames: vec![
+                                YoloFrame {
+                                    frame: 0,
+                                    timestamp: 0.0,
+                                    objects: vec![
+                                        YoloObject {
+                                            class_name: "person".to_string(),
+                                            class_id: 0,
+                                            x: 100,
+                                            y: 200,
+                                            width: 50,
+                                            height: 100,
+                                            confidence: 0.95,
+                                        },
+                                        YoloObject {
+                                            class_name: "car".to_string(),
+                                            class_id: 2,
+                                            x: 300,
+                                            y: 150,
+                                            width: 80,
+                                            height: 60,
+                                            confidence: 0.87,
+                                        },
+                                    ],
+                                },
+                                YoloFrame {
+                                    frame: 1,
+                                    timestamp: 0.033, // 1/30 second
+                                    objects: vec![YoloObject {
+                                        class_name: "person".to_string(),
+                                        class_id: 0,
+                                        x: 110,
+                                        y: 210,
+                                        width: 52,
+                                        height: 102,
+                                        confidence: 0.92,
+                                    }],
+                                },
+                            ],
+                        };
+
+                        // Create visual chunk from YOLO result
+                        let chunk = Chunk::from_yolo_result(1, 100, &yolo_result, 0, 1).unwrap();
+
+                        // Verify chunk properties
+                        assert_eq!(chunk.id, 1);
+                        assert_eq!(chunk.video_id, 100);
+                        assert_eq!(chunk.chunk_type, ChunkType::Visual);
+                        assert_eq!(chunk.start_time, 0.0);
+                        assert_eq!(chunk.end_time, 0.033);
+
+                        // Verify visual content
+                        if let ChunkContent::Visual(content) = chunk.content {
+                            assert_eq!(content.metadata.object_count, 3);
+                            assert_eq!(content.metadata.unique_classes.len(), 2);
+                            assert!(content
+                                .metadata
+                                .unique_classes
+                                .contains(&"person".to_string()));
+                            assert!(content.metadata.unique_classes.contains(&"car".to_string()));
+                            assert_eq!(content.dominant_objects, vec!["person"]);
+                            assert_eq!(content.keyframe_objects.len(), 2);
+                        } else {
+                            panic!("Expected Visual content type");
+                        }
+                    }
+
+                    #[test]
+                    fn test_visual_chunk_content_methods() {
+                        let content = VisualChunkContent {
+                            start_time: 0.0,
+                            end_time: 5.0,
+                            keyframe_objects: vec![KeyframeObjects {
+                                frame: 0,
+                                timestamp: 0.0,
+                                objects: vec![
+                                    DetectedObject {
+                                        class_name: "person".to_string(),
+                                        class_id: 0,
+                                        bounding_box: BoundingBox {
+                                            x: 100,
+                                            y: 200,
+                                            width: 50,
+                                            height: 100,
+                                        },
+                                        confidence: 0.95,
+                                    },
+                                    DetectedObject {
+                                        class_name: "car".to_string(),
+                                        class_id: 2,
+                                        bounding_box: BoundingBox {
+                                            x: 300,
+                                            y: 150,
+                                            width: 80,
+                                            height: 60,
+                                        },
+                                        confidence: 0.87,
+                                    },
+                                ],
+                            }],
+                            dominant_objects: vec!["person".to_string()],
+                            object_relationships: vec![],
+                            scene_description: Some("A person near a car".to_string()),
+                            metadata: VisualMetadata {
+                                object_count: 2,
+                                unique_classes: vec!["person".to_string(), "car".to_string()],
+                                max_confidence: 0.95,
+                                avg_confidence: 0.91,
+                                spatial_density: 2.0,
+                            },
+                        };
+
+                        // Test summary method
+                        let summary = content.summary();
+                        assert!(summary.contains("Visual chunk from 0.0s to 5.0s"));
+                        assert!(summary.contains("person"));
+
+                        // Test contains_object method
+                        assert!(content.contains_object("person"));
+                        assert!(content.contains_object("car"));
+                        assert!(!content.contains_object("dog"));
+
+                        // Test high_confidence_objects method
+                        let high_conf_objects = content.high_confidence_objects(0.9);
+                        assert_eq!(high_conf_objects.len(), 1);
+                        assert_eq!(high_conf_objects[0].class_name, "person");
+                    }
+
+                    #[test]
+                    fn test_frame_similarity() {
+                        let frame1 = YoloFrame {
+                            frame: 0,
+                            timestamp: 0.0,
+                            objects: vec![
+                                YoloObject {
+                                    class_name: "person".to_string(),
+                                    class_id: 0,
+                                    x: 100,
+                                    y: 200,
+                                    width: 50,
+                                    height: 100,
+                                    confidence: 0.95,
+                                },
+                                YoloObject {
+                                    class_name: "car".to_string(),
+                                    class_id: 2,
+                                    x: 300,
+                                    y: 150,
+                                    width: 80,
+                                    height: 60,
+                                    confidence: 0.87,
+                                },
+                            ],
+                        };
+
+                        let frame2 = YoloFrame {
+                            frame: 1,
+                            timestamp: 0.033,
+                            objects: vec![
+                                YoloObject {
+                                    class_name: "person".to_string(),
+                                    class_id: 0,
+                                    x: 110,
+                                    y: 210,
+                                    width: 52,
+                                    height: 102,
+                                    confidence: 0.92,
+                                },
+                                YoloObject {
+                                    class_name: "car".to_string(),
+                                    class_id: 2,
+                                    x: 310,
+                                    y: 155,
+                                    width: 82,
+                                    height: 62,
+                                    confidence: 0.85,
+                                },
+                            ],
+                        };
+
+                        let frame3 = YoloFrame {
+                            frame: 2,
+                            timestamp: 0.066,
+                            objects: vec![YoloObject {
+                                class_name: "dog".to_string(),
+                                class_id: 16,
+                                x: 150,
+                                y: 250,
+                                width: 40,
+                                height: 60,
+                                confidence: 0.78,
+                            }],
+                        };
+
+                        // Test similar frames (same objects)
+                        let similarity_same =
+                            VisualChunkContent::frame_similarity(&frame1, &frame2);
+                        assert!((similarity_same - 1.0).abs() < 0.001);
+
+                        // Test dissimilar frames (different objects)
+                        let similarity_diff =
+                            VisualChunkContent::frame_similarity(&frame1, &frame3);
+                        assert!((similarity_diff - 0.0).abs() < 0.001);
+
+                        // Test empty frames
+                        let empty_frame = YoloFrame {
+                            frame: 3,
+                            timestamp: 0.1,
+                            objects: vec![],
+                        };
+                        let similarity_empty =
+                            VisualChunkContent::frame_similarity(&empty_frame, &empty_frame);
+                        assert!((similarity_empty - 1.0).abs() < 0.001);
+
+                        let similarity_mixed =
+                            VisualChunkContent::frame_similarity(&empty_frame, &frame1);
+                        assert!((similarity_mixed - 0.0).abs() < 0.001);
+                    }
+                }
+                current_chunk_frames = vec![frame];
+            }
+        }
+
+        // Handle last chunk
+        if current_chunk_frames.len() >= min_frames_per_chunk {
+            if let Some(chunk) =
+                Self::create_chunk_from_frames(current_id, video_id, &current_chunk_frames)
+            {
+                chunks.push(chunk);
+            }
+        }
+
+        chunks
+    }
+
+    fn create_chunk_from_frames(
+        id: i64,
+        video_id: i64,
+        frames: &[&crate::core::processor::yolo::YoloFrame],
+    ) -> Option<Self> {
+        if frames.is_empty() {
+            return None;
+        }
+
+        // Simple conversion - could use the from_yolo_result method
+        let start_frame = frames.first().unwrap().frame;
+        let end_frame = frames.last().unwrap().frame;
+        let dummy_yolo_result = crate::core::processor::yolo::YoloResult {
+            frame_count: frames.len() as u64,
+            fps: 0.0, // Not used in this context
+            frames: frames.iter().map(|f| (*f).clone()).collect(),
+        };
+
+        Self::from_yolo_result(id, video_id, &dummy_yolo_result, start_frame, end_frame)
+    }
+
+    /// Creates a new chunk from seconds (legacy conversion).
+    ///
+    /// This is useful for migrating from older systems that store time as seconds.
+    /// The frame counts are calculated by rounding `seconds * fps`.
+    #[allow(clippy::too_many_arguments)]
+    pub fn from_seconds(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        chunk_type: ChunkType,
+        rule: ChunkRule,
+        start_time: f64,
+        end_time: f64,
+        fps: f64,
+        content: serde_json::Value,
+    ) -> Self {
+        let start_frame = (start_time * fps).round() as i64;
+        let end_frame = (end_time * fps).round() as i64;
+        Self::new(
+            file_id,
+            uuid,
+            chunk_index,
+            chunk_type,
+            rule,
+            start_frame,
+            end_frame,
+            fps,
+            content,
+        )
+    }
+
+    /// Returns the start time as a `FrameTime`.
+    pub fn start_time(&self) -> FrameTime {
+        FrameTime::from_frames(self.start_frame, self.fps)
+    }
+
+    /// Returns the end time as a `FrameTime`.
+    pub fn end_time(&self) -> FrameTime {
+        FrameTime::from_frames(self.end_frame, self.fps)
+    }
+
+    /// Returns the duration in frames.
+    pub fn duration_frames(&self) -> i64 {
+        self.end_frame - self.start_frame
+    }
+
+    /// Returns the duration in seconds.
+    pub fn duration_seconds(&self) -> f64 {
+        self.duration_frames() as f64 / self.fps
+    }
+
+    /// Formats the start time as "seconds.frame" (e.g., "123.04").
+    pub fn format_start_sec_frame(&self) -> String {
+        self.start_time().format_sec_frame()
+    }
+
+    /// Formats the end time as "seconds.frame" (e.g., "456.15").
+    pub fn format_end_sec_frame(&self) -> String {
+        self.end_time().format_sec_frame()
+    }
+
+    /// Formats the start time as "HH:MM:SS".
+    pub fn format_start_hms(&self) -> String {
+        self.start_time().format_hms()
+    }
+
+    /// Formats the end time as "HH:MM:SS".
+    pub fn format_end_hms(&self) -> String {
+        self.end_time().format_hms()
+    }
+
+    /// Formats the start time as "HH:MM:SS.FF".
+    pub fn format_start_hms_frame(&self) -> String {
+        self.start_time().format_hms_frame()
+    }
+
+    /// Formats the end time as "HH:MM:SS.FF".
+    pub fn format_end_hms_frame(&self) -> String {
+        self.end_time().format_hms_frame()
+    }
+
+    /// Returns a tuple of (start_seconds, end_seconds) for compatibility.
+    ///
+    /// This is provided for backward compatibility during migration.
+    /// Prefer using `start_time()` and `end_time()` methods.
+    pub fn time_range_seconds(&self) -> (f64, f64) {
+        (self.start_time().seconds(), self.end_time().seconds())
+    }
+
+    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
+        self.metadata = Some(metadata);
+        self
+    }
+
+    pub fn with_vector_id(mut self, vector_id: String) -> Self {
+        self.vector_id = Some(vector_id);
+        self
+    }
+
+    pub fn with_text_content(mut self, text: String) -> Self {
+        self.text_content = Some(text);
+        self
+    }
+
+    pub fn with_frame_count(mut self, count: i32) -> Self {
+        self.frame_count = count;
+        self
+    }
+
+    pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
+        self.pre_chunk_ids = ids;
+        self
+    }
+
+    pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
+        self.parent_chunk_id = Some(parent_id);
+        self
+    }
+
+    pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
+        self.child_chunk_ids = child_ids;
+        self
+    }
+
+    pub fn is_parent_chunk(&self) -> bool {
+        !self.child_chunk_ids.is_empty()
+    }
+
+    pub fn is_child_chunk(&self) -> bool {
+        self.parent_chunk_id.is_some()
+    }
+
+    /// 創建視覺分片
+    pub fn new_visual(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        visual_content: VisualChunkContent,
+    ) -> Self {
+        let content = serde_json::to_value(&visual_content)
+            .unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
+
+        Self::new(
+            file_id,
+            uuid,
+            chunk_index,
+            ChunkType::Visual,
+            ChunkRule::Rule2,
+            start_frame,
+            end_frame,
+            fps,
+            content,
+        )
+    }
+
+    /// 從 YOLO 結果創建視覺分片
+    pub fn from_yolo_result(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
+    ) -> Self {
+        use crate::core::processor::yolo::YoloFrame;
+        use std::collections::HashMap;
+
+        // 分析物件統計
+        let mut object_counts = HashMap::new();
+        let mut keyframe_objects = Vec::new();
+        let mut all_objects = Vec::new();
+
+        for frame in &yolo_frames {
+            let mut frame_objects = Vec::new();
+
+            for obj in &frame.objects {
+                // 更新物件統計
+                *object_counts.entry(obj.class_name.clone()).or_insert(0) += 1;
+
+                // 創建檢測到的物件
+                let detected_obj = DetectedObject {
+                    class_name: obj.class_name.clone(),
+                    class_id: obj.class_id,
+                    confidence: obj.confidence,
+                    bbox: Some(BoundingBox {
+                        x: obj.x,
+                        y: obj.y,
+                        width: obj.width,
+                        height: obj.height,
+                    }),
+                    occurrence: 1,
+                };
+
+                frame_objects.push(detected_obj.clone());
+                all_objects.push(detected_obj);
+            }
+
+            if !frame_objects.is_empty() {
+                keyframe_objects.push(KeyframeObjects {
+                    timestamp: frame.timestamp,
+                    frame_number: frame.frame,
+                    objects: frame_objects,
+                });
+            }
+        }
+
+        // 創建主要物件標籤
+        let primary_objects = object_counts
+            .iter()
+            .filter(|(_, &count)| count >= 3) // 出現至少3次的物件
+            .map(|(name, _)| name.clone())
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        // 創建物件統計 JSON
+        let object_stats =
+            serde_json::to_value(&object_counts).unwrap_or_else(|_| serde_json::json!({}));
+
+        // 創建視覺內容
+        let visual_content = VisualChunkContent {
+            primary_objects: if primary_objects.is_empty() {
+                "no objects detected".to_string()
+            } else {
+                primary_objects
+            },
+            object_stats,
+            keyframe_objects,
+            object_frequency: serde_json::to_value(&object_counts)
+                .unwrap_or_else(|_| serde_json::json!({})),
+            visual_summary: None, // 可選，後續可添加 LLM 生成的摘要
+        };
+
+        Self::new_visual(
+            file_id,
+            uuid,
+            chunk_index,
+            start_frame,
+            end_frame,
+            fps,
+            visual_content,
+        )
+    }
+}
+
+impl VisualChunkContent {
+    /// Calculate similarity between two YOLO frames based on object composition
+    pub fn frame_similarity(
+        frame1: &crate::core::processor::yolo::YoloFrame,
+        frame2: &crate::core::processor::yolo::YoloFrame,
+    ) -> f32 {
+        if frame1.objects.is_empty() && frame2.objects.is_empty() {
+            return 1.0; // Both empty frames are perfectly similar
+        }
+
+        if frame1.objects.is_empty() || frame2.objects.is_empty() {
+            return 0.0; // One empty, one non-empty are dissimilar
+        }
+
+        // Create sets of object class names
+        let set1: std::collections::HashSet<String> = frame1
+            .objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+        let set2: std::collections::HashSet<String> = frame2
+            .objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+
+        // Calculate Jaccard similarity
+        let intersection: Vec<_> = set1.intersection(&set2).collect();
+        let union: Vec<_> = set1.union(&set2).collect();
+
+        if union.is_empty() {
+            0.0
+        } else {
+            intersection.len() as f32 / union.len() as f32
+        }
+    }
+
+    /// Get a summary of the visual chunk
+    pub fn summary(&self) -> String {
+        let duration = self.end_time - self.start_time;
+        let frame_count = self.keyframe_objects.len();
+
+        format!(
+            "Visual chunk from {:.1}s to {:.1}s (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant objects: {}",
+            self.start_time,
+            self.end_time,
+            duration,
+            frame_count,
+            self.metadata.object_count,
+            self.metadata.unique_classes.len(),
+            if self.dominant_objects.is_empty() {
+                "none".to_string()
+            } else {
+                self.dominant_objects.join(", ")
+            }
+        )
+    }
+
+    /// Check if this chunk contains a specific object class
+    pub fn contains_object(&self, class_name: &str) -> bool {
+        self.keyframe_objects
+            .iter()
+            .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
+    }
+
+    /// Get all objects with confidence above threshold
+    pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
+        self.keyframe_objects
+            .iter()
+            .flat_map(|ko| ko.objects.iter())
+            .filter(|obj| obj.confidence >= threshold)
+            .collect()
+    }
+}
--- a/src/core/chunk/types_fixed.rs
+++ b/src/core/chunk/types_fixed.rs
@@ -0,0 +1,320 @@
+use crate::core::time::FrameTime;
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ChunkType {
+    TimeBased,
+    Sentence,
+    Cut,
+    Trace,
+    Story,  // Parent chunk from story analysis
+    Visual, // Visual object-based chunk from YOLO detection (Phase 2.1)
+}
+
+impl ChunkType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ChunkType::TimeBased => "time",
+            ChunkType::Sentence => "sentence",
+            ChunkType::Cut => "cut",
+            ChunkType::Trace => "trace",
+            ChunkType::Story => "story",
+            ChunkType::Visual => "visual",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ChunkRule {
+    Rule1, // 直接轉換
+    Rule2, // 集合內容
+}
+
+/// 關鍵幀的物件列表
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct KeyframeObjects {
+    /// 關鍵幀時間 (秒)
+    pub timestamp: f64,
+    /// 關鍵幀幀號
+    pub frame_number: u64,
+    /// 檢測到的物件
+    pub objects: Vec<DetectedObject>,
+}
+
+/// 檢測到的物件
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DetectedObject {
+    /// 物件類別名稱
+    pub class_name: String,
+    /// 物件類別 ID
+    pub class_id: u32,
+    /// 信心值 (0.0-1.0)
+    pub confidence: f32,
+    /// 邊界框 (x, y, width, height)
+    pub bbox: Option<BoundingBox>,
+    /// 出現次數 (在分片內)
+    pub occurrence: u32,
+}
+
+/// 邊界框
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BoundingBox {
+    pub x: i32,
+    pub y: i32,
+    pub width: i32,
+    pub height: i32,
+}
+
+/// 視覺分片內容 (Phase 2.1)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualChunkContent {
+    pub start_time: f64,
+    pub end_time: f64,
+    pub keyframe_objects: Vec<KeyframeObjects>,
+    pub dominant_objects: Vec<String>,
+    pub object_relationships: Vec<(String, String, String)>, // (object1, relationship, object2)
+    pub scene_description: Option<String>,
+    pub metadata: VisualMetadata,
+}
+
+/// 視覺元數據 (Phase 2.1)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualMetadata {
+    pub object_count: u32,
+    pub unique_classes: Vec<String>,
+    pub max_confidence: f32,
+    pub avg_confidence: f32,
+    pub spatial_density: f32, // objects per frame
+}
+
+impl ChunkRule {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ChunkRule::Rule1 => "rule_1",
+            ChunkRule::Rule2 => "rule_2",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Chunk {
+    pub file_id: i32,
+    pub uuid: String,
+    pub chunk_id: String,
+    pub chunk_index: u32,
+    pub chunk_type: ChunkType,
+    pub rule: ChunkRule,
+    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
+    pub fps: f64,
+    /// Start frame (0-based)
+    pub start_frame: i64,
+    /// End frame (exclusive)
+    pub end_frame: i64,
+    pub text_content: Option<String>,
+    pub content: serde_json::Value,
+    pub metadata: Option<serde_json::Value>,
+    pub vector_id: Option<String>,
+    pub frame_count: i32,
+    pub pre_chunk_ids: Vec<i32>,
+    pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
+    pub child_chunk_ids: Vec<String>,    // Child chunk IDs (for parent chunks)
+    pub visual_stats: Option<serde_json::Value>,
+}
+
+impl Chunk {
+    /// 創建視覺分片 (Phase 2.1)
+    pub fn new_visual(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        visual_content: VisualChunkContent,
+    ) -> Self {
+        let content = serde_json::to_value(&visual_content)
+            .unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
+
+        Self::new(
+            file_id,
+            uuid,
+            chunk_index,
+            ChunkType::Visual,
+            ChunkRule::Rule2,
+            start_frame,
+            end_frame,
+            fps,
+            content,
+        )
+    }
+
+    /// 從 YOLO 結果創建視覺分片 (Phase 2.1)
+    pub fn from_yolo_result(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
+    ) -> Self {
+        let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
+            .iter()
+            .map(|frame| {
+                let objects: Vec<DetectedObject> = frame
+                    .objects
+                    .iter()
+                    .map(|obj| DetectedObject {
+                        class_name: obj.class_name.clone(),
+                        class_id: obj.class_id,
+                        confidence: obj.confidence,
+                        bbox: Some(BoundingBox {
+                            x: obj.x,
+                            y: obj.y,
+                            width: obj.width,
+                            height: obj.height,
+                        }),
+                        occurrence: 1,
+                    })
+                    .collect();
+
+                KeyframeObjects {
+                    timestamp: frame.timestamp,
+                    frame_number: frame.frame,
+                    objects,
+                }
+            })
+            .collect();
+
+        // 計算物件統計
+        let mut object_counts = std::collections::HashMap::new();
+        for obj in yolo_frames.iter().flat_map(|f| &f.objects) {
+            *object_counts.entry(obj.class_name.clone()).or_insert(0) += 1;
+        }
+
+        let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
+        let all_classes: Vec<String> = yolo_frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
+            .collect();
+        let unique_classes: Vec<String> = all_classes
+            .iter()
+            .cloned()
+            .collect::<std::collections::HashSet<_>>()
+            .into_iter()
+            .collect();
+
+        let confidences: Vec<f32> = yolo_frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.confidence))
+            .collect();
+        let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
+        let avg_confidence = if !confidences.is_empty() {
+            confidences.iter().sum::<f32>() / confidences.len() as f32
+        } else {
+            0.0
+        };
+
+        // 找出主要物件
+        let primary_objects = object_counts
+            .iter()
+            .filter(|(_, &count)| count as f32 / yolo_frames.len() as f32 > 0.5)
+            .map(|(name, _)| name.clone())
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        let object_stats =
+            serde_json::to_value(&object_counts).unwrap_or_else(|_| serde_json::json!({}));
+
+        let visual_content = VisualChunkContent {
+            start_time: if let Some(first) = yolo_frames.first() {
+                first.timestamp
+            } else {
+                0.0
+            },
+            end_time: if let Some(last) = yolo_frames.last() {
+                last.timestamp
+            } else {
+                0.0
+            },
+            keyframe_objects,
+            dominant_objects: primary_objects
+                .split(", ")
+                .map(|s| s.to_string())
+                .filter(|s| !s.is_empty())
+                .collect(),
+            object_relationships: vec![], // 可選：後續添加關係檢測
+            scene_description: None,      // 可選：後續添加 LLM 生成的場景描述
+            metadata: VisualMetadata {
+                object_count: total_objects,
+                unique_classes,
+                max_confidence,
+                avg_confidence,
+                spatial_density: if yolo_frames.len() > 0 {
+                    total_objects as f32 / yolo_frames.len() as f32
+                } else {
+                    0.0
+                },
+            },
+        };
+
+        Self::new_visual(
+            file_id,
+            uuid,
+            chunk_index,
+            start_frame,
+            end_frame,
+            fps,
+            visual_content,
+        )
+    }
+
+    /// 創建新分片
+    pub fn new(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        chunk_type: ChunkType,
+        rule: ChunkRule,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        content: serde_json::Value,
+    ) -> Self {
+        let frame_count = (end_frame - start_frame) as i32;
+        let chunk_id = format!("{}_{}", uuid, chunk_index);
+
+        Self {
+            file_id,
+            uuid,
+            chunk_id,
+            chunk_index,
+            chunk_type,
+            rule,
+            fps,
+            start_frame,
+            end_frame,
+            text_content: None,
+            content,
+            metadata: None,
+            vector_id: None,
+            frame_count,
+            pre_chunk_ids: vec![],
+            parent_chunk_id: None,
+            child_chunk_ids: vec![],
+            visual_stats: None,
+        }
+    }
+
+    /// 將分片轉換為幀時間
+    pub fn to_frame_time(&self) -> FrameTime {
+        FrameTime::from_frames(self.start_frame as u64, self.end_frame as u64, self.fps)
+    }
+
+    /// 檢查是否是父分片
+    pub fn is_parent(&self) -> bool {
+        self.parent_chunk_id.is_some()
+    }
+}
--- a/src/core/chunk/visual_test.rs
+++ b/src/core/chunk/visual_test.rs
@@ -0,0 +1,486 @@
+//! 視覺分片測試
+//!
+//! 測試視覺分片數據結構和功能
+
+use serde::{Deserialize, Serialize};
+
+/// 視覺分片類型
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum ChunkType {
+    TimeBased,
+    Sentence,
+    Cut,
+    Trace,
+    Story,
+    Visual,
+}
+
+/// 檢測到的物件
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DetectedObject {
+    /// 物件類別名稱
+    pub class_name: String,
+    /// 物件類別 ID
+    pub class_id: u32,
+    /// 信心值 (0.0-1.0)
+    pub confidence: f32,
+    /// 邊界框 (x, y, width, height)
+    pub bbox: Option<(i32, i32, i32, i32)>,
+}
+
+/// 關鍵幀的物件列表
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct KeyframeObjects {
+    /// 關鍵幀時間 (秒)
+    pub timestamp: f64,
+    /// 關鍵幀幀號
+    pub frame_number: u64,
+    /// 檢測到的物件
+    pub objects: Vec<DetectedObject>,
+}
+
+/// 視覺分片內容
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualChunkContent {
+    pub start_time: f64,
+    pub end_time: f64,
+    pub keyframe_objects: Vec<KeyframeObjects>,
+    pub dominant_objects: Vec<String>,
+    pub object_relationships: Vec<(String, String, String)>, // (object1, relationship, object2)
+    pub scene_description: Option<String>,
+    pub metadata: VisualMetadata,
+}
+
+/// 視覺元數據
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualMetadata {
+    pub object_count: u32,
+    pub unique_classes: Vec<String>,
+    pub max_confidence: f32,
+    pub avg_confidence: f32,
+    pub spatial_density: f32, // objects per frame
+}
+
+impl VisualChunkContent {
+    /// 計算兩個幀之間的相似度（基於物件組成）
+    pub fn frame_similarity(
+        frame1_objects: &[DetectedObject],
+        frame2_objects: &[DetectedObject],
+    ) -> f32 {
+        if frame1_objects.is_empty() && frame2_objects.is_empty() {
+            return 1.0; // 兩個空幀完全相似
+        }
+
+        if frame1_objects.is_empty() || frame2_objects.is_empty() {
+            return 0.0; // 一個空一個非空，不相似
+        }
+
+        // 創建物件類別名稱集合
+        let set1: std::collections::HashSet<String> = frame1_objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+        let set2: std::collections::HashSet<String> = frame2_objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+
+        // 計算 Jaccard 相似度
+        let intersection: Vec<_> = set1.intersection(&set2).collect();
+        let union: Vec<_> = set1.union(&set2).collect();
+
+        if union.is_empty() {
+            0.0
+        } else {
+            intersection.len() as f32 / union.len() as f32
+        }
+    }
+
+    /// 獲取視覺分片的摘要
+    pub fn summary(&self) -> String {
+        let duration = self.end_time - self.start_time;
+        let frame_count = self.keyframe_objects.len();
+
+        format!(
+            "視覺分片: {:.1}s 到 {:.1}s (持續時間: {:.1}s, {} 幀). 物件: {} 個總計, {} 個唯一. 主要物件: {}",
+            self.start_time,
+            self.end_time,
+            duration,
+            frame_count,
+            self.metadata.object_count,
+            self.metadata.unique_classes.len(),
+            if self.dominant_objects.is_empty() {
+                "無".to_string()
+            } else {
+                self.dominant_objects.join(", ")
+            }
+        )
+    }
+
+    /// 檢查是否包含特定物件類別
+    pub fn contains_object(&self, class_name: &str) -> bool {
+        self.keyframe_objects
+            .iter()
+            .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
+    }
+
+    /// 獲取信心值高於閾值的所有物件
+    pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
+        self.keyframe_objects
+            .iter()
+            .flat_map(|ko| ko.objects.iter())
+            .filter(|obj| obj.confidence >= threshold)
+            .collect()
+    }
+}
+
+/// 模擬 YOLO 結果
+#[derive(Debug, Clone)]
+pub struct MockYoloResult {
+    pub frames: Vec<MockYoloFrame>,
+}
+
+#[derive(Debug, Clone)]
+pub struct MockYoloFrame {
+    pub frame: u64,
+    pub timestamp: f64,
+    pub objects: Vec<MockYoloObject>,
+}
+
+#[derive(Debug, Clone)]
+pub struct MockYoloObject {
+    pub class_name: String,
+    pub class_id: u32,
+    pub x: i32,
+    pub y: i32,
+    pub width: i32,
+    pub height: i32,
+    pub confidence: f32,
+}
+
+impl MockYoloResult {
+    /// 從模擬 YOLO 結果創建視覺分片
+    pub fn to_visual_chunk(&self, start_frame: u64, end_frame: u64) -> Option<VisualChunkContent> {
+        let frames: Vec<_> = self
+            .frames
+            .iter()
+            .filter(|f| f.frame >= start_frame && f.frame <= end_frame)
+            .collect();
+
+        if frames.is_empty() {
+            return None;
+        }
+
+        // 轉換幀為關鍵幀物件
+        let keyframe_objects: Vec<KeyframeObjects> = frames
+            .iter()
+            .map(|frame| {
+                let objects: Vec<DetectedObject> = frame
+                    .objects
+                    .iter()
+                    .map(|obj| DetectedObject {
+                        class_name: obj.class_name.clone(),
+                        class_id: obj.class_id,
+                        confidence: obj.confidence,
+                        bbox: Some((obj.x, obj.y, obj.width, obj.height)),
+                    })
+                    .collect();
+                KeyframeObjects {
+                    timestamp: frame.timestamp,
+                    frame_number: frame.frame,
+                    objects,
+                }
+            })
+            .collect();
+
+        // 計算元數據
+        let total_objects: u32 = frames.iter().map(|f| f.objects.len() as u32).sum();
+        let all_classes: Vec<String> = frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
+            .collect();
+        let unique_classes: Vec<String> = all_classes
+            .iter()
+            .cloned()
+            .collect::<std::collections::HashSet<_>>()
+            .into_iter()
+            .collect();
+        let confidences: Vec<f32> = frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.confidence))
+            .collect();
+        let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
+        let avg_confidence = if !confidences.is_empty() {
+            confidences.iter().sum::<f32>() / confidences.len() as f32
+        } else {
+            0.0
+        };
+
+        let start_time = frames.first().map(|f| f.timestamp).unwrap_or(0.0);
+        let end_time = frames.last().map(|f| f.timestamp).unwrap_or(0.0);
+
+        // 查找主要物件（出現在大多數幀中的物件）
+        let mut object_counts = std::collections::HashMap::new();
+        for frame in &frames {
+            let frame_classes: std::collections::HashSet<_> =
+                frame.objects.iter().map(|o| o.class_name.clone()).collect();
+            for class in frame_classes {
+                *object_counts.entry(class).or_insert(0) += 1;
+            }
+        }
+
+        let mut dominant_objects: Vec<String> = object_counts
+            .into_iter()
+            .filter(|(_, count)| *count as f32 / frames.len() as f32 > 0.5) // 出現在 >50% 的幀中
+            .map(|(class, _)| class)
+            .collect();
+        dominant_objects.sort();
+
+        Some(VisualChunkContent {
+            start_time,
+            end_time,
+            keyframe_objects,
+            dominant_objects,
+            object_relationships: vec![], // 需要關係檢測邏輯
+            scene_description: None,      // 可由 LLM 後期生成
+            metadata: VisualMetadata {
+                object_count: total_objects,
+                unique_classes,
+                max_confidence,
+                avg_confidence,
+                spatial_density: if frames.len() > 0 {
+                    total_objects as f32 / frames.len() as f32
+                } else {
+                    0.0
+                },
+            },
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_chunk_type_visual() {
+        let chunk_type = ChunkType::Visual;
+        let json = serde_json::to_string(&chunk_type).unwrap();
+        assert_eq!(json, "\"visual\"");
+
+        let deserialized: ChunkType = serde_json::from_str(&json).unwrap();
+        assert_eq!(deserialized, ChunkType::Visual);
+    }
+
+    #[test]
+    fn test_visual_chunk_creation() {
+        // 創建模擬 YOLO 結果
+        let yolo_result = MockYoloResult {
+            frames: vec![
+                MockYoloFrame {
+                    frame: 0,
+                    timestamp: 0.0,
+                    objects: vec![
+                        MockYoloObject {
+                            class_name: "person".to_string(),
+                            class_id: 0,
+                            x: 100,
+                            y: 200,
+                            width: 50,
+                            height: 100,
+                            confidence: 0.95,
+                        },
+                        MockYoloObject {
+                            class_name: "car".to_string(),
+                            class_id: 2,
+                            x: 300,
+                            y: 150,
+                            width: 80,
+                            height: 60,
+                            confidence: 0.87,
+                        },
+                    ],
+                },
+                MockYoloFrame {
+                    frame: 1,
+                    timestamp: 0.033, // 1/30 秒
+                    objects: vec![MockYoloObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        x: 110,
+                        y: 210,
+                        width: 52,
+                        height: 102,
+                        confidence: 0.92,
+                    }],
+                },
+            ],
+        };
+
+        // 從 YOLO 結果創建視覺分片
+        let chunk = yolo_result.to_visual_chunk(0, 1).unwrap();
+
+        // 驗證分片屬性
+        assert_eq!(chunk.start_time, 0.0);
+        assert_eq!(chunk.end_time, 0.033);
+        assert_eq!(chunk.metadata.object_count, 3);
+        assert_eq!(chunk.metadata.unique_classes.len(), 2);
+        assert!(chunk
+            .metadata
+            .unique_classes
+            .contains(&"person".to_string()));
+        assert!(chunk.metadata.unique_classes.contains(&"car".to_string()));
+        assert_eq!(chunk.dominant_objects, vec!["person"]);
+        assert_eq!(chunk.keyframe_objects.len(), 2);
+    }
+
+    #[test]
+    fn test_visual_chunk_content_methods() {
+        let content = VisualChunkContent {
+            start_time: 0.0,
+            end_time: 5.0,
+            keyframe_objects: vec![KeyframeObjects {
+                timestamp: 0.0,
+                frame_number: 0,
+                objects: vec![
+                    DetectedObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        confidence: 0.95,
+                        bbox: Some((100, 200, 50, 100)),
+                    },
+                    DetectedObject {
+                        class_name: "car".to_string(),
+                        class_id: 2,
+                        confidence: 0.87,
+                        bbox: Some((300, 150, 80, 60)),
+                    },
+                ],
+            }],
+            dominant_objects: vec!["person".to_string()],
+            object_relationships: vec![],
+            scene_description: Some("一個人站在車旁".to_string()),
+            metadata: VisualMetadata {
+                object_count: 2,
+                unique_classes: vec!["person".to_string(), "car".to_string()],
+                max_confidence: 0.95,
+                avg_confidence: 0.91,
+                spatial_density: 2.0,
+            },
+        };
+
+        // 測試摘要方法
+        let summary = content.summary();
+        assert!(summary.contains("視覺分片"));
+        assert!(summary.contains("person"));
+
+        // 測試 contains_object 方法
+        assert!(content.contains_object("person"));
+        assert!(content.contains_object("car"));
+        assert!(!content.contains_object("dog"));
+
+        // 測試 high_confidence_objects 方法
+        let high_conf_objects = content.high_confidence_objects(0.9);
+        assert_eq!(high_conf_objects.len(), 1);
+        assert_eq!(high_conf_objects[0].class_name, "person");
+    }
+
+    #[test]
+    fn test_frame_similarity() {
+        let frame1_objects = vec![
+            DetectedObject {
+                class_name: "person".to_string(),
+                class_id: 0,
+                confidence: 0.95,
+                bbox: Some((100, 200, 50, 100)),
+            },
+            DetectedObject {
+                class_name: "car".to_string(),
+                class_id: 2,
+                confidence: 0.87,
+                bbox: Some((300, 150, 80, 60)),
+            },
+        ];
+
+        let frame2_objects = vec![
+            DetectedObject {
+                class_name: "person".to_string(),
+                class_id: 0,
+                confidence: 0.92,
+                bbox: Some((110, 210, 52, 102)),
+            },
+            DetectedObject {
+                class_name: "car".to_string(),
+                class_id: 2,
+                confidence: 0.85,
+                bbox: Some((310, 155, 82, 62)),
+            },
+        ];
+
+        let frame3_objects = vec![DetectedObject {
+            class_name: "dog".to_string(),
+            class_id: 16,
+            confidence: 0.78,
+            bbox: Some((150, 250, 40, 60)),
+        }];
+
+        // 測試相似幀（相同物件）
+        let similarity_same =
+            VisualChunkContent::frame_similarity(&frame1_objects, &frame2_objects);
+        assert!((similarity_same - 1.0).abs() < 0.001);
+
+        // 測試不相似幀（不同物件）
+        let similarity_diff =
+            VisualChunkContent::frame_similarity(&frame1_objects, &frame3_objects);
+        assert!((similarity_diff - 0.0).abs() < 0.001);
+
+        // 測試空幀
+        let empty_objects: Vec<DetectedObject> = vec![];
+        let similarity_empty = VisualChunkContent::frame_similarity(&empty_objects, &empty_objects);
+        assert!((similarity_empty - 1.0).abs() < 0.001);
+
+        let similarity_mixed =
+            VisualChunkContent::frame_similarity(&empty_objects, &frame1_objects);
+        assert!((similarity_mixed - 0.0).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_serialization_deserialization() {
+        let content = VisualChunkContent {
+            start_time: 0.0,
+            end_time: 5.0,
+            keyframe_objects: vec![KeyframeObjects {
+                timestamp: 0.0,
+                frame_number: 0,
+                objects: vec![DetectedObject {
+                    class_name: "person".to_string(),
+                    class_id: 0,
+                    confidence: 0.95,
+                    bbox: Some((100, 200, 50, 100)),
+                }],
+            }],
+            dominant_objects: vec!["person".to_string()],
+            object_relationships: vec![],
+            scene_description: Some("場景描述".to_string()),
+            metadata: VisualMetadata {
+                object_count: 1,
+                unique_classes: vec!["person".to_string()],
+                max_confidence: 0.95,
+                avg_confidence: 0.95,
+                spatial_density: 1.0,
+            },
+        };
+
+        // 序列化
+        let json = serde_json::to_string(&content).unwrap();
+        assert!(json.contains("person"));
+        assert!(json.contains("visual_chunk"));
+
+        // 反序列化
+        let deserialized: VisualChunkContent = serde_json::from_str(&json).unwrap();
+        assert_eq!(deserialized.start_time, 0.0);
+        assert_eq!(deserialized.end_time, 5.0);
+        assert_eq!(deserialized.dominant_objects, vec!["person"]);
+    }
+}
--- a/src/core/db/postgres_db.rs
+++ b/src/core/db/postgres_db.rs
@@ -77,6 +77,8 @@ pub struct VideoRow {
    pub status: String,
    pub user_id: Option<i32>,
    pub job_id: Option<i32>,
+    pub created_at: Option<String>,
+    pub registration_time: Option<String>,
 }

 impl From<VideoRow> for VideoRecord {
@@ -103,7 +105,8 @@ impl From<VideoRow> for VideoRecord {
            status: VideoStatus::from_db_str(&row.status).unwrap_or(VideoStatus::Pending),
            user_id: row.user_id.map(|v| v as i64),
            job_id: row.job_id.map(|v| v as i64),
-            created_at: String::new(),
+            created_at: row.created_at.unwrap_or_default(),
+            registration_time: row.registration_time,
        }
    }
 }
@@ -124,6 +127,7 @@ pub struct VideoRecord {
    pub user_id: Option<i64>,
    pub job_id: Option<i64>,
    pub created_at: String,
+    pub registration_time: Option<String>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -701,7 +705,7 @@ impl PostgresDb {
        let table = schema::table_name("videos");
        let result = sqlx::query_as::<_, VideoRow>(
            &format!(
-                "SELECT id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id FROM {} WHERE uuid = $1",
+                "SELECT id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id, created_at::text, registration_time::text FROM {} WHERE uuid = $1",
                table
            )
        )
@@ -796,28 +800,90 @@ impl PostgresDb {
    }

    pub async fn list_videos(&self, limit: i32, offset: i64) -> Result<(Vec<VideoRecord>, i64)> {
+        // Default to unprocessed (status != 'ready')
+        self.search_videos(None, Some(false), limit, offset).await
+    }
+
+    pub async fn search_videos(
+        &self,
+        query: Option<&str>,
+        is_processed: Option<bool>,
+        limit: i32,
+        offset: i64,
+    ) -> Result<(Vec<VideoRecord>, i64)> {
        let table = schema::table_name("videos");
+        
+        // Build status condition
+        // is_processed = Some(true)  => status = 'ready'
+        // is_processed = Some(false) => status != 'ready'
+        // is_processed = None        => no filter
+        let status_cond = match is_processed {
+            Some(true) => "AND status = 'ready'",
+            Some(false) => "AND status != 'ready'",
+            None => "",
+        };

-        // Count total
-        let count: Option<i64> = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table))
-            .fetch_one(&self.pool)
-            .await?;
-        let total = count.unwrap_or(0);
+        // Build search condition safely
+        // If query is Some, we filter by filename/path/probe_json
+        let search_cond = if query.is_some() {
+            "AND (LOWER(file_name) LIKE $1 OR LOWER(file_path) LIKE $1 OR LOWER(probe_json::text) LIKE $1)"
+        } else {
+            ""
+        };

-        // Select paged
-        let rows = sqlx::query_as::<_, VideoRow>(
-            &format!(
-                "SELECT id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2",
-                table
-            )
-        )
-        .bind(limit)
-        .bind(offset)
-        .fetch_all(&self.pool)
-        .await?;
+        let where_clause = format!("WHERE 1=1 {} {}", status_cond, search_cond);
+
+        // 1. Count Query
+        // If query is present, $1 is the pattern.
+        // If query is None, no pattern param needed for count? 
+        // Actually, to keep code simple, let's just construct the query string.
+        // SQLx query_as requires bind count to match placeholders.
+        
+        let count_query = format!("SELECT COUNT(*) FROM {} {}", table, where_clause);
+        
+        let total: i64 = if let Some(q) = query {
+            let pattern = format!("%{}%", q.to_lowercase());
+            sqlx::query_scalar(&count_query)
+                .bind(&pattern)
+                .fetch_one(&self.pool)
+                .await?
+        } else {
+            sqlx::query_scalar(&count_query)
+                .fetch_one(&self.pool)
+                .await?
+        };
+
+        // 2. Select Query
+        // Cast created_at and registration_time to text
+        let columns = "id, uuid, file_path, file_name, duration, width, height, fps, probe_json, fs_video, fs_json, psql_chunk, pobject_chunk, mobject_chunk, pvector_chunk, qvector_chunk, status, user_id, job_id, created_at::text, registration_time::text";
+        
+        // Determine parameter order for LIMIT/OFFSET
+        // If search is present, pattern is $1. Limit is $2. Offset is $3.
+        // If search is not present, Limit is $1. Offset is $2.
+        
+        let select_query = if query.is_some() {
+            format!("SELECT {} FROM {} {} ORDER BY id DESC LIMIT $2 OFFSET $3", columns, table, where_clause)
+        } else {
+            format!("SELECT {} FROM {} {} ORDER BY id DESC LIMIT $1 OFFSET $2", columns, table, where_clause)
+        };
+
+        let rows = if let Some(q) = query {
+            let pattern = format!("%{}%", q.to_lowercase());
+            sqlx::query_as::<_, VideoRow>(&select_query)
+                .bind(&pattern)
+                .bind(limit)
+                .bind(offset)
+                .fetch_all(&self.pool)
+                .await?
+        } else {
+            sqlx::query_as::<_, VideoRow>(&select_query)
+                .bind(limit)
+                .bind(offset)
+                .fetch_all(&self.pool)
+                .await?
+        };

        let videos: Vec<VideoRecord> = rows.into_iter().map(|r| r.into()).collect();
-
        Ok((videos, total))
    }

@@ -850,6 +916,19 @@ impl PostgresDb {
        Ok(())
    }

+    pub async fn set_registration_time(&self, uuid: &str) -> Result<()> {
+        let table = schema::table_name("videos");
+        sqlx::query(&format!(
+            "UPDATE {} SET registration_time = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE uuid = $1 AND registration_time IS NULL",
+            table
+        ))
+        .bind(uuid)
+        .execute(&self.pool)
+        .await?;
+
+        Ok(())
+    }
+
    pub async fn delete_video(&self, uuid: &str) -> Result<()> {
        tracing::info!("[PostgresDb] Deleting video: {}", uuid);

--- a/src/core/db/schema_ctx.rs
+++ b/src/core/db/schema_ctx.rs
@@ -0,0 +1,68 @@
+use anyhow::Result;
+use sqlx::PgPool;
+use std::sync::atomic::{AtomicU32, Ordering};
+
+/// Schema context for database operations
+/// Ensures all queries use the correct schema prefix
+#[derive(Debug, Clone)]
+pub struct SchemaContext {
+    pub prefix: String,
+}
+
+static SCHEMA_INSTANCE: std::sync::OnceLock<SchemaContext> = std::sync::OnceLock::new();
+static SCHEMA_VERSION: AtomicU32 = AtomicU32::new(0);
+
+impl SchemaContext {
+    /// Initialize schema context from environment
+    pub fn init() -> Self {
+        let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
+        let prefix = if schema == "public" {
+            String::new()
+        } else {
+            format!("{}.", schema)
+        };
+        Self { prefix }
+    }
+
+    /// Get the global schema context
+    pub fn global() -> &'static Self {
+        SCHEMA_INSTANCE.get_or_init(|| Self::init())
+    }
+
+    /// Get table name with schema prefix
+    pub fn table(&self, name: &str) -> String {
+        format!("{}{}", self.prefix, name)
+    }
+
+    /// Reload schema context (for testing)
+    pub fn reload() {
+        SCHEMA_VERSION.fetch_add(1, Ordering::SeqCst);
+        // Note: OnceLock can't be reset, so we use a different approach
+        // In production, schema doesn't change at runtime
+    }
+}
+
+/// Quick helper to get table name with current schema prefix
+pub fn t(name: &str) -> String {
+    SchemaContext::global().table(name)
+}
+
+/// Check if a table exists in the current schema
+pub async fn table_exists(pool: &PgPool, table_name: &str) -> Result<bool> {
+    let schema = SchemaContext::global();
+    let schema_name = if schema.prefix.is_empty() {
+        "public".to_string()
+    } else {
+        schema.prefix.trim_end_matches('.').to_string()
+    };
+
+    let query = format!(
+        "SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = $1 AND table_name = $2)"
+    );
+    let exists: bool = sqlx::query_scalar(&query)
+        .bind(&schema_name)
+        .bind(table_name)
+        .fetch_one(pool)
+        .await?;
+    Ok(exists)
+}
--- a/src/core/ingestion.rs
+++ b/src/core/ingestion.rs
@@ -0,0 +1,143 @@
+use anyhow::{Context, Result};
+use std::path::Path;
+use tracing::{info, warn};
+
+use crate::core::db::{Database, PostgresDb, VideoRecord, VideoStatus};
+use crate::core::probe;
+use crate::core::storage::FileManager;
+use crate::uuid as uuid_utils;
+
+/// Handles the automatic ingestion of video files.
+/// This service is responsible for:
+/// 1. Running `ffprobe` (Pre-processing)
+/// 2. Saving probe JSON
+/// 3. Registering the video in the database (making it visible in the API)
+pub struct IngestionService {
+    db: PostgresDb,
+}
+
+impl IngestionService {
+    pub fn new(db: PostgresDb) -> Self {
+        Self { db }
+    }
+
+    /// Registers a video file found in the watched directory.
+    /// This function is idempotent: if the video (UUID) already exists, it skips.
+    pub async fn ingest(&self, file_path: &str) -> Result<Option<String>> {
+        let path = Path::new(file_path);
+
+        // 1. Validate extension
+        if !is_video_extension(path) {
+            return Ok(None);
+        }
+
+        // 2. Compute UUID
+        let uuid = uuid_utils::compute_uuid_from_path(file_path);
+
+        // 3. Check if already registered
+        if let Ok(Some(_)) = self.db.get_video_by_uuid(&uuid).await {
+            info!(
+                "Video already registered: {} ({})",
+                path.file_name().unwrap_or_default().to_string_lossy(),
+                uuid
+            );
+            return Ok(None);
+        }
+
+        info!("Starting ingestion for: {} ({})", path.display(), uuid);
+
+        // 4. Run ffprobe
+        let probe_result = probe::probe_video(file_path)
+            .with_context(|| format!("Failed to probe video: {}", file_path))?;
+
+        // 5. Extract metadata
+        let duration = probe_result
+            .format
+            .duration
+            .as_ref()
+            .and_then(|s| s.parse::<f64>().ok())
+            .unwrap_or(0.0);
+
+        let mut width = 0u32;
+        let mut height = 0u32;
+        let mut fps = 0.0;
+
+        for stream in &probe_result.streams {
+            if stream.codec_type.as_deref() == Some("video") {
+                width = stream.width.unwrap_or(0);
+                height = stream.height.unwrap_or(0);
+                if let Some(fps_str) = &stream.r_frame_rate {
+                    if let Some((num, den)) = fps_str.split_once('/') {
+                        if let (Ok(n), Ok(d)) = (num.parse::<f64>(), den.parse::<f64>()) {
+                            if d > 0.0 {
+                                fps = n / d;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // 6. Save Probe JSON
+        let file_manager = FileManager::new(std::path::PathBuf::from("."));
+        let probe_json_str = serde_json::to_string_pretty(&probe_result)?;
+
+        if let Err(e) = file_manager.save_json(&uuid, "probe", &probe_json_str) {
+            warn!("Failed to save probe JSON for {}: {}", uuid, e);
+        } else {
+            info!("Probe JSON saved for {}", uuid);
+        }
+
+        // 7. Create Record
+        // Use absolute path for safety
+        let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
+
+        let record = VideoRecord {
+            id: 0,
+            uuid: uuid.clone(),
+            file_path: canonical_path.to_string_lossy().to_string(),
+            file_name: path
+                .file_name()
+                .unwrap_or_default()
+                .to_string_lossy()
+                .to_string(),
+            duration,
+            width,
+            height,
+            fps,
+            probe_json: Some(probe_json_str),
+            storage: Default::default(),
+            status: VideoStatus::Pending, // Ready for processing
+            user_id: None,
+            job_id: None,
+            created_at: String::new(),
+            registration_time: None,
+        };
+
+        // 8. Insert DB
+        self.db
+            .register_video(&record)
+            .await
+            .with_context(|| "Failed to register video in database")?;
+
+        self.db
+            .set_registration_time(&uuid)
+            .await
+            .with_context(|| "Failed to set registration_time")?;
+
+        info!(
+            "Successfully registered video: {} (UUID: {})",
+            record.file_name, uuid
+        );
+        Ok(Some(uuid))
+    }
+}
+
+fn is_video_extension(path: &Path) -> bool {
+    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
+        let ext = ext.to_lowercase();
+        matches!(ext.as_str(), "mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v")
+    } else {
+        false
+    }
+}
--- a/src/core/llm/client.rs
+++ b/src/core/llm/client.rs
@@ -0,0 +1,104 @@
+use anyhow::Result;
+use reqwest::Client;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+use tracing::{debug, error, warn};
+
+use crate::core::config;
+
+#[derive(Debug, Serialize)]
+struct ChatRequest {
+    model: String,
+    messages: Vec<ChatMessage>,
+    temperature: f32,
+    max_tokens: u32,
+    stream: bool,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct ChatMessage {
+    role: String,
+    content: String,
+}
+
+#[derive(Debug, Deserialize)]
+struct ChatResponse {
+    choices: Vec<Choice>,
+}
+
+#[derive(Debug, Deserialize)]
+struct Choice {
+    message: ChatMessage,
+}
+
+/// Generates a 5W1H+ summary for a given scene context.
+/// Context should include the combined text of all sentences in the scene.
+pub async fn generate_5w1h_summary(scene_text: &str) -> Result<String> {
+    if !*config::llm::SUMMARY_ENABLED {
+        warn!("LLM Summary is disabled via config");
+        return Ok("LLM Disabled".to_string());
+    }
+
+    let client = Client::builder()
+        .timeout(Duration::from_secs(*config::llm::SUMMARY_TIMEOUT_SECS))
+        .build()?;
+
+    let prompt = format!(
+        r#"Analyze the following video scene transcript and provide a concise 5W1H+ summary in JSON format.
+        Focus on: Who, What, Where, When, Why, How, and Key Objects/Actions.
+        
+        Transcript:
+        "{}"
+
+        Output format:
+        {{
+          "who": "...",
+          "what": "...",
+          "where": "...",
+          "when": "...",
+          "why": "...",
+          "how": "...",
+          "summary": "..."
+        }}"#,
+        scene_text
+    );
+
+    let req = ChatRequest {
+        model: (*config::llm::SUMMARY_MODEL).clone(),
+        messages: vec![
+            ChatMessage {
+                role: "system".to_string(),
+                content: "You are an expert video analyst assistant.".to_string(),
+            },
+            ChatMessage {
+                role: "user".to_string(),
+                content: prompt,
+            },
+        ],
+        temperature: 0.1,
+        max_tokens: 512,
+        stream: false,
+    };
+
+    debug!("Calling LLM for summary: {}", *config::llm::SUMMARY_URL);
+
+    let res = client
+        .post(&*config::llm::SUMMARY_URL)
+        .json(&req)
+        .send()
+        .await?;
+
+    if !res.status().is_success() {
+        error!("LLM API error: {}", res.status());
+        let text = res.text().await.unwrap_or_default();
+        anyhow::bail!("LLM API error: {}", text);
+    }
+
+    let chat_res: ChatResponse = res.json().await?;
+
+    if let Some(choice) = chat_res.choices.into_iter().next() {
+        Ok(choice.message.content.trim().to_string())
+    } else {
+        anyhow::bail!("Empty response from LLM");
+    }
+}
--- a/src/core/llm/mod.rs
+++ b/src/core/llm/mod.rs
@@ -0,0 +1 @@
+pub mod client;
--- a/src/core/person_identity.rs
+++ b/src/core/person_identity.rs
@@ -0,0 +1,266 @@
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use sqlx::FromRow;
+
+// ==========================================
+// 舊版結構體 (保留以向後兼容)
+// ==========================================
+
+#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
+pub struct PersonIdentity {
+    pub id: i32,
+    pub person_id: String,
+    pub face_identity_id: Option<i32>,
+    pub speaker_id: Option<String>,
+    pub video_uuid: String,
+    pub confidence: f64,
+    pub name: Option<String>,
+    pub metadata: serde_json::Value,
+    pub first_appearance_time: Option<f64>,
+    pub last_appearance_time: Option<f64>,
+    pub total_appearance_duration: f64,
+    pub appearance_count: i32,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub is_confirmed: bool,
+}
+
+// ==========================================
+// 新版結構體 (V5 身份綁定系統)
+// ==========================================
+
+/// 人物身份 (Identity) - 統一管理演員、公眾人物、家人朋友等
+#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
+pub struct Identity {
+    pub id: i32,
+    pub name: String,
+    pub embedding: Option<String>, // Vector embedding stored as text/json
+    pub metadata: Option<serde_json::Value>,
+    pub created_at: DateTime<Utc>,
+}
+
+/// 身份綁定記錄 (Identity Binding)
+/// 將機器 ID (face_x, speaker_y) 綁定到 Identity
+#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
+pub struct IdentityBinding {
+    pub id: i64,
+    pub identity_id: i64,
+    pub binding_type: String,  // 'face', 'speaker'
+    pub binding_value: String, // e.g. "face_1", "speaker_3"
+    pub source: String,        // 'auto', 'manual'
+    pub confidence: f64,
+    pub is_active: bool,
+    pub created_at: DateTime<Utc>,
+}
+
+/// 綁定請求 (用於 API)
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct BindIdentityRequest {
+    pub identity_id: Option<i64>,
+    pub name: Option<String>,   // 若未提供 identity_id，則建立新 Identity
+    pub binding_type: String,   // 'face' 或 'speaker'
+    pub binding_value: String,  // e.g. "face_1"
+    pub source: Option<String>, // 預設 'manual'
+}
+
+/// 解綁請求
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct UnbindIdentityRequest {
+    pub binding_type: String,
+    pub binding_value: String,
+}
+
+/// 建議綁定請求 (由系統自動產生，人工確認)
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct SuggestedBinding {
+    pub binding_type: String,
+    pub binding_value: String,
+    pub suggested_identity_id: i64,
+    pub suggested_identity_name: String,
+    pub confidence: f64,
+    pub reason: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
+pub struct PersonAppearance {
+    pub id: i32,
+    pub person_id: String,
+    pub video_uuid: String,
+    pub start_time: f64,
+    pub end_time: f64,
+    pub duration: f64,
+    pub face_detection_id: Option<i32>,
+    pub asrx_segment_start: Option<f64>,
+    pub asrx_segment_end: Option<f64>,
+    pub confidence: f64,
+    pub metadata: serde_json::Value,
+    pub created_at: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
+pub struct PersonMatch {
+    pub face_id: String,
+    pub speaker_id: String,
+    pub confidence: f64,
+    pub match_count: i64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PersonTimelineEntry {
+    pub start_time: f64,
+    pub end_time: f64,
+    pub duration: f64,
+    pub confidence: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PersonStatistics {
+    pub total_appearances: i32,
+    pub total_duration: f64,
+    pub first_appearance: Option<f64>,
+    pub last_appearance: Option<f64>,
+    pub average_confidence: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CreatePersonIdentityRequest {
+    pub video_uuid: String,
+    pub face_identity_id: Option<i32>,
+    pub speaker_id: Option<String>,
+    pub name: Option<String>,
+    pub metadata: Option<serde_json::Value>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct UpdatePersonIdentityRequest {
+    pub name: Option<String>,
+    pub metadata: Option<serde_json::Value>,
+    pub is_confirmed: Option<bool>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PersonIdentityResponse {
+    pub person_id: String,
+    pub name: Option<String>,
+    pub face_identity_id: Option<i32>,
+    pub speaker_id: Option<String>,
+    pub confidence: f64,
+    pub appearance_count: i32,
+    pub total_appearance_duration: f64,
+    pub first_appearance_time: Option<f64>,
+    pub last_appearance_time: Option<f64>,
+    pub is_confirmed: bool,
+}
+
+impl From<PersonIdentity> for PersonIdentityResponse {
+    fn from(person: PersonIdentity) -> Self {
+        Self {
+            person_id: person.person_id,
+            name: person.name,
+            face_identity_id: person.face_identity_id,
+            speaker_id: person.speaker_id,
+            confidence: person.confidence,
+            appearance_count: person.appearance_count,
+            total_appearance_duration: person.total_appearance_duration,
+            first_appearance_time: person.first_appearance_time,
+            last_appearance_time: person.last_appearance_time,
+            is_confirmed: person.is_confirmed,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PersonTimelineResponse {
+    pub person_id: String,
+    pub name: Option<String>,
+    pub timeline: Vec<PersonTimelineEntry>,
+    pub statistics: PersonStatistics,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChunkPersonInfo {
+    pub person_id: String,
+    pub name: Option<String>,
+    pub confidence: f64,
+    pub overlap_duration: f64,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_person_identity_serialization() {
+        let person = PersonIdentity {
+            id: 1,
+            person_id: "person_001".to_string(),
+            face_identity_id: Some(123),
+            speaker_id: Some("SPEAKER_00".to_string()),
+            video_uuid: "video_abc".to_string(),
+            confidence: 0.85,
+            name: Some("张三".to_string()),
+            metadata: serde_json::json!({"role": "host"}),
+            first_appearance_time: Some(10.5),
+            last_appearance_time: Some(350.2),
+            total_appearance_duration: 120.5,
+            appearance_count: 15,
+            created_at: Utc::now(),
+            updated_at: Utc::now(),
+            is_confirmed: true,
+        };
+
+        let json = serde_json::to_string(&person).unwrap();
+        assert!(json.contains("person_001"));
+        assert!(json.contains("SPEAKER_00"));
+        assert!(json.contains("张三"));
+    }
+
+    #[test]
+    fn test_person_appearance_serialization() {
+        let appearance = PersonAppearance {
+            id: 1,
+            person_id: "person_001".to_string(),
+            video_uuid: "video_abc".to_string(),
+            start_time: 10.5,
+            end_time: 25.3,
+            duration: 14.8,
+            face_detection_id: Some(456),
+            asrx_segment_start: Some(10.0),
+            asrx_segment_end: Some(26.0),
+            confidence: 0.92,
+            metadata: serde_json::json!({}),
+            created_at: Utc::now(),
+        };
+
+        let json = serde_json::to_string(&appearance).unwrap();
+        assert!(json.contains("person_001"));
+        assert!(json.contains("14.8"));
+    }
+
+    #[test]
+    fn test_person_match() {
+        let match_result = PersonMatch {
+            face_id: "face_123".to_string(),
+            speaker_id: "SPEAKER_00".to_string(),
+            confidence: 0.85,
+            match_count: 15,
+        };
+
+        assert_eq!(match_result.face_id, "face_123");
+        assert!(match_result.confidence >= 0.0 && match_result.confidence <= 1.0);
+    }
+
+    #[test]
+    fn test_person_statistics() {
+        let stats = PersonStatistics {
+            total_appearances: 15,
+            total_duration: 120.5,
+            first_appearance: Some(10.5),
+            last_appearance: Some(350.2),
+            average_confidence: 0.88,
+        };
+
+        assert_eq!(stats.total_appearances, 15);
+        assert!(stats.total_duration > 0.0);
+    }
+}
--- a/src/core/processor/asr_legacy.rs
+++ b/src/core/processor/asr_legacy.rs
@@ -0,0 +1,124 @@
+use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+use super::executor::PythonExecutor;
+use crate::core::config::processor;
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AsrResult {
+    pub language: Option<String>,
+    pub language_probability: Option<f64>,
+    pub segments: Vec<AsrSegment>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AsrSegment {
+    pub start: f64,
+    pub end: f64,
+    pub text: String,
+}
+
+pub async fn process_asr(
+    video_path: &str,
+    output_path: &str,
+    uuid: Option<&str>,
+) -> Result<AsrResult> {
+    let executor = PythonExecutor::new()?;
+    let script_path = executor.script_path("asr_processor.py");
+
+    tracing::info!("[ASR] Starting ASR processing: {}", video_path);
+
+    executor
+        .run(
+            "asr_processor.py",
+            &[video_path, output_path],
+            uuid,
+            "ASR",
+            Some(Duration::from_secs(*processor::ASR_TIMEOUT_SECS)),
+        )
+        .await
+        .with_context(|| format!("Failed to run {:?}", script_path))?;
+
+    let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
+
+    let result: AsrResult =
+        serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
+
+    tracing::info!(
+        "[ASR] Result: {} segments, language: {:?}",
+        result.segments.len(),
+        result.language
+    );
+
+    Ok(result)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_asr_result_serialization() {
+        let result = AsrResult {
+            language: Some("en".to_string()),
+            language_probability: Some(0.95),
+            segments: vec![
+                AsrSegment {
+                    start: 0.0,
+                    end: 2.5,
+                    text: "Hello world".to_string(),
+                },
+                AsrSegment {
+                    start: 2.5,
+                    end: 5.0,
+                    text: "Test speech".to_string(),
+                },
+            ],
+        };
+
+        let json = serde_json::to_string(&result).unwrap();
+        assert!(json.contains("Hello world"));
+        assert!(json.contains("en"));
+    }
+
+    #[test]
+    fn test_asr_result_deserialization() {
+        let json = r#"{
+            "language": "zh",
+            "language_probability": 0.98,
+            "segments": [
+                {"start": 0.0, "end": 1.5, "text": "測試"}
+            ]
+        }"#;
+
+        let result: AsrResult = serde_json::from_str(json).unwrap();
+        assert_eq!(result.language, Some("zh".to_string()));
+        assert_eq!(result.language_probability, Some(0.98));
+        assert_eq!(result.segments.len(), 1);
+        assert_eq!(result.segments[0].text, "測試");
+    }
+
+    #[test]
+    fn test_asr_segment_default() {
+        let segment = AsrSegment {
+            start: 0.0,
+            end: 1.0,
+            text: String::new(),
+        };
+        assert_eq!(segment.start, 0.0);
+        assert_eq!(segment.end, 1.0);
+        assert!(segment.text.is_empty());
+    }
+
+    #[test]
+    fn test_asr_result_empty_segments() {
+        let result = AsrResult {
+            language: None,
+            language_probability: None,
+            segments: vec![],
+        };
+        assert!(result.language.is_none());
+        assert!(result.segments.is_empty());
+    }
+}
--- a/src/core/processor/face_recognition.rs
+++ b/src/core/processor/face_recognition.rs
@@ -0,0 +1,345 @@
+use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+use super::executor::PythonExecutor;
+
+const FACE_RECOGNITION_TIMEOUT: Duration = Duration::from_secs(10800); // 3 hours for recognition
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FaceRecognitionResult {
+    pub frame_count: u64,
+    pub fps: f64,
+    pub frames: Vec<FaceRecognitionFrame>,
+    pub recognized_faces: Vec<RecognizedFace>,
+    pub face_clusters: Vec<FaceCluster>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FaceRecognitionFrame {
+    pub frame: u64,
+    pub timestamp: f64,
+    pub faces: Vec<RecognizedFaceDetection>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RecognizedFaceDetection {
+    pub face_id: Option<String>,
+    pub x: i32,
+    pub y: i32,
+    pub width: i32,
+    pub height: i32,
+    pub confidence: f32,
+    pub embedding: Option<Vec<f32>>,
+    pub attributes: Option<FaceAttributes>,
+    pub identity: Option<FaceIdentity>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FaceAttributes {
+    pub age: Option<u8>,
+    pub gender: Option<String>,
+    pub emotion: Option<String>,
+    pub glasses: Option<bool>,
+    pub mask: Option<bool>,
+    pub pose: Option<FacePose>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FacePose {
+    pub yaw: f32,
+    pub pitch: f32,
+    pub roll: f32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FaceIdentity {
+    pub name: Option<String>,
+    pub confidence: f32,
+    pub database_id: Option<String>,
+    pub metadata: Option<serde_json::Value>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RecognizedFace {
+    pub face_id: String,
+    pub embedding: Vec<f32>,
+    pub first_seen: f64,
+    pub last_seen: f64,
+    pub total_appearances: u32,
+    pub attributes: Option<FaceAttributes>,
+    pub identities: Vec<FaceIdentity>,
+    pub cluster_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FaceCluster {
+    pub cluster_id: String,
+    pub face_ids: Vec<String>,
+    pub centroid: Vec<f32>,
+    pub size: u32,
+    pub representative_face_id: Option<String>,
+    pub metadata: Option<serde_json::Value>,
+}
+
+pub async fn process_face_recognition(
+    video_path: &str,
+    output_path: &str,
+    uuid: Option<&str>,
+    enable_recognition: bool,
+    enable_tracking: bool,
+    enable_clustering: bool,
+) -> Result<FaceRecognitionResult> {
+    let executor = PythonExecutor::new()?;
+    let script_path = executor.script_path("face_recognition_processor.py");
+
+    tracing::info!(
+        "[FACE_RECOGNITION] Starting face recognition: {}",
+        video_path
+    );
+
+    if !script_path.exists() {
+        tracing::warn!("[FACE_RECOGNITION] Script not found, returning empty result");
+        return Ok(FaceRecognitionResult {
+            frame_count: 0,
+            fps: 0.0,
+            frames: vec![],
+            recognized_faces: vec![],
+            face_clusters: vec![],
+        });
+    }
+
+    let args = vec![
+        video_path,
+        output_path,
+        if enable_recognition { "1" } else { "0" },
+        if enable_tracking { "1" } else { "0" },
+        if enable_clustering { "1" } else { "0" },
+    ];
+
+    executor
+        .run(
+            "face_recognition_processor.py",
+            &args,
+            uuid,
+            "FACE_RECOGNITION",
+            Some(FACE_RECOGNITION_TIMEOUT),
+        )
+        .await
+        .with_context(|| format!("Failed to run {:?}", script_path))?;
+
+    let json_str =
+        std::fs::read_to_string(output_path).context("Failed to read FACE_RECOGNITION output")?;
+
+    let result: FaceRecognitionResult =
+        serde_json::from_str(&json_str).context("Failed to parse FACE_RECOGNITION output")?;
+
+    tracing::info!(
+        "[FACE_RECOGNITION] Result: {} frames, {} recognized faces, {} clusters",
+        result.frames.len(),
+        result.recognized_faces.len(),
+        result.face_clusters.len()
+    );
+
+    Ok(result)
+}
+
+pub async fn register_face(
+    image_path: &str,
+    name: &str,
+    metadata: Option<serde_json::Value>,
+) -> Result<FaceRegistrationResult> {
+    let executor = PythonExecutor::new()?;
+    let script_path = executor.script_path("face_registration.py");
+
+    tracing::info!("[FACE_REGISTRATION] Registering face: {}", name);
+
+    if !script_path.exists() {
+        anyhow::bail!("Face registration script not found");
+    }
+
+    let output_path = format!("/tmp/face_registration_{}.json", uuid::Uuid::new_v4());
+
+    // Handle metadata separately to avoid lifetime issues
+    let meta_temp_file = metadata.as_ref().map(|meta| {
+        let meta_path = format!("/tmp/face_metadata_{}.json", uuid::Uuid::new_v4());
+        std::fs::write(&meta_path, serde_json::to_string(meta).unwrap()).unwrap();
+        meta_path
+    });
+
+    // Build arguments - use output_path as database path so Python writes there
+    let mut args = vec![
+        image_path.to_string(),
+        output_path.clone(),
+        name.to_string(),
+    ];
+
+    // Add database parameter (point to same output for now)
+    let database_path = output_path.clone();
+    args.push("--database".to_string());
+    args.push(database_path.clone());
+
+    if let Some(ref meta_path) = meta_temp_file {
+        args.push("--metadata".to_string());
+        args.push(meta_path.clone());
+    }
+
+    let args_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
+    executor
+        .run(
+            "face_registration.py",
+            &args_refs,
+            None,
+            "FACE_REGISTRATION",
+            Some(Duration::from_secs(300)),
+        )
+        .await
+        .with_context(|| format!("Failed to run {:?}", script_path))?;
+
+    let json_str =
+        std::fs::read_to_string(&output_path).context("Failed to read registration output")?;
+
+    let result: FaceRegistrationResult =
+        serde_json::from_str(&json_str).context("Failed to parse registration output")?;
+
+    // Clean up temp files
+    let _ = std::fs::remove_file(&output_path);
+    if let Some(meta_path) = meta_temp_file {
+        let _ = std::fs::remove_file(&meta_path);
+    }
+
+    tracing::info!("[FACE_REGISTRATION] Registered face: {}", result.face_id);
+
+    Ok(result)
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FaceRegistrationResult {
+    pub face_id: String,
+    pub embedding: Vec<f32>,
+    pub attributes: Option<FaceAttributes>,
+    pub success: bool,
+    pub message: String,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_face_recognition_result_serialization() {
+        let result = FaceRecognitionResult {
+            frame_count: 100,
+            fps: 30.0,
+            frames: vec![FaceRecognitionFrame {
+                frame: 0,
+                timestamp: 0.0,
+                faces: vec![RecognizedFaceDetection {
+                    face_id: Some("face_1".to_string()),
+                    x: 100,
+                    y: 100,
+                    width: 50,
+                    height: 60,
+                    confidence: 0.95,
+                    embedding: Some(vec![0.1, 0.2, 0.3]),
+                    attributes: Some(FaceAttributes {
+                        age: Some(30),
+                        gender: Some("male".to_string()),
+                        emotion: Some("neutral".to_string()),
+                        glasses: Some(false),
+                        mask: Some(false),
+                        pose: Some(FacePose {
+                            yaw: 0.1,
+                            pitch: 0.2,
+                            roll: 0.3,
+                        }),
+                    }),
+                    identity: Some(FaceIdentity {
+                        name: Some("John Doe".to_string()),
+                        confidence: 0.85,
+                        database_id: Some("user_123".to_string()),
+                        metadata: Some(serde_json::json!({"role": "employee"})),
+                    }),
+                }],
+            }],
+            recognized_faces: vec![RecognizedFace {
+                face_id: "face_1".to_string(),
+                embedding: vec![0.1, 0.2, 0.3],
+                first_seen: 0.0,
+                last_seen: 10.0,
+                total_appearances: 5,
+                attributes: Some(FaceAttributes {
+                    age: Some(30),
+                    gender: Some("male".to_string()),
+                    emotion: Some("neutral".to_string()),
+                    glasses: Some(false),
+                    mask: Some(false),
+                    pose: Some(FacePose {
+                        yaw: 0.1,
+                        pitch: 0.2,
+                        roll: 0.3,
+                    }),
+                }),
+                identities: vec![FaceIdentity {
+                    name: Some("John Doe".to_string()),
+                    confidence: 0.85,
+                    database_id: Some("user_123".to_string()),
+                    metadata: Some(serde_json::json!({"role": "employee"})),
+                }],
+                cluster_id: Some("cluster_1".to_string()),
+            }],
+            face_clusters: vec![FaceCluster {
+                cluster_id: "cluster_1".to_string(),
+                face_ids: vec!["face_1".to_string()],
+                centroid: vec![0.1, 0.2, 0.3],
+                size: 1,
+                representative_face_id: Some("face_1".to_string()),
+                metadata: Some(serde_json::json!({"description": "main person"})),
+            }],
+        };
+
+        let json = serde_json::to_string(&result).unwrap();
+        assert!(json.contains("face_1"));
+        assert!(json.contains("John Doe"));
+        assert!(json.contains("cluster_1"));
+    }
+
+    #[test]
+    fn test_face_attributes_serialization() {
+        let attributes = FaceAttributes {
+            age: Some(25),
+            gender: Some("female".to_string()),
+            emotion: Some("happy".to_string()),
+            glasses: Some(true),
+            mask: Some(false),
+            pose: Some(FacePose {
+                yaw: -0.1,
+                pitch: 0.05,
+                roll: 0.02,
+            }),
+        };
+
+        let json = serde_json::to_string(&attributes).unwrap();
+        assert!(json.contains("\"age\":25"));
+        assert!(json.contains("\"gender\":\"female\""));
+        assert!(json.contains("\"emotion\":\"happy\""));
+    }
+
+    #[test]
+    fn test_face_identity_serialization() {
+        let identity = FaceIdentity {
+            name: Some("Alice Smith".to_string()),
+            confidence: 0.92,
+            database_id: Some("employee_456".to_string()),
+            metadata: Some(serde_json::json!({
+                "department": "engineering",
+                "position": "senior developer"
+            })),
+        };
+
+        let json = serde_json::to_string(&identity).unwrap();
+        assert!(json.contains("Alice Smith"));
+        assert!(json.contains("\"confidence\":0.92"));
+        assert!(json.contains("engineering"));
+    }
+}
--- a/src/core/processor/visual_chunk.rs
+++ b/src/core/processor/visual_chunk.rs
@@ -0,0 +1,562 @@
+//! 視覺分片處理器 (Phase 2.2)
+//!
+//! 從 YOLO 結果生成視覺分片
+
+use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+use super::executor::PythonExecutor;
+use super::yolo::{YoloFrame, YoloResult};
+
+const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
+
+/// 視覺分片處理結果
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct VisualChunkResult {
+    /// 生成的視覺分片數量
+    pub chunk_count: u32,
+    /// 處理的總幀數
+    pub total_frames: u32,
+    /// 檢測到的總物件數
+    pub total_objects: u32,
+    /// 唯一物件類別數
+    pub unique_classes: u32,
+    /// 生成的視覺分片
+    pub chunks: Vec<crate::core::chunk::Chunk>,
+}
+
+/// 從 YOLO 結果生成視覺分片
+pub async fn process_visual_chunk(
+    file_id: i32,
+    uuid: String,
+    video_path: &str,
+    yolo_result: &YoloResult,
+    chunk_index_offset: u32,
+    fps: f64,
+) -> Result<VisualChunkResult> {
+    tracing::info!(
+        "[VisualChunk] Starting visual chunk generation for video: {}, {} frames",
+        video_path,
+        yolo_result.frames.len()
+    );
+
+    if yolo_result.frames.is_empty() {
+        tracing::warn!("[VisualChunk] No YOLO frames to process");
+        return Ok(VisualChunkResult {
+            chunk_count: 0,
+            total_frames: 0,
+            total_objects: 0,
+            unique_classes: 0,
+            chunks: vec![],
+        });
+    }
+
+    // 策略 1: 固定幀數分片（每 N 幀一個分片）
+    let chunks = create_fixed_frame_chunks(file_id, &uuid, yolo_result, chunk_index_offset, fps);
+
+    // 統計信息
+    let total_objects: u32 = yolo_result
+        .frames
+        .iter()
+        .map(|f| f.objects.len() as u32)
+        .sum();
+    let all_classes: Vec<String> = yolo_result
+        .frames
+        .iter()
+        .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
+        .collect();
+    let unique_classes: u32 = all_classes
+        .iter()
+        .cloned()
+        .collect::<std::collections::HashSet<_>>()
+        .len() as u32;
+
+    tracing::info!(
+        "[VisualChunk] Generated {} visual chunks from {} frames, {} total objects, {} unique classes",
+        chunks.len(),
+        yolo_result.frames.len(),
+        total_objects,
+        unique_classes
+    );
+
+    Ok(VisualChunkResult {
+        chunk_count: chunks.len() as u32,
+        total_frames: yolo_result.frames.len() as u32,
+        total_objects,
+        unique_classes,
+        chunks,
+    })
+}
+
+/// 創建固定幀數分片（每 N 幀一個分片）
+fn create_fixed_frame_chunks(
+    file_id: i32,
+    uuid: &str,
+    yolo_result: &YoloResult,
+    chunk_index_offset: u32,
+    fps: f64,
+) -> Vec<crate::core::chunk::Chunk> {
+    let mut chunks = Vec::new();
+
+    // 配置：每 30 幀創建一個分片（約 1 秒，如果 fps=30）
+    let frames_per_chunk = 30;
+    let total_frames = yolo_result.frames.len();
+
+    if total_frames == 0 {
+        return chunks;
+    }
+
+    let mut chunk_index = chunk_index_offset;
+    let mut start_idx = 0;
+
+    while start_idx < total_frames {
+        let end_idx = std::cmp::min(start_idx + frames_per_chunk, total_frames);
+
+        // 獲取這個分片的幀
+        let chunk_frames: Vec<YoloFrame> = yolo_result.frames[start_idx..end_idx]
+            .iter()
+            .cloned()
+            .collect();
+
+        if chunk_frames.is_empty() {
+            break;
+        }
+
+        // 計算幀範圍
+        let start_frame = chunk_frames.first().unwrap().frame as i64;
+        let end_frame = chunk_frames.last().unwrap().frame as i64 + 1; // exclusive
+
+        // 創建視覺分片
+        let chunk = crate::core::chunk::Chunk::from_yolo_frames(
+            file_id,
+            uuid.to_string(),
+            chunk_index,
+            start_frame,
+            end_frame,
+            fps,
+            chunk_frames,
+        );
+
+        chunks.push(chunk);
+
+        // 更新索引
+        start_idx = end_idx;
+        chunk_index += 1;
+    }
+
+    chunks
+}
+
+/// 基於物件相似度創建分片
+fn create_similarity_based_chunks(
+    file_id: i32,
+    uuid: &str,
+    yolo_result: &YoloResult,
+    chunk_index_offset: u32,
+    fps: f64,
+    similarity_threshold: f32,
+    min_frames_per_chunk: usize,
+) -> Vec<crate::core::chunk::Chunk> {
+    let mut chunks = Vec::new();
+
+    if yolo_result.frames.is_empty() {
+        return chunks;
+    }
+
+    let mut current_chunk_frames: Vec<YoloFrame> = Vec::new();
+    let mut chunk_index = chunk_index_offset;
+    let mut current_start_frame = 0;
+
+    for (i, frame) in yolo_result.frames.iter().enumerate() {
+        if current_chunk_frames.is_empty() {
+            current_chunk_frames.push(frame.clone());
+            current_start_frame = frame.frame as i64;
+            continue;
+        }
+
+        // 檢查相似度（簡化版本：檢查物件類別是否相同）
+        let last_frame = current_chunk_frames.last().unwrap();
+        let similarity = calculate_frame_similarity(last_frame, frame);
+
+        if similarity >= similarity_threshold {
+            // 相似度高，加入當前分片
+            current_chunk_frames.push(frame.clone());
+        } else {
+            // 相似度低，創建新分片
+            if current_chunk_frames.len() >= min_frames_per_chunk {
+                let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
+
+                let chunk = crate::core::chunk::Chunk::from_yolo_frames(
+                    file_id,
+                    uuid.to_string(),
+                    chunk_index,
+                    current_start_frame,
+                    end_frame,
+                    fps,
+                    current_chunk_frames.clone(),
+                );
+
+                chunks.push(chunk);
+                chunk_index += 1;
+            }
+
+            // 開始新的分片
+            current_chunk_frames = vec![frame.clone()];
+            current_start_frame = frame.frame as i64;
+        }
+    }
+
+    // 處理最後一個分片
+    if current_chunk_frames.len() >= min_frames_per_chunk {
+        let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
+
+        let chunk = crate::core::chunk::Chunk::from_yolo_frames(
+            file_id,
+            uuid.to_string(),
+            chunk_index,
+            current_start_frame,
+            end_frame,
+            fps,
+            current_chunk_frames,
+        );
+
+        chunks.push(chunk);
+    }
+
+    chunks
+}
+
+/// 計算兩個幀之間的相似度（基於物件類別）
+fn calculate_frame_similarity(frame1: &YoloFrame, frame2: &YoloFrame) -> f32 {
+    if frame1.objects.is_empty() && frame2.objects.is_empty() {
+        return 1.0;
+    }
+
+    if frame1.objects.is_empty() || frame2.objects.is_empty() {
+        return 0.0;
+    }
+
+    let set1: std::collections::HashSet<String> = frame1
+        .objects
+        .iter()
+        .map(|o| o.class_name.clone())
+        .collect();
+    let set2: std::collections::HashSet<String> = frame2
+        .objects
+        .iter()
+        .map(|o| o.class_name.clone())
+        .collect();
+
+    let intersection: Vec<_> = set1.intersection(&set2).collect();
+    let union: Vec<_> = set1.union(&set2).collect();
+
+    if union.is_empty() {
+        0.0
+    } else {
+        intersection.len() as f32 / union.len() as f32
+    }
+}
+
+/// 使用 Python 腳本生成視覺分片（進階版本）
+pub async fn process_visual_chunk_advanced(
+    video_path: &str,
+    output_path: &str,
+    uuid: Option<&str>,
+) -> Result<VisualChunkResult> {
+    let executor = PythonExecutor::new()?;
+    let script_path = executor.script_path("visual_chunk_processor.py");
+
+    tracing::info!(
+        "[VisualChunk] Starting advanced visual chunk generation: {}",
+        video_path
+    );
+
+    if !script_path.exists() {
+        tracing::warn!("[VisualChunk] Script not found, using basic generation");
+        // 這裡可以回退到基本生成方法
+        return Ok(VisualChunkResult {
+            chunk_count: 0,
+            total_frames: 0,
+            total_objects: 0,
+            unique_classes: 0,
+            chunks: vec![],
+        });
+    }
+
+    executor
+        .run(
+            "visual_chunk_processor.py",
+            &[video_path, output_path],
+            uuid,
+            "VisualChunk",
+            Some(VISUAL_CHUNK_TIMEOUT),
+        )
+        .await
+        .with_context(|| format!("Failed to run {:?}", script_path))?;
+
+    let json_str =
+        std::fs::read_to_string(output_path).context("Failed to read visual chunk output")?;
+
+    let result: VisualChunkResult =
+        serde_json::from_str(&json_str).context("Failed to parse visual chunk output")?;
+
+    tracing::info!(
+        "[VisualChunk] Advanced generation result: {} chunks, {} frames",
+        result.chunk_count,
+        result.total_frames
+    );
+
+    Ok(result)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_calculate_frame_similarity() {
+        use crate::core::processor::yolo::{YoloFrame, YoloObject};
+
+        let frame1 = YoloFrame {
+            frame: 0,
+            timestamp: 0.0,
+            objects: vec![
+                YoloObject {
+                    class_name: "person".to_string(),
+                    class_id: 0,
+                    x: 100,
+                    y: 200,
+                    width: 50,
+                    height: 100,
+                    confidence: 0.95,
+                },
+                YoloObject {
+                    class_name: "car".to_string(),
+                    class_id: 2,
+                    x: 300,
+                    y: 150,
+                    width: 80,
+                    height: 60,
+                    confidence: 0.87,
+                },
+            ],
+        };
+
+        let frame2 = YoloFrame {
+            frame: 1,
+            timestamp: 0.033,
+            objects: vec![
+                YoloObject {
+                    class_name: "person".to_string(),
+                    class_id: 0,
+                    x: 110,
+                    y: 210,
+                    width: 52,
+                    height: 102,
+                    confidence: 0.92,
+                },
+                YoloObject {
+                    class_name: "car".to_string(),
+                    class_id: 2,
+                    x: 310,
+                    y: 155,
+                    width: 82,
+                    height: 62,
+                    confidence: 0.85,
+                },
+            ],
+        };
+
+        let frame3 = YoloFrame {
+            frame: 2,
+            timestamp: 0.066,
+            objects: vec![YoloObject {
+                class_name: "dog".to_string(),
+                class_id: 16,
+                x: 150,
+                y: 250,
+                width: 40,
+                height: 60,
+                confidence: 0.78,
+            }],
+        };
+
+        // 相同物件的幀應該高度相似
+        let similarity_same = calculate_frame_similarity(&frame1, &frame2);
+        assert!((similarity_same - 1.0).abs() < 0.001);
+
+        // 不同物件的幀應該不相似
+        let similarity_diff = calculate_frame_similarity(&frame1, &frame3);
+        assert!((similarity_diff - 0.0).abs() < 0.001);
+
+        // 空幀應該完全相似
+        let empty_frame = YoloFrame {
+            frame: 3,
+            timestamp: 0.1,
+            objects: vec![],
+        };
+        let similarity_empty = calculate_frame_similarity(&empty_frame, &empty_frame);
+        assert!((similarity_empty - 1.0).abs() < 0.001);
+    }
+
+    #[tokio::test]
+    async fn test_create_fixed_frame_chunks() {
+        use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
+
+        // 創建測試 YOLO 結果（60 幀，每幀都有物件）
+        let mut frames = Vec::new();
+        for i in 0..60 {
+            frames.push(YoloFrame {
+                frame: i as u64,
+                timestamp: i as f64 / 30.0, // 假設 fps=30
+                objects: vec![YoloObject {
+                    class_name: "person".to_string(),
+                    class_id: 0,
+                    x: 100,
+                    y: 200,
+                    width: 50,
+                    height: 100,
+                    confidence: 0.9,
+                }],
+            });
+        }
+
+        let yolo_result = YoloResult {
+            frame_count: 60,
+            fps: 30.0,
+            frames,
+        };
+
+        let chunks = create_fixed_frame_chunks(1, "test-uuid", &yolo_result, 0, 30.0);
+
+        // 60 幀，每 30 幀一個分片，應該有 2 個分片
+        assert_eq!(chunks.len(), 2);
+
+        // 檢查第一個分片
+        let first_chunk = &chunks[0];
+        assert_eq!(
+            first_chunk.chunk_type,
+            crate::core::chunk::ChunkType::Visual
+        );
+        assert_eq!(first_chunk.start_frame, 0);
+        assert_eq!(first_chunk.end_frame, 30); // exclusive
+        assert_eq!(first_chunk.frame_count, 30);
+
+        // 檢查第二個分片
+        let second_chunk = &chunks[1];
+        assert_eq!(
+            second_chunk.chunk_type,
+            crate::core::chunk::ChunkType::Visual
+        );
+        assert_eq!(second_chunk.start_frame, 30);
+        assert_eq!(second_chunk.end_frame, 60); // exclusive
+        assert_eq!(second_chunk.frame_count, 30);
+    }
+
+    #[test]
+    fn test_create_similarity_based_chunks() {
+        use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
+
+        // 創建測試 YOLO 結果
+        let frames = vec![
+            YoloFrame {
+                // 幀 0-4: 都有 person 和 car
+                frame: 0,
+                timestamp: 0.0,
+                objects: vec![
+                    YoloObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        x: 100,
+                        y: 200,
+                        width: 50,
+                        height: 100,
+                        confidence: 0.9,
+                    },
+                    YoloObject {
+                        class_name: "car".to_string(),
+                        class_id: 2,
+                        x: 300,
+                        y: 150,
+                        width: 80,
+                        height: 60,
+                        confidence: 0.8,
+                    },
+                ],
+            },
+            YoloFrame {
+                // 幀 1
+                frame: 1,
+                timestamp: 0.033,
+                objects: vec![
+                    YoloObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        x: 110,
+                        y: 210,
+                        width: 52,
+                        height: 102,
+                        confidence: 0.88,
+                    },
+                    YoloObject {
+                        class_name: "car".to_string(),
+                        class_id: 2,
+                        x: 310,
+                        y: 155,
+                        width: 82,
+                        height: 62,
+                        confidence: 0.78,
+                    },
+                ],
+            },
+            YoloFrame {
+                // 幀 5-9: 只有 dog
+                frame: 5,
+                timestamp: 0.166,
+                objects: vec![YoloObject {
+                    class_name: "dog".to_string(),
+                    class_id: 16,
+                    x: 150,
+                    y: 250,
+                    width: 40,
+                    height: 60,
+                    confidence: 0.7,
+                }],
+            },
+            YoloFrame {
+                // 幀 6
+                frame: 6,
+                timestamp: 0.2,
+                objects: vec![YoloObject {
+                    class_name: "dog".to_string(),
+                    class_id: 16,
+                    x: 155,
+                    y: 255,
+                    width: 42,
+                    height: 62,
+                    confidence: 0.68,
+                }],
+            },
+        ];
+
+        let yolo_result = YoloResult {
+            frame_count: 7,
+            fps: 30.0,
+            frames,
+        };
+
+        let chunks = create_similarity_based_chunks(
+            1,
+            "test-uuid",
+            &yolo_result,
+            0,
+            30.0,
+            0.5, // similarity threshold
+            2,   // min frames per chunk
+        );
+
+        // 應該有 2 個分片：一個是 person+car，一個是 dog
+        assert_eq!(chunks.len(), 2);
+    }
+}
--- a/src/core/text/mod.rs
+++ b/src/core/text/mod.rs
@@ -0,0 +1,9 @@
+pub mod online_synonym_expander;
+pub mod synonym;
+pub mod synonym_expander;
+pub mod tokenizer;
+
+pub use online_synonym_expander::{global_online_expander, OnlineSynonymExpander};
+pub use synonym::{normalize_chinese_query, simplified_to_traditional, traditional_to_simplified};
+pub use synonym_expander::{global_synonym_expander, SynonymExpander};
+pub use tokenizer::{contains_chinese, extract_and_tokenize_text, tokenize_chinese_text};
--- a/src/core/text/online_synonym_expander.rs
+++ b/src/core/text/online_synonym_expander.rs
@@ -0,0 +1,242 @@
+use anyhow::{Context, Result};
+use once_cell::sync::Lazy;
+use serde::Deserialize;
+use std::collections::HashMap;
+use std::env;
+use std::sync::Arc;
+use tokio::sync::Mutex;
+
+/// Online Synonym Expander
+/// Fetches synonyms from LLM (llama.cpp server) on-demand and caches them.
+///
+/// Environment variables:
+/// - `MOMENTRY_ONLINE_SYNONYM` - Enable online synonym expansion (default: false)
+/// - `MOMENTRY_LLM_SYNONYM_URL` - LLM server URL (default: http://127.0.0.1:8081)
+/// - `MOMENTRY_LLM_SYNONYM_MODEL` - Model name (default: gemma4)
+/// - `MOMENTRY_LLM_SYNONYM_TIMEOUT` - Request timeout in seconds (default: 60)
+
+#[derive(Debug, Deserialize)]
+struct LlmResponse {
+    choices: Vec<LlmChoice>,
+}
+
+#[derive(Debug, Deserialize)]
+struct LlmChoice {
+    message: LlmMessage,
+}
+
+#[derive(Debug, Deserialize)]
+struct LlmMessage {
+    content: String,
+}
+
+#[derive(Debug)]
+pub struct OnlineSynonymExpander {
+    /// Local synonym cache (loaded from file)
+    local_map: HashMap<String, Vec<String>>,
+    /// Runtime cache for LLM-fetched synonyms
+    runtime_cache: Arc<Mutex<HashMap<String, Vec<String>>>>,
+    /// LLM server URL
+    api_url: String,
+    /// Model name
+    model: String,
+    /// Request timeout
+    timeout_secs: u64,
+}
+
+static SYSTEM_PROMPT: &str = r#"You are a synonym generation assistant. For each given word, provide 8-12 synonyms in the same language.
+Rules:
+1. Return ONLY a JSON array of strings, nothing else
+2. Synonyms should be contextually relevant for video content search
+3. Include common words, informal terms, and related concepts
+4. Do NOT include the input word in the output
+5. All synonyms must be in the SAME language as the input word
+6. No explanations, no markdown, just the JSON array
+
+Example input: "money"
+Example output: ["cash", "dollar", "currency", "funds", "bucks", "greenbacks", "coins", "wealth", "payment"]"#;
+
+impl OnlineSynonymExpander {
+    pub fn new(local_file_path: Option<&str>) -> Self {
+        let local_map = if let Some(path) = local_file_path {
+            match Self::load_local_file(path) {
+                Ok(map) => map,
+                Err(e) => {
+                    tracing::warn!("Failed to load local synonym file {}: {}", path, e);
+                    HashMap::new()
+                }
+            }
+        } else {
+            HashMap::new()
+        };
+
+        let api_url = env::var("MOMENTRY_LLM_SYNONYM_URL")
+            .unwrap_or_else(|_| "http://127.0.0.1:8081".to_string());
+        let model = env::var("MOMENTRY_LLM_SYNONYM_MODEL").unwrap_or_else(|_| "gemma4".to_string());
+        let timeout_secs = env::var("MOMENTRY_LLM_SYNONYM_TIMEOUT")
+            .ok()
+            .and_then(|v| v.parse().ok())
+            .unwrap_or(60);
+
+        Self {
+            local_map,
+            runtime_cache: Arc::new(Mutex::new(HashMap::new())),
+            api_url,
+            model,
+            timeout_secs,
+        }
+    }
+
+    fn load_local_file(path: &str) -> Result<HashMap<String, Vec<String>>> {
+        let content = std::fs::read_to_string(path).context("Failed to read local synonym file")?;
+        let map: HashMap<String, Vec<String>> =
+            serde_json::from_str(&content).context("Failed to parse local synonym JSON")?;
+        Ok(map)
+    }
+
+    /// Get synonyms for a word. Checks local map first, then runtime cache, then fetches from LLM.
+    pub async fn expand_word(&self, word: &str) -> String {
+        // 1. Check local map
+        if let Some(syns) = self.local_map.get(word) {
+            if !syns.is_empty() {
+                let mut parts = vec![word.to_string()];
+                parts.extend_from_slice(syns);
+                return format!("({})", parts.join(" | "));
+            }
+        }
+
+        // 2. Check runtime cache
+        let mut cache = self.runtime_cache.lock().await;
+        if let Some(syns) = cache.get(word) {
+            if !syns.is_empty() {
+                let mut parts = vec![word.to_string()];
+                parts.extend_from_slice(syns);
+                return format!("({})", parts.join(" | "));
+            }
+        }
+        drop(cache);
+
+        // 3. Fetch from LLM
+        if let Ok(synonyms) = self.fetch_from_llm(word).await {
+            if !synonyms.is_empty() {
+                // Add to runtime cache
+                let mut cache = self.runtime_cache.lock().await;
+                cache.insert(word.to_string(), synonyms.clone());
+                drop(cache);
+
+                let mut parts = vec![word.to_string()];
+                parts.extend_from_slice(&synonyms);
+                return format!("({})", parts.join(" | "));
+            }
+        }
+
+        // 4. Fallback: return original word
+        word.to_string()
+    }
+
+    async fn fetch_from_llm(&self, word: &str) -> Result<Vec<String>> {
+        let client = reqwest::Client::new();
+
+        let prompt = format!(
+            r#"Give synonyms for: "{}"
+Return ONLY a JSON array of strings, nothing else. Do NOT include the input word."#,
+            word
+        );
+
+        let payload = serde_json::json!({
+            "model": self.model,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": SYSTEM_PROMPT
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "temperature": 0.3,
+            "stream": false,
+            "max_tokens": 256,
+        });
+
+        let response = client
+            .post(format!("{}/v1/chat/completions", self.api_url))
+            .json(&payload)
+            .timeout(std::time::Duration::from_secs(self.timeout_secs))
+            .send()
+            .await
+            .context("LLM request failed")?;
+
+        if !response.status().is_success() {
+            anyhow::bail!("LLM request failed with status: {}", response.status());
+        }
+
+        let llm_resp: LlmResponse = response
+            .json()
+            .await
+            .context("Failed to parse LLM response")?;
+
+        let content = &llm_resp
+            .choices
+            .get(0)
+            .context("No choices in LLM response")?
+            .message
+            .content;
+
+        // Extract JSON from response (handle markdown code blocks)
+        let json_str = if let Some(start) = content.find('[') {
+            if let Some(end) = content.rfind(']') {
+                &content[start..=end]
+            } else {
+                anyhow::bail!("No JSON array found in LLM response");
+            }
+        } else {
+            anyhow::bail!("No JSON array found in LLM response");
+        };
+
+        let synonyms: Vec<String> =
+            serde_json::from_str(json_str).context("Failed to parse LLM synonyms JSON")?;
+
+        // Filter and normalize
+        let cleaned: Vec<String> = synonyms
+            .into_iter()
+            .map(|s| s.trim().to_lowercase())
+            .filter(|s| !s.is_empty() && !s.contains(' ')) // Filter out multi-word synonyms for to_tsquery compatibility
+            .collect();
+
+        if cleaned.is_empty() {
+            anyhow::bail!("No valid synonyms returned");
+        }
+
+        tracing::info!(
+            "LLM fetched {} synonyms for '{}': {:?}",
+            cleaned.len(),
+            word,
+            cleaned.iter().take(5).collect::<Vec<_>>()
+        );
+
+        Ok(cleaned)
+    }
+
+    /// Get the number of cached synonyms
+    pub async fn cache_size(&self) -> usize {
+        self.runtime_cache.lock().await.len()
+    }
+}
+
+/// Global online synonym expander (lazy-loaded)
+static ONLINE_EXPANDER: Lazy<Option<OnlineSynonymExpander>> = Lazy::new(|| {
+    if env::var("MOMENTRY_ONLINE_SYNONYM").is_ok() {
+        let local_file = env::var("MOMENTRY_SYNONYM_FILE").ok();
+        tracing::info!("Initializing online synonym expander");
+        Some(OnlineSynonymExpander::new(local_file.as_deref()))
+    } else {
+        None
+    }
+});
+
+/// Get the global online synonym expander (if enabled)
+pub fn global_online_expander() -> Option<&'static OnlineSynonymExpander> {
+    ONLINE_EXPANDER.as_ref()
+}
--- a/src/core/text/synonym.rs
+++ b/src/core/text/synonym.rs
@@ -0,0 +1,71 @@
+use ferrous_opencc::{config::BuiltinConfig, OpenCC};
+use once_cell::sync::Lazy;
+
+static OPENCC_S2T: Lazy<OpenCC> = Lazy::new(|| {
+    OpenCC::from_config(BuiltinConfig::S2t)
+        .expect("Failed to initialize OpenCC Simplified to Traditional converter")
+});
+
+static OPENCC_T2S: Lazy<OpenCC> = Lazy::new(|| {
+    OpenCC::from_config(BuiltinConfig::T2s)
+        .expect("Failed to initialize OpenCC Traditional to Simplified converter")
+});
+
+/// Convert Simplified Chinese text to Traditional Chinese
+pub fn simplified_to_traditional(text: &str) -> String {
+    OPENCC_S2T.convert(text)
+}
+
+/// Convert Traditional Chinese text to Simplified Chinese
+pub fn traditional_to_simplified(text: &str) -> String {
+    OPENCC_T2S.convert(text)
+}
+
+/// Normalize Chinese query for search:
+/// 1. Convert Simplified Chinese to Traditional Chinese (assuming database stores Traditional)
+/// 2. Return converted text
+pub fn normalize_chinese_query(text: &str) -> String {
+    simplified_to_traditional(text)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_simplified_to_traditional() {
+        // Example: Simplified "计算机" -> Traditional "計算機"
+        let simplified = "计算机";
+        let traditional = simplified_to_traditional(simplified);
+        // The conversion might produce "計算機" (depending on dictionary)
+        // We'll just verify it's not empty and different from input
+        assert!(!traditional.is_empty());
+        assert_ne!(traditional, simplified);
+
+        // Traditional input should remain unchanged (or nearly unchanged)
+        let traditional_input = "計算機";
+        let converted = simplified_to_traditional(traditional_input);
+        assert_eq!(converted, traditional_input);
+    }
+
+    #[test]
+    fn test_traditional_to_simplified() {
+        let traditional = "計算機";
+        let simplified = traditional_to_simplified(traditional);
+        assert!(!simplified.is_empty());
+        assert_ne!(simplified, traditional);
+    }
+
+    #[test]
+    fn test_normalize_chinese_query() {
+        let simplified = "计算机";
+        let normalized = normalize_chinese_query(simplified);
+        // Should be Traditional
+        assert_ne!(normalized, simplified);
+
+        let traditional = "計算機";
+        let normalized2 = normalize_chinese_query(traditional);
+        // Should remain Traditional
+        assert_eq!(normalized2, traditional);
+    }
+}
--- a/src/core/text/synonym_expander.rs
+++ b/src/core/text/synonym_expander.rs
@@ -0,0 +1,247 @@
+use anyhow::{Context, Result};
+use once_cell::sync::Lazy;
+use std::collections::HashMap;
+use std::env;
+use std::fs;
+use std::path::Path;
+
+/// 同義詞擴展器
+/// 從 JSON 檔案加載自定義同義詞映射
+#[derive(Debug, Clone, Default)]
+pub struct SynonymExpander {
+    /// 詞語 -> 同義詞列表的映射
+    map: HashMap<String, Vec<String>>,
+}
+
+impl SynonymExpander {
+    /// 從 JSON 檔案創建同義詞擴展器
+    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
+        let content = fs::read_to_string(path).context("Failed to read synonym file")?;
+        let map: HashMap<String, Vec<String>> =
+            serde_json::from_str(&content).context("Failed to parse synonym JSON")?;
+        Ok(Self { map })
+    }
+
+    /// 從多個 JSON 檔案創建同義詞擴展器（後面的檔案會覆蓋前面的）
+    pub fn from_files<P: AsRef<Path>>(paths: &[P]) -> Result<Self> {
+        let mut combined_map = HashMap::new();
+
+        for path in paths {
+            let content = fs::read_to_string(path)
+                .with_context(|| format!("Failed to read synonym file: {:?}", path.as_ref()))?;
+            let map: HashMap<String, Vec<String>> =
+                serde_json::from_str(&content).with_context(|| {
+                    format!("Failed to parse synonym JSON from {:?}", path.as_ref())
+                })?;
+
+            // 合併映射，後面的檔案覆蓋前面的
+            for (key, synonyms) in map {
+                combined_map.insert(key, synonyms);
+            }
+        }
+
+        Ok(Self { map: combined_map })
+    }
+
+    /// 從內建預設資料創建（返回空映射，用戶可通過配置文件添加自定義同義詞）
+    pub fn from_default() -> Self {
+        Self::empty()
+    }
+
+    /// 獲取詞語的同義詞列表（如果存在）
+    pub fn get_synonyms(&self, word: &str) -> Option<&[String]> {
+        self.map.get(word).map(|v| v.as_slice())
+    }
+
+    /// 擴展查詢詞語：將詞語替換為 (詞語 OR 同義詞1 OR 同義詞2 ...)
+    /// 如果沒有同義詞，返回原詞語
+    pub fn expand_word(&self, word: &str) -> String {
+        match self.get_synonyms(word) {
+            Some(syns) if !syns.is_empty() => {
+                let mut parts = vec![word.to_string()];
+                parts.extend_from_slice(syns);
+                format!("({})", parts.join(" | "))
+            }
+            _ => word.to_string(),
+        }
+    }
+
+    /// 擴展整個查詢字符串（空格分隔的詞語）
+    pub fn expand_query(&self, query: &str) -> String {
+        query
+            .split_whitespace()
+            .map(|word| self.expand_word(word))
+            .collect::<Vec<_>>()
+            .join(" & ")
+    }
+
+    /// 對中文查詢進行智能擴展：先匹配已知同義詞，再對剩餘部分進行分詞
+    pub fn expand_chinese_query(&self, query: &str) -> String {
+        // 如果查詢很短，直接嘗試匹配整個查詢
+        if query.chars().count() <= 4 {
+            if let Some(syns) = self.get_synonyms(query) {
+                let mut parts = vec![query.to_string()];
+                parts.extend_from_slice(syns);
+                return format!("({})", parts.join(" | "));
+            }
+        }
+
+        // 嘗試在查詢中尋找已知的同義詞
+        let mut expanded_parts = Vec::new();
+        let mut remaining_query = query;
+        let mut found_synonym = false;
+
+        // 對同義詞鍵按長度降序排序（最長匹配優先）
+        let mut keys: Vec<&String> = self.map.keys().collect();
+        keys.sort_by_key(|b| std::cmp::Reverse(b.chars().count()));
+
+        // 貪婪匹配：尋找最長的同義詞匹配
+        while !remaining_query.is_empty() {
+            let mut matched = false;
+
+            for key in &keys {
+                if remaining_query.starts_with(*key) {
+                    // 找到匹配的同義詞
+                    expanded_parts.push(self.expand_word(key));
+                    remaining_query = &remaining_query[key.len()..];
+                    found_synonym = true;
+                    matched = true;
+                    break;
+                }
+            }
+
+            if !matched {
+                // 沒有找到同義詞，跳過第一個字符，繼續嘗試
+                let first_char_len = remaining_query.chars().next().map_or(0, |c| c.len_utf8());
+                if first_char_len > 0 {
+                    let next_part = &remaining_query[..first_char_len];
+                    expanded_parts.push(next_part.to_string());
+                    remaining_query = &remaining_query[first_char_len..];
+                } else {
+                    break;
+                }
+            }
+        }
+
+        if found_synonym {
+            // 如果有找到同義詞，使用擴展後的查詢
+            expanded_parts.join(" & ")
+        } else {
+            // 沒有找到同義詞，返回原查詢（稍後會進行分詞）
+            query.to_string()
+        }
+    }
+
+    /// 創建空的同義詞擴展器（無同義詞映射）
+    pub fn empty() -> Self {
+        Self {
+            map: HashMap::new(),
+        }
+    }
+}
+
+/// 全局同義詞擴展器（懶加載）
+static SYNONYM_EXPANDER: Lazy<SynonymExpander> = Lazy::new(|| {
+    // 優先嘗試 MOMENTRY_SYNONYM_FILES（逗號分隔的多個檔案）
+    if let Ok(files_var) = env::var("MOMENTRY_SYNONYM_FILES") {
+        let file_paths: Vec<&str> = files_var
+            .split(',')
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .collect();
+
+        if !file_paths.is_empty() {
+            match SynonymExpander::from_files(&file_paths) {
+                Ok(expander) => {
+                    tracing::info!(
+                        "Loaded synonym expander from {} files: {:?}",
+                        file_paths.len(),
+                        file_paths
+                    );
+                    return expander;
+                }
+                Err(e) => {
+                    tracing::warn!(
+                        "Failed to load synonym expander from files {:?}: {}",
+                        file_paths,
+                        e
+                    );
+                    // 繼續嘗試單一檔案或使用預設
+                }
+            }
+        }
+    }
+
+    // 回退到單一檔案 MOMENTRY_SYNONYM_FILE（向下兼容）
+    if let Ok(file_path) = env::var("MOMENTRY_SYNONYM_FILE") {
+        match SynonymExpander::from_file(&file_path) {
+            Ok(expander) => {
+                tracing::info!("Loaded synonym expander from {}", file_path);
+                expander
+            }
+            Err(e) => {
+                tracing::warn!("Failed to load synonym expander from {}: {}", file_path, e);
+                SynonymExpander::empty()
+            }
+        }
+    } else {
+        // 使用預設同義詞（示例）
+        SynonymExpander::from_default()
+    }
+});
+
+/// 獲取全局同義詞擴展器實例
+pub fn global_synonym_expander() -> &'static SynonymExpander {
+    &SYNONYM_EXPANDER
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_expand_word() {
+        let mut map = HashMap::new();
+        map.insert(
+            "電腦".to_string(),
+            vec!["計算機".to_string(), "微机".to_string()],
+        );
+        map.insert(
+            "工作".to_string(),
+            vec!["任務".to_string(), "作業".to_string()],
+        );
+        let expander = SynonymExpander { map };
+
+        assert_eq!(expander.expand_word("電腦"), "(電腦 | 計算機 | 微机)");
+        assert_eq!(expander.expand_word("工作"), "(工作 | 任務 | 作業)");
+        assert_eq!(expander.expand_word("未知"), "未知");
+    }
+
+    #[test]
+    fn test_expand_query() {
+        let mut map = HashMap::new();
+        map.insert(
+            "電腦".to_string(),
+            vec!["計算機".to_string(), "微机".to_string()],
+        );
+        map.insert(
+            "工作".to_string(),
+            vec!["任務".to_string(), "作業".to_string()],
+        );
+        let expander = SynonymExpander { map };
+
+        assert_eq!(
+            expander.expand_query("電腦 工作"),
+            "(電腦 | 計算機 | 微机) & (工作 | 任務 | 作業)"
+        );
+        assert_eq!(expander.expand_query("單個詞"), "單個詞");
+        assert_eq!(expander.expand_query(""), "");
+    }
+
+    #[test]
+    fn test_from_files_empty() {
+        let paths: Vec<&str> = vec![];
+        let expander = SynonymExpander::from_files(&paths).unwrap();
+        assert!(expander.map.is_empty());
+    }
+}
--- a/src/core/text/tokenizer.rs
+++ b/src/core/text/tokenizer.rs
@@ -0,0 +1,121 @@
+use jieba_rs::Jieba;
+use once_cell::sync::Lazy;
+
+static JIEBA: Lazy<Jieba> = Lazy::new(Jieba::new);
+
+/// 檢查文本是否包含中文字符
+/// 包括 CJK Unified Ideographs (U+4E00-U+9FFF) 和 Extension A (U+3400-U+4DBF)
+pub fn contains_chinese(text: &str) -> bool {
+    text.chars()
+        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c) || ('\u{3400}'..='\u{4dbf}').contains(&c))
+}
+
+/// 對中文文本進行分詞，並用空格連接分詞結果
+/// 非中文文本保持不變
+///
+/// # 示例
+/// ```
+/// use momentry_core::core::text::tokenizer::tokenize_chinese_text;
+///
+/// assert_eq!(tokenize_chinese_text("這是一個測試"), "這 是 一 個 測 試");
+/// assert_eq!(tokenize_chinese_text("Hello world"), "Hello world");
+/// assert_eq!(tokenize_chinese_text("中文English混合"), "中文 English 混合");
+/// ```
+pub fn tokenize_chinese_text(text: &str) -> String {
+    if contains_chinese(text) {
+        // 使用精確模式分詞（cut=false）
+        let tokens = JIEBA.cut(text, false);
+        tokens.join(" ")
+    } else {
+        text.to_string()
+    }
+}
+
+/// 從 JSON 內容中提取文本並進行分詞
+/// 支持兩種格式：
+/// 1. content->'data'->>'text' (中文視頻格式)
+/// 2. content->'text' (英文視頻格式)
+pub fn extract_and_tokenize_text(content: &serde_json::Value) -> String {
+    let raw_text = content
+        .get("data")
+        .and_then(|data| data.get("text"))
+        .and_then(|v| v.as_str())
+        .or_else(|| content.get("text").and_then(|v| v.as_str()))
+        .unwrap_or("");
+
+    tokenize_chinese_text(raw_text)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_contains_chinese() {
+        assert!(contains_chinese("中文"));
+        assert!(contains_chinese("這是一個測試"));
+        assert!(contains_chinese("混合文本 English 中文"));
+        assert!(!contains_chinese("English only"));
+        assert!(!contains_chinese("123"));
+        assert!(!contains_chinese(""));
+    }
+
+    #[test]
+    fn test_tokenize_chinese_text() {
+        // 純中文
+        assert_eq!(tokenize_chinese_text("這是一個測試"), "這 是 一 個 測 試");
+
+        // 純英文
+        assert_eq!(tokenize_chinese_text("Hello world"), "Hello world");
+
+        // 中英混合
+        assert_eq!(
+            tokenize_chinese_text("中文English混合"),
+            "中文 English 混合"
+        );
+
+        // 空字符串
+        assert_eq!(tokenize_chinese_text(""), "");
+
+        // 數字和標點
+        assert_eq!(tokenize_chinese_text("測試123。"), "測 試 123 。");
+    }
+
+    #[test]
+    fn test_extract_and_tokenize_text() {
+        // 中文格式：content->'data'->>'text'
+        let content1 = serde_json::json!({
+            "data": {
+                "text": "這是一個測試"
+            }
+        });
+        assert_eq!(extract_and_tokenize_text(&content1), "這 是 一 個 測 試");
+
+        // 英文格式：content->'text'
+        let content2 = serde_json::json!({
+            "text": "Hello world"
+        });
+        assert_eq!(extract_and_tokenize_text(&content2), "Hello world");
+
+        // 混合格式：優先使用 data->text
+        let content3 = serde_json::json!({
+            "data": {
+                "text": "中文測試"
+            },
+            "text": "English text"
+        });
+        assert_eq!(extract_and_tokenize_text(&content3), "中文 測 試");
+
+        // 無文本
+        let content4 = serde_json::json!({});
+        assert_eq!(extract_and_tokenize_text(&content4), "");
+
+        // 非字符串文本
+        let content5 = serde_json::json!({
+            "data": {
+                "text": 123
+            }
+        });
+        assert_eq!(extract_and_tokenize_text(&content5), "");
+    }
+}
--- a/src/core/tmdb/ingest.rs
+++ b/src/core/tmdb/ingest.rs
@@ -0,0 +1,40 @@
+use anyhow::{Context, Result};
+use serde::Deserialize;
+use std::path::Path;
+use tracing::{info, warn};
+
+use crate::core::db::PostgresDb;
+
+#[derive(Debug, Deserialize)]
+pub struct CastEntry {
+    pub name: String,
+    pub role: String,
+    pub image: Option<String>,
+}
+
+/// Ingests TMDB cast data from the JSON file generated by `tmdb_cast_fetcher.py`
+pub async fn ingest_cast(db: &PostgresDb, json_path: &str) -> Result<usize> {
+    let path = Path::new(json_path);
+    if !path.exists() {
+        return Err(anyhow::anyhow!("Cast JSON file not found: {}", json_path));
+    }
+
+    let content = std::fs::read_to_string(path)
+        .with_context(|| format!("Failed to read cast JSON: {}", json_path))?;
+
+    let cast_list: Vec<CastEntry> =
+        serde_json::from_str(&content).with_context(|| "Invalid cast JSON format")?;
+
+    let mut count = 0;
+    for entry in &cast_list {
+        match db.get_or_create_identity(&entry.name).await {
+            Ok(_talent) => {
+                info!("Ingested TMDB cast: {} as {}", entry.name, entry.role);
+                count += 1;
+            }
+            Err(e) => warn!("Failed to create talent '{}': {}", entry.name, e),
+        }
+    }
+
+    Ok(count)
+}
--- a/src/core/tmdb/mod.rs
+++ b/src/core/tmdb/mod.rs
@@ -0,0 +1 @@
+pub mod ingest;
--- a/src/core/worker/job_runner.rs
+++ b/src/core/worker/job_runner.rs
@@ -0,0 +1,144 @@
+use sqlx::PgPool;
+use tokio::time::{sleep, Duration};
+use tracing;
+use uuid::Uuid;
+
+use crate::core::chunk;
+
+pub struct JobWorker {
+    pool: PgPool,
+    poll_interval: Duration,
+}
+
+impl JobWorker {
+    pub fn new(pool: PgPool, poll_interval_secs: u64) -> Self {
+        Self {
+            pool,
+            poll_interval: Duration::from_secs(poll_interval_secs),
+        }
+    }
+
+    pub async fn run(&self) {
+        tracing::info!(
+            "🤖 Job Worker started (Polling every {}s)",
+            self.poll_interval.as_secs()
+        );
+
+        loop {
+            match self.process_next_job().await {
+                Ok(has_work) => {
+                    if !has_work {
+                        // No work found, wait before polling again
+                        sleep(self.poll_interval).await;
+                    }
+                    // If we processed a job, loop immediately to check for more
+                }
+                Err(e) => {
+                    tracing::error!("❌ Job Worker error: {}", e);
+                    sleep(Duration::from_secs(5)).await;
+                }
+            }
+        }
+    }
+
+    async fn process_next_job(&self) -> anyhow::Result<bool> {
+        // 1. Fetch a QUEUED job
+        // We use a transaction to ensure no two workers pick the same job (atomic update)
+        let job_row: Option<(String, String, String, String, String, i64)> = sqlx::query_as(
+            r#"
+            UPDATE dev.jobs 
+            SET status = 'RUNNING', updated_at = NOW()
+            WHERE id = (
+                SELECT id FROM dev.jobs 
+                WHERE status = 'QUEUED' 
+                ORDER BY created_at ASC 
+                LIMIT 1 
+                FOR UPDATE SKIP LOCKED
+            )
+            RETURNING id::text, asset_uuid, rule, status, processor_list, total_frames
+            "#,
+        )
+        .fetch_optional(&self.pool)
+        .await?;
+
+        if let Some((job_id, asset_uuid, rule, _status, _processors, total_frames)) = job_row {
+            let job_uuid =
+                Uuid::parse_str(&job_id).map_err(|e| anyhow::anyhow!("Invalid job UUID: {}", e))?;
+
+            tracing::info!(
+                "🚀 Processing Job {} for Asset {} (Rule: {})",
+                job_id,
+                asset_uuid,
+                rule
+            );
+
+            // 2. Execute Logic based on Rule
+            let result = match rule.as_str() {
+                "rule1" => {
+                    let fps = self.get_asset_fps(&asset_uuid).await?;
+                    chunk::rule1_ingest::ingest_rule1(&self.pool, &asset_uuid, fps).await
+                }
+                _ => {
+                    tracing::warn!("Unknown rule type: {}", rule);
+                    Ok(0)
+                }
+            };
+
+            // 3. Update Job Status
+            match result {
+                Ok(chunk_count) => {
+                    tracing::info!(
+                        "✅ Job {} completed. Processed {} items.",
+                        job_id,
+                        chunk_count
+                    );
+
+                    sqlx::query!(
+                        "UPDATE dev.jobs SET status = 'COMPLETED', processed_frames = total_frames, updated_at = NOW() WHERE id = $1",
+                        job_uuid
+                    )
+                    .execute(&self.pool)
+                    .await?;
+
+                    sqlx::query!(
+                        "UPDATE dev.videos SET processing_status = 'COMPLETED' WHERE uuid = $1",
+                        asset_uuid
+                    )
+                    .execute(&self.pool)
+                    .await?;
+                }
+                Err(e) => {
+                    tracing::error!("❌ Job {} failed: {}", job_id, e);
+                    let err_msg = e.to_string();
+                    let safe_msg = if err_msg.len() > 500 {
+                        &err_msg[..500]
+                    } else {
+                        &err_msg
+                    };
+
+                    sqlx::query!(
+                        "UPDATE dev.jobs SET status = 'FAILED', error_message = $2, updated_at = NOW() WHERE id = $1",
+                        job_uuid,
+                        safe_msg
+                    )
+                    .execute(&self.pool)
+                    .await?;
+                }
+            }
+            return Ok(true); // Processed a job
+        }
+
+        Ok(false) // No job found
+    }
+
+    async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
+        let fps: Option<f64> =
+            sqlx::query_scalar("SELECT (metadata->>'fps')::float FROM dev.videos WHERE uuid = $1")
+                .bind(uuid)
+                .fetch_optional(&self.pool)
+                .await?;
+
+        // Fallback to 29.97 if not found
+        Ok(fps.unwrap_or(29.97))
+    }
+}
--- a/src/core/worker/mod.rs
+++ b/src/core/worker/mod.rs
@@ -0,0 +1,2 @@
+pub mod job_runner;
+pub use job_runner::JobWorker;