chore: backup before migration to new repo

2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions
--- a/src/core/cache/keys.rs
+++ b/src/core/cache/keys.rs
@@ -10,6 +10,8 @@ pub const KEY_PREFIX_VIDEO: &str = "video:";
 pub const KEY_PREFIX_SEARCH: &str = "search:";
 pub const KEY_PREFIX_SEARCH_HYBRID: &str = "search:hybrid:";
 pub const KEY_PREFIX_SEARCH_N8N: &str = "search:n8n:";
+pub const KEY_PREFIX_SEARCH_BM25: &str = "search:bm25:";
+pub const KEY_PREFIX_SEARCH_N8N_BM25: &str = "search:n8n:bm25:";
 pub const KEY_HEALTH: &str = "health:basic";

 pub fn videos_list(page: usize, limit: usize) -> String {
@@ -32,6 +34,14 @@ pub fn n8n_search(query_hash: &str) -> String {
    format!("{}{}", KEY_PREFIX_SEARCH_N8N, query_hash)
 }

+pub fn bm25_search(query_hash: &str) -> String {
+    format!("{}{}", KEY_PREFIX_SEARCH_BM25, query_hash)
+}
+
+pub fn n8n_bm25_search(query_hash: &str) -> String {
+    format!("{}{}", KEY_PREFIX_SEARCH_N8N_BM25, query_hash)
+}
+
 pub fn health() -> String {
    KEY_HEALTH.to_string()
 }
@@ -48,6 +58,17 @@ pub fn search_prefix() -> String {
    format!("^{}", KEY_PREFIX_SEARCH)
 }

+pub const KEY_PREFIX_VISUAL_SEARCH: &str = "search:visual:";
+pub const CATEGORY_VISUAL_SEARCH: &str = "visual_search";
+
+pub fn visual_search(uuid: &str, criteria_hash: &str) -> String {
+    format!("{}{}:{}", KEY_PREFIX_VISUAL_SEARCH, uuid, criteria_hash)
+}
+
+pub fn visual_search_prefix() -> String {
+    format!("^{}", KEY_PREFIX_VISUAL_SEARCH)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -78,8 +99,28 @@ mod tests {
        assert_eq!(n8n_search("hash123"), "search:n8n:hash123");
    }

+    #[test]
+    fn test_bm25_search() {
+        assert_eq!(bm25_search("hash123"), "search:bm25:hash123");
+    }
+
+    #[test]
+    fn test_n8n_bm25_search() {
+        assert_eq!(n8n_bm25_search("hash123"), "search:n8n:bm25:hash123");
+    }
+
    #[test]
    fn test_health() {
        assert_eq!(health(), "health:basic");
    }
+
+    #[test]
+    fn test_visual_search() {
+        assert_eq!(visual_search("abc123", "hash"), "search:visual:abc123:hash");
+    }
+
+    #[test]
+    fn test_visual_search_prefix() {
+        assert_eq!(visual_search_prefix(), "^search:visual:");
+    }
 }
--- a/src/core/cache/mongo_cache.rs
+++ b/src/core/cache/mongo_cache.rs
@@ -136,6 +136,10 @@ impl MongoCache {
        self.settings.ttl_video_meta
    }

+    pub fn ttl_visual_search(&self) -> u64 {
+        self.settings.ttl_search // Reuse search TTL
+    }
+
    pub async fn get<T: DeserializeOwned>(&self, key: &str) -> Result<Option<T>> {
        if !self.is_enabled() {
            return Ok(None);
--- a/src/core/chunk/mod.rs
+++ b/src/core/chunk/mod.rs
@@ -1,5 +1,9 @@
+pub mod rule1_ingest;
+pub mod rule3_ingest;
 pub mod splitter;
 pub mod types;

+pub use rule1_ingest::ingest_rule1;
+pub use rule3_ingest::ingest_rule3;
 pub use splitter::{AsrSegment, ChunkSplitter};
 pub use types::{Chunk, ChunkType};
--- a/src/core/chunk/types.rs
+++ b/src/core/chunk/types.rs
@@ -1,6 +1,7 @@
 use crate::core::time::FrameTime;
 use serde::{Deserialize, Serialize};

+// ==================== ChunkType ====================
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ChunkType {
@@ -8,7 +9,8 @@ pub enum ChunkType {
    Sentence,
    Cut,
    Trace,
-    Story, // Parent chunk from story analysis
+    Story,
+    Visual, // 視覺分片 (Phase 2.1)
 }

 impl ChunkType {
@@ -19,10 +21,12 @@ impl ChunkType {
            ChunkType::Cut => "cut",
            ChunkType::Trace => "trace",
            ChunkType::Story => "story",
+            ChunkType::Visual => "visual",
        }
    }
 }

+// ==================== ChunkRule ====================
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ChunkRule {
@@ -39,6 +43,73 @@ impl ChunkRule {
    }
 }

+// ==================== 視覺分片相關結構 (Phase 2.1) ====================
+/// 邊界框
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BoundingBox {
+    pub x: i32,
+    pub y: i32,
+    pub width: i32,
+    pub height: i32,
+}
+
+/// 檢測到的物件
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DetectedObject {
+    /// 物件類別名稱
+    pub class_name: String,
+    /// 物件類別 ID
+    pub class_id: u32,
+    /// 信心值 (0.0-1.0)
+    pub confidence: f32,
+    /// 邊界框
+    pub bbox: Option<BoundingBox>,
+    /// 出現次數 (在分片內)
+    pub occurrence: u32,
+}
+
+/// 關鍵幀的物件列表
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct KeyframeObjects {
+    /// 關鍵幀時間 (秒) - 僅供參考，主要使用 frame_number
+    pub timestamp: f64,
+    /// 關鍵幀幀號 - 主要時間標示
+    pub frame_number: u64,
+    /// 檢測到的物件
+    pub objects: Vec<DetectedObject>,
+}
+
+/// 視覺元數據
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualMetadata {
+    /// 總物件數量
+    pub object_count: u32,
+    /// 唯一物件類別列表
+    pub unique_classes: Vec<String>,
+    /// 最高信心值
+    pub max_confidence: f32,
+    /// 平均信心值
+    pub avg_confidence: f32,
+    /// 空間密度（每幀平均物件數）
+    pub spatial_density: f32,
+}
+
+/// 視覺分片內容
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VisualChunkContent {
+    /// 關鍵幀物件列表，每個關鍵幀包含 frame_number
+    pub keyframe_objects: Vec<KeyframeObjects>,
+    /// 主要物件標籤（出現在大多數幀中的物件）
+    pub dominant_objects: Vec<String>,
+    /// 物件關係 (object1, relationship, object2) - 可選
+    pub object_relationships: Vec<(String, String, String)>,
+    /// 場景描述 - 可選
+    pub scene_description: Option<String>,
+    /// 視覺元數據
+    pub metadata: VisualMetadata,
+}
+
+// ==================== Chunk 主結構 ====================
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Chunk {
    pub file_id: i32,
@@ -49,9 +120,9 @@ pub struct Chunk {
    pub rule: ChunkRule,
    /// Frames per second (can be fractional, e.g., 29.97, 23.976)
    pub fps: f64,
-    /// Start frame (0-based)
+    /// Start frame (0-based) - 主要時間標示
    pub start_frame: i64,
-    /// End frame (exclusive)
+    /// End frame (exclusive) - 主要時間標示
    pub end_frame: i64,
    pub text_content: Option<String>,
    pub content: serde_json::Value,
@@ -61,17 +132,11 @@ pub struct Chunk {
    pub pre_chunk_ids: Vec<i32>,
    pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
    pub child_chunk_ids: Vec<String>,    // Child chunk IDs (for parent chunks)
+    pub visual_stats: Option<serde_json::Value>,
 }

 impl Chunk {
-    /// Creates a new chunk from frame counts.
-    ///
-    /// # Arguments
-    ///
-    /// * `start_frame` - Start frame (0-based)
-    /// * `end_frame` - End frame (exclusive)
-    /// * `fps` - Frames per second (can be fractional)
-    #[allow(clippy::too_many_arguments)]
+    /// 創建新分片
    pub fn new(
        file_id: i32,
        uuid: String,
@@ -83,11 +148,13 @@ impl Chunk {
        fps: f64,
        content: serde_json::Value,
    ) -> Self {
-        let chunk_id = format!("{}_{:04}", chunk_type.as_str(), chunk_index);
+        let frame_count = (end_frame - start_frame) as i32;
+        let chunk_id = format!("{}_{}", uuid, chunk_index);
+
        Self {
            file_id,
            uuid,
-            chunk_id: chunk_id.clone(),
+            chunk_id,
            chunk_index,
            chunk_type,
            rule,
@@ -98,17 +165,171 @@ impl Chunk {
            content,
            metadata: None,
            vector_id: None,
-            frame_count: 0,
+            frame_count,
            pre_chunk_ids: vec![],
            parent_chunk_id: None,
            child_chunk_ids: vec![],
+            visual_stats: None,
        }
    }

-    /// Creates a new chunk from seconds (legacy conversion).
+    /// 創建視覺分片 (Phase 2.1)
+    pub fn new_visual(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        visual_content: VisualChunkContent,
+    ) -> Self {
+        let content = serde_json::to_value(&visual_content)
+            .unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
+
+        Self::new(
+            file_id,
+            uuid,
+            chunk_index,
+            ChunkType::Visual,
+            ChunkRule::Rule2,
+            start_frame,
+            end_frame,
+            fps,
+            content,
+        )
+    }
+
+    /// 從 YOLO 幀創建視覺分片 (Phase 2.1)
+    pub fn from_yolo_frames(
+        file_id: i32,
+        uuid: String,
+        chunk_index: u32,
+        start_frame: i64,
+        end_frame: i64,
+        fps: f64,
+        yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
+    ) -> Self {
+        // 將 YOLO 幀轉換為關鍵幀物件
+        let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
+            .iter()
+            .map(|frame| {
+                let objects: Vec<DetectedObject> = frame
+                    .objects
+                    .iter()
+                    .map(|obj| DetectedObject {
+                        class_name: obj.class_name.clone(),
+                        class_id: obj.class_id,
+                        confidence: obj.confidence,
+                        bbox: Some(BoundingBox {
+                            x: obj.x,
+                            y: obj.y,
+                            width: obj.width,
+                            height: obj.height,
+                        }),
+                        occurrence: 1,
+                    })
+                    .collect();
+
+                KeyframeObjects {
+                    timestamp: frame.timestamp,
+                    frame_number: frame.frame,
+                    objects,
+                }
+            })
+            .collect();
+
+        // 計算物件統計
+        let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
+
+        // 收集所有物件類別
+        let all_classes: Vec<String> = yolo_frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
+            .collect();
+
+        // 獲取唯一類別
+        let unique_classes: Vec<String> = all_classes
+            .iter()
+            .cloned()
+            .collect::<std::collections::HashSet<_>>()
+            .into_iter()
+            .collect();
+
+        // 計算信心值統計
+        let confidences: Vec<f32> = yolo_frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.confidence))
+            .collect();
+
+        let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
+        let avg_confidence = if !confidences.is_empty() {
+            confidences.iter().sum::<f32>() / confidences.len() as f32
+        } else {
+            0.0
+        };
+
+        // 計算主要物件（出現在大多數幀中的物件）
+        let mut object_counts = std::collections::HashMap::new();
+        for frame in &yolo_frames {
+            let frame_classes: std::collections::HashSet<_> =
+                frame.objects.iter().map(|o| o.class_name.clone()).collect();
+            for class in frame_classes {
+                *object_counts.entry(class).or_insert(0) += 1;
+            }
+        }
+
+        let mut dominant_objects: Vec<String> = object_counts
+            .into_iter()
+            .filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5)
+            .map(|(class, _)| class)
+            .collect();
+        dominant_objects.sort();
+
+        // 創建視覺內容
+        let visual_content = VisualChunkContent {
+            keyframe_objects,
+            dominant_objects,
+            object_relationships: vec![], // 可選：後期添加關係檢測
+            scene_description: None,      // 可選：後期添加 LLM 生成的場景描述
+            metadata: VisualMetadata {
+                object_count: total_objects,
+                unique_classes,
+                max_confidence,
+                avg_confidence,
+                spatial_density: if yolo_frames.len() > 0 {
+                    total_objects as f32 / yolo_frames.len() as f32
+                } else {
+                    0.0
+                },
+            },
+        };
+
+        Self::new_visual(
+            file_id,
+            uuid,
+            chunk_index,
+            start_frame,
+            end_frame,
+            fps,
+            visual_content,
+        )
+    }
+
+    /// 將分片轉換為幀時間
+    pub fn to_frame_time(&self) -> FrameTime {
+        // 使用第一個幀作為參考點
+        FrameTime::from_frames(self.start_frame, self.fps)
+    }
+
+    /// 檢查是否是父分片
+    pub fn is_parent(&self) -> bool {
+        self.parent_chunk_id.is_some()
+    }
+
+    /// 從秒數創建新分片（舊版轉換）
    ///
-    /// This is useful for migrating from older systems that store time as seconds.
-    /// The frame counts are calculated by rounding `seconds * fps`.
+    /// 這對於從存儲時間為秒的舊系統遷移很有用。
+    /// 幀數通過舍入 `seconds * fps` 計算。
    #[allow(clippy::too_many_arguments)]
    pub fn from_seconds(
        file_id: i32,
@@ -136,104 +357,197 @@ impl Chunk {
        )
    }

-    /// Returns the start time as a `FrameTime`.
+    /// 返回開始時間為 `FrameTime`
    pub fn start_time(&self) -> FrameTime {
        FrameTime::from_frames(self.start_frame, self.fps)
    }

-    /// Returns the end time as a `FrameTime`.
+    /// 返回結束時間為 `FrameTime`
    pub fn end_time(&self) -> FrameTime {
        FrameTime::from_frames(self.end_frame, self.fps)
    }

-    /// Returns the duration in frames.
+    /// 返回持續時間的幀數
    pub fn duration_frames(&self) -> i64 {
        self.end_frame - self.start_frame
    }

-    /// Returns the duration in seconds.
+    /// 返回持續時間的秒數
    pub fn duration_seconds(&self) -> f64 {
        self.duration_frames() as f64 / self.fps
    }

-    /// Formats the start time as "seconds.frame" (e.g., "123.04").
+    /// 將開始時間格式化為 "seconds.frame" (例如："123.04")
    pub fn format_start_sec_frame(&self) -> String {
        self.start_time().format_sec_frame()
    }

-    /// Formats the end time as "seconds.frame" (e.g., "456.15").
+    /// 將結束時間格式化為 "seconds.frame" (例如："456.15")
    pub fn format_end_sec_frame(&self) -> String {
        self.end_time().format_sec_frame()
    }

-    /// Formats the start time as "HH:MM:SS".
+    /// 將開始時間格式化為 "HH:MM:SS"
    pub fn format_start_hms(&self) -> String {
        self.start_time().format_hms()
    }

-    /// Formats the end time as "HH:MM:SS".
+    /// 將結束時間格式化為 "HH:MM:SS"
    pub fn format_end_hms(&self) -> String {
        self.end_time().format_hms()
    }

-    /// Formats the start time as "HH:MM:SS.FF".
+    /// 將開始時間格式化為 "HH:MM:SS.FF"
    pub fn format_start_hms_frame(&self) -> String {
        self.start_time().format_hms_frame()
    }

-    /// Formats the end time as "HH:MM:SS.FF".
+    /// 將結束時間格式化為 "HH:MM:SS.FF"
    pub fn format_end_hms_frame(&self) -> String {
        self.end_time().format_hms_frame()
    }

-    /// Returns a tuple of (start_seconds, end_seconds) for compatibility.
+    /// 返回 (start_seconds, end_seconds) 元組用於兼容性
    ///
-    /// This is provided for backward compatibility during migration.
-    /// Prefer using `start_time()` and `end_time()` methods.
+    /// 這在遷移期間提供向後兼容性。
+    /// 建議使用 `start_time()` 和 `end_time()` 方法。
    pub fn time_range_seconds(&self) -> (f64, f64) {
        (self.start_time().seconds(), self.end_time().seconds())
    }

+    /// 添加元數據
    pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
        self.metadata = Some(metadata);
        self
    }

+    /// 添加向量 ID
    pub fn with_vector_id(mut self, vector_id: String) -> Self {
        self.vector_id = Some(vector_id);
        self
    }

+    /// 添加文本內容
    pub fn with_text_content(mut self, text: String) -> Self {
        self.text_content = Some(text);
        self
    }

+    /// 設置幀數
    pub fn with_frame_count(mut self, count: i32) -> Self {
        self.frame_count = count;
        self
    }

+    /// 設置前一個分片 ID
    pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
        self.pre_chunk_ids = ids;
        self
    }

+    /// 設置父分片 ID
    pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
        self.parent_chunk_id = Some(parent_id);
        self
    }

+    /// 設置子分片 ID
    pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
        self.child_chunk_ids = child_ids;
        self
    }
+}

-    pub fn is_parent_chunk(&self) -> bool {
-        !self.child_chunk_ids.is_empty()
+// ==================== VisualChunkContent 輔助方法 ====================
+impl VisualChunkContent {
+    /// 計算兩個 YOLO 幀之間的相似度（基於物件組成）
+    pub fn frame_similarity(
+        frame1: &crate::core::processor::yolo::YoloFrame,
+        frame2: &crate::core::processor::yolo::YoloFrame,
+    ) -> f32 {
+        if frame1.objects.is_empty() && frame2.objects.is_empty() {
+            return 1.0; // 兩個空幀完全相似
+        }
+
+        if frame1.objects.is_empty() || frame2.objects.is_empty() {
+            return 0.0; // 一個空一個非空，不相似
+        }
+
+        // 創建物件類別名稱集合
+        let set1: std::collections::HashSet<String> = frame1
+            .objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+        let set2: std::collections::HashSet<String> = frame2
+            .objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+
+        // 計算 Jaccard 相似度
+        let intersection: Vec<_> = set1.intersection(&set2).collect();
+        let union: Vec<_> = set1.union(&set2).collect();
+
+        if union.is_empty() {
+            0.0
+        } else {
+            intersection.len() as f32 / union.len() as f32
+        }
    }

-    pub fn is_child_chunk(&self) -> bool {
-        self.parent_chunk_id.is_some()
+    /// 獲取視覺分片的摘要（使用關鍵幀的 frame_number）
+    pub fn summary(&self, fps: f64) -> String {
+        if self.keyframe_objects.is_empty() {
+            return "Empty visual chunk".to_string();
+        }
+
+        let first_frame = self.keyframe_objects.first().unwrap().frame_number;
+        let last_frame = self.keyframe_objects.last().unwrap().frame_number;
+
+        // 計算時間（僅供參考）
+        let start_time = if fps > 0.0 {
+            first_frame as f64 / fps
+        } else {
+            0.0
+        };
+        let end_time = if fps > 0.0 {
+            last_frame as f64 / fps
+        } else {
+            0.0
+        };
+        let duration = end_time - start_time;
+        let frame_count = self.keyframe_objects.len();
+
+        format!(
+            "Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}",
+            first_frame,
+            last_frame,
+            duration,
+            frame_count,
+            self.metadata.object_count,
+            self.metadata.unique_classes.len(),
+            if self.dominant_objects.is_empty() {
+                "none".to_string()
+            } else {
+                self.dominant_objects.join(", ")
+            }
+        )
+    }
+
+    /// 檢查是否包含特定物件類別
+    pub fn contains_object(&self, class_name: &str) -> bool {
+        self.keyframe_objects
+            .iter()
+            .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
+    }
+
+    /// 獲取信心值高於閾值的所有物件
+    pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
+        self.keyframe_objects
+            .iter()
+            .flat_map(|ko| ko.objects.iter())
+            .filter(|obj| obj.confidence >= threshold)
+            .collect()
    }
 }
--- a/src/core/config.rs
+++ b/src/core/config.rs
@@ -164,3 +164,29 @@ pub mod cache {
            .unwrap_or(3600)
    });
 }
+
+pub mod llm {
+    use super::*;
+
+    pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_URL")
+            .unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
+    });
+
+    pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
+    });
+
+    pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_TIMEOUT")
+            .unwrap_or_else(|_| "120".to_string())
+            .parse()
+            .unwrap_or(120)
+    });
+
+    pub static SUMMARY_ENABLED: Lazy<bool> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_SUMMARY_ENABLED")
+            .map(|v| v == "true" || v == "1")
+            .unwrap_or(true)
+    });
+}
--- a/src/core/db/mongodb_db.rs
+++ b/src/core/db/mongodb_db.rs
@@ -6,6 +6,7 @@ use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};

 pub struct MongoDb {
    base_url: String,
+    database: String,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -53,7 +54,8 @@ impl MongoDb {
    pub fn new() -> Self {
        let base_url =
            std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string());
-        Self { base_url }
+        let database = crate::core::config::MONGODB_DATABASE.clone();
+        Self { base_url, database }
    }
 }

@@ -68,7 +70,7 @@ impl MongoDb {
        let doc: ChunkDocument = chunk.clone().into();
        let client = reqwest::Client::new();

-        let url = format!("{}/momentry/chunks", self.base_url);
+        let url = format!("{}/{}/chunks", self.base_url, self.database);

        client
            .post(&url)
@@ -83,8 +85,8 @@ impl MongoDb {
    pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
        let client = reqwest::Client::new();
        let url = format!(
-            "{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}",
-            self.base_url, uuid
+            "{}/{}/chunks?filter={{\"uuid\":\"{}\"}}",
+            self.base_url, self.database, uuid
        );

        let response = client
@@ -131,6 +133,7 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
+                    visual_stats: None,
                }
            })
            .collect();
@@ -141,8 +144,8 @@ impl MongoDb {
    pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> {
        let client = reqwest::Client::new();
        let url = format!(
-            "{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
-            self.base_url, query
+            "{}/{}/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
+            self.base_url, self.database, query
        );

        let response = client
@@ -189,6 +192,7 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
+                    visual_stats: None,
                }
            })
            .collect();
@@ -198,7 +202,7 @@ impl MongoDb {

    pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> {
        let client = reqwest::Client::new();
-        let url = format!("{}/momentry/chunks", self.base_url);
+        let url = format!("{}/{}/chunks", self.base_url, self.database);

        let response = client
            .get(&url)
@@ -244,6 +248,7 @@ impl MongoDb {
                    pre_chunk_ids: vec![],
                    parent_chunk_id: doc.parent_chunk_id,
                    child_chunk_ids: doc.child_chunk_ids,
+                    visual_stats: None,
                }
            })
            .collect();
--- a/src/core/db/postgres_db.rs
+++ b/src/core/db/postgres_db.rs
--- a/src/core/db/qdrant_db.rs
+++ b/src/core/db/qdrant_db.rs
@@ -128,7 +128,7 @@ impl QdrantDb {
        use std::hash::{Hash, Hasher};
        let mut hasher = DefaultHasher::new();
        point_id_str.hash(&mut hasher);
-        let point_id = hasher.finish() as u64;
+        let point_id = hasher.finish();

        let body = serde_json::json!({
            "points": [{
@@ -171,7 +171,7 @@ impl QdrantDb {
            ));
        }

-        tracing::debug!("Qdrant response: {}", response_text);
+        tracing::debug!("Qdrant upsert response status: {}", status);
        tracing::info!("Successfully upserted vector for chunk: {}", chunk_id);
        Ok(())
    }
@@ -257,6 +257,101 @@ impl QdrantDb {
        Ok(search_results)
    }

+    pub async fn search_collections(
+        &self,
+        query_vector: &[f32],
+        collections: &[&str],
+        limit: usize,
+    ) -> Result<Vec<SearchResult>> {
+        let mut handles = Vec::new();
+        for &collection in collections {
+            let url = format!("{}/collections/{}/points/search", self.base_url, collection);
+            let client = self.client.clone();
+            let api_key = self.api_key.clone();
+            let query_vec = query_vector.to_vec();
+            let body = serde_json::json!({
+                "vector": query_vec,
+                "limit": limit * 2, // Fetch more from each to account for overlaps
+                "with_payload": true
+            });
+            handles.push(async move {
+                let response = client
+                    .post(&url)
+                    .header("api-key", &api_key)
+                    .header("Content-Type", "application/json")
+                    .json(&body)
+                    .send()
+                    .await;
+
+                match response {
+                    Ok(resp) if resp.status().is_success() => {
+                        let resp_text = resp
+                            .text()
+                            .await
+                            .unwrap_or_else(|_| "Failed to read response".to_string());
+
+                        #[derive(Deserialize)]
+                        struct QdrantSearchResult {
+                            result: Vec<QdrantPoint>,
+                        }
+                        #[derive(Deserialize)]
+                        struct QdrantPoint {
+                            #[allow(dead_code)]
+                            id: serde_json::Value,
+                            score: f64,
+                            payload: HashMap<String, serde_json::Value>,
+                        }
+                        if let Ok(result) = serde_json::from_str::<QdrantSearchResult>(&resp_text) {
+                            let results: Vec<SearchResult> = result
+                                .result
+                                .into_iter()
+                                .map(|r| {
+                                    let uuid = r
+                                        .payload
+                                        .get("uuid")
+                                        .and_then(|v| v.as_str())
+                                        .unwrap_or("unknown")
+                                        .to_string();
+                                    let chunk_id = r
+                                        .payload
+                                        .get("chunk_id")
+                                        .and_then(|v| v.as_str())
+                                        .unwrap_or("unknown")
+                                        .to_string();
+                                    SearchResult {
+                                        uuid,
+                                        chunk_id,
+                                        score: r.score as f32,
+                                    }
+                                })
+                                .collect();
+                            Ok::<Vec<SearchResult>, anyhow::Error>(results)
+                        } else {
+                            Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new())
+                        }
+                    }
+                    _ => Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new()),
+                }
+            });
+        }
+
+        let results = futures_util::future::join_all(handles).await;
+        let mut merged: Vec<SearchResult> = results
+            .into_iter()
+            .filter_map(Result::ok)
+            .flatten()
+            .collect();
+
+        // Sort by score descending
+        merged.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
+        // Deduplicate by chunk_id + uuid
+        merged.dedup_by_key(|r| (r.chunk_id.clone(), r.uuid.clone()));
+        // Truncate to limit
+        merged.truncate(limit);
+
+        Ok(merged)
+    }
+
    pub async fn search_in_uuid(
        &self,
        query_vector: &[f32],
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -4,9 +4,15 @@ pub mod chunk;
 pub mod config;
 pub mod db;
 pub mod embedding;
+pub mod ingestion;
+pub mod llm;
 pub mod overlay;
+pub mod person_identity;
 pub mod probe;
 pub mod processor;
 pub mod storage;
+pub mod text;
 pub mod thumbnail;
 pub mod time;
+pub mod tmdb;
+pub mod worker;
--- a/src/core/processor/asrx.rs
+++ b/src/core/processor/asrx.rs
@@ -28,16 +28,23 @@ pub async fn process_asrx(
    uuid: Option<&str>,
 ) -> Result<AsrxResult> {
    let executor = PythonExecutor::new()?;
-    let script_path = executor.script_path("asrx_processor.py");
+    let script_path = executor.script_path("asrx_processor_custom.py");

-    tracing::info!("[ASRX] Starting speaker diarization: {}", video_path);
+    tracing::info!(
+        "[ASRX] Starting speaker diarization (custom): {}",
+        video_path
+    );

    if !script_path.exists() {
-        tracing::warn!("[ASRX] Script not found, returning empty result");
-        return Ok(AsrxResult {
-            language: None,
-            segments: vec![],
-        });
+        tracing::warn!("[ASRX] Custom script not found, falling back to original");
+        let fallback_path = executor.script_path("asrx_processor.py");
+        if !fallback_path.exists() {
+            tracing::warn!("[ASRX] No script found, returning empty result");
+            return Ok(AsrxResult {
+                language: None,
+                segments: vec![],
+            });
+        }
    }

    let mut cmd = Command::new(executor.python_path());
--- a/src/core/processor/mod.rs
+++ b/src/core/processor/mod.rs
@@ -9,6 +9,7 @@ pub mod ocr;
 pub mod pose;
 pub mod scene_classification;
 pub mod story;
+pub mod visual_chunk;
 pub mod yolo;

 pub use asr::{process_asr, AsrResult, AsrSegment};
@@ -28,4 +29,5 @@ pub use scene_classification::{
    process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
 };
 pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
+pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
 pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};