feat: add migrations, test scripts, and utility tools

- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00
parent 4d75b2e251
commit b54c2def30
192 changed files with 46721 additions and 0 deletions
--- a/tests/visual_chunk_concept.rs
+++ b/tests/visual_chunk_concept.rs
@@ -0,0 +1,451 @@
+//! 視覺分片概念驗證測試
+//!
+//! 此測試驗證視覺分片的數據結構和基本功能
+
+/// 視覺分片類型
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum ChunkType {
+    TimeBased,
+    Sentence,
+    Cut,
+    Trace,
+    Story,
+    Visual, // 視覺分片 (Phase 2.1)
+}
+
+impl ChunkType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ChunkType::TimeBased => "time",
+            ChunkType::Sentence => "sentence",
+            ChunkType::Cut => "cut",
+            ChunkType::Trace => "trace",
+            ChunkType::Story => "story",
+            ChunkType::Visual => "visual",
+        }
+    }
+}
+
+/// 檢測到的物件
+#[derive(Debug, Clone)]
+pub struct DetectedObject {
+    /// 物件類別名稱
+    pub class_name: String,
+    /// 物件類別 ID
+    pub class_id: u32,
+    /// 信心值 (0.0-1.0)
+    pub confidence: f32,
+    /// 邊界框 (x, y, width, height)
+    pub bbox: Option<(i32, i32, i32, i32)>,
+}
+
+/// 關鍵幀的物件列表
+#[derive(Debug, Clone)]
+pub struct KeyframeObjects {
+    /// 關鍵幀時間 (秒)
+    pub timestamp: f64,
+    /// 關鍵幀幀號
+    pub frame_number: u64,
+    /// 檢測到的物件
+    pub objects: Vec<DetectedObject>,
+}
+
+/// 視覺分片內容
+#[derive(Debug, Clone)]
+pub struct VisualChunkContent {
+    pub start_time: f64,
+    pub end_time: f64,
+    pub keyframe_objects: Vec<KeyframeObjects>,
+    pub dominant_objects: Vec<String>,
+    pub scene_description: Option<String>,
+    pub metadata: VisualMetadata,
+}
+
+/// 視覺元數據
+#[derive(Debug, Clone)]
+pub struct VisualMetadata {
+    pub object_count: u32,
+    pub unique_classes: Vec<String>,
+    pub max_confidence: f32,
+    pub avg_confidence: f32,
+    pub spatial_density: f32, // objects per frame
+}
+
+impl VisualChunkContent {
+    /// 獲取視覺分片的摘要
+    pub fn summary(&self) -> String {
+        let duration = self.end_time - self.start_time;
+        let frame_count = self.keyframe_objects.len();
+
+        format!(
+            "視覺分片: {:.1}s 到 {:.1}s (持續時間: {:.1}s, {} 幀). 物件: {} 個總計, {} 個唯一. 主要物件: {}",
+            self.start_time,
+            self.end_time,
+            duration,
+            frame_count,
+            self.metadata.object_count,
+            self.metadata.unique_classes.len(),
+            if self.dominant_objects.is_empty() {
+                "無".to_string()
+            } else {
+                self.dominant_objects.join(", ")
+            }
+        )
+    }
+
+    /// 檢查是否包含特定物件類別
+    pub fn contains_object(&self, class_name: &str) -> bool {
+        self.keyframe_objects
+            .iter()
+            .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
+    }
+}
+
+/// 模擬 YOLO 結果
+#[derive(Debug, Clone)]
+pub struct MockYoloResult {
+    pub frames: Vec<MockYoloFrame>,
+}
+
+#[derive(Debug, Clone)]
+pub struct MockYoloFrame {
+    pub frame: u64,
+    pub timestamp: f64,
+    pub objects: Vec<MockYoloObject>,
+}
+
+#[derive(Debug, Clone)]
+pub struct MockYoloObject {
+    pub class_name: String,
+    pub class_id: u32,
+    pub x: i32,
+    pub y: i32,
+    pub width: i32,
+    pub height: i32,
+    pub confidence: f32,
+}
+
+impl MockYoloResult {
+    /// 從模擬 YOLO 結果創建視覺分片
+    pub fn to_visual_chunk(&self, start_frame: u64, end_frame: u64) -> Option<VisualChunkContent> {
+        let frames: Vec<_> = self
+            .frames
+            .iter()
+            .filter(|f| f.frame >= start_frame && f.frame <= end_frame)
+            .collect();
+
+        if frames.is_empty() {
+            return None;
+        }
+
+        // 轉換幀為關鍵幀物件
+        let keyframe_objects: Vec<KeyframeObjects> = frames
+            .iter()
+            .map(|frame| {
+                let objects: Vec<DetectedObject> = frame
+                    .objects
+                    .iter()
+                    .map(|obj| DetectedObject {
+                        class_name: obj.class_name.clone(),
+                        class_id: obj.class_id,
+                        confidence: obj.confidence,
+                        bbox: Some((obj.x, obj.y, obj.width, obj.height)),
+                    })
+                    .collect();
+                KeyframeObjects {
+                    timestamp: frame.timestamp,
+                    frame_number: frame.frame,
+                    objects,
+                }
+            })
+            .collect();
+
+        // 計算元數據
+        let total_objects: u32 = frames.iter().map(|f| f.objects.len() as u32).sum();
+        let all_classes: Vec<String> = frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
+            .collect();
+        let unique_classes: Vec<String> = all_classes
+            .iter()
+            .cloned()
+            .collect::<std::collections::HashSet<_>>()
+            .into_iter()
+            .collect();
+        let confidences: Vec<f32> = frames
+            .iter()
+            .flat_map(|f| f.objects.iter().map(|o| o.confidence))
+            .collect();
+        let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
+        let avg_confidence = if !confidences.is_empty() {
+            confidences.iter().sum::<f32>() / confidences.len() as f32
+        } else {
+            0.0
+        };
+
+        let start_time = frames.first().map(|f| f.timestamp).unwrap_or(0.0);
+        let end_time = frames.last().map(|f| f.timestamp).unwrap_or(0.0);
+
+        // 查找主要物件（出現在大多數幀中的物件）
+        let mut object_counts = std::collections::HashMap::new();
+        for frame in &frames {
+            let frame_classes: std::collections::HashSet<_> =
+                frame.objects.iter().map(|o| o.class_name.clone()).collect();
+            for class in frame_classes {
+                *object_counts.entry(class).or_insert(0) += 1;
+            }
+        }
+
+        let mut dominant_objects: Vec<String> = object_counts
+            .into_iter()
+            .filter(|(_, count)| *count as f32 / frames.len() as f32 > 0.5) // 出現在 >50% 的幀中
+            .map(|(class, _)| class)
+            .collect();
+        dominant_objects.sort();
+
+        Some(VisualChunkContent {
+            start_time,
+            end_time,
+            keyframe_objects,
+            dominant_objects,
+            scene_description: None, // 可由 LLM 後期生成
+            metadata: VisualMetadata {
+                object_count: total_objects,
+                unique_classes,
+                max_confidence,
+                avg_confidence,
+                spatial_density: if frames.len() > 0 {
+                    total_objects as f32 / frames.len() as f32
+                } else {
+                    0.0
+                },
+            },
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_chunk_type_visual() {
+        let chunk_type = ChunkType::Visual;
+        assert_eq!(chunk_type.as_str(), "visual");
+        assert_eq!(chunk_type, ChunkType::Visual);
+    }
+
+    #[test]
+    fn test_visual_chunk_creation() {
+        // 創建模擬 YOLO 結果
+        let yolo_result = MockYoloResult {
+            frames: vec![
+                MockYoloFrame {
+                    frame: 0,
+                    timestamp: 0.0,
+                    objects: vec![
+                        MockYoloObject {
+                            class_name: "person".to_string(),
+                            class_id: 0,
+                            x: 100,
+                            y: 200,
+                            width: 50,
+                            height: 100,
+                            confidence: 0.95,
+                        },
+                        MockYoloObject {
+                            class_name: "car".to_string(),
+                            class_id: 2,
+                            x: 300,
+                            y: 150,
+                            width: 80,
+                            height: 60,
+                            confidence: 0.87,
+                        },
+                    ],
+                },
+                MockYoloFrame {
+                    frame: 1,
+                    timestamp: 0.033, // 1/30 秒
+                    objects: vec![MockYoloObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        x: 110,
+                        y: 210,
+                        width: 52,
+                        height: 102,
+                        confidence: 0.92,
+                    }],
+                },
+            ],
+        };
+
+        // 從 YOLO 結果創建視覺分片
+        let chunk = yolo_result.to_visual_chunk(0, 1).unwrap();
+
+        // 驗證分片屬性
+        assert_eq!(chunk.start_time, 0.0);
+        assert_eq!(chunk.end_time, 0.033);
+        assert_eq!(chunk.metadata.object_count, 3);
+        assert_eq!(chunk.metadata.unique_classes.len(), 2);
+        assert!(chunk
+            .metadata
+            .unique_classes
+            .contains(&"person".to_string()));
+        assert!(chunk.metadata.unique_classes.contains(&"car".to_string()));
+        assert_eq!(chunk.dominant_objects, vec!["person"]);
+        assert_eq!(chunk.keyframe_objects.len(), 2);
+    }
+
+    #[test]
+    fn test_visual_chunk_content_methods() {
+        let content = VisualChunkContent {
+            start_time: 0.0,
+            end_time: 5.0,
+            keyframe_objects: vec![KeyframeObjects {
+                timestamp: 0.0,
+                frame_number: 0,
+                objects: vec![
+                    DetectedObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        confidence: 0.95,
+                        bbox: Some((100, 200, 50, 100)),
+                    },
+                    DetectedObject {
+                        class_name: "car".to_string(),
+                        class_id: 2,
+                        confidence: 0.87,
+                        bbox: Some((300, 150, 80, 60)),
+                    },
+                ],
+            }],
+            dominant_objects: vec!["person".to_string()],
+            scene_description: Some("一個人站在車旁".to_string()),
+            metadata: VisualMetadata {
+                object_count: 2,
+                unique_classes: vec!["person".to_string(), "car".to_string()],
+                max_confidence: 0.95,
+                avg_confidence: 0.91,
+                spatial_density: 2.0,
+            },
+        };
+
+        // 測試摘要方法
+        let summary = content.summary();
+        assert!(summary.contains("視覺分片"));
+        assert!(summary.contains("person"));
+        assert!(summary.contains("車"));
+
+        // 測試 contains_object 方法
+        assert!(content.contains_object("person"));
+        assert!(content.contains_object("car"));
+        assert!(!content.contains_object("dog"));
+    }
+
+    #[test]
+    fn test_frame_similarity_concept() {
+        // 測試幀相似度計算概念
+        let frame1_objects = vec![
+            DetectedObject {
+                class_name: "person".to_string(),
+                class_id: 0,
+                confidence: 0.95,
+                bbox: Some((100, 200, 50, 100)),
+            },
+            DetectedObject {
+                class_name: "car".to_string(),
+                class_id: 2,
+                confidence: 0.87,
+                bbox: Some((300, 150, 80, 60)),
+            },
+        ];
+
+        let frame2_objects = vec![
+            DetectedObject {
+                class_name: "person".to_string(),
+                class_id: 0,
+                confidence: 0.92,
+                bbox: Some((110, 210, 52, 102)),
+            },
+            DetectedObject {
+                class_name: "car".to_string(),
+                class_id: 2,
+                confidence: 0.85,
+                bbox: Some((310, 155, 82, 62)),
+            },
+        ];
+
+        // 創建集合
+        let set1: std::collections::HashSet<String> = frame1_objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+        let set2: std::collections::HashSet<String> = frame2_objects
+            .iter()
+            .map(|o| o.class_name.clone())
+            .collect();
+
+        // 計算交集和聯集
+        let intersection: Vec<_> = set1.intersection(&set2).collect();
+        let union: Vec<_> = set1.union(&set2).collect();
+
+        // 驗證相似度
+        assert_eq!(intersection.len(), 2); // person, car
+        assert_eq!(union.len(), 2); // person, car
+        assert_eq!(intersection.len() as f32 / union.len() as f32, 1.0); // 完全相似
+    }
+
+    #[test]
+    fn test_dominant_objects_detection() {
+        let yolo_result = MockYoloResult {
+            frames: vec![
+                MockYoloFrame {
+                    frame: 0,
+                    timestamp: 0.0,
+                    objects: vec![MockYoloObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        x: 100,
+                        y: 200,
+                        width: 50,
+                        height: 100,
+                        confidence: 0.95,
+                    }],
+                },
+                MockYoloFrame {
+                    frame: 1,
+                    timestamp: 0.033,
+                    objects: vec![MockYoloObject {
+                        class_name: "person".to_string(),
+                        class_id: 0,
+                        x: 110,
+                        y: 210,
+                        width: 52,
+                        height: 102,
+                        confidence: 0.92,
+                    }],
+                },
+                MockYoloFrame {
+                    frame: 2,
+                    timestamp: 0.066,
+                    objects: vec![MockYoloObject {
+                        class_name: "car".to_string(),
+                        class_id: 2,
+                        x: 300,
+                        y: 150,
+                        width: 80,
+                        height: 60,
+                        confidence: 0.87,
+                    }],
+                },
+            ],
+        };
+
+        let chunk = yolo_result.to_visual_chunk(0, 2).unwrap();
+
+        // person 出現在 2/3 幀中（67% > 50%），car 出現在 1/3 幀中（33% < 50%）
+        assert_eq!(chunk.dominant_objects, vec!["person"]);
+        assert!(!chunk.dominant_objects.contains(&"car".to_string()));
+    }
+}