//! 視覺分片概念驗證測試 //! //! 此測試驗證視覺分片的數據結構和基本功能 /// 視覺分片類型 #[derive(Debug, Clone, Copy, PartialEq)] pub enum ChunkType { TimeBased, Sentence, Cut, Trace, Story, Visual, // 視覺分片 (Phase 2.1) } impl ChunkType { pub fn as_str(&self) -> &'static str { match self { ChunkType::TimeBased => "time", ChunkType::Sentence => "sentence", ChunkType::Cut => "cut", ChunkType::Trace => "trace", ChunkType::Story => "story", ChunkType::Visual => "visual", } } } /// 檢測到的物件 #[derive(Debug, Clone)] pub struct DetectedObject { /// 物件類別名稱 pub class_name: String, /// 物件類別 ID pub class_id: u32, /// 信心值 (0.0-1.0) pub confidence: f32, /// 邊界框 (x, y, width, height) pub bbox: Option<(i32, i32, i32, i32)>, } /// 關鍵幀的物件列表 #[derive(Debug, Clone)] pub struct KeyframeObjects { /// 關鍵幀時間 (秒) pub timestamp: f64, /// 關鍵幀幀號 pub frame_number: u64, /// 檢測到的物件 pub objects: Vec, } /// 視覺分片內容 #[derive(Debug, Clone)] pub struct VisualChunkContent { pub start_time: f64, pub end_time: f64, pub keyframe_objects: Vec, pub dominant_objects: Vec, pub scene_description: Option, pub metadata: VisualMetadata, } /// 視覺元數據 #[derive(Debug, Clone)] pub struct VisualMetadata { pub object_count: u32, pub unique_classes: Vec, pub max_confidence: f32, pub avg_confidence: f32, pub spatial_density: f32, // objects per frame } impl VisualChunkContent { /// 獲取視覺分片的摘要 pub fn summary(&self) -> String { let duration = self.end_time - self.start_time; let frame_count = self.keyframe_objects.len(); format!( "視覺分片: {:.1}s 到 {:.1}s (持續時間: {:.1}s, {} 幀). 物件: {} 個總計, {} 個唯一. 主要物件: {}", self.start_time, self.end_time, duration, frame_count, self.metadata.object_count, self.metadata.unique_classes.len(), if self.dominant_objects.is_empty() { "無".to_string() } else { self.dominant_objects.join(", ") } ) } /// 檢查是否包含特定物件類別 pub fn contains_object(&self, class_name: &str) -> bool { self.keyframe_objects .iter() .any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name)) } } /// 模擬 YOLO 結果 #[derive(Debug, Clone)] pub struct MockYoloResult { pub frames: Vec, } #[derive(Debug, Clone)] pub struct MockYoloFrame { pub frame: u64, pub timestamp: f64, pub objects: Vec, } #[derive(Debug, Clone)] pub struct MockYoloObject { pub class_name: String, pub class_id: u32, pub x: i32, pub y: i32, pub width: i32, pub height: i32, pub confidence: f32, } impl MockYoloResult { /// 從模擬 YOLO 結果創建視覺分片 pub fn to_visual_chunk(&self, start_frame: u64, end_frame: u64) -> Option { let frames: Vec<_> = self .frames .iter() .filter(|f| f.frame >= start_frame && f.frame <= end_frame) .collect(); if frames.is_empty() { return None; } // 轉換幀為關鍵幀物件 let keyframe_objects: Vec = frames .iter() .map(|frame| { let objects: Vec = frame .objects .iter() .map(|obj| DetectedObject { class_name: obj.class_name.clone(), class_id: obj.class_id, confidence: obj.confidence, bbox: Some((obj.x, obj.y, obj.width, obj.height)), }) .collect(); KeyframeObjects { timestamp: frame.timestamp, frame_number: frame.frame, objects, } }) .collect(); // 計算元數據 let total_objects: u32 = frames.iter().map(|f| f.objects.len() as u32).sum(); let all_classes: Vec = frames .iter() .flat_map(|f| f.objects.iter().map(|o| o.class_name.clone())) .collect(); let unique_classes: Vec = all_classes .iter() .cloned() .collect::>() .into_iter() .collect(); let confidences: Vec = frames .iter() .flat_map(|f| f.objects.iter().map(|o| o.confidence)) .collect(); let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max); let avg_confidence = if !confidences.is_empty() { confidences.iter().sum::() / confidences.len() as f32 } else { 0.0 }; let start_time = frames.first().map(|f| f.timestamp).unwrap_or(0.0); let end_time = frames.last().map(|f| f.timestamp).unwrap_or(0.0); // 查找主要物件(出現在大多數幀中的物件) let mut object_counts = std::collections::HashMap::new(); for frame in &frames { let frame_classes: std::collections::HashSet<_> = frame.objects.iter().map(|o| o.class_name.clone()).collect(); for class in frame_classes { *object_counts.entry(class).or_insert(0) += 1; } } let mut dominant_objects: Vec = object_counts .into_iter() .filter(|(_, count)| *count as f32 / frames.len() as f32 > 0.5) // 出現在 >50% 的幀中 .map(|(class, _)| class) .collect(); dominant_objects.sort(); Some(VisualChunkContent { start_time, end_time, keyframe_objects, dominant_objects, scene_description: None, // 可由 LLM 後期生成 metadata: VisualMetadata { object_count: total_objects, unique_classes, max_confidence, avg_confidence, spatial_density: if frames.len() > 0 { total_objects as f32 / frames.len() as f32 } else { 0.0 }, }, }) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_chunk_type_visual() { let chunk_type = ChunkType::Visual; assert_eq!(chunk_type.as_str(), "visual"); assert_eq!(chunk_type, ChunkType::Visual); } #[test] fn test_visual_chunk_creation() { // 創建模擬 YOLO 結果 let yolo_result = MockYoloResult { frames: vec![ MockYoloFrame { frame: 0, timestamp: 0.0, objects: vec![ MockYoloObject { class_name: "person".to_string(), class_id: 0, x: 100, y: 200, width: 50, height: 100, confidence: 0.95, }, MockYoloObject { class_name: "car".to_string(), class_id: 2, x: 300, y: 150, width: 80, height: 60, confidence: 0.87, }, ], }, MockYoloFrame { frame: 1, timestamp: 0.033, // 1/30 秒 objects: vec![MockYoloObject { class_name: "person".to_string(), class_id: 0, x: 110, y: 210, width: 52, height: 102, confidence: 0.92, }], }, ], }; // 從 YOLO 結果創建視覺分片 let chunk = yolo_result.to_visual_chunk(0, 1).unwrap(); // 驗證分片屬性 assert_eq!(chunk.start_time, 0.0); assert_eq!(chunk.end_time, 0.033); assert_eq!(chunk.metadata.object_count, 3); assert_eq!(chunk.metadata.unique_classes.len(), 2); assert!(chunk .metadata .unique_classes .contains(&"person".to_string())); assert!(chunk.metadata.unique_classes.contains(&"car".to_string())); assert_eq!(chunk.dominant_objects, vec!["person"]); assert_eq!(chunk.keyframe_objects.len(), 2); } #[test] fn test_visual_chunk_content_methods() { let content = VisualChunkContent { start_time: 0.0, end_time: 5.0, keyframe_objects: vec![KeyframeObjects { timestamp: 0.0, frame_number: 0, objects: vec![ DetectedObject { class_name: "person".to_string(), class_id: 0, confidence: 0.95, bbox: Some((100, 200, 50, 100)), }, DetectedObject { class_name: "car".to_string(), class_id: 2, confidence: 0.87, bbox: Some((300, 150, 80, 60)), }, ], }], dominant_objects: vec!["person".to_string()], scene_description: Some("一個人站在車旁".to_string()), metadata: VisualMetadata { object_count: 2, unique_classes: vec!["person".to_string(), "car".to_string()], max_confidence: 0.95, avg_confidence: 0.91, spatial_density: 2.0, }, }; // 測試摘要方法 let summary = content.summary(); assert!(summary.contains("視覺分片")); assert!(summary.contains("person")); assert!(summary.contains("車")); // 測試 contains_object 方法 assert!(content.contains_object("person")); assert!(content.contains_object("car")); assert!(!content.contains_object("dog")); } #[test] fn test_frame_similarity_concept() { // 測試幀相似度計算概念 let frame1_objects = vec![ DetectedObject { class_name: "person".to_string(), class_id: 0, confidence: 0.95, bbox: Some((100, 200, 50, 100)), }, DetectedObject { class_name: "car".to_string(), class_id: 2, confidence: 0.87, bbox: Some((300, 150, 80, 60)), }, ]; let frame2_objects = vec![ DetectedObject { class_name: "person".to_string(), class_id: 0, confidence: 0.92, bbox: Some((110, 210, 52, 102)), }, DetectedObject { class_name: "car".to_string(), class_id: 2, confidence: 0.85, bbox: Some((310, 155, 82, 62)), }, ]; // 創建集合 let set1: std::collections::HashSet = frame1_objects .iter() .map(|o| o.class_name.clone()) .collect(); let set2: std::collections::HashSet = frame2_objects .iter() .map(|o| o.class_name.clone()) .collect(); // 計算交集和聯集 let intersection: Vec<_> = set1.intersection(&set2).collect(); let union: Vec<_> = set1.union(&set2).collect(); // 驗證相似度 assert_eq!(intersection.len(), 2); // person, car assert_eq!(union.len(), 2); // person, car assert_eq!(intersection.len() as f32 / union.len() as f32, 1.0); // 完全相似 } #[test] fn test_dominant_objects_detection() { let yolo_result = MockYoloResult { frames: vec![ MockYoloFrame { frame: 0, timestamp: 0.0, objects: vec![MockYoloObject { class_name: "person".to_string(), class_id: 0, x: 100, y: 200, width: 50, height: 100, confidence: 0.95, }], }, MockYoloFrame { frame: 1, timestamp: 0.033, objects: vec![MockYoloObject { class_name: "person".to_string(), class_id: 0, x: 110, y: 210, width: 52, height: 102, confidence: 0.92, }], }, MockYoloFrame { frame: 2, timestamp: 0.066, objects: vec![MockYoloObject { class_name: "car".to_string(), class_id: 2, x: 300, y: 150, width: 80, height: 60, confidence: 0.87, }], }, ], }; let chunk = yolo_result.to_visual_chunk(0, 2).unwrap(); // person 出現在 2/3 幀中(67% > 50%),car 出現在 1/3 幀中(33% < 50%) assert_eq!(chunk.dominant_objects, vec!["person"]); assert!(!chunk.dominant_objects.contains(&"car".to_string())); } }