feat: add migrations, test scripts, and utility tools

- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
This commit is contained in:
Warren
2026-04-30 15:11:53 +08:00
parent 4d75b2e251
commit b54c2def30
192 changed files with 46721 additions and 0 deletions

View File

@@ -0,0 +1,451 @@
//! 視覺分片概念驗證測試
//!
//! 此測試驗證視覺分片的數據結構和基本功能
/// 視覺分片類型
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ChunkType {
TimeBased,
Sentence,
Cut,
Trace,
Story,
Visual, // 視覺分片 (Phase 2.1)
}
impl ChunkType {
pub fn as_str(&self) -> &'static str {
match self {
ChunkType::TimeBased => "time",
ChunkType::Sentence => "sentence",
ChunkType::Cut => "cut",
ChunkType::Trace => "trace",
ChunkType::Story => "story",
ChunkType::Visual => "visual",
}
}
}
/// 檢測到的物件
#[derive(Debug, Clone)]
pub struct DetectedObject {
/// 物件類別名稱
pub class_name: String,
/// 物件類別 ID
pub class_id: u32,
/// 信心值 (0.0-1.0)
pub confidence: f32,
/// 邊界框 (x, y, width, height)
pub bbox: Option<(i32, i32, i32, i32)>,
}
/// 關鍵幀的物件列表
#[derive(Debug, Clone)]
pub struct KeyframeObjects {
/// 關鍵幀時間 (秒)
pub timestamp: f64,
/// 關鍵幀幀號
pub frame_number: u64,
/// 檢測到的物件
pub objects: Vec<DetectedObject>,
}
/// 視覺分片內容
#[derive(Debug, Clone)]
pub struct VisualChunkContent {
pub start_time: f64,
pub end_time: f64,
pub keyframe_objects: Vec<KeyframeObjects>,
pub dominant_objects: Vec<String>,
pub scene_description: Option<String>,
pub metadata: VisualMetadata,
}
/// 視覺元數據
#[derive(Debug, Clone)]
pub struct VisualMetadata {
pub object_count: u32,
pub unique_classes: Vec<String>,
pub max_confidence: f32,
pub avg_confidence: f32,
pub spatial_density: f32, // objects per frame
}
impl VisualChunkContent {
/// 獲取視覺分片的摘要
pub fn summary(&self) -> String {
let duration = self.end_time - self.start_time;
let frame_count = self.keyframe_objects.len();
format!(
"視覺分片: {:.1}s 到 {:.1}s (持續時間: {:.1}s, {} 幀). 物件: {} 個總計, {} 個唯一. 主要物件: {}",
self.start_time,
self.end_time,
duration,
frame_count,
self.metadata.object_count,
self.metadata.unique_classes.len(),
if self.dominant_objects.is_empty() {
"".to_string()
} else {
self.dominant_objects.join(", ")
}
)
}
/// 檢查是否包含特定物件類別
pub fn contains_object(&self, class_name: &str) -> bool {
self.keyframe_objects
.iter()
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
}
}
/// 模擬 YOLO 結果
#[derive(Debug, Clone)]
pub struct MockYoloResult {
pub frames: Vec<MockYoloFrame>,
}
#[derive(Debug, Clone)]
pub struct MockYoloFrame {
pub frame: u64,
pub timestamp: f64,
pub objects: Vec<MockYoloObject>,
}
#[derive(Debug, Clone)]
pub struct MockYoloObject {
pub class_name: String,
pub class_id: u32,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
pub confidence: f32,
}
impl MockYoloResult {
/// 從模擬 YOLO 結果創建視覺分片
pub fn to_visual_chunk(&self, start_frame: u64, end_frame: u64) -> Option<VisualChunkContent> {
let frames: Vec<_> = self
.frames
.iter()
.filter(|f| f.frame >= start_frame && f.frame <= end_frame)
.collect();
if frames.is_empty() {
return None;
}
// 轉換幀為關鍵幀物件
let keyframe_objects: Vec<KeyframeObjects> = frames
.iter()
.map(|frame| {
let objects: Vec<DetectedObject> = frame
.objects
.iter()
.map(|obj| DetectedObject {
class_name: obj.class_name.clone(),
class_id: obj.class_id,
confidence: obj.confidence,
bbox: Some((obj.x, obj.y, obj.width, obj.height)),
})
.collect();
KeyframeObjects {
timestamp: frame.timestamp,
frame_number: frame.frame,
objects,
}
})
.collect();
// 計算元數據
let total_objects: u32 = frames.iter().map(|f| f.objects.len() as u32).sum();
let all_classes: Vec<String> = frames
.iter()
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
.collect();
let unique_classes: Vec<String> = all_classes
.iter()
.cloned()
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let confidences: Vec<f32> = frames
.iter()
.flat_map(|f| f.objects.iter().map(|o| o.confidence))
.collect();
let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
let avg_confidence = if !confidences.is_empty() {
confidences.iter().sum::<f32>() / confidences.len() as f32
} else {
0.0
};
let start_time = frames.first().map(|f| f.timestamp).unwrap_or(0.0);
let end_time = frames.last().map(|f| f.timestamp).unwrap_or(0.0);
// 查找主要物件(出現在大多數幀中的物件)
let mut object_counts = std::collections::HashMap::new();
for frame in &frames {
let frame_classes: std::collections::HashSet<_> =
frame.objects.iter().map(|o| o.class_name.clone()).collect();
for class in frame_classes {
*object_counts.entry(class).or_insert(0) += 1;
}
}
let mut dominant_objects: Vec<String> = object_counts
.into_iter()
.filter(|(_, count)| *count as f32 / frames.len() as f32 > 0.5) // 出現在 >50% 的幀中
.map(|(class, _)| class)
.collect();
dominant_objects.sort();
Some(VisualChunkContent {
start_time,
end_time,
keyframe_objects,
dominant_objects,
scene_description: None, // 可由 LLM 後期生成
metadata: VisualMetadata {
object_count: total_objects,
unique_classes,
max_confidence,
avg_confidence,
spatial_density: if frames.len() > 0 {
total_objects as f32 / frames.len() as f32
} else {
0.0
},
},
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_type_visual() {
let chunk_type = ChunkType::Visual;
assert_eq!(chunk_type.as_str(), "visual");
assert_eq!(chunk_type, ChunkType::Visual);
}
#[test]
fn test_visual_chunk_creation() {
// 創建模擬 YOLO 結果
let yolo_result = MockYoloResult {
frames: vec![
MockYoloFrame {
frame: 0,
timestamp: 0.0,
objects: vec![
MockYoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 100,
y: 200,
width: 50,
height: 100,
confidence: 0.95,
},
MockYoloObject {
class_name: "car".to_string(),
class_id: 2,
x: 300,
y: 150,
width: 80,
height: 60,
confidence: 0.87,
},
],
},
MockYoloFrame {
frame: 1,
timestamp: 0.033, // 1/30 秒
objects: vec![MockYoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 110,
y: 210,
width: 52,
height: 102,
confidence: 0.92,
}],
},
],
};
// 從 YOLO 結果創建視覺分片
let chunk = yolo_result.to_visual_chunk(0, 1).unwrap();
// 驗證分片屬性
assert_eq!(chunk.start_time, 0.0);
assert_eq!(chunk.end_time, 0.033);
assert_eq!(chunk.metadata.object_count, 3);
assert_eq!(chunk.metadata.unique_classes.len(), 2);
assert!(chunk
.metadata
.unique_classes
.contains(&"person".to_string()));
assert!(chunk.metadata.unique_classes.contains(&"car".to_string()));
assert_eq!(chunk.dominant_objects, vec!["person"]);
assert_eq!(chunk.keyframe_objects.len(), 2);
}
#[test]
fn test_visual_chunk_content_methods() {
let content = VisualChunkContent {
start_time: 0.0,
end_time: 5.0,
keyframe_objects: vec![KeyframeObjects {
timestamp: 0.0,
frame_number: 0,
objects: vec![
DetectedObject {
class_name: "person".to_string(),
class_id: 0,
confidence: 0.95,
bbox: Some((100, 200, 50, 100)),
},
DetectedObject {
class_name: "car".to_string(),
class_id: 2,
confidence: 0.87,
bbox: Some((300, 150, 80, 60)),
},
],
}],
dominant_objects: vec!["person".to_string()],
scene_description: Some("一個人站在車旁".to_string()),
metadata: VisualMetadata {
object_count: 2,
unique_classes: vec!["person".to_string(), "car".to_string()],
max_confidence: 0.95,
avg_confidence: 0.91,
spatial_density: 2.0,
},
};
// 測試摘要方法
let summary = content.summary();
assert!(summary.contains("視覺分片"));
assert!(summary.contains("person"));
assert!(summary.contains(""));
// 測試 contains_object 方法
assert!(content.contains_object("person"));
assert!(content.contains_object("car"));
assert!(!content.contains_object("dog"));
}
#[test]
fn test_frame_similarity_concept() {
// 測試幀相似度計算概念
let frame1_objects = vec![
DetectedObject {
class_name: "person".to_string(),
class_id: 0,
confidence: 0.95,
bbox: Some((100, 200, 50, 100)),
},
DetectedObject {
class_name: "car".to_string(),
class_id: 2,
confidence: 0.87,
bbox: Some((300, 150, 80, 60)),
},
];
let frame2_objects = vec![
DetectedObject {
class_name: "person".to_string(),
class_id: 0,
confidence: 0.92,
bbox: Some((110, 210, 52, 102)),
},
DetectedObject {
class_name: "car".to_string(),
class_id: 2,
confidence: 0.85,
bbox: Some((310, 155, 82, 62)),
},
];
// 創建集合
let set1: std::collections::HashSet<String> = frame1_objects
.iter()
.map(|o| o.class_name.clone())
.collect();
let set2: std::collections::HashSet<String> = frame2_objects
.iter()
.map(|o| o.class_name.clone())
.collect();
// 計算交集和聯集
let intersection: Vec<_> = set1.intersection(&set2).collect();
let union: Vec<_> = set1.union(&set2).collect();
// 驗證相似度
assert_eq!(intersection.len(), 2); // person, car
assert_eq!(union.len(), 2); // person, car
assert_eq!(intersection.len() as f32 / union.len() as f32, 1.0); // 完全相似
}
#[test]
fn test_dominant_objects_detection() {
let yolo_result = MockYoloResult {
frames: vec![
MockYoloFrame {
frame: 0,
timestamp: 0.0,
objects: vec![MockYoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 100,
y: 200,
width: 50,
height: 100,
confidence: 0.95,
}],
},
MockYoloFrame {
frame: 1,
timestamp: 0.033,
objects: vec![MockYoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 110,
y: 210,
width: 52,
height: 102,
confidence: 0.92,
}],
},
MockYoloFrame {
frame: 2,
timestamp: 0.066,
objects: vec![MockYoloObject {
class_name: "car".to_string(),
class_id: 2,
x: 300,
y: 150,
width: 80,
height: 60,
confidence: 0.87,
}],
},
],
};
let chunk = yolo_result.to_visual_chunk(0, 2).unwrap();
// person 出現在 2/3 幀中67% > 50%car 出現在 1/3 幀中33% < 50%
assert_eq!(chunk.dominant_objects, vec!["person"]);
assert!(!chunk.dominant_objects.contains(&"car".to_string()));
}
}