feat: backup architecture docs, source code, and scripts
This commit is contained in:
124
src/core/processor/asr_legacy.rs
Normal file
124
src/core/processor/asr_legacy.rs
Normal file
@@ -0,0 +1,124 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
use crate::core::config::processor;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrResult {
|
||||
pub language: Option<String>,
|
||||
pub language_probability: Option<f64>,
|
||||
pub segments: Vec<AsrSegment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
pub async fn process_asr(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<AsrResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("asr_processor.py");
|
||||
|
||||
tracing::info!("[ASR] Starting ASR processing: {}", video_path);
|
||||
|
||||
executor
|
||||
.run(
|
||||
"asr_processor.py",
|
||||
&[video_path, output_path],
|
||||
uuid,
|
||||
"ASR",
|
||||
Some(Duration::from_secs(*processor::ASR_TIMEOUT_SECS)),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
|
||||
|
||||
let result: AsrResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
|
||||
|
||||
tracing::info!(
|
||||
"[ASR] Result: {} segments, language: {:?}",
|
||||
result.segments.len(),
|
||||
result.language
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_serialization() {
|
||||
let result = AsrResult {
|
||||
language: Some("en".to_string()),
|
||||
language_probability: Some(0.95),
|
||||
segments: vec![
|
||||
AsrSegment {
|
||||
start: 0.0,
|
||||
end: 2.5,
|
||||
text: "Hello world".to_string(),
|
||||
},
|
||||
AsrSegment {
|
||||
start: 2.5,
|
||||
end: 5.0,
|
||||
text: "Test speech".to_string(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("Hello world"));
|
||||
assert!(json.contains("en"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_deserialization() {
|
||||
let json = r#"{
|
||||
"language": "zh",
|
||||
"language_probability": 0.98,
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 1.5, "text": "測試"}
|
||||
]
|
||||
}"#;
|
||||
|
||||
let result: AsrResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.language, Some("zh".to_string()));
|
||||
assert_eq!(result.language_probability, Some(0.98));
|
||||
assert_eq!(result.segments.len(), 1);
|
||||
assert_eq!(result.segments[0].text, "測試");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_segment_default() {
|
||||
let segment = AsrSegment {
|
||||
start: 0.0,
|
||||
end: 1.0,
|
||||
text: String::new(),
|
||||
};
|
||||
assert_eq!(segment.start, 0.0);
|
||||
assert_eq!(segment.end, 1.0);
|
||||
assert!(segment.text.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_empty_segments() {
|
||||
let result = AsrResult {
|
||||
language: None,
|
||||
language_probability: None,
|
||||
segments: vec![],
|
||||
};
|
||||
assert!(result.language.is_none());
|
||||
assert!(result.segments.is_empty());
|
||||
}
|
||||
}
|
||||
345
src/core/processor/face_recognition.rs
Normal file
345
src/core/processor/face_recognition.rs
Normal file
@@ -0,0 +1,345 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
|
||||
const FACE_RECOGNITION_TIMEOUT: Duration = Duration::from_secs(10800); // 3 hours for recognition
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceRecognitionResult {
|
||||
pub frame_count: u64,
|
||||
pub fps: f64,
|
||||
pub frames: Vec<FaceRecognitionFrame>,
|
||||
pub recognized_faces: Vec<RecognizedFace>,
|
||||
pub face_clusters: Vec<FaceCluster>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceRecognitionFrame {
|
||||
pub frame: u64,
|
||||
pub timestamp: f64,
|
||||
pub faces: Vec<RecognizedFaceDetection>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RecognizedFaceDetection {
|
||||
pub face_id: Option<String>,
|
||||
pub x: i32,
|
||||
pub y: i32,
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
pub confidence: f32,
|
||||
pub embedding: Option<Vec<f32>>,
|
||||
pub attributes: Option<FaceAttributes>,
|
||||
pub identity: Option<FaceIdentity>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceAttributes {
|
||||
pub age: Option<u8>,
|
||||
pub gender: Option<String>,
|
||||
pub emotion: Option<String>,
|
||||
pub glasses: Option<bool>,
|
||||
pub mask: Option<bool>,
|
||||
pub pose: Option<FacePose>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FacePose {
|
||||
pub yaw: f32,
|
||||
pub pitch: f32,
|
||||
pub roll: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceIdentity {
|
||||
pub name: Option<String>,
|
||||
pub confidence: f32,
|
||||
pub database_id: Option<String>,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RecognizedFace {
|
||||
pub face_id: String,
|
||||
pub embedding: Vec<f32>,
|
||||
pub first_seen: f64,
|
||||
pub last_seen: f64,
|
||||
pub total_appearances: u32,
|
||||
pub attributes: Option<FaceAttributes>,
|
||||
pub identities: Vec<FaceIdentity>,
|
||||
pub cluster_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceCluster {
|
||||
pub cluster_id: String,
|
||||
pub face_ids: Vec<String>,
|
||||
pub centroid: Vec<f32>,
|
||||
pub size: u32,
|
||||
pub representative_face_id: Option<String>,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub async fn process_face_recognition(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
enable_recognition: bool,
|
||||
enable_tracking: bool,
|
||||
enable_clustering: bool,
|
||||
) -> Result<FaceRecognitionResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("face_recognition_processor.py");
|
||||
|
||||
tracing::info!(
|
||||
"[FACE_RECOGNITION] Starting face recognition: {}",
|
||||
video_path
|
||||
);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[FACE_RECOGNITION] Script not found, returning empty result");
|
||||
return Ok(FaceRecognitionResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
frames: vec![],
|
||||
recognized_faces: vec![],
|
||||
face_clusters: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let args = vec![
|
||||
video_path,
|
||||
output_path,
|
||||
if enable_recognition { "1" } else { "0" },
|
||||
if enable_tracking { "1" } else { "0" },
|
||||
if enable_clustering { "1" } else { "0" },
|
||||
];
|
||||
|
||||
executor
|
||||
.run(
|
||||
"face_recognition_processor.py",
|
||||
&args,
|
||||
uuid,
|
||||
"FACE_RECOGNITION",
|
||||
Some(FACE_RECOGNITION_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(output_path).context("Failed to read FACE_RECOGNITION output")?;
|
||||
|
||||
let result: FaceRecognitionResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse FACE_RECOGNITION output")?;
|
||||
|
||||
tracing::info!(
|
||||
"[FACE_RECOGNITION] Result: {} frames, {} recognized faces, {} clusters",
|
||||
result.frames.len(),
|
||||
result.recognized_faces.len(),
|
||||
result.face_clusters.len()
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn register_face(
|
||||
image_path: &str,
|
||||
name: &str,
|
||||
metadata: Option<serde_json::Value>,
|
||||
) -> Result<FaceRegistrationResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("face_registration.py");
|
||||
|
||||
tracing::info!("[FACE_REGISTRATION] Registering face: {}", name);
|
||||
|
||||
if !script_path.exists() {
|
||||
anyhow::bail!("Face registration script not found");
|
||||
}
|
||||
|
||||
let output_path = format!("/tmp/face_registration_{}.json", uuid::Uuid::new_v4());
|
||||
|
||||
// Handle metadata separately to avoid lifetime issues
|
||||
let meta_temp_file = metadata.as_ref().map(|meta| {
|
||||
let meta_path = format!("/tmp/face_metadata_{}.json", uuid::Uuid::new_v4());
|
||||
std::fs::write(&meta_path, serde_json::to_string(meta).unwrap()).unwrap();
|
||||
meta_path
|
||||
});
|
||||
|
||||
// Build arguments - use output_path as database path so Python writes there
|
||||
let mut args = vec![
|
||||
image_path.to_string(),
|
||||
output_path.clone(),
|
||||
name.to_string(),
|
||||
];
|
||||
|
||||
// Add database parameter (point to same output for now)
|
||||
let database_path = output_path.clone();
|
||||
args.push("--database".to_string());
|
||||
args.push(database_path.clone());
|
||||
|
||||
if let Some(ref meta_path) = meta_temp_file {
|
||||
args.push("--metadata".to_string());
|
||||
args.push(meta_path.clone());
|
||||
}
|
||||
|
||||
let args_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
|
||||
executor
|
||||
.run(
|
||||
"face_registration.py",
|
||||
&args_refs,
|
||||
None,
|
||||
"FACE_REGISTRATION",
|
||||
Some(Duration::from_secs(300)),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(&output_path).context("Failed to read registration output")?;
|
||||
|
||||
let result: FaceRegistrationResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse registration output")?;
|
||||
|
||||
// Clean up temp files
|
||||
let _ = std::fs::remove_file(&output_path);
|
||||
if let Some(meta_path) = meta_temp_file {
|
||||
let _ = std::fs::remove_file(&meta_path);
|
||||
}
|
||||
|
||||
tracing::info!("[FACE_REGISTRATION] Registered face: {}", result.face_id);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceRegistrationResult {
|
||||
pub face_id: String,
|
||||
pub embedding: Vec<f32>,
|
||||
pub attributes: Option<FaceAttributes>,
|
||||
pub success: bool,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_face_recognition_result_serialization() {
|
||||
let result = FaceRecognitionResult {
|
||||
frame_count: 100,
|
||||
fps: 30.0,
|
||||
frames: vec![FaceRecognitionFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
faces: vec![RecognizedFaceDetection {
|
||||
face_id: Some("face_1".to_string()),
|
||||
x: 100,
|
||||
y: 100,
|
||||
width: 50,
|
||||
height: 60,
|
||||
confidence: 0.95,
|
||||
embedding: Some(vec![0.1, 0.2, 0.3]),
|
||||
attributes: Some(FaceAttributes {
|
||||
age: Some(30),
|
||||
gender: Some("male".to_string()),
|
||||
emotion: Some("neutral".to_string()),
|
||||
glasses: Some(false),
|
||||
mask: Some(false),
|
||||
pose: Some(FacePose {
|
||||
yaw: 0.1,
|
||||
pitch: 0.2,
|
||||
roll: 0.3,
|
||||
}),
|
||||
}),
|
||||
identity: Some(FaceIdentity {
|
||||
name: Some("John Doe".to_string()),
|
||||
confidence: 0.85,
|
||||
database_id: Some("user_123".to_string()),
|
||||
metadata: Some(serde_json::json!({"role": "employee"})),
|
||||
}),
|
||||
}],
|
||||
}],
|
||||
recognized_faces: vec![RecognizedFace {
|
||||
face_id: "face_1".to_string(),
|
||||
embedding: vec![0.1, 0.2, 0.3],
|
||||
first_seen: 0.0,
|
||||
last_seen: 10.0,
|
||||
total_appearances: 5,
|
||||
attributes: Some(FaceAttributes {
|
||||
age: Some(30),
|
||||
gender: Some("male".to_string()),
|
||||
emotion: Some("neutral".to_string()),
|
||||
glasses: Some(false),
|
||||
mask: Some(false),
|
||||
pose: Some(FacePose {
|
||||
yaw: 0.1,
|
||||
pitch: 0.2,
|
||||
roll: 0.3,
|
||||
}),
|
||||
}),
|
||||
identities: vec![FaceIdentity {
|
||||
name: Some("John Doe".to_string()),
|
||||
confidence: 0.85,
|
||||
database_id: Some("user_123".to_string()),
|
||||
metadata: Some(serde_json::json!({"role": "employee"})),
|
||||
}],
|
||||
cluster_id: Some("cluster_1".to_string()),
|
||||
}],
|
||||
face_clusters: vec![FaceCluster {
|
||||
cluster_id: "cluster_1".to_string(),
|
||||
face_ids: vec!["face_1".to_string()],
|
||||
centroid: vec![0.1, 0.2, 0.3],
|
||||
size: 1,
|
||||
representative_face_id: Some("face_1".to_string()),
|
||||
metadata: Some(serde_json::json!({"description": "main person"})),
|
||||
}],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("face_1"));
|
||||
assert!(json.contains("John Doe"));
|
||||
assert!(json.contains("cluster_1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_face_attributes_serialization() {
|
||||
let attributes = FaceAttributes {
|
||||
age: Some(25),
|
||||
gender: Some("female".to_string()),
|
||||
emotion: Some("happy".to_string()),
|
||||
glasses: Some(true),
|
||||
mask: Some(false),
|
||||
pose: Some(FacePose {
|
||||
yaw: -0.1,
|
||||
pitch: 0.05,
|
||||
roll: 0.02,
|
||||
}),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&attributes).unwrap();
|
||||
assert!(json.contains("\"age\":25"));
|
||||
assert!(json.contains("\"gender\":\"female\""));
|
||||
assert!(json.contains("\"emotion\":\"happy\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_face_identity_serialization() {
|
||||
let identity = FaceIdentity {
|
||||
name: Some("Alice Smith".to_string()),
|
||||
confidence: 0.92,
|
||||
database_id: Some("employee_456".to_string()),
|
||||
metadata: Some(serde_json::json!({
|
||||
"department": "engineering",
|
||||
"position": "senior developer"
|
||||
})),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&identity).unwrap();
|
||||
assert!(json.contains("Alice Smith"));
|
||||
assert!(json.contains("\"confidence\":0.92"));
|
||||
assert!(json.contains("engineering"));
|
||||
}
|
||||
}
|
||||
562
src/core/processor/visual_chunk.rs
Normal file
562
src/core/processor/visual_chunk.rs
Normal file
@@ -0,0 +1,562 @@
|
||||
//! 視覺分片處理器 (Phase 2.2)
|
||||
//!
|
||||
//! 從 YOLO 結果生成視覺分片
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
use super::yolo::{YoloFrame, YoloResult};
|
||||
|
||||
const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
|
||||
/// 視覺分片處理結果
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct VisualChunkResult {
|
||||
/// 生成的視覺分片數量
|
||||
pub chunk_count: u32,
|
||||
/// 處理的總幀數
|
||||
pub total_frames: u32,
|
||||
/// 檢測到的總物件數
|
||||
pub total_objects: u32,
|
||||
/// 唯一物件類別數
|
||||
pub unique_classes: u32,
|
||||
/// 生成的視覺分片
|
||||
pub chunks: Vec<crate::core::chunk::Chunk>,
|
||||
}
|
||||
|
||||
/// 從 YOLO 結果生成視覺分片
|
||||
pub async fn process_visual_chunk(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
video_path: &str,
|
||||
yolo_result: &YoloResult,
|
||||
chunk_index_offset: u32,
|
||||
fps: f64,
|
||||
) -> Result<VisualChunkResult> {
|
||||
tracing::info!(
|
||||
"[VisualChunk] Starting visual chunk generation for video: {}, {} frames",
|
||||
video_path,
|
||||
yolo_result.frames.len()
|
||||
);
|
||||
|
||||
if yolo_result.frames.is_empty() {
|
||||
tracing::warn!("[VisualChunk] No YOLO frames to process");
|
||||
return Ok(VisualChunkResult {
|
||||
chunk_count: 0,
|
||||
total_frames: 0,
|
||||
total_objects: 0,
|
||||
unique_classes: 0,
|
||||
chunks: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// 策略 1: 固定幀數分片(每 N 幀一個分片)
|
||||
let chunks = create_fixed_frame_chunks(file_id, &uuid, yolo_result, chunk_index_offset, fps);
|
||||
|
||||
// 統計信息
|
||||
let total_objects: u32 = yolo_result
|
||||
.frames
|
||||
.iter()
|
||||
.map(|f| f.objects.len() as u32)
|
||||
.sum();
|
||||
let all_classes: Vec<String> = yolo_result
|
||||
.frames
|
||||
.iter()
|
||||
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
|
||||
.collect();
|
||||
let unique_classes: u32 = all_classes
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.len() as u32;
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Generated {} visual chunks from {} frames, {} total objects, {} unique classes",
|
||||
chunks.len(),
|
||||
yolo_result.frames.len(),
|
||||
total_objects,
|
||||
unique_classes
|
||||
);
|
||||
|
||||
Ok(VisualChunkResult {
|
||||
chunk_count: chunks.len() as u32,
|
||||
total_frames: yolo_result.frames.len() as u32,
|
||||
total_objects,
|
||||
unique_classes,
|
||||
chunks,
|
||||
})
|
||||
}
|
||||
|
||||
/// 創建固定幀數分片(每 N 幀一個分片)
|
||||
fn create_fixed_frame_chunks(
|
||||
file_id: i32,
|
||||
uuid: &str,
|
||||
yolo_result: &YoloResult,
|
||||
chunk_index_offset: u32,
|
||||
fps: f64,
|
||||
) -> Vec<crate::core::chunk::Chunk> {
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
// 配置:每 30 幀創建一個分片(約 1 秒,如果 fps=30)
|
||||
let frames_per_chunk = 30;
|
||||
let total_frames = yolo_result.frames.len();
|
||||
|
||||
if total_frames == 0 {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
let mut chunk_index = chunk_index_offset;
|
||||
let mut start_idx = 0;
|
||||
|
||||
while start_idx < total_frames {
|
||||
let end_idx = std::cmp::min(start_idx + frames_per_chunk, total_frames);
|
||||
|
||||
// 獲取這個分片的幀
|
||||
let chunk_frames: Vec<YoloFrame> = yolo_result.frames[start_idx..end_idx]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if chunk_frames.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// 計算幀範圍
|
||||
let start_frame = chunk_frames.first().unwrap().frame as i64;
|
||||
let end_frame = chunk_frames.last().unwrap().frame as i64 + 1; // exclusive
|
||||
|
||||
// 創建視覺分片
|
||||
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
|
||||
file_id,
|
||||
uuid.to_string(),
|
||||
chunk_index,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
chunk_frames,
|
||||
);
|
||||
|
||||
chunks.push(chunk);
|
||||
|
||||
// 更新索引
|
||||
start_idx = end_idx;
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
/// 基於物件相似度創建分片
|
||||
fn create_similarity_based_chunks(
|
||||
file_id: i32,
|
||||
uuid: &str,
|
||||
yolo_result: &YoloResult,
|
||||
chunk_index_offset: u32,
|
||||
fps: f64,
|
||||
similarity_threshold: f32,
|
||||
min_frames_per_chunk: usize,
|
||||
) -> Vec<crate::core::chunk::Chunk> {
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
if yolo_result.frames.is_empty() {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
let mut current_chunk_frames: Vec<YoloFrame> = Vec::new();
|
||||
let mut chunk_index = chunk_index_offset;
|
||||
let mut current_start_frame = 0;
|
||||
|
||||
for (i, frame) in yolo_result.frames.iter().enumerate() {
|
||||
if current_chunk_frames.is_empty() {
|
||||
current_chunk_frames.push(frame.clone());
|
||||
current_start_frame = frame.frame as i64;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 檢查相似度(簡化版本:檢查物件類別是否相同)
|
||||
let last_frame = current_chunk_frames.last().unwrap();
|
||||
let similarity = calculate_frame_similarity(last_frame, frame);
|
||||
|
||||
if similarity >= similarity_threshold {
|
||||
// 相似度高,加入當前分片
|
||||
current_chunk_frames.push(frame.clone());
|
||||
} else {
|
||||
// 相似度低,創建新分片
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
|
||||
|
||||
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
|
||||
file_id,
|
||||
uuid.to_string(),
|
||||
chunk_index,
|
||||
current_start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
current_chunk_frames.clone(),
|
||||
);
|
||||
|
||||
chunks.push(chunk);
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
// 開始新的分片
|
||||
current_chunk_frames = vec![frame.clone()];
|
||||
current_start_frame = frame.frame as i64;
|
||||
}
|
||||
}
|
||||
|
||||
// 處理最後一個分片
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
|
||||
|
||||
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
|
||||
file_id,
|
||||
uuid.to_string(),
|
||||
chunk_index,
|
||||
current_start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
current_chunk_frames,
|
||||
);
|
||||
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
/// 計算兩個幀之間的相似度(基於物件類別)
|
||||
fn calculate_frame_similarity(frame1: &YoloFrame, frame2: &YoloFrame) -> f32 {
|
||||
if frame1.objects.is_empty() && frame2.objects.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
if frame1.objects.is_empty() || frame2.objects.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let set1: std::collections::HashSet<String> = frame1
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
let set2: std::collections::HashSet<String> = frame2
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
|
||||
let intersection: Vec<_> = set1.intersection(&set2).collect();
|
||||
let union: Vec<_> = set1.union(&set2).collect();
|
||||
|
||||
if union.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
intersection.len() as f32 / union.len() as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// 使用 Python 腳本生成視覺分片(進階版本)
|
||||
pub async fn process_visual_chunk_advanced(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<VisualChunkResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("visual_chunk_processor.py");
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Starting advanced visual chunk generation: {}",
|
||||
video_path
|
||||
);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[VisualChunk] Script not found, using basic generation");
|
||||
// 這裡可以回退到基本生成方法
|
||||
return Ok(VisualChunkResult {
|
||||
chunk_count: 0,
|
||||
total_frames: 0,
|
||||
total_objects: 0,
|
||||
unique_classes: 0,
|
||||
chunks: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
executor
|
||||
.run(
|
||||
"visual_chunk_processor.py",
|
||||
&[video_path, output_path],
|
||||
uuid,
|
||||
"VisualChunk",
|
||||
Some(VISUAL_CHUNK_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(output_path).context("Failed to read visual chunk output")?;
|
||||
|
||||
let result: VisualChunkResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse visual chunk output")?;
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Advanced generation result: {} chunks, {} frames",
|
||||
result.chunk_count,
|
||||
result.total_frames
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_calculate_frame_similarity() {
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject};
|
||||
|
||||
let frame1 = YoloFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.95,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame2 = YoloFrame {
|
||||
frame: 1,
|
||||
timestamp: 0.033,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.92,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 310,
|
||||
y: 155,
|
||||
width: 82,
|
||||
height: 62,
|
||||
confidence: 0.85,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame3 = YoloFrame {
|
||||
frame: 2,
|
||||
timestamp: 0.066,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 150,
|
||||
y: 250,
|
||||
width: 40,
|
||||
height: 60,
|
||||
confidence: 0.78,
|
||||
}],
|
||||
};
|
||||
|
||||
// 相同物件的幀應該高度相似
|
||||
let similarity_same = calculate_frame_similarity(&frame1, &frame2);
|
||||
assert!((similarity_same - 1.0).abs() < 0.001);
|
||||
|
||||
// 不同物件的幀應該不相似
|
||||
let similarity_diff = calculate_frame_similarity(&frame1, &frame3);
|
||||
assert!((similarity_diff - 0.0).abs() < 0.001);
|
||||
|
||||
// 空幀應該完全相似
|
||||
let empty_frame = YoloFrame {
|
||||
frame: 3,
|
||||
timestamp: 0.1,
|
||||
objects: vec![],
|
||||
};
|
||||
let similarity_empty = calculate_frame_similarity(&empty_frame, &empty_frame);
|
||||
assert!((similarity_empty - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_fixed_frame_chunks() {
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
// 創建測試 YOLO 結果(60 幀,每幀都有物件)
|
||||
let mut frames = Vec::new();
|
||||
for i in 0..60 {
|
||||
frames.push(YoloFrame {
|
||||
frame: i as u64,
|
||||
timestamp: i as f64 / 30.0, // 假設 fps=30
|
||||
objects: vec![YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.9,
|
||||
}],
|
||||
});
|
||||
}
|
||||
|
||||
let yolo_result = YoloResult {
|
||||
frame_count: 60,
|
||||
fps: 30.0,
|
||||
frames,
|
||||
};
|
||||
|
||||
let chunks = create_fixed_frame_chunks(1, "test-uuid", &yolo_result, 0, 30.0);
|
||||
|
||||
// 60 幀,每 30 幀一個分片,應該有 2 個分片
|
||||
assert_eq!(chunks.len(), 2);
|
||||
|
||||
// 檢查第一個分片
|
||||
let first_chunk = &chunks[0];
|
||||
assert_eq!(
|
||||
first_chunk.chunk_type,
|
||||
crate::core::chunk::ChunkType::Visual
|
||||
);
|
||||
assert_eq!(first_chunk.start_frame, 0);
|
||||
assert_eq!(first_chunk.end_frame, 30); // exclusive
|
||||
assert_eq!(first_chunk.frame_count, 30);
|
||||
|
||||
// 檢查第二個分片
|
||||
let second_chunk = &chunks[1];
|
||||
assert_eq!(
|
||||
second_chunk.chunk_type,
|
||||
crate::core::chunk::ChunkType::Visual
|
||||
);
|
||||
assert_eq!(second_chunk.start_frame, 30);
|
||||
assert_eq!(second_chunk.end_frame, 60); // exclusive
|
||||
assert_eq!(second_chunk.frame_count, 30);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_similarity_based_chunks() {
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
// 創建測試 YOLO 結果
|
||||
let frames = vec![
|
||||
YoloFrame {
|
||||
// 幀 0-4: 都有 person 和 car
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.9,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.8,
|
||||
},
|
||||
],
|
||||
},
|
||||
YoloFrame {
|
||||
// 幀 1
|
||||
frame: 1,
|
||||
timestamp: 0.033,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.88,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 310,
|
||||
y: 155,
|
||||
width: 82,
|
||||
height: 62,
|
||||
confidence: 0.78,
|
||||
},
|
||||
],
|
||||
},
|
||||
YoloFrame {
|
||||
// 幀 5-9: 只有 dog
|
||||
frame: 5,
|
||||
timestamp: 0.166,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 150,
|
||||
y: 250,
|
||||
width: 40,
|
||||
height: 60,
|
||||
confidence: 0.7,
|
||||
}],
|
||||
},
|
||||
YoloFrame {
|
||||
// 幀 6
|
||||
frame: 6,
|
||||
timestamp: 0.2,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 155,
|
||||
y: 255,
|
||||
width: 42,
|
||||
height: 62,
|
||||
confidence: 0.68,
|
||||
}],
|
||||
},
|
||||
];
|
||||
|
||||
let yolo_result = YoloResult {
|
||||
frame_count: 7,
|
||||
fps: 30.0,
|
||||
frames,
|
||||
};
|
||||
|
||||
let chunks = create_similarity_based_chunks(
|
||||
1,
|
||||
"test-uuid",
|
||||
&yolo_result,
|
||||
0,
|
||||
30.0,
|
||||
0.5, // similarity threshold
|
||||
2, // min frames per chunk
|
||||
);
|
||||
|
||||
// 應該有 2 個分片:一個是 person+car,一個是 dog
|
||||
assert_eq!(chunks.len(), 2);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user