use anyhow::{Context, Result}; use libc; use std::collections::HashMap; use std::fs; use std::path::PathBuf; use std::sync::Arc; use tokio::sync::{mpsc, RwLock}; use tracing::{error, info, warn}; /// Guard that ensures processor pool cleanup runs even if the task panics. struct ProcessorCleanupGuard { job_id: i32, running: Arc>>, running_count: Arc>, } impl Drop for ProcessorCleanupGuard { fn drop(&mut self) { use tokio::sync::TryLockError; // 嘗試同步清理;若 lock 被佔用則跳過(避免 deadlock) if let Ok(mut guard) = self.running.try_write() { guard.remove(&self.job_id); } else { warn!("[ProcessorCleanupGuard] running lock contended, skipping cleanup"); } if let Ok(mut guard) = self.running_count.try_write() { if *guard > 0 { *guard -= 1; } } else { warn!("[ProcessorCleanupGuard] running_count lock contended, skipping cleanup"); } } } use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR}; use crate::core::db::{ MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient, }; use crate::core::processor; use crate::core::processor::asr::AsrResult; use crate::core::processor::asrx::AsrxResult; use crate::core::processor::cut::CutResult; use crate::core::processor::face::FaceResult; use crate::core::processor::ocr::OcrResult; use crate::core::processor::pose::PoseResult; use crate::core::processor::scene_classification::SceneClassificationResult; use crate::core::processor::visual_chunk::VisualChunkResult; use crate::core::processor::yolo::YoloResult; use crate::worker::resources::SystemResources; #[derive(Debug)] struct ProcessorOutput { data: serde_json::Value, chunks_produced: i32, frames_processed: i32, total_frames: i32, retry_count: i32, pid: i32, } #[derive(Debug, Clone)] pub struct ProcessorTask { pub job: MonitorJob, pub processor_type: ProcessorType, pub processor_result_id: i32, pub frame_dir: Option, } pub struct ProcessorPool { db: Arc, redis: Arc, config_max: usize, running: Arc>>, running_count: Arc>, } struct ProcessorHandle { #[allow(dead_code)] processor_type: ProcessorType, cancel_tx: mpsc::Sender<()>, child_pid: Arc>>, } impl ProcessorPool { pub fn new(db: Arc, redis: Arc, max_concurrent: usize) -> Self { Self { db, redis, config_max: max_concurrent, running: Arc::new(RwLock::new(HashMap::new())), running_count: Arc::new(RwLock::new(0)), } } /// 根據系統資源計算當前安全的並發上限 pub async fn current_max(&self) -> usize { let resources = SystemResources::check(); resources.safe_max_concurrent(self.config_max).max(1) } pub async fn can_start(&self) -> bool { let count = *self.running_count.read().await; let max = self.current_max().await; count < max } /// 清理 stale running state:若系統中實際運行的 processor 比記錄少,修正 count pub async fn sweep_stale(&self) { let handle_count = self.running.read().await.len(); let count = *self.running_count.read().await; if handle_count != count { warn!( "[ProcessorPool] Stale count detected: handles={}, count={}, fixing", handle_count, count ); let mut c = self.running_count.write().await; *c = handle_count; } if handle_count == 0 && count == 0 { if let Err(e) = self .db .reset_stale_processor_results(ProcessorJobStatus::Pending, "Worker restarted") .await { error!("Failed to reset stale processor results: {}", e); } } } async fn kill_existing_processor(redis: &RedisClient, uuid: &str, processor: &str) { let prefix = crate::core::config::REDIS_KEY_PREFIX.as_str(); let key = format!("{}job:{}:processor:{}", prefix, uuid, processor); if let Ok(mut conn) = redis.get_conn().await { let old_pid: Option = redis::cmd("HGET") .arg(&key) .arg("pid") .query_async(&mut conn) .await .ok() .flatten(); if let Some(pid) = old_pid { if pid > 0 { warn!( "[PID] Killing existing process {} for {}/{}", pid, uuid, processor ); unsafe { libc::kill(pid, libc::SIGKILL); } } } } } pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> { Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()) .await; let (cancel_tx, cancel_rx) = mpsc::channel(1); let job_id = task.job.id; let processor_type = task.processor_type; let current_limit = self.current_max().await; { let mut count = self.running_count.write().await; if *count >= current_limit { anyhow::bail!( "Max concurrent processors reached (dynamic limit: {})", current_limit ); } *count += 1; } let running = self.running.clone(); let running_count = self.running_count.clone(); let child_pid: Arc>> = Arc::new(RwLock::new(None)); running.write().await.insert( job_id, ProcessorHandle { processor_type, cancel_tx, child_pid: child_pid.clone(), }, ); let db = self.db.clone(); let redis = self.redis.clone(); let job = task.job.clone(); let processor_result_id = task.processor_result_id; let processor_name = processor_type.as_str().to_string(); // 設置共享 frame 目錄環境變數(若有) if let Some(ref fd) = task.frame_dir { std::env::set_var("MOMENTRY_FRAME_DIR", fd); } else { std::env::remove_var("MOMENTRY_FRAME_DIR"); } tokio::spawn(async move { // Guard 的 Drop 確保 panic 時也清理 pool state let _guard = ProcessorCleanupGuard { job_id, running: running.clone(), running_count: running_count.clone(), }; info!("Starting processor {} for job {}", processor_name, job.uuid); let _ = db .update_processor_result( processor_result_id, ProcessorJobStatus::Running, None, None, ) .await; let _ = redis .update_worker_processor_status( &job.uuid, &processor_name, "running", None, 0, 0, 0, 0, 0, ) .await; // Set started_at once (subscriber's update_worker_processor_status won't touch it) if let Ok(mut conn) = redis.get_conn().await { let prefix = crate::core::config::REDIS_KEY_PREFIX.as_str(); let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name); let now = chrono::Utc::now().to_rfc3339(); let _: Option = redis::cmd("HSET") .arg(&key) .arg("started_at") .arg(&now) .query_async(&mut conn) .await .ok(); let _: Option = redis::cmd("HSET") .arg(&key) .arg("embedding_started_at") .arg(&now) .query_async(&mut conn) .await .ok(); } // Subscribe to Redis progress pub/sub and update processor hash in real-time let sub_redis = redis.clone(); let sub_uuid = job.uuid.clone(); let sub_processor = processor_name.clone(); let progress_handle = tokio::spawn(async move { let cb_redis = sub_redis.clone(); let cb_uuid = sub_uuid.clone(); let cb_processor = sub_processor.clone(); if let Err(e) = sub_redis .subscribe_and_callback(&sub_uuid, move |msg| { tracing::info!( "[Subscriber] Got msg for={} cur={} tot={}", msg.processor, msg.data.current.unwrap_or(0), msg.data.total.unwrap_or(0) ); if msg.processor == cb_processor { let cur = msg.data.current.unwrap_or(0); let tot = msg.data.total.unwrap_or(0); let oc = msg.data.output_count.unwrap_or(0); let r = cb_redis.clone(); let u = cb_uuid.clone(); let p = cb_processor.clone(); tokio::spawn(async move { match r .update_worker_processor_status( &u, &p, "running", None, cur, oc, tot, 0, 0, ) .await { Ok(_) => tracing::info!( "[Subscriber] Updated {}: cur={} tot={}", p, cur, tot ), Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e), } }); } }) .await { tracing::warn!("[ProgressSub] Subscriber ended: {}", e); } }); let result = Self::run_processor(&db, &redis, &job, processor_type, cancel_rx).await; progress_handle.abort(); match result { Ok(output) => { // 驗收 agent 檢查產出內容 let verification = crate::verification::verifier::verify_output(&processor_type, &job.uuid); if verification.passed { info!( "Processor {} completed and verified for job {} ({} chunks, {} frames)", processor_name, job.uuid, output.chunks_produced, output.frames_processed ); // 清理暫存備份 crate::verification::verifier::cleanup_temp_files( &processor_type, &job.uuid, ); if let Err(e) = db .update_processor_result_with_stats( processor_result_id, ProcessorJobStatus::Completed, None, Some(&output.data), output.chunks_produced, output.frames_processed, ) .await { error!("Failed to update processor result to completed: {}", e); } if let Err(e) = redis .update_worker_processor_status( &job.uuid, &processor_name, "completed", None, output.frames_processed, output.chunks_produced, output.total_frames, output.retry_count, output.pid, ) .await { error!("Failed to update Redis processor status: {}", e); } } else { error!( "Processor {} output failed verification for job {}: {:?}", processor_name, job.uuid, verification.details ); if let Err(db_err) = db .update_processor_result_with_stats( processor_result_id, ProcessorJobStatus::Failed, Some(&format!("verification failed: {:?}", verification.details)), None, 0, 0, ) .await { error!("Failed to update processor result to failed: {}", db_err); } if let Err(redis_err) = redis .update_worker_processor_status( &job.uuid, &processor_name, "failed", Some(&format!("verification failed: {:?}", verification.details)), 0, 0, 0, 0, 0, ) .await { error!("Failed to update Redis processor status: {}", redis_err); } } } Err(e) => { error!( "Processor {} failed for job {}: {}", processor_name, job.uuid, e ); if let Err(db_err) = db .update_processor_result_with_stats( processor_result_id, ProcessorJobStatus::Failed, Some(&e.to_string()), None, 0, 0, ) .await { error!("Failed to update processor result to failed: {}", db_err); } if let Err(redis_err) = redis .update_worker_processor_status( &job.uuid, &processor_name, "failed", Some(&e.to_string()), 0, 0, 0, 0, 0, ) .await { error!("Failed to update Redis processor status: {}", redis_err); } } } }); Ok(()) } async fn run_processor( db: &PostgresDb, _redis: &RedisClient, job: &MonitorJob, processor_type: ProcessorType, _cancel_rx: mpsc::Receiver<()>, ) -> Result { let video_path = job.video_path.as_ref().context("No video path in job")?; // Generate output path let output_dir = PathBuf::from(OUTPUT_DIR.as_str()); let suffix = match processor_type { ProcessorType::Story => format!("{}.story_story", job.uuid), _ => format!("{}.{}", job.uuid, processor_type.as_str()), }; let output_path = output_dir.join(format!("{}.json", suffix)); // Ensure output directory exists if let Some(parent) = output_path.parent() { tokio::fs::create_dir_all(parent).await?; } let uuid = Some(job.uuid.as_str()); let video = db.get_video_by_uuid(&job.uuid).await?; let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0); match processor_type { ProcessorType::Asr => { let result = processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?; let chunks_produced = result.segments.len() as i32; tracing::info!( "ASR completed, storing {} segments for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Cut => { let cut_path = std::path::Path::new(&output_dir).join(format!("{}.cut.json", job.uuid)); let result = if cut_path.exists() { // CUT 在 register 階段已完成,直接載入 let content = std::fs::read_to_string(&cut_path).context("Failed to read cut.json")?; serde_json::from_str(&content).context("Failed to parse cut.json")? } else { processor::process_cut(video_path, output_path.to_str().unwrap(), uuid).await? }; let chunks_produced = result.scenes.len() as i32; tracing::info!( "CUT completed, storing {} scenes for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_cut_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store CUT chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Yolo => { let result = processor::process_yolo(video_path, output_path.to_str().unwrap(), uuid) .await?; let chunks_produced = result.frames.len() as i32; tracing::info!( "YOLO completed, storing {} frames for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_yolo_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store YOLO chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Ocr => { let result = processor::process_ocr(video_path, output_path.to_str().unwrap(), uuid).await?; let chunks_produced = result.frames.len() as i32; tracing::info!( "OCR completed, storing {} frames for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_ocr_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store OCR chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Face => { let result = processor::process_face(video_path, output_path.to_str().unwrap(), uuid) .await?; let chunks_produced = result.frames.len() as i32; tracing::info!( "FACE completed, storing {} frames for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e); } // 將 face embedding 寫入 Qdrant if let Err(e) = Self::store_face_embeddings_to_qdrant(&job.uuid, &result).await { tracing::error!("Failed to store face embeddings to Qdrant: {}", e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Pose => { let result = processor::process_pose(video_path, output_path.to_str().unwrap(), uuid) .await?; let chunks_produced = result.frames.len() as i32; tracing::info!( "POSE completed, storing {} frames for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_pose_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store POSE chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Asrx => { let result = processor::process_asrx(video_path, output_path.to_str().unwrap(), uuid) .await?; let chunks_produced = result.segments.len() as i32; tracing::info!( "ASRX completed, storing {} segments for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_asrx_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store ASRX chunks for {}: {}", job.uuid, e); } // 將 voice embeddings 寫入 Qdrant if let Err(e) = Self::store_voice_embeddings_to_qdrant(&job.uuid, &result).await { tracing::error!("Failed to store voice embeddings to Qdrant: {}", e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::VisualChunk => { let result = processor::process_visual_chunk_advanced( video_path, output_path.to_str().unwrap(), uuid, ) .await?; let chunks_produced = result.chunk_count as i32; tracing::info!( "VisualChunk completed, storing {} chunks for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Scene => { let scene_path = std::path::Path::new(&output_dir).join(format!("{}.scene.json", job.uuid)); let scene_err = std::path::Path::new(&output_dir).join(format!("{}.scene.err", job.uuid)); let scene_tmp = std::path::Path::new(&output_dir).join(format!("{}.scene.tmp", job.uuid)); // 優先順序:.err(跳過)→ .json(載入)→ .tmp(等待或重新執行) let result = if scene_err.exists() { tracing::warn!("Scene previously failed for {}, skipping", job.uuid); return Ok(ProcessorOutput { data: serde_json::Value::Null, chunks_produced: 0, frames_processed: 0, total_frames, retry_count: 0, pid: 0, }); } else if scene_path.exists() { tracing::info!("Scene JSON exists for {}, loading from file", job.uuid); crate::core::processor::load_scene_from_file(scene_path.to_str().unwrap())? } else if scene_tmp.exists() { tracing::warn!("Scene tmp exists for {}, waiting for completion", job.uuid); // 生產環境應等待,此處直接跳過避免卡住 crate::core::processor::SceneClassificationResult::default() } else { processor::process_scene_classification( video_path, output_path.to_str().unwrap(), uuid, ) .await? }; let chunks_produced = result.scenes.len() as i32; tracing::info!( "Scene classification completed, storing {} scenes for {}", chunks_produced, job.uuid ); if let Err(e) = Self::store_scene_chunks(db, &job.uuid, &result).await { tracing::error!("Failed to store Scene chunks for {}: {}", job.uuid, e); } Ok(ProcessorOutput { data: serde_json::to_value(result)?, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::Story => { let executor = crate::core::processor::PythonExecutor::new()?; let _ = executor .run( "parent_chunk_5w1h.py", &["--file-uuid", &job.uuid, "--embed"], uuid, "STORY", Some(std::time::Duration::from_secs(300)), ) .await; let narratives_path = output_dir.join(format!("{}.narratives.json", job.uuid)); let chunks_produced = if narratives_path.exists() { let content = std::fs::read_to_string(&narratives_path).unwrap_or_default(); let count: i32 = serde_json::from_str::>(&content) .map(|v| v.len() as i32) .unwrap_or(0); tracing::info!("Story generated {} narratives for {}", count, job.uuid); count } else { 0 }; Ok(ProcessorOutput { data: serde_json::Value::Null, chunks_produced, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } ProcessorType::FiveW1H => { let executor = crate::core::processor::PythonExecutor::new()?; let _ = executor .run( "parent_chunk_5w1h.py", &["--file-uuid", &job.uuid, "--embed", "--mode", "llm"], uuid, "5W1H", Some(std::time::Duration::from_secs(300)), ) .await; Ok(ProcessorOutput { data: serde_json::Value::Null, chunks_produced: 0, frames_processed: total_frames, total_frames, retry_count: 0, pid: 0, }) } } } pub async fn store_asr_chunks( db: &PostgresDb, uuid: &str, asr_result: &AsrResult, ) -> Result<()> { let video = db .get_video_by_uuid(uuid) .await? .ok_or_else(|| anyhow::anyhow!("Video not found for uuid: {}", uuid))?; let fps = if video.fps > 0.0 { video.fps } else { 30.0 }; let segments: Vec<(i64, i64, i64, f64, f64, serde_json::Value)> = asr_result .segments .iter() .enumerate() .map(|(i, segment)| { // Prefer ASR output frames, fallback to time-based conversion let start_frame = segment .start_frame .unwrap_or_else(|| (segment.start_time * fps).round() as i64); let end_frame = segment .end_frame .unwrap_or_else(|| (segment.end_time * fps).round() as i64); let data = serde_json::json!({ "text": segment.text, "text_normalized": segment.text.to_lowercase(), "language": asr_result.language, "language_probability": asr_result.language_probability, }); ( i as i64, start_frame, end_frame, segment.start_time, segment.end_time, data, ) }) .collect(); db.store_asr_pre_chunks_batch(uuid, &segments).await?; tracing::info!( "Stored {} ASR pre-chunks for video {}", segments.len(), uuid ); Ok(()) } pub async fn store_cut_chunks( db: &PostgresDb, uuid: &str, cut_result: &CutResult, ) -> Result<()> { let video = db .get_video_by_uuid(uuid) .await? .ok_or_else(|| anyhow::anyhow!("Video not found for uuid: {}", uuid))?; let fps = if video.fps > 0.0 { video.fps } else { 30.0 }; let scenes: Vec<(i64, i64, i64, f64, f64, serde_json::Value)> = cut_result .scenes .iter() .enumerate() .map(|(i, scene)| { let data = serde_json::json!({ "scene_number": scene.scene_number, }); ( i as i64, scene.start_frame as i64, scene.end_frame as i64, scene.start_time, scene.end_time, data, ) }) .collect(); db.store_cut_pre_chunks_batch(uuid, &scenes).await?; tracing::info!("Stored {} CUT pre-chunks for video {}", scenes.len(), uuid); Ok(()) } pub async fn store_yolo_chunks( db: &PostgresDb, uuid: &str, yolo_result: &YoloResult, ) -> Result<()> { let frames_count = yolo_result.frames.len(); tracing::info!( "Storing {} YOLO pre-chunks for video {}", frames_count, uuid ); let mut pre_chunks_to_store = Vec::new(); for frame in yolo_result.frames.iter() { let data = serde_json::json!({ "objects": frame.objects, "timestamp": frame.timestamp, }); pre_chunks_to_store.push(( frame.frame as i64, // coordinate_index Some(frame.timestamp), // timestamp data, None, // identity_id None, // confidence )); } db.store_raw_pre_chunks_batch(uuid, "yolo", &pre_chunks_to_store) .await?; Ok(()) } pub async fn store_ocr_chunks( db: &PostgresDb, uuid: &str, ocr_result: &OcrResult, ) -> Result<()> { let frames_count = ocr_result.frames.len(); tracing::info!("Storing {} OCR pre-chunks for video {}", frames_count, uuid); let mut pre_chunks_to_store = Vec::new(); for frame in ocr_result.frames.iter() { let data = serde_json::json!({ "texts": frame.texts, "timestamp": frame.timestamp, }); pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None)); } db.store_raw_pre_chunks_batch(uuid, "ocr", &pre_chunks_to_store) .await?; Ok(()) } pub async fn store_face_chunks( db: &PostgresDb, uuid: &str, face_result: &FaceResult, ) -> Result<()> { let frames_count = face_result.frames.len(); tracing::info!( "Storing {} Face pre-chunks + {} detections for video {}", frames_count, face_result .frames .iter() .map(|f| f.faces.len()) .sum::(), uuid ); let mut pre_chunks_to_store = Vec::new(); let mut detections_to_store = Vec::new(); for frame in face_result.frames.iter() { let data = serde_json::json!({ "faces": frame.faces, "timestamp": frame.timestamp, }); pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None)); for face in frame.faces.iter() { detections_to_store.push(( frame.frame as i64, frame.timestamp, face.x, face.y, face.width, face.height, face.confidence, )); } } db.store_raw_pre_chunks_batch(uuid, "face", &pre_chunks_to_store) .await?; db.store_face_detections_batch(uuid, &detections_to_store) .await?; Ok(()) } /// 將 face embeddings 寫入 Qdrant momentry_dev_face collection pub async fn store_face_embeddings_to_qdrant( uuid: &str, face_result: &FaceResult, ) -> Result<()> { let qdrant = QdrantDb::new(); let collection = format!( "{}{}", crate::core::config::REDIS_KEY_PREFIX .as_str() .trim_end_matches(':'), "_face" ); // 確保 collection 存在(dim=512 for FaceNet) if let Err(e) = qdrant.ensure_collection(&collection, 512).await { tracing::error!("Failed to ensure Qdrant face collection: {}", e); return Ok(()); } let mut count = 0; for frame in &face_result.frames { for face in &frame.faces { if let Some(embedding) = &face.embedding { if embedding.len() != 512 { continue; } // 使用 hash 作為 Qdrant point ID(需要 unsigned integer) // 使用 frame number 作為 Qdrant point ID(u64) let point_id = frame.frame as u64; let payload = serde_json::json!({ "file_uuid": uuid, "face_id": face.face_id, "frame": frame.frame, "timestamp": frame.timestamp, "x": face.x, "y": face.y, "width": face.width, "height": face.height, "confidence": face.confidence, }); if let Err(e) = qdrant .upsert_vector_to_collection( &collection, point_id, embedding, Some(payload), ) .await { tracing::error!("Failed to upsert face vector {}: {}", point_id, e); } else { count += 1; } } } } if count > 0 { tracing::info!("Stored {} face embeddings to Qdrant for {}", count, uuid); } Ok(()) } /// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection pub async fn store_voice_embeddings_to_qdrant( uuid: &str, asrx_result: &AsrxResult, ) -> Result<()> { let qdrant = QdrantDb::new(); let collection = format!( "{}{}", crate::core::config::REDIS_KEY_PREFIX .as_str() .trim_end_matches(':'), "_voice" ); // 確保 collection 存在(dim=192 for ASRX voice) if let Err(e) = qdrant.ensure_collection(&collection, 192).await { tracing::error!("Failed to ensure Qdrant voice collection: {}", e); return Ok(()); } let embeddings = match &asrx_result.embeddings { Some(e) => e, None => return Ok(()), }; let mut count = 0; for (i, segment) in asrx_result.segments.iter().enumerate() { if let Some(emb) = embeddings.get(i) { if emb.len() != 192 { continue; } let payload = serde_json::json!({ "file_uuid": uuid, "speaker_id": segment.speaker_id, "segment_index": i, "start_frame": segment.start_frame, "end_frame": segment.end_frame, "start_time": segment.start_time, "end_time": segment.end_time, }); if let Err(e) = qdrant .upsert_vector_to_collection(&collection, i as u64, emb, Some(payload)) .await { tracing::error!("Failed to upsert voice vector {}: {}", i, e); } else { count += 1; } } } if count > 0 { tracing::info!("Stored {} voice embeddings to Qdrant for {}", count, uuid); } Ok(()) } pub async fn store_pose_chunks( db: &PostgresDb, uuid: &str, pose_result: &PoseResult, ) -> Result<()> { let frames_count = pose_result.frames.len(); tracing::info!( "Storing {} Pose pre-chunks for video {}", frames_count, uuid ); let mut pre_chunks_to_store = Vec::new(); for frame in pose_result.frames.iter() { let data = serde_json::json!({ "persons": frame.persons, "timestamp": frame.timestamp, }); pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None)); } db.store_raw_pre_chunks_batch(uuid, "pose", &pre_chunks_to_store) .await?; Ok(()) } pub async fn store_asrx_chunks( db: &PostgresDb, uuid: &str, asrx_result: &AsrxResult, ) -> Result<()> { let segments_count = asrx_result.segments.len(); tracing::info!( "Storing {} ASRX pre-chunks for video {}", segments_count, uuid ); let mut pre_chunks_to_store = Vec::new(); for (i, segment) in asrx_result.segments.iter().enumerate() { let data = serde_json::json!({ "text": segment.text, "speaker_id": segment.speaker_id, "timestamp": segment.start_time, }); // ASRX is time-based, so we use segment index or start time as coordinate. pre_chunks_to_store.push((i as i64, Some(segment.start_time), data, None, None)); } db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store) .await?; Ok(()) } pub async fn store_visual_chunk_chunks( db: &PostgresDb, uuid: &str, visual_chunk_result: &VisualChunkResult, ) -> Result<()> { for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() { match db.store_chunk(chunk).await { Ok(_) => { tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid); } Err(e) => { tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e); } } } Ok(()) } pub async fn store_scene_chunks( db: &PostgresDb, uuid: &str, scene_result: &SceneClassificationResult, ) -> Result<()> { let video = db .get_video_by_uuid(uuid) .await? .ok_or_else(|| anyhow::anyhow!("Video not found for uuid: {}", uuid))?; let fps = if video.fps > 0.0 { video.fps } else { 30.0 }; let scenes: Vec<(i64, i64, i64, f64, f64, serde_json::Value)> = scene_result .scenes .iter() .enumerate() .map(|(i, scene)| { let start_frame = (scene.start_time * fps).round() as i64; let end_frame = (scene.end_time * fps).round() as i64; let data = serde_json::json!({ "scene_type": scene.scene_type, "scene_type_zh": scene.scene_type_zh, "confidence": scene.confidence, "top_5": scene.top_5, }); ( i as i64, start_frame, end_frame, scene.start_time, scene.end_time, data, ) }) .collect(); db.store_scene_pre_chunks_batch(uuid, &scenes).await?; for (i, scene) in scene_result.scenes.iter().enumerate() { let chk_id = format!("scene_{}", i + 1); let meta = serde_json::json!({ "scene_type": scene.scene_type, "scene_type_zh": scene.scene_type_zh, "confidence": scene.confidence, "top_5": scene.top_5, }); let chunk_table = crate::core::db::schema::table_name("chunk"); let _ = sqlx::query(&format!( "UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table )) .bind(&meta) .bind(uuid) .bind(&chk_id) .execute(db.pool()) .await; } tracing::info!( "Stored {} Scene pre-chunks for video {}", scenes.len(), uuid ); Ok(()) } pub async fn get_running_count(&self) -> usize { *self.running_count.read().await } pub async fn cancel_all(&self) { let mut running = self.running.write().await; for (_, handle) in running.drain() { let _ = handle.cancel_tx.send(()).await; } let mut count = self.running_count.write().await; *count = 0; } }