use anyhow::{Context, Result}; use tracing::{info, warn}; use crate::core::db::{ workspace_sqlite::{SpeakerDetectionBatchItem, WorkspaceDb}, PostgresDb, QdrantDb, QdrantWorkspace, }; #[derive(Debug)] pub struct CheckinResult { pub file_uuid: String, pub pre_chunks_moved: usize, pub speaker_detections_moved: usize, pub vectors_moved: usize, } #[derive(Debug)] pub struct CheckoutResult { pub file_uuid: String, pub rows_deleted: usize, } pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result { let schema = crate::core::config::DATABASE_SCHEMA.as_str(); info!("Checkin starting for {} (schema={})", file_uuid, schema); let workspace = WorkspaceDb::open(file_uuid) .await .context("No workspace found for checkin")?; let qdrant_ws = QdrantWorkspace::new(); let pre_chunks = workspace.get_all_pre_chunks().await?; let spk_dets = workspace.get_all_speaker_detections().await?; info!( "Checkin {} workspace: {} pre_chunks, {} spk_dets", file_uuid, pre_chunks.len(), spk_dets.len(), ); // ── Pre-chunks ── for chunk in &pre_chunks { let data_value: serde_json::Value = chunk .data .as_ref() .and_then(|d| serde_json::from_str(d).ok()) .unwrap_or(serde_json::Value::Null); match chunk.processor_type.as_str() { "asr" => { let start = chunk.start_time.unwrap_or(0.0); let end = chunk.end_time.unwrap_or(0.0); let sf = chunk.start_frame.unwrap_or(0); let ef = chunk.end_frame.unwrap_or(0); let idx = chunk.id as i64; db.store_asr_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)]) .await?; } "cut" => { let start = chunk.start_time.unwrap_or(0.0); let end = chunk.end_time.unwrap_or(0.0); let sf = chunk.start_frame.unwrap_or(0); let ef = chunk.end_frame.unwrap_or(0); let idx = chunk.id as i64; db.store_cut_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)]) .await?; } "scene" => { let start = chunk.start_time.unwrap_or(0.0); let end = chunk.end_time.unwrap_or(0.0); let sf = chunk.start_frame.unwrap_or(0); let ef = chunk.end_frame.unwrap_or(0); let idx = chunk.id as i64; db.store_scene_pre_chunks_batch( file_uuid, &[(idx, sf, ef, start, end, data_value)], ) .await?; } _ => { let frame = chunk.start_frame.unwrap_or(0); let ts = chunk.start_time; let text = chunk.text_content.clone(); db.store_raw_pre_chunks_batch( file_uuid, &chunk.processor_type, &[(frame, ts, data_value, text, None)], ) .await?; } } } // ── Speaker detections ── if !spk_dets.is_empty() { let batch: Vec<(String, f64, f64, String, Option, f32)> = spk_dets .iter() .map(|s| { ( s.speaker_id.clone().unwrap_or_default(), s.start_time.unwrap_or(0.0), s.end_time.unwrap_or(0.0), s.text_content.clone().unwrap_or_default(), s.chunk_id.clone(), s.confidence.unwrap_or(0.0) as f32, ) }) .collect(); db.store_speaker_detections_batch(file_uuid, &batch).await?; } // ── Qdrant vectors ── let mut vectors_moved = 0usize; match qdrant_ws.scroll_by_file_uuid(file_uuid).await { Ok(ws_data) => { let qdrant = QdrantDb::new(); // Chunks → production collection for point in &ws_data.chunks { if let Some(ref vector) = point.vector { let payload_val: serde_json::Value = serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null); let point_id: u64 = match point.id.parse::() { Ok(id) => id, Err(_) => { use std::hash::{Hash, Hasher}; let mut hasher = std::collections::hash_map::DefaultHasher::new(); point.id.hash(&mut hasher); hasher.finish() } }; if let Err(e) = qdrant .upsert_vector_to_collection( &qdrant.collection_name, point_id, vector, Some(payload_val), ) .await { warn!("Failed to checkin chunk vector {}: {}", point.id, e); } else { vectors_moved += 1; } } } // Traces → production traces collection let traces_coll = format!( "{}_traces", crate::core::config::REDIS_KEY_PREFIX .as_str() .trim_end_matches(':') ); for point in &ws_data.traces { if let Some(ref vector) = point.vector { let payload_val: serde_json::Value = serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null); let point_id: u64 = match point.id.parse::() { Ok(id) => id, Err(_) => { use std::hash::{Hash, Hasher}; let mut hasher = std::collections::hash_map::DefaultHasher::new(); point.id.hash(&mut hasher); hasher.finish() } }; if let Err(e) = qdrant .upsert_vector_to_collection( &traces_coll, point_id, vector, Some(payload_val), ) .await { warn!("Failed to checkin trace vector {}: {}", point.id, e); } else { vectors_moved += 1; } } } } Err(e) => { warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e); } } // ── Cleanup workspace ── if let Err(e) = workspace.clear().await { warn!("Failed to clear workspace for {}: {}", file_uuid, e); } if let Err(e) = qdrant_ws.delete_by_file_uuid(file_uuid).await { warn!( "Failed to delete workspace vectors for {}: {}", file_uuid, e ); } info!( "Checkin complete for {}: {} pre_chunks, {} spk_dets, {} vectors", file_uuid, pre_chunks.len(), spk_dets.len(), vectors_moved, ); Ok(CheckinResult { file_uuid: file_uuid.to_string(), pre_chunks_moved: pre_chunks.len(), speaker_detections_moved: spk_dets.len(), vectors_moved, }) } pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result { let schema = crate::core::config::DATABASE_SCHEMA.as_str(); let table = crate::core::db::schema::table_name; info!("Checkout starting for {} (schema={})", file_uuid, schema); // Delete face_detections let face_table = table("face_detections"); let face_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table)) .bind(file_uuid) .execute(db.pool()) .await?; info!( "Checkout {}: deleted {} rows from {}", file_uuid, face_result.rows_affected(), face_table ); // Delete speaker_detections let spk_table = table("speaker_detections"); let spk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", spk_table)) .bind(file_uuid) .execute(db.pool()) .await?; info!( "Checkout {}: deleted {} rows from {}", file_uuid, spk_result.rows_affected(), spk_table ); // Delete pre_chunks let pc_table = table("pre_chunks"); let pc_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pc_table)) .bind(file_uuid) .execute(db.pool()) .await?; info!( "Checkout {}: deleted {} rows from {}", file_uuid, pc_result.rows_affected(), pc_table ); // Delete chunks let chunk_table = table("chunk"); let chunk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunk_table)) .bind(file_uuid) .execute(db.pool()) .await?; info!( "Checkout {}: deleted {} rows from {}", file_uuid, chunk_result.rows_affected(), chunk_table ); // Delete processor_results let pr_table = table("processor_results"); let pr_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pr_table)) .bind(file_uuid) .execute(db.pool()) .await?; info!( "Checkout {}: deleted {} rows from {}", file_uuid, pr_result.rows_affected(), pr_table ); // Delete Qdrant vectors from production let qdrant = QdrantDb::new(); if let Err(e) = qdrant.delete_by_uuid(file_uuid).await { warn!( "Failed to delete chunk vectors from Qdrant for {}: {}", file_uuid, e ); } let prefix = crate::core::config::REDIS_KEY_PREFIX .as_str() .trim_end_matches(':'); let traces_coll = format!("{}_traces", prefix); let voice_coll = format!("{}_voice", file_uuid); for coll in &[traces_coll, voice_coll] { if let Err(e) = QdrantDb::delete_by_uuid_from_collection( &qdrant.client, &qdrant.base_url, &qdrant.api_key, coll, file_uuid, ) .await { warn!( "Failed to delete vectors from {} for {}: {}", coll, file_uuid, e ); } } let rows_deleted = face_result.rows_affected() + spk_result.rows_affected() + pc_result.rows_affected() + chunk_result.rows_affected() + pr_result.rows_affected(); info!( "Checkout complete for {}: {} PG rows deleted", file_uuid, rows_deleted, ); Ok(CheckoutResult { file_uuid: file_uuid.to_string(), rows_deleted: rows_deleted as usize, }) }