Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
336 lines
11 KiB
Rust
336 lines
11 KiB
Rust
use anyhow::{Context, Result};
|
|
use tracing::{info, warn};
|
|
|
|
use crate::core::db::{
|
|
workspace_sqlite::{SpeakerDetectionBatchItem, WorkspaceDb},
|
|
PostgresDb, QdrantDb, QdrantWorkspace,
|
|
};
|
|
|
|
#[derive(Debug)]
|
|
pub struct CheckinResult {
|
|
pub file_uuid: String,
|
|
pub pre_chunks_moved: usize,
|
|
pub speaker_detections_moved: usize,
|
|
pub vectors_moved: usize,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct CheckoutResult {
|
|
pub file_uuid: String,
|
|
pub rows_deleted: usize,
|
|
}
|
|
|
|
pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result<CheckinResult> {
|
|
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
|
|
info!("Checkin starting for {} (schema={})", file_uuid, schema);
|
|
|
|
let workspace = WorkspaceDb::open(file_uuid)
|
|
.await
|
|
.context("No workspace found for checkin")?;
|
|
let qdrant_ws = QdrantWorkspace::new();
|
|
|
|
let pre_chunks = workspace.get_all_pre_chunks().await?;
|
|
let spk_dets = workspace.get_all_speaker_detections().await?;
|
|
|
|
info!(
|
|
"Checkin {} workspace: {} pre_chunks, {} spk_dets",
|
|
file_uuid,
|
|
pre_chunks.len(),
|
|
spk_dets.len(),
|
|
);
|
|
|
|
// ── Pre-chunks ──
|
|
for chunk in &pre_chunks {
|
|
let data_value: serde_json::Value = chunk
|
|
.data
|
|
.as_ref()
|
|
.and_then(|d| serde_json::from_str(d).ok())
|
|
.unwrap_or(serde_json::Value::Null);
|
|
|
|
match chunk.processor_type.as_str() {
|
|
"asr" => {
|
|
let start = chunk.start_time.unwrap_or(0.0);
|
|
let end = chunk.end_time.unwrap_or(0.0);
|
|
let sf = chunk.start_frame.unwrap_or(0);
|
|
let ef = chunk.end_frame.unwrap_or(0);
|
|
let idx = chunk.id as i64;
|
|
db.store_asr_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
|
|
.await?;
|
|
}
|
|
"cut" => {
|
|
let start = chunk.start_time.unwrap_or(0.0);
|
|
let end = chunk.end_time.unwrap_or(0.0);
|
|
let sf = chunk.start_frame.unwrap_or(0);
|
|
let ef = chunk.end_frame.unwrap_or(0);
|
|
let idx = chunk.id as i64;
|
|
db.store_cut_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
|
|
.await?;
|
|
}
|
|
"scene" => {
|
|
let start = chunk.start_time.unwrap_or(0.0);
|
|
let end = chunk.end_time.unwrap_or(0.0);
|
|
let sf = chunk.start_frame.unwrap_or(0);
|
|
let ef = chunk.end_frame.unwrap_or(0);
|
|
let idx = chunk.id as i64;
|
|
db.store_scene_pre_chunks_batch(
|
|
file_uuid,
|
|
&[(idx, sf, ef, start, end, data_value)],
|
|
)
|
|
.await?;
|
|
}
|
|
_ => {
|
|
let frame = chunk.start_frame.unwrap_or(0);
|
|
let ts = chunk.start_time;
|
|
let text = chunk.text_content.clone();
|
|
db.store_raw_pre_chunks_batch(
|
|
file_uuid,
|
|
&chunk.processor_type,
|
|
&[(frame, ts, data_value, text, None)],
|
|
)
|
|
.await?;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Speaker detections ──
|
|
if !spk_dets.is_empty() {
|
|
let batch: Vec<(String, f64, f64, String, Option<String>, f32)> = spk_dets
|
|
.iter()
|
|
.map(|s| {
|
|
(
|
|
s.speaker_id.clone().unwrap_or_default(),
|
|
s.start_time.unwrap_or(0.0),
|
|
s.end_time.unwrap_or(0.0),
|
|
s.text_content.clone().unwrap_or_default(),
|
|
s.chunk_id.clone(),
|
|
s.confidence.unwrap_or(0.0) as f32,
|
|
)
|
|
})
|
|
.collect();
|
|
db.store_speaker_detections_batch(file_uuid, &batch).await?;
|
|
}
|
|
|
|
// ── Qdrant vectors ──
|
|
let mut vectors_moved = 0usize;
|
|
match qdrant_ws.scroll_by_file_uuid(file_uuid).await {
|
|
Ok(ws_data) => {
|
|
let qdrant = QdrantDb::new();
|
|
|
|
// Chunks → production collection
|
|
for point in &ws_data.chunks {
|
|
if let Some(ref vector) = point.vector {
|
|
let payload_val: serde_json::Value =
|
|
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
|
|
let point_id: u64 = match point.id.parse::<u64>() {
|
|
Ok(id) => id,
|
|
Err(_) => {
|
|
use std::hash::{Hash, Hasher};
|
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
point.id.hash(&mut hasher);
|
|
hasher.finish()
|
|
}
|
|
};
|
|
if let Err(e) = qdrant
|
|
.upsert_vector_to_collection(
|
|
&qdrant.collection_name,
|
|
point_id,
|
|
vector,
|
|
Some(payload_val),
|
|
)
|
|
.await
|
|
{
|
|
warn!("Failed to checkin chunk vector {}: {}", point.id, e);
|
|
} else {
|
|
vectors_moved += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Traces → production traces collection
|
|
let traces_coll = format!(
|
|
"{}_traces",
|
|
crate::core::config::REDIS_KEY_PREFIX
|
|
.as_str()
|
|
.trim_end_matches(':')
|
|
);
|
|
for point in &ws_data.traces {
|
|
if let Some(ref vector) = point.vector {
|
|
let payload_val: serde_json::Value =
|
|
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
|
|
let point_id: u64 = match point.id.parse::<u64>() {
|
|
Ok(id) => id,
|
|
Err(_) => {
|
|
use std::hash::{Hash, Hasher};
|
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
point.id.hash(&mut hasher);
|
|
hasher.finish()
|
|
}
|
|
};
|
|
if let Err(e) = qdrant
|
|
.upsert_vector_to_collection(
|
|
&traces_coll,
|
|
point_id,
|
|
vector,
|
|
Some(payload_val),
|
|
)
|
|
.await
|
|
{
|
|
warn!("Failed to checkin trace vector {}: {}", point.id, e);
|
|
} else {
|
|
vectors_moved += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e);
|
|
}
|
|
}
|
|
|
|
// ── Cleanup workspace ──
|
|
if let Err(e) = workspace.clear().await {
|
|
warn!("Failed to clear workspace for {}: {}", file_uuid, e);
|
|
}
|
|
if let Err(e) = qdrant_ws.delete_by_file_uuid(file_uuid).await {
|
|
warn!(
|
|
"Failed to delete workspace vectors for {}: {}",
|
|
file_uuid, e
|
|
);
|
|
}
|
|
|
|
info!(
|
|
"Checkin complete for {}: {} pre_chunks, {} spk_dets, {} vectors",
|
|
file_uuid,
|
|
pre_chunks.len(),
|
|
spk_dets.len(),
|
|
vectors_moved,
|
|
);
|
|
|
|
Ok(CheckinResult {
|
|
file_uuid: file_uuid.to_string(),
|
|
pre_chunks_moved: pre_chunks.len(),
|
|
speaker_detections_moved: spk_dets.len(),
|
|
vectors_moved,
|
|
})
|
|
}
|
|
|
|
pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result<CheckoutResult> {
|
|
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
|
|
let table = crate::core::db::schema::table_name;
|
|
info!("Checkout starting for {} (schema={})", file_uuid, schema);
|
|
|
|
// Delete face_detections
|
|
let face_table = table("face_detections");
|
|
let face_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
|
|
.bind(file_uuid)
|
|
.execute(db.pool())
|
|
.await?;
|
|
info!(
|
|
"Checkout {}: deleted {} rows from {}",
|
|
file_uuid,
|
|
face_result.rows_affected(),
|
|
face_table
|
|
);
|
|
|
|
// Delete speaker_detections
|
|
let spk_table = table("speaker_detections");
|
|
let spk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", spk_table))
|
|
.bind(file_uuid)
|
|
.execute(db.pool())
|
|
.await?;
|
|
info!(
|
|
"Checkout {}: deleted {} rows from {}",
|
|
file_uuid,
|
|
spk_result.rows_affected(),
|
|
spk_table
|
|
);
|
|
|
|
// Delete pre_chunks
|
|
let pc_table = table("pre_chunks");
|
|
let pc_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pc_table))
|
|
.bind(file_uuid)
|
|
.execute(db.pool())
|
|
.await?;
|
|
info!(
|
|
"Checkout {}: deleted {} rows from {}",
|
|
file_uuid,
|
|
pc_result.rows_affected(),
|
|
pc_table
|
|
);
|
|
|
|
// Delete chunks
|
|
let chunk_table = table("chunk");
|
|
let chunk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunk_table))
|
|
.bind(file_uuid)
|
|
.execute(db.pool())
|
|
.await?;
|
|
info!(
|
|
"Checkout {}: deleted {} rows from {}",
|
|
file_uuid,
|
|
chunk_result.rows_affected(),
|
|
chunk_table
|
|
);
|
|
|
|
// Delete processor_results
|
|
let pr_table = table("processor_results");
|
|
let pr_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pr_table))
|
|
.bind(file_uuid)
|
|
.execute(db.pool())
|
|
.await?;
|
|
info!(
|
|
"Checkout {}: deleted {} rows from {}",
|
|
file_uuid,
|
|
pr_result.rows_affected(),
|
|
pr_table
|
|
);
|
|
|
|
// Delete Qdrant vectors from production
|
|
let qdrant = QdrantDb::new();
|
|
|
|
if let Err(e) = qdrant.delete_by_uuid(file_uuid).await {
|
|
warn!(
|
|
"Failed to delete chunk vectors from Qdrant for {}: {}",
|
|
file_uuid, e
|
|
);
|
|
}
|
|
|
|
let prefix = crate::core::config::REDIS_KEY_PREFIX
|
|
.as_str()
|
|
.trim_end_matches(':');
|
|
let traces_coll = format!("{}_traces", prefix);
|
|
let voice_coll = format!("{}_voice", file_uuid);
|
|
|
|
for coll in &[traces_coll, voice_coll] {
|
|
if let Err(e) = QdrantDb::delete_by_uuid_from_collection(
|
|
&qdrant.client,
|
|
&qdrant.base_url,
|
|
&qdrant.api_key,
|
|
coll,
|
|
file_uuid,
|
|
)
|
|
.await
|
|
{
|
|
warn!(
|
|
"Failed to delete vectors from {} for {}: {}",
|
|
coll, file_uuid, e
|
|
);
|
|
}
|
|
}
|
|
|
|
let rows_deleted = face_result.rows_affected()
|
|
+ spk_result.rows_affected()
|
|
+ pc_result.rows_affected()
|
|
+ chunk_result.rows_affected()
|
|
+ pr_result.rows_affected();
|
|
|
|
info!(
|
|
"Checkout complete for {}: {} PG rows deleted",
|
|
file_uuid, rows_deleted,
|
|
);
|
|
|
|
Ok(CheckoutResult {
|
|
file_uuid: file_uuid.to_string(),
|
|
rows_deleted: rows_deleted as usize,
|
|
})
|
|
}
|