feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
335
src/core/checkin.rs
Normal file
335
src/core/checkin.rs
Normal file
@@ -0,0 +1,335 @@
|
||||
use anyhow::{Context, Result};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::core::db::{
|
||||
workspace_sqlite::{SpeakerDetectionBatchItem, WorkspaceDb},
|
||||
PostgresDb, QdrantDb, QdrantWorkspace,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CheckinResult {
|
||||
pub file_uuid: String,
|
||||
pub pre_chunks_moved: usize,
|
||||
pub speaker_detections_moved: usize,
|
||||
pub vectors_moved: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CheckoutResult {
|
||||
pub file_uuid: String,
|
||||
pub rows_deleted: usize,
|
||||
}
|
||||
|
||||
pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result<CheckinResult> {
|
||||
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
|
||||
info!("Checkin starting for {} (schema={})", file_uuid, schema);
|
||||
|
||||
let workspace = WorkspaceDb::open(file_uuid)
|
||||
.await
|
||||
.context("No workspace found for checkin")?;
|
||||
let qdrant_ws = QdrantWorkspace::new();
|
||||
|
||||
let pre_chunks = workspace.get_all_pre_chunks().await?;
|
||||
let spk_dets = workspace.get_all_speaker_detections().await?;
|
||||
|
||||
info!(
|
||||
"Checkin {} workspace: {} pre_chunks, {} spk_dets",
|
||||
file_uuid,
|
||||
pre_chunks.len(),
|
||||
spk_dets.len(),
|
||||
);
|
||||
|
||||
// ── Pre-chunks ──
|
||||
for chunk in &pre_chunks {
|
||||
let data_value: serde_json::Value = chunk
|
||||
.data
|
||||
.as_ref()
|
||||
.and_then(|d| serde_json::from_str(d).ok())
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
|
||||
match chunk.processor_type.as_str() {
|
||||
"asr" => {
|
||||
let start = chunk.start_time.unwrap_or(0.0);
|
||||
let end = chunk.end_time.unwrap_or(0.0);
|
||||
let sf = chunk.start_frame.unwrap_or(0);
|
||||
let ef = chunk.end_frame.unwrap_or(0);
|
||||
let idx = chunk.id as i64;
|
||||
db.store_asr_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
|
||||
.await?;
|
||||
}
|
||||
"cut" => {
|
||||
let start = chunk.start_time.unwrap_or(0.0);
|
||||
let end = chunk.end_time.unwrap_or(0.0);
|
||||
let sf = chunk.start_frame.unwrap_or(0);
|
||||
let ef = chunk.end_frame.unwrap_or(0);
|
||||
let idx = chunk.id as i64;
|
||||
db.store_cut_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
|
||||
.await?;
|
||||
}
|
||||
"scene" => {
|
||||
let start = chunk.start_time.unwrap_or(0.0);
|
||||
let end = chunk.end_time.unwrap_or(0.0);
|
||||
let sf = chunk.start_frame.unwrap_or(0);
|
||||
let ef = chunk.end_frame.unwrap_or(0);
|
||||
let idx = chunk.id as i64;
|
||||
db.store_scene_pre_chunks_batch(
|
||||
file_uuid,
|
||||
&[(idx, sf, ef, start, end, data_value)],
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
_ => {
|
||||
let frame = chunk.start_frame.unwrap_or(0);
|
||||
let ts = chunk.start_time;
|
||||
let text = chunk.text_content.clone();
|
||||
db.store_raw_pre_chunks_batch(
|
||||
file_uuid,
|
||||
&chunk.processor_type,
|
||||
&[(frame, ts, data_value, text, None)],
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Speaker detections ──
|
||||
if !spk_dets.is_empty() {
|
||||
let batch: Vec<(String, f64, f64, String, Option<String>, f32)> = spk_dets
|
||||
.iter()
|
||||
.map(|s| {
|
||||
(
|
||||
s.speaker_id.clone().unwrap_or_default(),
|
||||
s.start_time.unwrap_or(0.0),
|
||||
s.end_time.unwrap_or(0.0),
|
||||
s.text_content.clone().unwrap_or_default(),
|
||||
s.chunk_id.clone(),
|
||||
s.confidence.unwrap_or(0.0) as f32,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
db.store_speaker_detections_batch(file_uuid, &batch).await?;
|
||||
}
|
||||
|
||||
// ── Qdrant vectors ──
|
||||
let mut vectors_moved = 0usize;
|
||||
match qdrant_ws.scroll_by_file_uuid(file_uuid).await {
|
||||
Ok(ws_data) => {
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
// Chunks → production collection
|
||||
for point in &ws_data.chunks {
|
||||
if let Some(ref vector) = point.vector {
|
||||
let payload_val: serde_json::Value =
|
||||
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
|
||||
let point_id: u64 = match point.id.parse::<u64>() {
|
||||
Ok(id) => id,
|
||||
Err(_) => {
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
point.id.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
};
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&qdrant.collection_name,
|
||||
point_id,
|
||||
vector,
|
||||
Some(payload_val),
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("Failed to checkin chunk vector {}: {}", point.id, e);
|
||||
} else {
|
||||
vectors_moved += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Traces → production traces collection
|
||||
let traces_coll = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
for point in &ws_data.traces {
|
||||
if let Some(ref vector) = point.vector {
|
||||
let payload_val: serde_json::Value =
|
||||
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
|
||||
let point_id: u64 = match point.id.parse::<u64>() {
|
||||
Ok(id) => id,
|
||||
Err(_) => {
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
point.id.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
};
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&traces_coll,
|
||||
point_id,
|
||||
vector,
|
||||
Some(payload_val),
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("Failed to checkin trace vector {}: {}", point.id, e);
|
||||
} else {
|
||||
vectors_moved += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Cleanup workspace ──
|
||||
if let Err(e) = workspace.clear().await {
|
||||
warn!("Failed to clear workspace for {}: {}", file_uuid, e);
|
||||
}
|
||||
if let Err(e) = qdrant_ws.delete_by_file_uuid(file_uuid).await {
|
||||
warn!(
|
||||
"Failed to delete workspace vectors for {}: {}",
|
||||
file_uuid, e
|
||||
);
|
||||
}
|
||||
|
||||
info!(
|
||||
"Checkin complete for {}: {} pre_chunks, {} spk_dets, {} vectors",
|
||||
file_uuid,
|
||||
pre_chunks.len(),
|
||||
spk_dets.len(),
|
||||
vectors_moved,
|
||||
);
|
||||
|
||||
Ok(CheckinResult {
|
||||
file_uuid: file_uuid.to_string(),
|
||||
pre_chunks_moved: pre_chunks.len(),
|
||||
speaker_detections_moved: spk_dets.len(),
|
||||
vectors_moved,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result<CheckoutResult> {
|
||||
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
|
||||
let table = crate::core::db::schema::table_name;
|
||||
info!("Checkout starting for {} (schema={})", file_uuid, schema);
|
||||
|
||||
// Delete face_detections
|
||||
let face_table = table("face_detections");
|
||||
let face_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
info!(
|
||||
"Checkout {}: deleted {} rows from {}",
|
||||
file_uuid,
|
||||
face_result.rows_affected(),
|
||||
face_table
|
||||
);
|
||||
|
||||
// Delete speaker_detections
|
||||
let spk_table = table("speaker_detections");
|
||||
let spk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", spk_table))
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
info!(
|
||||
"Checkout {}: deleted {} rows from {}",
|
||||
file_uuid,
|
||||
spk_result.rows_affected(),
|
||||
spk_table
|
||||
);
|
||||
|
||||
// Delete pre_chunks
|
||||
let pc_table = table("pre_chunks");
|
||||
let pc_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pc_table))
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
info!(
|
||||
"Checkout {}: deleted {} rows from {}",
|
||||
file_uuid,
|
||||
pc_result.rows_affected(),
|
||||
pc_table
|
||||
);
|
||||
|
||||
// Delete chunks
|
||||
let chunk_table = table("chunk");
|
||||
let chunk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunk_table))
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
info!(
|
||||
"Checkout {}: deleted {} rows from {}",
|
||||
file_uuid,
|
||||
chunk_result.rows_affected(),
|
||||
chunk_table
|
||||
);
|
||||
|
||||
// Delete processor_results
|
||||
let pr_table = table("processor_results");
|
||||
let pr_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pr_table))
|
||||
.bind(file_uuid)
|
||||
.execute(db.pool())
|
||||
.await?;
|
||||
info!(
|
||||
"Checkout {}: deleted {} rows from {}",
|
||||
file_uuid,
|
||||
pr_result.rows_affected(),
|
||||
pr_table
|
||||
);
|
||||
|
||||
// Delete Qdrant vectors from production
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
if let Err(e) = qdrant.delete_by_uuid(file_uuid).await {
|
||||
warn!(
|
||||
"Failed to delete chunk vectors from Qdrant for {}: {}",
|
||||
file_uuid, e
|
||||
);
|
||||
}
|
||||
|
||||
let prefix = crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':');
|
||||
let traces_coll = format!("{}_traces", prefix);
|
||||
let voice_coll = format!("{}_voice", file_uuid);
|
||||
|
||||
for coll in &[traces_coll, voice_coll] {
|
||||
if let Err(e) = QdrantDb::delete_by_uuid_from_collection(
|
||||
&qdrant.client,
|
||||
&qdrant.base_url,
|
||||
&qdrant.api_key,
|
||||
coll,
|
||||
file_uuid,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(
|
||||
"Failed to delete vectors from {} for {}: {}",
|
||||
coll, file_uuid, e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let rows_deleted = face_result.rows_affected()
|
||||
+ spk_result.rows_affected()
|
||||
+ pc_result.rows_affected()
|
||||
+ chunk_result.rows_affected()
|
||||
+ pr_result.rows_affected();
|
||||
|
||||
info!(
|
||||
"Checkout complete for {}: {} PG rows deleted",
|
||||
file_uuid, rows_deleted,
|
||||
);
|
||||
|
||||
Ok(CheckoutResult {
|
||||
file_uuid: file_uuid.to_string(),
|
||||
rows_deleted: rows_deleted as usize,
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user