feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)

Phase 2.6.1: co_occurrence_edges migration
- build_co_occurrence_edges_from_qdrant()
- Qdrant embeddings → frame grouping → YOLO objects
- Result: 6679 edges (vs 6701 PostgreSQL)

Phase 2.6.2: face_face_edges migration
- build_face_face_edges_from_qdrant()
- Qdrant embeddings → frame grouping → face pairs
- mutual_gaze detection preserved
- Result: 6 edges (exact match)

Phase 2.6.3: speaker_face_edges migration
- build_speaker_face_edges_from_qdrant()
- Qdrant embeddings → trace_id frame ranges
- SPEAKS_AS edge creation

Architecture:
- All edges use Qdrant payload (no face_detections queries)
- PostgreSQL fallback for empty Qdrant
- Estimated 3.6x performance improvement

Testing:
- Playground (3003): ✓ All Phase 2.6 logs verified
- Edge counts: ✓ Close match with PostgreSQL
- Fallback: ✓ Working

Docs:
- docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md
- docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
Accusys
2026-06-21 04:47:49 +08:00
parent 0afc70fc5b
commit 2cfcfdd1af
2926 changed files with 8311058 additions and 1394 deletions

335
src/core/checkin.rs Normal file
View File

@@ -0,0 +1,335 @@
use anyhow::{Context, Result};
use tracing::{info, warn};
use crate::core::db::{
workspace_sqlite::{SpeakerDetectionBatchItem, WorkspaceDb},
PostgresDb, QdrantDb, QdrantWorkspace,
};
#[derive(Debug)]
pub struct CheckinResult {
pub file_uuid: String,
pub pre_chunks_moved: usize,
pub speaker_detections_moved: usize,
pub vectors_moved: usize,
}
#[derive(Debug)]
pub struct CheckoutResult {
pub file_uuid: String,
pub rows_deleted: usize,
}
pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result<CheckinResult> {
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
info!("Checkin starting for {} (schema={})", file_uuid, schema);
let workspace = WorkspaceDb::open(file_uuid)
.await
.context("No workspace found for checkin")?;
let qdrant_ws = QdrantWorkspace::new();
let pre_chunks = workspace.get_all_pre_chunks().await?;
let spk_dets = workspace.get_all_speaker_detections().await?;
info!(
"Checkin {} workspace: {} pre_chunks, {} spk_dets",
file_uuid,
pre_chunks.len(),
spk_dets.len(),
);
// ── Pre-chunks ──
for chunk in &pre_chunks {
let data_value: serde_json::Value = chunk
.data
.as_ref()
.and_then(|d| serde_json::from_str(d).ok())
.unwrap_or(serde_json::Value::Null);
match chunk.processor_type.as_str() {
"asr" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_asr_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
.await?;
}
"cut" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_cut_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
.await?;
}
"scene" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_scene_pre_chunks_batch(
file_uuid,
&[(idx, sf, ef, start, end, data_value)],
)
.await?;
}
_ => {
let frame = chunk.start_frame.unwrap_or(0);
let ts = chunk.start_time;
let text = chunk.text_content.clone();
db.store_raw_pre_chunks_batch(
file_uuid,
&chunk.processor_type,
&[(frame, ts, data_value, text, None)],
)
.await?;
}
}
}
// ── Speaker detections ──
if !spk_dets.is_empty() {
let batch: Vec<(String, f64, f64, String, Option<String>, f32)> = spk_dets
.iter()
.map(|s| {
(
s.speaker_id.clone().unwrap_or_default(),
s.start_time.unwrap_or(0.0),
s.end_time.unwrap_or(0.0),
s.text_content.clone().unwrap_or_default(),
s.chunk_id.clone(),
s.confidence.unwrap_or(0.0) as f32,
)
})
.collect();
db.store_speaker_detections_batch(file_uuid, &batch).await?;
}
// ── Qdrant vectors ──
let mut vectors_moved = 0usize;
match qdrant_ws.scroll_by_file_uuid(file_uuid).await {
Ok(ws_data) => {
let qdrant = QdrantDb::new();
// Chunks → production collection
for point in &ws_data.chunks {
if let Some(ref vector) = point.vector {
let payload_val: serde_json::Value =
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
let point_id: u64 = match point.id.parse::<u64>() {
Ok(id) => id,
Err(_) => {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
point.id.hash(&mut hasher);
hasher.finish()
}
};
if let Err(e) = qdrant
.upsert_vector_to_collection(
&qdrant.collection_name,
point_id,
vector,
Some(payload_val),
)
.await
{
warn!("Failed to checkin chunk vector {}: {}", point.id, e);
} else {
vectors_moved += 1;
}
}
}
// Traces → production traces collection
let traces_coll = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
for point in &ws_data.traces {
if let Some(ref vector) = point.vector {
let payload_val: serde_json::Value =
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
let point_id: u64 = match point.id.parse::<u64>() {
Ok(id) => id,
Err(_) => {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
point.id.hash(&mut hasher);
hasher.finish()
}
};
if let Err(e) = qdrant
.upsert_vector_to_collection(
&traces_coll,
point_id,
vector,
Some(payload_val),
)
.await
{
warn!("Failed to checkin trace vector {}: {}", point.id, e);
} else {
vectors_moved += 1;
}
}
}
}
Err(e) => {
warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e);
}
}
// ── Cleanup workspace ──
if let Err(e) = workspace.clear().await {
warn!("Failed to clear workspace for {}: {}", file_uuid, e);
}
if let Err(e) = qdrant_ws.delete_by_file_uuid(file_uuid).await {
warn!(
"Failed to delete workspace vectors for {}: {}",
file_uuid, e
);
}
info!(
"Checkin complete for {}: {} pre_chunks, {} spk_dets, {} vectors",
file_uuid,
pre_chunks.len(),
spk_dets.len(),
vectors_moved,
);
Ok(CheckinResult {
file_uuid: file_uuid.to_string(),
pre_chunks_moved: pre_chunks.len(),
speaker_detections_moved: spk_dets.len(),
vectors_moved,
})
}
pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result<CheckoutResult> {
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
let table = crate::core::db::schema::table_name;
info!("Checkout starting for {} (schema={})", file_uuid, schema);
// Delete face_detections
let face_table = table("face_detections");
let face_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
face_result.rows_affected(),
face_table
);
// Delete speaker_detections
let spk_table = table("speaker_detections");
let spk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", spk_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
spk_result.rows_affected(),
spk_table
);
// Delete pre_chunks
let pc_table = table("pre_chunks");
let pc_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pc_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
pc_result.rows_affected(),
pc_table
);
// Delete chunks
let chunk_table = table("chunk");
let chunk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunk_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
chunk_result.rows_affected(),
chunk_table
);
// Delete processor_results
let pr_table = table("processor_results");
let pr_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pr_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
pr_result.rows_affected(),
pr_table
);
// Delete Qdrant vectors from production
let qdrant = QdrantDb::new();
if let Err(e) = qdrant.delete_by_uuid(file_uuid).await {
warn!(
"Failed to delete chunk vectors from Qdrant for {}: {}",
file_uuid, e
);
}
let prefix = crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':');
let traces_coll = format!("{}_traces", prefix);
let voice_coll = format!("{}_voice", file_uuid);
for coll in &[traces_coll, voice_coll] {
if let Err(e) = QdrantDb::delete_by_uuid_from_collection(
&qdrant.client,
&qdrant.base_url,
&qdrant.api_key,
coll,
file_uuid,
)
.await
{
warn!(
"Failed to delete vectors from {} for {}: {}",
coll, file_uuid, e
);
}
}
let rows_deleted = face_result.rows_affected()
+ spk_result.rows_affected()
+ pc_result.rows_affected()
+ chunk_result.rows_affected()
+ pr_result.rows_affected();
info!(
"Checkout complete for {}: {} PG rows deleted",
file_uuid, rows_deleted,
);
Ok(CheckoutResult {
file_uuid: file_uuid.to_string(),
rows_deleted: rows_deleted as usize,
})
}