feat: Phase 2.5 gaze_trace and lip_trace Qdrant migration + Charade Q&A test

Phase 2.5.1: gaze_trace_nodes from Qdrant
- build_gaze_trace_nodes_from_qdrant()
- Read trace_id, frame, bbox from Qdrant payload
- Compute gaze stats (yaw, pitch, roll, gaze direction, blink)
- No PostgreSQL face_detections dependency

Phase 2.5.2: lip_trace_nodes from Qdrant + face.json
- build_lip_trace_nodes_from_qdrant()
- Match trace_id using Qdrant embeddings + face.json bbox
- Compute lip stats (openness, variance, speaking frames)
- Fixed face.json bbox structure (x,y,width,height not bbox object)

Test results:
- 23 gaze_trace nodes from Qdrant
- 23 lip_trace nodes from Qdrant + face.json
- 51 lip_sync edges created
- Charade Q&A: 20 identities, 75 relationship chunks

Docs:
- TKG_PHASE2_NONFACE_MIGRATION_V1.0.md (migration plan)
- 2026-06-21_charade_qa_test.md (Q&A test report)
This commit is contained in:
Accusys
2026-06-21 02:17:08 +08:00
parent 23c440104b
commit c39805bb8e
5 changed files with 802 additions and 16 deletions

View File

@@ -1354,6 +1354,160 @@ async fn build_gaze_trace_nodes(
pool: &PgPool,
file_uuid: &str,
pose_data: &[FacePose],
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
// Phase 2.5.1: Try Qdrant first
let face_db = FaceEmbeddingDb::new();
let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?;
if !qdrant_embeddings.is_empty() {
tracing::info!("[TKG-Phase2.5] Building gaze_trace nodes from Qdrant ({} embeddings)", qdrant_embeddings.len());
return build_gaze_trace_nodes_from_qdrant(pool, file_uuid, pose_data, qdrant_embeddings).await;
}
tracing::info!("[TKG-Phase2.5] No Qdrant embeddings, falling back to PostgreSQL");
build_gaze_trace_nodes_from_pg(pool, file_uuid, pose_data).await
}
async fn build_gaze_trace_nodes_from_qdrant(
pool: &PgPool,
file_uuid: &str,
pose_data: &[FacePose],
qdrant_embeddings: Vec<(String, Vec<f32>, crate::core::db::face_embedding_db::FaceEmbeddingPayload)>,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingPayload;
let nodes_table = t("tkg_nodes");
// Group by trace_id
let mut trace_frames: HashMap<i64, Vec<(i64, f64, f64, f64, f64)>> = HashMap::new();
for (_, _, payload) in &qdrant_embeddings {
trace_frames
.entry(payload.trace_id as i64)
.or_default()
.push((
payload.frame,
payload.bbox_x,
payload.bbox_y,
payload.bbox_w,
payload.bbox_h,
));
}
if trace_frames.is_empty() {
tracing::warn!("[TKG-Phase2.5] No trace data in Qdrant");
return Ok(0);
}
let mut count = 0;
for (tid, frames) in &trace_frames {
let external_id = format!("gaze_{}", tid);
// Compute gaze stats for this trace
let mut frame_count = 0i64;
let mut first_frame = i64::MAX;
let mut last_frame = i64::MIN;
let mut yaw_sum = 0.0f64;
let mut pitch_sum = 0.0f64;
let mut roll_sum = 0.0f64;
let mut gaze_dir_counts: HashMap<&str, i64> = HashMap::new();
let mut blink_candidates = 0i64;
let mut prev_openness = 0.0f64;
for (frame, x, y, w, h) in frames {
if let Some((yaw, pitch, roll)) = get_pose_for_face(*frame, *x, *y, *w, *h, pose_data) {
frame_count += 1;
if *frame < first_frame {
first_frame = *frame;
}
if *frame > last_frame {
last_frame = *frame;
}
yaw_sum += yaw;
pitch_sum += pitch;
roll_sum += roll;
// Gaze direction
let gaze_dir = GazeDirection::from_yaw_pitch(yaw, pitch);
*gaze_dir_counts.entry(gaze_dir.as_str()).or_default() += 1;
// Blink detection (eye openness from pitch variance)
let openness = (pitch.abs() * 10.0).min(1.0);
if prev_openness > 0.5 && openness < 0.2 {
blink_candidates += 1;
}
prev_openness = openness;
}
}
if frame_count == 0 {
continue;
}
let avg_yaw = yaw_sum / frame_count as f64;
let avg_pitch = pitch_sum / frame_count as f64;
let avg_roll = roll_sum / frame_count as f64;
let dominant_gaze = gaze_dir_counts
.iter()
.max_by_key(|(_, &c)| c)
.map(|(&d, _)| d)
.unwrap_or("unknown");
// Compute eye openness and blink rate
let blink_rate = if frame_count > 1 {
blink_candidates as f64 / (frame_count as f64 / 30.0) // per second at 30fps
} else {
0.0
};
let (gaze_dx, gaze_dy) = compute_gaze_vector(avg_yaw, avg_pitch);
let props = serde_json::json!({
"trace_id": tid,
"frame_count": frame_count,
"start_frame": first_frame,
"end_frame": last_frame,
"avg_yaw": (avg_yaw * 1000.0).round() / 1000.0,
"avg_pitch": (avg_pitch * 1000.0).round() / 1000.0,
"avg_roll": (avg_roll * 1000.0).round() / 1000.0,
"head_direction": dominant_gaze,
"gaze_direction": GazeDirection::from_yaw_pitch(avg_yaw, avg_pitch).as_str(),
"gaze_vector": {"dx": (gaze_dx * 1000.0).round() / 1000.0, "dy": (gaze_dy * 1000.0).round() / 1000.0},
"eye_openness": (prev_openness * 100.0).round() / 100.0,
"blink_count": blink_candidates,
"blink_rate": (blink_rate * 100.0).round() / 100.0,
});
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
"#,
nodes_table
))
.bind("gaze_trace")
.bind(&external_id)
.bind(file_uuid)
.bind(&external_id)
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
tracing::info!("[TKG-Phase2.5] Built {} gaze_trace nodes from Qdrant", count);
Ok(count)
}
async fn build_gaze_trace_nodes_from_pg(
pool: &PgPool,
file_uuid: &str,
pose_data: &[FacePose],
) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
@@ -1655,6 +1809,227 @@ async fn build_lip_trace_nodes(
file_uuid: &str,
output_dir: &str,
pose_data: &[FacePose],
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
// Phase 2.5.2: Try Qdrant first for trace_id mapping
let face_db = FaceEmbeddingDb::new();
let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?;
if !qdrant_embeddings.is_empty() {
tracing::info!("[TKG-Phase2.5] Building lip_trace nodes from Qdrant + face.json");
return build_lip_trace_nodes_from_qdrant(pool, file_uuid, output_dir, pose_data, qdrant_embeddings).await;
}
tracing::info!("[TKG-Phase2.5] No Qdrant embeddings, falling back to PostgreSQL");
build_lip_trace_nodes_from_pg(pool, file_uuid, output_dir, pose_data).await
}
async fn build_lip_trace_nodes_from_qdrant(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
pose_data: &[FacePose],
qdrant_embeddings: Vec<(String, Vec<f32>, crate::core::db::face_embedding_db::FaceEmbeddingPayload)>,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingPayload;
let nodes_table = t("tkg_nodes");
// Load lip data from face.json
let path = Path::new(output_dir).join(format!("{}.face.json", file_uuid));
if !path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read face.json: {}", path.display()))?;
let json: serde_json::Value = serde_json::from_str(&content)?;
// Build trace_id mapping from Qdrant: frame → Vec<(trace_id, bbox)>
let mut frame_trace_map: HashMap<i64, Vec<(i64, f64, f64, f64, f64)>> = HashMap::new();
for (_, _, payload) in &qdrant_embeddings {
frame_trace_map
.entry(payload.frame)
.or_default()
.push((
payload.trace_id as i64,
payload.bbox_x,
payload.bbox_y,
payload.bbox_w,
payload.bbox_h,
));
}
// Helper function to match trace_id by bbox distance
let match_trace_id = |frame: i64, x: f64, y: f64, w: f64, h: f64| -> Option<i64> {
let traces = frame_trace_map.get(&frame)?;
if traces.is_empty() {
return None;
}
// Find closest by bbox center distance
let mut best: Option<(i64, f64)> = None;
for (tid, tx, ty, tw, th) in traces {
let cx = x + w / 2.0;
let cy = y + h / 2.0;
let tcx = tx + tw / 2.0;
let tcy = ty + th / 2.0;
let dist = ((cx - tcx).powi(2) + (cy - tcy).powi(2)).sqrt();
if best.is_none() || dist < best.unwrap().1 {
best = Some((*tid, dist));
}
}
best.map(|(tid, _)| tid)
};
// Group by trace_id: trace_id → Vec<(frame, inner_lips_area, outer_lips_area)>
let mut lip_data: HashMap<i64, Vec<(i64, f64, f64)>> = HashMap::new();
if let Some(frames) = json.get("frames").and_then(|v| v.as_array()) {
for frame_entry in frames {
let frame_num = frame_entry
.get("frame")
.and_then(|v| v.as_i64())
.unwrap_or(0);
if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) {
for face in faces {
// face.json has x, y, width, height (not bbox object)
let x = face.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0);
let y = face.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0);
let w = face.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0);
let h = face.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0);
// Get trace_id from Qdrant mapping
let trace_id = match match_trace_id(frame_num, x, y, w, h) {
Some(tid) => tid,
None => continue,
};
// Extract lip landmarks
let lips = face.get("lips");
if let Some(lips_obj) = lips.and_then(|v| v.as_object()) {
let inner_area = compute_lip_area(lips_obj.get("inner_lips"));
let outer_area = compute_lip_area(lips_obj.get("outer_lips"));
if inner_area > 0.0 || outer_area > 0.0 {
lip_data
.entry(trace_id)
.or_default()
.push((frame_num, inner_area, outer_area));
}
}
}
}
}
}
if lip_data.is_empty() {
tracing::warn!("[TKG-Phase2.5] No lip data matched");
return Ok(0);
}
let mut count = 0;
for (tid, frames) in &lip_data {
let external_id = format!("lip_{}", tid);
let frame_count = frames.len() as i64;
let first_frame = frames.iter().map(|(f, _, _)| *f).min().unwrap_or(0);
let last_frame = frames.iter().map(|(f, _, _)| *f).max().unwrap_or(0);
let avg_inner = frames.iter().map(|(_, i, _)| *i).sum::<f64>() / frame_count as f64;
let avg_outer = frames.iter().map(|(_, _, o)| *o).sum::<f64>() / frame_count as f64;
let avg_openness = if avg_outer > 0.0 {
avg_inner / avg_outer
} else {
0.0
};
// Compute movement variance
let openness_values: Vec<f64> = frames
.iter()
.map(|(_, i, o)| if *o > 0.0 { i / o } else { 0.0 })
.collect();
let mean_openness = openness_values.iter().sum::<f64>() / openness_values.len() as f64;
let variance = openness_values
.iter()
.map(|&v| (v - mean_openness).powi(2))
.sum::<f64>()
/ openness_values.len() as f64;
// Count speaking frames (openness > threshold)
let speaking_threshold = avg_openness * 1.2;
let speaking_frames = frames
.iter()
.filter(|(_, i, o)| {
if *o > 0.0 {
i / o > speaking_threshold
} else {
false
}
})
.count() as i64;
// Get pose for this trace
let (avg_yaw, avg_pitch) = if let Some((y, p, _)) = frames
.iter()
.filter_map(|(f, _, _)| {
pose_data
.iter()
.find(|fp| fp.frame == *f)
.map(|fp| (fp.yaw, fp.pitch, fp.roll))
})
.next()
{
(y, p)
} else {
(0.0, 0.0)
};
let props = serde_json::json!({
"trace_id": tid,
"frame_count": frame_count,
"start_frame": first_frame,
"end_frame": last_frame,
"avg_openness": (avg_openness * 1000.0).round() / 1000.0,
"avg_inner_area": (avg_inner * 100.0).round() / 100.0,
"avg_outer_area": (avg_outer * 100.0).round() / 100.0,
"movement_variance": (variance * 1000.0).round() / 1000.0,
"speaking_frames": speaking_frames,
"silent_frames": frame_count - speaking_frames,
"avg_yaw": (avg_yaw * 1000.0).round() / 1000.0,
"avg_pitch": (avg_pitch * 1000.0).round() / 1000.0,
});
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
"#,
nodes_table
))
.bind("lip_trace")
.bind(&external_id)
.bind(file_uuid)
.bind(&format!("Lip Trace {}", tid))
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
tracing::info!("[TKG-Phase2.5] Built {} lip_trace nodes from Qdrant", count);
Ok(count)
}
async fn build_lip_trace_nodes_from_pg(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
pose_data: &[FacePose],
) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
@@ -1680,14 +2055,11 @@ async fn build_lip_trace_nodes(
.unwrap_or(0);
if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) {
for face in faces {
let bbox = match face.get("bbox") {
Some(b) => b,
None => continue,
};
let x = bbox.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0);
let y = bbox.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0);
let w = bbox.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0);
let h = bbox.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0);
// face.json has x, y, width, height (not bbox object)
let x = face.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0);
let y = face.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0);
let w = face.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0);
let h = face.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0);
// Get trace_id for this face
let trace_id =
@@ -2244,14 +2616,11 @@ async fn build_skin_tone_trace_nodes(
.unwrap_or(0);
if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) {
for face in faces {
let bbox = match face.get("bbox") {
Some(b) => b,
None => continue,
};
let x = bbox.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0);
let y = bbox.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0);
let w = bbox.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0);
let h = bbox.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0);
// face.json has x, y, width, height (not bbox object)
let x = face.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0);
let y = face.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0);
let w = face.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0);
let h = face.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0);
let trace_id =
match get_trace_for_face(pool, file_uuid, frame_num, x, y, w, h).await {