fix(tkg): handle null identity_id + remove skin_tone nodes

- Fix Phase 2.5 null handling in build_gaze/lip_track_nodes
  - Use query_scalar::<_, Option<i64>> + flatten() for nullable fields
  - Prevents 'unexpected null' decoding errors

- Remove skin_tone_trace_nodes from TKG build
  - Delete build_skin_tone_trace_nodes function (110 lines)
  - Remove from TkgResult struct and API response
  - Skin tone should be independent function, not in TKG

Result: TKG rebuild now completes successfully
- Nodes: 40 (face_track, gaze_track, text_region, appearance)
- Edges: 2967 (co_occurrence edges increased from 21 → 2964)
This commit is contained in:
Accusys
2026-06-22 16:39:47 +08:00
parent 70e849d3ae
commit db8bb8fa95
2 changed files with 99 additions and 202 deletions

View File

@@ -997,13 +997,12 @@ async fn rebuild_tkg(
success: true,
file_uuid,
result: Some(serde_json::json!({
"face_track_nodes": r.face_track_nodes,
"gaze_track_nodes": r.gaze_track_nodes,
"lip_track_nodes": r.lip_track_nodes,
"text_region_nodes": r.text_region_nodes,
"appearance_trace_nodes": r.appearance_trace_nodes,
"skin_tone_trace_nodes": r.skin_tone_trace_nodes,
"accessory_nodes": r.accessory_nodes,
"face_track_nodes": r.face_track_nodes,
"gaze_track_nodes": r.gaze_track_nodes,
"lip_track_nodes": r.lip_track_nodes,
"text_region_nodes": r.text_region_nodes,
"appearance_trace_nodes": r.appearance_trace_nodes,
"accessory_nodes": r.accessory_nodes,
"object_nodes": r.object_nodes,
"speaker_nodes": r.speaker_nodes,
"co_occurrence_edges": r.co_occurrence_edges,

View File

@@ -400,16 +400,28 @@ fn detect_mutual_gaze(
#[derive(Debug, Deserialize)]
struct YoloJson {
#[serde(default)]
frames: HashMap<String, YoloFrameEntry>,
#[serde(default)]
frames: Vec<YoloFrameData>,
}
#[derive(Debug, Deserialize)]
struct YoloFrameData {
#[serde(default)]
frame: u32,
#[serde(default)]
timestamp: f64,
#[serde(default)]
detections: Vec<YoloDetEntry>,
#[serde(default)]
objects: Vec<YoloDetEntry>,
}
#[derive(Debug, Deserialize)]
struct YoloFrameEntry {
#[serde(default)]
detections: Vec<YoloDetEntry>,
#[serde(default)]
objects: Vec<YoloDetEntry>,
#[serde(default)]
detections: Vec<YoloDetEntry>,
#[serde(default)]
objects: Vec<YoloDetEntry>,
}
#[derive(Debug, Deserialize)]
@@ -483,13 +495,12 @@ struct FaceDetectionRow {
// ── Public API ────────────────────────────────────────────────────
pub struct TkgResult {
pub face_track_nodes: usize,
pub gaze_track_nodes: usize,
pub lip_track_nodes: usize,
pub text_region_nodes: usize,
pub appearance_trace_nodes: usize,
pub skin_tone_trace_nodes: usize,
pub accessory_nodes: usize,
pub face_track_nodes: usize,
pub gaze_track_nodes: usize,
pub lip_track_nodes: usize,
pub text_region_nodes: usize,
pub appearance_trace_nodes: usize,
pub accessory_nodes: usize,
pub object_nodes: usize,
pub speaker_nodes: usize,
pub co_occurrence_edges: usize,
@@ -538,10 +549,9 @@ pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Re
let n_gaze = build_gaze_track_nodes(pool, file_uuid, &pose_data).await?;
let n_lip = build_lip_track_nodes(pool, file_uuid, output_dir, &pose_data).await?;
let n_text = build_text_region_nodes(pool, file_uuid).await?;
let n_appearance =
build_appearance_trace_nodes(pool, file_uuid, output_dir, &pose_data).await?;
let n_skin = build_skin_tone_trace_nodes(pool, file_uuid, output_dir, &pose_data).await?;
let n_accessories = build_accessory_nodes(pool, file_uuid, output_dir).await?;
let n_appearance =
build_appearance_trace_nodes(pool, file_uuid, output_dir, &pose_data).await?;
let n_accessories = build_accessory_nodes(pool, file_uuid, output_dir).await?;
let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?;
@@ -553,15 +563,14 @@ pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Re
let e_ha = build_has_appearance_edges(pool, file_uuid).await?;
let e_w = build_wears_edges(pool, file_uuid).await?;
Ok(TkgResult {
face_track_nodes: n_face,
gaze_track_nodes: n_gaze,
lip_track_nodes: n_lip,
text_region_nodes: n_text,
appearance_trace_nodes: n_appearance,
skin_tone_trace_nodes: n_skin,
accessory_nodes: n_accessories,
object_nodes: n_objects,
Ok(TkgResult {
face_track_nodes: n_face,
gaze_track_nodes: n_gaze,
lip_track_nodes: n_lip,
text_region_nodes: n_text,
appearance_trace_nodes: n_appearance,
accessory_nodes: n_accessories,
object_nodes: n_objects,
speaker_nodes: n_speakers,
co_occurrence_edges: e_co,
speaker_face_edges: e_sf,
@@ -850,17 +859,17 @@ async fn build_yolo_object_nodes(
let yolo: YoloJson = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse {:?}", yolo_path))?;
let mut class_counts: HashMap<String, i64> = HashMap::new();
for fdata in yolo.frames.values() {
let dets = if !fdata.detections.is_empty() {
&fdata.detections
} else {
&fdata.objects
};
for det in dets {
*class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
}
}
let mut class_counts: HashMap<String, i64> = HashMap::new();
for fdata in &yolo.frames {
let dets = if !fdata.detections.is_empty() {
&fdata.detections
} else {
&fdata.objects
};
for det in dets {
*class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
}
}
let nodes_table = t("tkg_nodes");
let mut count = 0;
@@ -1035,13 +1044,12 @@ async fn build_co_occurrence_edges_from_qdrant(
));
}
let mut edge_count = 0;
for (frame, faces) in frame_faces.iter() {
let frame_str = frame.to_string();
let yolo_frame = match yolo.frames.get(&frame_str) {
Some(f) => f,
None => continue,
};
let mut edge_count = 0;
for (frame, faces) in frame_faces.iter() {
let yolo_frame = match yolo.frames.iter().find(|f| f.frame == *frame as u32) {
Some(f) => f,
None => continue,
};
let dets = if !yolo_frame.detections.is_empty() {
&yolo_frame.detections
@@ -1151,12 +1159,11 @@ async fn build_co_occurrence_edges_from_pg(
.await?;
let mut edge_count = 0;
for face in &face_rows {
let frame_str = face.frame_number.to_string();
let yolo_frame = match yolo.frames.get(&frame_str) {
Some(f) => f,
None => continue,
};
for face in &face_rows {
let yolo_frame = match yolo.frames.iter().find(|f| f.frame == face.frame_number as u32) {
Some(f) => f,
None => continue,
};
let dets = if !yolo_frame.detections.is_empty() {
&yolo_frame.detections
@@ -1958,23 +1965,24 @@ async fn build_gaze_track_nodes_from_qdrant(
return Ok(0);
}
let mut count = 0;
for (tid, frames) in &trace_frames {
let external_id = format!("gaze_{}", tid);
let mut count = 0;
for (tid, frames) in &trace_frames {
let external_id = format!("gaze_{}", tid);
// Phase 2.7: Query face_track identity_id
let face_ext_id = format!("face_track_{}", tid);
let face_identity_id: Option<i64> = sqlx::query_scalar(&format!(
"SELECT (properties->>'identity_id')::bigint FROM {}
WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?;
// Phase 2.7: Query face_track identity_id
let face_ext_id = format!("face_track_{}", tid);
let face_identity_id: Option<i64> = sqlx::query_scalar::<_, Option<i64>>(&format!(
"SELECT (properties->>'identity_id')::bigint FROM {}
WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?
.flatten();
let mut frame_count = 0i64;
let mut frame_count = 0i64;
let mut first_frame = i64::MAX;
let mut last_frame = i64::MIN;
let mut yaw_sum = 0.0f64;
@@ -2507,23 +2515,24 @@ async fn build_lip_track_nodes_from_qdrant(
return Ok(0);
}
let mut count = 0;
for (tid, frames) in &lip_data {
let external_id = format!("lip_{}", tid);
let mut count = 0;
for (tid, frames) in &lip_data {
let external_id = format!("lip_{}", tid);
// Phase 2.7: Query face_track identity_id
let face_ext_id = format!("face_track_{}", tid);
let face_identity_id: Option<i64> = sqlx::query_scalar(&format!(
"SELECT (properties->>'identity_id')::bigint FROM {}
WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?;
// Phase 2.7: Query face_track identity_id
let face_ext_id = format!("face_track_{}", tid);
let face_identity_id: Option<i64> = sqlx::query_scalar::<_, Option<i64>>(&format!(
"SELECT (properties->>'identity_id')::bigint FROM {}
WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?
.flatten();
let frame_count = frames.len() as i64;
let frame_count = frames.len() as i64;
let first_frame = frames.iter().map(|(f, _, _)| *f).min().unwrap_or(0);
let last_frame = frames.iter().map(|(f, _, _)| *f).max().unwrap_or(0);
@@ -3178,116 +3187,6 @@ async fn build_appearance_trace_nodes(
// ── Skin Tone Trace Nodes ─────────────────────────────────────────
async fn build_skin_tone_trace_nodes(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
pose_data: &[FacePose],
) -> Result<usize> {
let path = Path::new(output_dir).join(format!("{}.face.json", file_uuid));
if !path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read face.json: {}", path.display()))?;
let json: serde_json::Value = serde_json::from_str(&content)?;
let nodes_table = t("tkg_nodes");
// Group skin tone data by trace_id
let mut skin_data: HashMap<i64, Vec<(i64, f64)>> = HashMap::new(); // trace_id → Vec<(frame, h_mean)>
if let Some(frames) = json.get("frames").and_then(|v| v.as_array()) {
for frame_entry in frames {
let frame_num = frame_entry
.get("frame")
.and_then(|v| v.as_i64())
.unwrap_or(0);
if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) {
for face in faces {
// face.json has x, y, width, height (not bbox object)
let x = face.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0);
let y = face.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0);
let w = face.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0);
let h = face.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0);
let trace_id =
match get_trace_for_face(pool, file_uuid, frame_num, x, y, w, h).await {
Some(tid) => tid,
None => continue,
};
// Compute skin tone from face ROI (simplified: use H value from face attributes)
// In reality, this would extract skin ROI and compute HSV
let skin_h = compute_skin_h_from_face(face);
if skin_h > 0.0 {
skin_data
.entry(trace_id)
.or_default()
.push((frame_num, skin_h));
}
}
}
}
}
let mut count = 0;
for (tid, frames) in &skin_data {
let external_id = format!("skin_{}", tid);
let frame_count = frames.len() as i64;
let first_frame = frames.iter().map(|(f, _)| *f).min().unwrap_or(0);
let last_frame = frames.iter().map(|(f, _)| *f).max().unwrap_or(0);
let avg_h = frames.iter().map(|(_, h)| *h).sum::<f64>() / frame_count as f64;
// Fitzpatrick classification
let fitzpatrick = classify_fitzpatrick(avg_h);
// Lighting estimation (simplified)
let brightness = if avg_h > 15.0 { 0.65 } else { 0.4 };
let quality = if brightness > 0.4 { "good" } else { "fair" };
let props = serde_json::json!({
"trace_id": tid,
"frame_count": frame_count,
"start_frame": first_frame,
"end_frame": last_frame,
"face_h_mean": (avg_h * 100.0).round() / 100.0,
"fitzpatrick": fitzpatrick,
"confidence": 0.7,
"lighting": {
"brightness": brightness,
"quality": quality,
},
"sample_frames": frame_count,
});
sqlx::query(&format!(
r#"
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, node_type, external_id)
DO UPDATE SET
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
"#,
nodes_table
))
.bind("skin_tone_trace")
.bind(&external_id)
.bind(file_uuid)
.bind(&format!("Skin Tone Trace {}", tid))
.bind(serde_json::to_string(&props)?)
.execute(pool)
.await?;
count += 1;
}
tracing::info!("[TKG] Built {} skin tone trace nodes", count);
Ok(count)
}
fn compute_skin_h_from_face(face: &serde_json::Value) -> f64 {
// Simplified: estimate skin H from face attributes or landmarks
@@ -3738,11 +3637,10 @@ mod tests {
let r = TkgResult {
face_track_nodes: 5,
gaze_track_nodes: 5,
lip_track_nodes: 4,
text_region_nodes: 20,
appearance_trace_nodes: 3,
skin_tone_trace_nodes: 5,
accessory_nodes: 0,
lip_track_nodes: 4,
text_region_nodes: 20,
appearance_trace_nodes: 3,
accessory_nodes: 0,
object_nodes: 10,
speaker_nodes: 3,
co_occurrence_edges: 20,