diff --git a/scripts/push_existing_embeddings.py b/scripts/push_existing_embeddings.py new file mode 100755 index 0000000..5322c07 --- /dev/null +++ b/scripts/push_existing_embeddings.py @@ -0,0 +1,87 @@ +#!/opt/homebrew/bin/python3.11 +""" +Push existing embeddings from face.json to Qdrant _faces collection. +This is faster than recomputing embeddings with face_processor.py. + +Usage: + python scripts/push_existing_embeddings.py --file-uuid + python scripts/push_existing_embeddings.py --all +""" + +import os +import sys +import json +import argparse +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from utils.qdrant_faces import ( + ensure_faces_collection, + push_face_embeddings_batch, +) + +OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output") + + +def push_embeddings_for_file(file_uuid: str) -> int: + """Push embeddings from face.json to Qdrant""" + face_json_path = Path(OUTPUT_DIR) / f"{file_uuid}.face.json" + if not face_json_path.exists(): + print(f"ERROR: {face_json_path} not found") + return 0 + + with open(face_json_path) as f: + data = json.load(f) + + faces = [] + for frame_data in data.get("frames", []): + frame = frame_data.get("frame", 0) + for face in frame_data.get("faces", []): + embedding = face.get("embedding") + if not embedding: + continue + + faces.append({ + "frame": frame, + "trace_id": face.get("trace_id"), + "bbox": { + "x": face.get("x", 0), + "y": face.get("y", 0), + "width": face.get("width", 0), + "height": face.get("height", 0), + }, + "confidence": face.get("confidence", 0), + "embedding": embedding, + }) + + if faces: + count = push_face_embeddings_batch(file_uuid, faces) + print(f"Pushed {count} embeddings for {file_uuid}") + return count + + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Push existing embeddings to Qdrant") + parser.add_argument("--file-uuid", help="File UUID to process") + parser.add_argument("--all", action="store_true", help="Process all files in output dir") + args = parser.parse_args() + + if args.all: + total = 0 + for face_json in Path(OUTPUT_DIR).glob("*.face.json"): + # Extract UUID from filename like "uuid.face.json" + filename = face_json.name + file_uuid = filename.replace(".face.json", "") + count = push_embeddings_for_file(file_uuid) + total += count + print(f"\nTotal: {total} embeddings pushed") + elif args.file_uuid: + push_embeddings_for_file(args.file_uuid) + else: + parser.print_help() + + +if __name__ == "__main__": + main() diff --git a/src/api/trace_agent_api.rs b/src/api/trace_agent_api.rs index 950b814..60ba204 100644 --- a/src/api/trace_agent_api.rs +++ b/src/api/trace_agent_api.rs @@ -997,12 +997,13 @@ async fn rebuild_tkg( success: true, file_uuid, result: Some(serde_json::json!({ -"face_track_nodes": r.face_track_nodes, -"gaze_track_nodes": r.gaze_track_nodes, -"lip_track_nodes": r.lip_track_nodes, -"text_region_nodes": r.text_region_nodes, -"appearance_trace_nodes": r.appearance_trace_nodes, -"accessory_nodes": r.accessory_nodes, + "face_track_nodes": r.face_track_nodes, + "gaze_track_nodes": r.gaze_track_nodes, + "lip_track_nodes": r.lip_track_nodes, + "text_region_nodes": r.text_region_nodes, + "appearance_trace_nodes": r.appearance_trace_nodes, + "skin_tone_trace_nodes": r.skin_tone_trace_nodes, + "accessory_nodes": r.accessory_nodes, "object_nodes": r.object_nodes, "hand_nodes": r.hand_nodes, "speaker_nodes": r.speaker_nodes, diff --git a/src/core/processor/tkg.rs b/src/core/processor/tkg.rs index 16983f3..ad5a844 100644 --- a/src/core/processor/tkg.rs +++ b/src/core/processor/tkg.rs @@ -458,6 +458,7 @@ struct FaceDetectionRow { // ── Public API ──────────────────────────────────────────────────── +#[derive(Debug, Serialize)] pub struct TkgResult { pub face_track_nodes: usize, pub gaze_track_nodes: usize, @@ -554,7 +555,7 @@ async fn build_face_track_nodes( file_uuid: &str, pose_data: &[FacePose], ) -> Result { - build_face_track_nodes_from_face_json(pool, file_uuid, pose_data).await + build_face_track_nodes_from_pg(pool, file_uuid, pose_data).await } async fn build_face_track_nodes_from_pg( @@ -2228,8 +2229,8 @@ async fn build_text_region_nodes(pool: &PgPool, file_uuid: &str) -> Result 50 { - format!("Text: {}...", &text[..47]) + let label = if text.chars().count() > 50 { + format!("Text: {}...", text.chars().take(47).collect::()) } else { format!("Text: {}", text) }; @@ -2587,7 +2588,7 @@ async fn build_skin_tone_trace_nodes( let mut count = 0; let rows: Vec<(i64, i64)> = sqlx::query_as(&format!( - "SELECT trace_id, COUNT(*) \ + "SELECT trace_id::bigint, COUNT(*)::bigint \ FROM {} \ WHERE file_uuid = $1 AND trace_id IS NOT NULL \ GROUP BY trace_id",