#!/opt/homebrew/bin/python3.11 """Update face_detections embeddings from face_traced.json""" import json import psycopg2 import sys import os DATABASE_URL = os.getenv("DATABASE_URL", "postgres://accusys@localhost:5432/momentry") def update_embeddings(file_uuid: str, traced_json_path: str, schema: str = "dev"): """Update embeddings in face_detections from face_traced.json""" with open(traced_json_path, 'r') as f: data = json.load(f) conn = psycopg2.connect(DATABASE_URL) cur = conn.cursor() frames = data.get('frames', {}) updated = 0 no_match = 0 # Process frames in batches of 1000 batch = [] for frame_key, frame_data in frames.items(): frame_num = int(frame_key) faces = frame_data.get('faces', []) for face in faces: embedding = face.get('embedding') if not embedding or len(embedding) == 0: continue trace_id = face.get('trace_id') x = face.get('x', 0) y = face.get('y', 0) w = face.get('width', 0) h = face.get('height', 0) # Convert embedding to PostgreSQL array format emb_str = '[' + ','.join(str(v) for v in embedding) + ']' batch.append((trace_id, emb_str, file_uuid, frame_num, x, y, w, h)) # Execute batch every 1000 frames if len(batch) >= 1000: for item in batch: try: cur.execute( f""" UPDATE {schema}.face_detections SET embedding = %s::real[], trace_id = %s WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s AND embedding IS NULL """, (item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7]) ) updated += cur.rowcount except Exception as e: pass conn.commit() batch = [] print(f"[UPDATE] Processed {updated} so far...", file=sys.stderr) # Final batch for item in batch: try: cur.execute( f""" UPDATE {schema}.face_detections SET embedding = %s::real[], trace_id = %s WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s AND embedding IS NULL """, (item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7]) ) updated += cur.rowcount except Exception as e: pass conn.commit() cur.close() conn.close() print(f"[UPDATE] Total updated: {updated}") return updated if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--file-uuid", required=True) parser.add_argument("--traced-json", help="Path to face_traced.json") parser.add_argument("--schema", default="dev") args = parser.parse_args() traced_json = args.traced_json or f"/Users/accusys/momentry/output_dev/{args.file_uuid}.face_traced.json" if not os.path.exists(traced_json): print(f"File not found: {traced_json}", file=sys.stderr) sys.exit(1) update_embeddings(args.file_uuid, traced_json, args.schema)