Files
momentry_core/scripts/update_embeddings.py

107 lines
3.5 KiB
Python

#!/opt/homebrew/bin/python3.11
"""Update face_detections embeddings from face_traced.json"""
import json
import psycopg2
import sys
import os
DATABASE_URL = os.getenv("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
def update_embeddings(file_uuid: str, traced_json_path: str, schema: str = "dev"):
"""Update embeddings in face_detections from face_traced.json"""
with open(traced_json_path, 'r') as f:
data = json.load(f)
conn = psycopg2.connect(DATABASE_URL)
cur = conn.cursor()
frames = data.get('frames', {})
updated = 0
no_match = 0
# Process frames in batches of 1000
batch = []
for frame_key, frame_data in frames.items():
frame_num = int(frame_key)
faces = frame_data.get('faces', [])
for face in faces:
embedding = face.get('embedding')
if not embedding or len(embedding) == 0:
continue
trace_id = face.get('trace_id')
x = face.get('x', 0)
y = face.get('y', 0)
w = face.get('width', 0)
h = face.get('height', 0)
# Convert embedding to PostgreSQL array format
emb_str = '[' + ','.join(str(v) for v in embedding) + ']'
batch.append((trace_id, emb_str, file_uuid, frame_num, x, y, w, h))
# Execute batch every 1000 frames
if len(batch) >= 1000:
for item in batch:
try:
cur.execute(
f"""
UPDATE {schema}.face_detections
SET embedding = %s::real[], trace_id = %s
WHERE file_uuid = %s AND frame_number = %s
AND x = %s AND y = %s AND width = %s AND height = %s
AND embedding IS NULL
""",
(item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7])
)
updated += cur.rowcount
except Exception as e:
pass
conn.commit()
batch = []
print(f"[UPDATE] Processed {updated} so far...", file=sys.stderr)
# Final batch
for item in batch:
try:
cur.execute(
f"""
UPDATE {schema}.face_detections
SET embedding = %s::real[], trace_id = %s
WHERE file_uuid = %s AND frame_number = %s
AND x = %s AND y = %s AND width = %s AND height = %s
AND embedding IS NULL
""",
(item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7])
)
updated += cur.rowcount
except Exception as e:
pass
conn.commit()
cur.close()
conn.close()
print(f"[UPDATE] Total updated: {updated}")
return updated
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--file-uuid", required=True)
parser.add_argument("--traced-json", help="Path to face_traced.json")
parser.add_argument("--schema", default="dev")
args = parser.parse_args()
traced_json = args.traced_json or f"/Users/accusys/momentry/output_dev/{args.file_uuid}.face_traced.json"
if not os.path.exists(traced_json):
print(f"File not found: {traced_json}", file=sys.stderr)
sys.exit(1)
update_embeddings(args.file_uuid, traced_json, args.schema)