#!/opt/homebrew/bin/python3.11 """Backfill face_id for existing face_detections rows using trace_id. face_id is generated as 'face_{trace_id}' for each unique trace. This covers past data where face_id was never written. """ import os import psycopg2 DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry") SCHEMA = os.environ.get("MOMENTRY_DB_SCHEMA", "dev") def get_conn(): return psycopg2.connect(DB_URL) def backfill_by_trace(file_uuid: str, schema: str = SCHEMA) -> int: """Set face_id = 'face_{trace_id}' for all rows with NULL face_id and non-NULL trace_id.""" conn = get_conn() cur = conn.cursor() cur.execute( f""" UPDATE {schema}.face_detections SET face_id = 'face_' || trace_id::text WHERE file_uuid = %s AND face_id IS NULL AND trace_id IS NOT NULL """, (file_uuid,), ) updated = cur.rowcount conn.commit() cur.close() conn.close() return updated def main(): conn = get_conn() cur = conn.cursor() # Count rows that need backfill cur.execute( f"""SELECT COUNT(*) FROM {SCHEMA}.face_detections WHERE face_id IS NULL AND trace_id IS NOT NULL""" ) total_rows = cur.fetchone()[0] cur.execute( f"""SELECT DISTINCT file_uuid FROM {SCHEMA}.face_detections WHERE face_id IS NULL AND trace_id IS NOT NULL""" ) uuids = [row[0] for row in cur.fetchall()] cur.close() conn.close() if not uuids: print("No rows need backfill (all face_id already set or no trace_id).") return print(f"Found {total_rows} rows across {len(uuids)} files to backfill") total_all = 0 for uuid in uuids: count = backfill_by_trace(uuid) total_all += count print(f" [{uuid}] updated {count} rows") print(f"\nDone: {len(uuids)} files, {total_all} rows updated") if __name__ == "__main__": main()