feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
107
scripts/update_embeddings.py
Normal file
107
scripts/update_embeddings.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""Update face_detections embeddings from face_traced.json"""
|
||||
|
||||
import json
|
||||
import psycopg2
|
||||
import sys
|
||||
import os
|
||||
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
|
||||
|
||||
def update_embeddings(file_uuid: str, traced_json_path: str, schema: str = "dev"):
|
||||
"""Update embeddings in face_detections from face_traced.json"""
|
||||
|
||||
with open(traced_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
conn = psycopg2.connect(DATABASE_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
frames = data.get('frames', {})
|
||||
|
||||
updated = 0
|
||||
no_match = 0
|
||||
|
||||
# Process frames in batches of 1000
|
||||
batch = []
|
||||
|
||||
for frame_key, frame_data in frames.items():
|
||||
frame_num = int(frame_key)
|
||||
faces = frame_data.get('faces', [])
|
||||
|
||||
for face in faces:
|
||||
embedding = face.get('embedding')
|
||||
if not embedding or len(embedding) == 0:
|
||||
continue
|
||||
|
||||
trace_id = face.get('trace_id')
|
||||
x = face.get('x', 0)
|
||||
y = face.get('y', 0)
|
||||
w = face.get('width', 0)
|
||||
h = face.get('height', 0)
|
||||
|
||||
# Convert embedding to PostgreSQL array format
|
||||
emb_str = '[' + ','.join(str(v) for v in embedding) + ']'
|
||||
|
||||
batch.append((trace_id, emb_str, file_uuid, frame_num, x, y, w, h))
|
||||
|
||||
# Execute batch every 1000 frames
|
||||
if len(batch) >= 1000:
|
||||
for item in batch:
|
||||
try:
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE {schema}.face_detections
|
||||
SET embedding = %s::real[], trace_id = %s
|
||||
WHERE file_uuid = %s AND frame_number = %s
|
||||
AND x = %s AND y = %s AND width = %s AND height = %s
|
||||
AND embedding IS NULL
|
||||
""",
|
||||
(item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7])
|
||||
)
|
||||
updated += cur.rowcount
|
||||
except Exception as e:
|
||||
pass
|
||||
conn.commit()
|
||||
batch = []
|
||||
print(f"[UPDATE] Processed {updated} so far...", file=sys.stderr)
|
||||
|
||||
# Final batch
|
||||
for item in batch:
|
||||
try:
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE {schema}.face_detections
|
||||
SET embedding = %s::real[], trace_id = %s
|
||||
WHERE file_uuid = %s AND frame_number = %s
|
||||
AND x = %s AND y = %s AND width = %s AND height = %s
|
||||
AND embedding IS NULL
|
||||
""",
|
||||
(item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7])
|
||||
)
|
||||
updated += cur.rowcount
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"[UPDATE] Total updated: {updated}")
|
||||
return updated
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--file-uuid", required=True)
|
||||
parser.add_argument("--traced-json", help="Path to face_traced.json")
|
||||
parser.add_argument("--schema", default="dev")
|
||||
args = parser.parse_args()
|
||||
|
||||
traced_json = args.traced_json or f"/Users/accusys/momentry/output_dev/{args.file_uuid}.face_traced.json"
|
||||
|
||||
if not os.path.exists(traced_json):
|
||||
print(f"File not found: {traced_json}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
update_embeddings(args.file_uuid, traced_json, args.schema)
|
||||
Reference in New Issue
Block a user