feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
107
v1.1/scripts/update_embeddings_v1.11.py
Normal file
107
v1.1/scripts/update_embeddings_v1.11.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""Update face_detections embeddings from face_traced.json"""
|
||||
|
||||
import json
|
||||
import psycopg2
|
||||
import sys
|
||||
import os
|
||||
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
|
||||
|
||||
def update_embeddings(file_uuid: str, traced_json_path: str, schema: str = "dev"):
|
||||
"""Update embeddings in face_detections from face_traced.json"""
|
||||
|
||||
with open(traced_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
conn = psycopg2.connect(DATABASE_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
frames = data.get('frames', {})
|
||||
|
||||
updated = 0
|
||||
no_match = 0
|
||||
|
||||
# Process frames in batches of 1000
|
||||
batch = []
|
||||
|
||||
for frame_key, frame_data in frames.items():
|
||||
frame_num = int(frame_key)
|
||||
faces = frame_data.get('faces', [])
|
||||
|
||||
for face in faces:
|
||||
embedding = face.get('embedding')
|
||||
if not embedding or len(embedding) == 0:
|
||||
continue
|
||||
|
||||
trace_id = face.get('trace_id')
|
||||
x = face.get('x', 0)
|
||||
y = face.get('y', 0)
|
||||
w = face.get('width', 0)
|
||||
h = face.get('height', 0)
|
||||
|
||||
# Convert embedding to PostgreSQL array format
|
||||
emb_str = '[' + ','.join(str(v) for v in embedding) + ']'
|
||||
|
||||
batch.append((trace_id, emb_str, file_uuid, frame_num, x, y, w, h))
|
||||
|
||||
# Execute batch every 1000 frames
|
||||
if len(batch) >= 1000:
|
||||
for item in batch:
|
||||
try:
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE {schema}.face_detections
|
||||
SET embedding = %s::real[], trace_id = %s
|
||||
WHERE file_uuid = %s AND frame_number = %s
|
||||
AND x = %s AND y = %s AND width = %s AND height = %s
|
||||
AND embedding IS NULL
|
||||
""",
|
||||
(item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7])
|
||||
)
|
||||
updated += cur.rowcount
|
||||
except Exception as e:
|
||||
pass
|
||||
conn.commit()
|
||||
batch = []
|
||||
print(f"[UPDATE] Processed {updated} so far...", file=sys.stderr)
|
||||
|
||||
# Final batch
|
||||
for item in batch:
|
||||
try:
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE {schema}.face_detections
|
||||
SET embedding = %s::real[], trace_id = %s
|
||||
WHERE file_uuid = %s AND frame_number = %s
|
||||
AND x = %s AND y = %s AND width = %s AND height = %s
|
||||
AND embedding IS NULL
|
||||
""",
|
||||
(item[1], item[0], item[2], item[3], item[4], item[5], item[6], item[7])
|
||||
)
|
||||
updated += cur.rowcount
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"[UPDATE] Total updated: {updated}")
|
||||
return updated
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--file-uuid", required=True)
|
||||
parser.add_argument("--traced-json", help="Path to face_traced.json")
|
||||
parser.add_argument("--schema", default="dev")
|
||||
args = parser.parse_args()
|
||||
|
||||
traced_json = args.traced_json or f"/Users/accusys/momentry/output_dev/{args.file_uuid}.face_traced.json"
|
||||
|
||||
if not os.path.exists(traced_json):
|
||||
print(f"File not found: {traced_json}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
update_embeddings(args.file_uuid, traced_json, args.schema)
|
||||
Reference in New Issue
Block a user