Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
103 lines
3.0 KiB
Python
103 lines
3.0 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Backfill missing Age & Gender for persons.
|
|
"""
|
|
|
|
import os
|
|
import cv2
|
|
import psycopg2
|
|
import insightface
|
|
|
|
DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"}
|
|
BASE_VIDEO_DIR = "output"
|
|
|
|
|
|
def main():
|
|
print("=== Starting Missing Demographics Backfill ===")
|
|
|
|
conn = psycopg2.connect(**DB_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
# Load Model
|
|
print("Loading InsightFace model...")
|
|
try:
|
|
app = insightface.app.FaceAnalysis(
|
|
name="buffalo_l", providers=["CPUExecutionProvider"]
|
|
)
|
|
app.prepare(ctx_id=0, det_size=(320, 320))
|
|
print("Model loaded.")
|
|
except Exception as e:
|
|
print(f"Error loading model: {e}")
|
|
return
|
|
|
|
# Query persons missing data
|
|
# Join with appearances to find a valid timestamp
|
|
cur.execute("""
|
|
SELECT DISTINCT ON (pi.person_id) pi.person_id, pa.video_uuid, pa.start_time
|
|
FROM person_identities pi
|
|
JOIN person_appearances pa ON pi.person_id = pa.person_id
|
|
WHERE pi.age IS NULL OR pi.gender IS NULL
|
|
ORDER BY pi.person_id, pa.start_time
|
|
""")
|
|
rows = cur.fetchall()
|
|
|
|
print(f"Found {len(rows)} entries to process.")
|
|
|
|
for i, (person_id, video_uuid, start_time) in enumerate(rows):
|
|
# Skip if time is null
|
|
if start_time is None:
|
|
continue
|
|
|
|
print(f"[{i + 1}/{len(rows)}] Processing: {person_id} @ {start_time:.1f}s")
|
|
|
|
video_path = f"{BASE_VIDEO_DIR}/{video_uuid}/{video_uuid}.mp4"
|
|
if not os.path.exists(video_path):
|
|
print(f" -> Video not found at {video_path}")
|
|
continue
|
|
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
print(" -> Could not open video.")
|
|
continue
|
|
|
|
# Seek
|
|
cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)
|
|
ret, frame = cap.read()
|
|
cap.release()
|
|
|
|
if not ret or frame is None:
|
|
print(" -> Failed to read frame.")
|
|
continue
|
|
|
|
faces = app.get(frame)
|
|
if faces:
|
|
face = faces[0]
|
|
age = int(face.age) if hasattr(face, "age") else None
|
|
gender_val = face.gender if hasattr(face, "gender") else None
|
|
gender = (
|
|
"female" if gender_val == 0 else ("male" if gender_val == 1 else None)
|
|
)
|
|
|
|
if age is not None and gender is not None:
|
|
cur.execute(
|
|
"""
|
|
UPDATE person_identities
|
|
SET age = %s, gender = %s
|
|
WHERE person_id = %s
|
|
""",
|
|
(age, gender, person_id),
|
|
)
|
|
conn.commit()
|
|
print(f" -> Updated: Age {age}, Gender {gender}")
|
|
else:
|
|
print(f" -> Detection incomplete (Age:{age}, Gender:{gender})")
|
|
else:
|
|
print(" -> No face found in frame.")
|
|
|
|
print("=== Done ===")
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|