momentry_core/scripts/backfill_demographics.py

#!/opt/homebrew/bin/python3.11
"""
Backfill missing Age & Gender for persons.
"""

import os
import cv2
import psycopg2
import insightface

DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"}
BASE_VIDEO_DIR = "output"


def main():
    print("=== Starting Missing Demographics Backfill ===")

    conn = psycopg2.connect(**DB_CONFIG)
    cur = conn.cursor()

    # Load Model
    print("Loading InsightFace model...")
    try:
        app = insightface.app.FaceAnalysis(
            name="buffalo_l", providers=["CPUExecutionProvider"]
        )
        app.prepare(ctx_id=0, det_size=(320, 320))
        print("Model loaded.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # Query persons missing data
    # Join with appearances to find a valid timestamp
    cur.execute("""
        SELECT DISTINCT ON (pi.person_id) pi.person_id, pa.video_uuid, pa.start_time
        FROM person_identities pi
        JOIN person_appearances pa ON pi.person_id = pa.person_id
        WHERE pi.age IS NULL OR pi.gender IS NULL
        ORDER BY pi.person_id, pa.start_time
    """)
    rows = cur.fetchall()

    print(f"Found {len(rows)} entries to process.")

    for i, (person_id, video_uuid, start_time) in enumerate(rows):
        # Skip if time is null
        if start_time is None:
            continue

        print(f"[{i + 1}/{len(rows)}] Processing: {person_id} @ {start_time:.1f}s")

        video_path = f"{BASE_VIDEO_DIR}/{video_uuid}/{video_uuid}.mp4"
        if not os.path.exists(video_path):
            print(f"  -> Video not found at {video_path}")
            continue

        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print("  -> Could not open video.")
            continue

        # Seek
        cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)
        ret, frame = cap.read()
        cap.release()

        if not ret or frame is None:
            print("  -> Failed to read frame.")
            continue

        faces = app.get(frame)
        if faces:
            face = faces[0]
            age = int(face.age) if hasattr(face, "age") else None
            gender_val = face.gender if hasattr(face, "gender") else None
            gender = (
                "female" if gender_val == 0 else ("male" if gender_val == 1 else None)
            )

            if age is not None and gender is not None:
                cur.execute(
                    """
                    UPDATE person_identities
                    SET age = %s, gender = %s
                    WHERE person_id = %s
                """,
                    (age, gender, person_id),
                )
                conn.commit()
                print(f"  -> Updated: Age {age}, Gender {gender}")
            else:
                print(f"  -> Detection incomplete (Age:{age}, Gender:{gender})")
        else:
            print("  -> No face found in frame.")

    print("=== Done ===")
    conn.close()


if __name__ == "__main__":
    main()