momentry_core/scripts/auto_identify_persons.py

#!/opt/homebrew/bin/python3.11
"""
Auto-Identify Persons: Bridge face_clustered.json + ASRX speaker data
Creates/updates person_identities with auto-generated names and speaker links.
"""

import json
import os
import sys
import psycopg2
from collections import defaultdict

UUID = sys.argv[1] if len(sys.argv) > 1 else "384b0ff44aaaa1f1"
BASE_DIR = f"output/{UUID}"

DB_CONFIG = {
    "host": "localhost",
    "user": "accusys",
    "dbname": "momentry",
}


def load_json(filepath):
    with open(filepath, "r") as f:
        return json.load(f)


def main():
    print(f"🔍 Auto-Identify Persons for {UUID}")
    print("=" * 60)

    # 1. Load face_clustered.json
    clustered_path = os.path.join(BASE_DIR, f"{UUID}.face_clustered.json")
    if not os.path.exists(clustered_path):
        print(f"❌ Not found: {clustered_path}")
        return

    clustered = load_json(clustered_path)
    print(f"📸 Loaded {len(clustered['frames'])} frames with face data")

    # 2. Build Person stats from face_clustered.json
    person_stats = defaultdict(
        lambda: {
            "frame_count": 0,
            "timestamps": [],
            "first_frame": None,
            "last_frame": None,
            "first_time": None,
            "last_time": None,
        }
    )

    for frame in clustered["frames"]:
        ts = frame["timestamp"]
        for face in frame.get("faces", []):
            pid = face.get("person_id")
            if pid:
                stats = person_stats[pid]
                stats["frame_count"] += 1
                stats["timestamps"].append(ts)
                if stats["first_time"] is None or ts < stats["first_time"]:
                    stats["first_time"] = ts
                    stats["first_frame"] = frame["frame"]
                if stats["last_time"] is None or ts > stats["last_time"]:
                    stats["last_time"] = ts
                    stats["last_frame"] = frame["frame"]

    print(f"👤 Found {len(person_stats)} unique persons from face clustering")

    # 3. Load ASRX data from sentence chunks (via DB or JSON)
    asrx_path = os.path.join(BASE_DIR, f"{UUID}.asrx.json")
    asrx_data = None
    if os.path.exists(asrx_path):
        asrx_data = load_json(asrx_path)
        print(f"🎤 Loaded ASRX: {len(asrx_data.get('segments', []))} segments")

    # 4. Match speakers to persons by time overlap
    person_speaker_votes = defaultdict(lambda: defaultdict(float))

    if asrx_data:
        for segment in asrx_data.get("segments", []):
            speaker_id = segment.get("speaker_id")
            if not speaker_id:
                continue
            seg_start = segment["start"]
            seg_end = segment["end"]

            # Find persons whose face timestamps overlap with this ASRX segment
            for pid, stats in person_stats.items():
                for ts in stats["timestamps"]:
                    if seg_start <= ts <= seg_end:
                        person_speaker_votes[pid][speaker_id] += 1.0

    # 5. Determine dominant speaker per person
    person_dominant_speaker = {}
    for pid, votes in person_speaker_votes.items():
        if votes:
            dominant = max(votes, key=votes.get)
            person_dominant_speaker[pid] = {
                "speaker_id": dominant,
                "votes": votes[dominant],
                "total_votes": sum(votes.values()),
                "confidence": votes[dominant] / sum(votes.values()),
            }

    # 6. Generate report
    print(f"\n{'=' * 60}")
    print("📊 Person Identification Results")
    print(f"{'=' * 60}")

    # Sort by frame count
    sorted_persons = sorted(
        person_stats.items(), key=lambda x: x[1]["frame_count"], reverse=True
    )

    for pid, stats in sorted_persons[:20]:
        speaker_info = person_dominant_speaker.get(pid, {})
        speaker_id = speaker_info.get("speaker_id", "N/A")
        confidence = speaker_info.get("confidence", 0.0)
        print(
            f"  {pid:12s} | frames:{stats['frame_count']:5d} | "
            f"time:{stats['first_time']:.0f}s-{stats['last_time']:.0f}s | "
            f"speaker:{speaker_id} ({confidence:.0%})"
        )

    # 7. Output JSON for API consumption
    output = {"uuid": UUID, "persons": []}
    for pid, stats in sorted_persons:
        speaker_info = person_dominant_speaker.get(pid, {})
        person_data = {
            "person_id": pid,
            "frame_count": stats["frame_count"],
            "first_time": stats["first_time"],
            "last_time": stats["last_time"],
            "speaker_id": speaker_info.get("speaker_id"),
            "speaker_confidence": speaker_info.get("confidence", 0.0),
            "suggested_name": pid,  # Use cluster label as initial name
        }
        output["persons"].append(person_data)

    output_path = os.path.join(BASE_DIR, f"{UUID}.person_identification.json")
    with open(output_path, "w") as f:
        json.dump(output, f, indent=2)

    print(f"\n💾 Saved: {output_path}")
    print(f"📝 Total persons identified: {len(output['persons'])}")

    # 8. Execute SQL INSERT statements
    print("\n--- Executing SQL ---")
    conn = psycopg2.connect(**DB_CONFIG)
    cur = conn.cursor()

    executed = 0
    for p in output["persons"]:
        speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL"
        sql = f"""INSERT INTO dev.person_identities (person_id, video_uuid, name, speaker_id,
        first_appearance_time, last_appearance_time, appearance_count, metadata)
        VALUES ('{p["person_id"]}', '{UUID}', '{p["person_id"]}', {speaker_val},
        {p["first_time"]}, {p["last_time"]}, {p["frame_count"]},
        '{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}')
        ON CONFLICT (person_id) DO UPDATE SET
        name = EXCLUDED.name,
        speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id),
        first_appearance_time = EXCLUDED.first_appearance_time,
        last_appearance_time = EXCLUDED.last_appearance_time,
        appearance_count = EXCLUDED.appearance_count,
        updated_at = NOW()"""
        try:
            cur.execute(sql)
            executed += 1
        except Exception as e:
            print(f"Error: {e}")

    conn.commit()
    cur.close()
    conn.close()
    print(f"✅ Executed {executed} SQL statements")

    # 9. Generate SQL INSERT statements for person_identities
    print("\n--- SQL INSERT statements for person_identities ---")
    for p in output["persons"][:10]:
        speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL"
        print(
            f"INSERT INTO person_identities (person_id, video_uuid, name, speaker_id, "
            f"first_appearance_time, last_appearance_time, appearance_count, metadata) "
            f"VALUES ('{p['person_id']}', '{UUID}', '{p['person_id']}', {speaker_val}, "
            f"{p['first_time']}, {p['last_time']}, {p['frame_count']}, "
            f'\'{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}\') '
            f"ON CONFLICT (person_id) DO UPDATE SET "
            f"name = EXCLUDED.name, "
            f"speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id), "
            f"first_appearance_time = EXCLUDED.first_appearance_time, "
            f"last_appearance_time = EXCLUDED.last_appearance_time, "
            f"appearance_count = EXCLUDED.appearance_count, "
            f"updated_at = NOW();"
        )


if __name__ == "__main__":
    main()