#!/opt/homebrew/bin/python3.11 """ Auto-Identify Persons: Bridge face_clustered.json + ASRX speaker data Creates/updates person_identities with auto-generated names and speaker links. """ import json import os import sys import psycopg2 from collections import defaultdict UUID = sys.argv[1] if len(sys.argv) > 1 else "384b0ff44aaaa1f1" BASE_DIR = f"output/{UUID}" DB_CONFIG = { "host": "localhost", "user": "accusys", "dbname": "momentry", } def load_json(filepath): with open(filepath, "r") as f: return json.load(f) def main(): print(f"šŸ” Auto-Identify Persons for {UUID}") print("=" * 60) # 1. Load face_clustered.json clustered_path = os.path.join(BASE_DIR, f"{UUID}.face_clustered.json") if not os.path.exists(clustered_path): print(f"āŒ Not found: {clustered_path}") return clustered = load_json(clustered_path) print(f"šŸ“ø Loaded {len(clustered['frames'])} frames with face data") # 2. Build Person stats from face_clustered.json person_stats = defaultdict( lambda: { "frame_count": 0, "timestamps": [], "first_frame": None, "last_frame": None, "first_time": None, "last_time": None, } ) for frame in clustered["frames"]: ts = frame["timestamp"] for face in frame.get("faces", []): pid = face.get("person_id") if pid: stats = person_stats[pid] stats["frame_count"] += 1 stats["timestamps"].append(ts) if stats["first_time"] is None or ts < stats["first_time"]: stats["first_time"] = ts stats["first_frame"] = frame["frame"] if stats["last_time"] is None or ts > stats["last_time"]: stats["last_time"] = ts stats["last_frame"] = frame["frame"] print(f"šŸ‘¤ Found {len(person_stats)} unique persons from face clustering") # 3. Load ASRX data from sentence chunks (via DB or JSON) asrx_path = os.path.join(BASE_DIR, f"{UUID}.asrx.json") asrx_data = None if os.path.exists(asrx_path): asrx_data = load_json(asrx_path) print(f"šŸŽ¤ Loaded ASRX: {len(asrx_data.get('segments', []))} segments") # 4. Match speakers to persons by time overlap person_speaker_votes = defaultdict(lambda: defaultdict(float)) if asrx_data: for segment in asrx_data.get("segments", []): speaker_id = segment.get("speaker_id") if not speaker_id: continue seg_start = segment["start"] seg_end = segment["end"] # Find persons whose face timestamps overlap with this ASRX segment for pid, stats in person_stats.items(): for ts in stats["timestamps"]: if seg_start <= ts <= seg_end: person_speaker_votes[pid][speaker_id] += 1.0 # 5. Determine dominant speaker per person person_dominant_speaker = {} for pid, votes in person_speaker_votes.items(): if votes: dominant = max(votes, key=votes.get) person_dominant_speaker[pid] = { "speaker_id": dominant, "votes": votes[dominant], "total_votes": sum(votes.values()), "confidence": votes[dominant] / sum(votes.values()), } # 6. Generate report print(f"\n{'=' * 60}") print("šŸ“Š Person Identification Results") print(f"{'=' * 60}") # Sort by frame count sorted_persons = sorted( person_stats.items(), key=lambda x: x[1]["frame_count"], reverse=True ) for pid, stats in sorted_persons[:20]: speaker_info = person_dominant_speaker.get(pid, {}) speaker_id = speaker_info.get("speaker_id", "N/A") confidence = speaker_info.get("confidence", 0.0) print( f" {pid:12s} | frames:{stats['frame_count']:5d} | " f"time:{stats['first_time']:.0f}s-{stats['last_time']:.0f}s | " f"speaker:{speaker_id} ({confidence:.0%})" ) # 7. Output JSON for API consumption output = {"uuid": UUID, "persons": []} for pid, stats in sorted_persons: speaker_info = person_dominant_speaker.get(pid, {}) person_data = { "person_id": pid, "frame_count": stats["frame_count"], "first_time": stats["first_time"], "last_time": stats["last_time"], "speaker_id": speaker_info.get("speaker_id"), "speaker_confidence": speaker_info.get("confidence", 0.0), "suggested_name": pid, # Use cluster label as initial name } output["persons"].append(person_data) output_path = os.path.join(BASE_DIR, f"{UUID}.person_identification.json") with open(output_path, "w") as f: json.dump(output, f, indent=2) print(f"\nšŸ’¾ Saved: {output_path}") print(f"šŸ“ Total persons identified: {len(output['persons'])}") # 8. Execute SQL INSERT statements print("\n--- Executing SQL ---") conn = psycopg2.connect(**DB_CONFIG) cur = conn.cursor() executed = 0 for p in output["persons"]: speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL" sql = f"""INSERT INTO dev.person_identities (person_id, video_uuid, name, speaker_id, first_appearance_time, last_appearance_time, appearance_count, metadata) VALUES ('{p["person_id"]}', '{UUID}', '{p["person_id"]}', {speaker_val}, {p["first_time"]}, {p["last_time"]}, {p["frame_count"]}, '{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}') ON CONFLICT (person_id) DO UPDATE SET name = EXCLUDED.name, speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id), first_appearance_time = EXCLUDED.first_appearance_time, last_appearance_time = EXCLUDED.last_appearance_time, appearance_count = EXCLUDED.appearance_count, updated_at = NOW()""" try: cur.execute(sql) executed += 1 except Exception as e: print(f"Error: {e}") conn.commit() cur.close() conn.close() print(f"āœ… Executed {executed} SQL statements") # 9. Generate SQL INSERT statements for person_identities print("\n--- SQL INSERT statements for person_identities ---") for p in output["persons"][:10]: speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL" print( f"INSERT INTO person_identities (person_id, video_uuid, name, speaker_id, " f"first_appearance_time, last_appearance_time, appearance_count, metadata) " f"VALUES ('{p['person_id']}', '{UUID}', '{p['person_id']}', {speaker_val}, " f"{p['first_time']}, {p['last_time']}, {p['frame_count']}, " f'\'{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}\') ' f"ON CONFLICT (person_id) DO UPDATE SET " f"name = EXCLUDED.name, " f"speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id), " f"first_appearance_time = EXCLUDED.first_appearance_time, " f"last_appearance_time = EXCLUDED.last_appearance_time, " f"appearance_count = EXCLUDED.appearance_count, " f"updated_at = NOW();" ) if __name__ == "__main__": main()