#!/opt/homebrew/bin/python3.11 """ Match Speaker IDs from ASRX to Child Chunks """ import json import psycopg2 UUID = "384b0ff44aaaa1f1" ASRX_PATH = f"output/{UUID}/{UUID}.asrx.json" DB_URL = "postgresql://accusys@localhost:5432/momentry" def match_speakers(): print(f"🚀 Matching Speakers for {UUID}...") with open(ASRX_PATH) as f: asrx = json.load(f) segments = asrx if isinstance(asrx, list) else asrx.get("segments", []) print(f"📂 Loaded {len(segments)} ASRX segments.") conn = psycopg2.connect(DB_URL) cur = conn.cursor() count = 0 for seg in segments: start = seg["start"] end = seg["end"] speaker = seg.get("speaker_id") if not speaker: continue # Find overlapping child chunks cur.execute( """ UPDATE child_chunks SET speaker_ids = array_append(speaker_ids, %s) WHERE uuid = %s AND start_time < %s AND end_time > %s AND NOT (speaker_ids @> ARRAY[%s]::text[]) """, (speaker, UUID, end, start, speaker), ) if cur.rowcount > 0: count += cur.rowcount conn.commit() print(f"✅ Updated {count} child chunks with Speaker IDs.") cur.close() conn.close() if __name__ == "__main__": match_speakers()