Files
momentry_core/scripts/match_speakers_to_chunks.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

57 lines
1.3 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Match Speaker IDs from ASRX to Child Chunks
"""
import json
import psycopg2
UUID = "384b0ff44aaaa1f1"
ASRX_PATH = f"output/{UUID}/{UUID}.asrx.json"
DB_URL = "postgresql://accusys@localhost:5432/momentry"
def match_speakers():
print(f"🚀 Matching Speakers for {UUID}...")
with open(ASRX_PATH) as f:
asrx = json.load(f)
segments = asrx if isinstance(asrx, list) else asrx.get("segments", [])
print(f"📂 Loaded {len(segments)} ASRX segments.")
conn = psycopg2.connect(DB_URL)
cur = conn.cursor()
count = 0
for seg in segments:
start = seg["start"]
end = seg["end"]
speaker = seg.get("speaker_id")
if not speaker:
continue
# Find overlapping child chunks
cur.execute(
"""
UPDATE child_chunks
SET speaker_ids = array_append(speaker_ids, %s)
WHERE uuid = %s
AND start_time < %s
AND end_time > %s
AND NOT (speaker_ids @> ARRAY[%s]::text[])
""",
(speaker, UUID, end, start, speaker),
)
if cur.rowcount > 0:
count += cur.rowcount
conn.commit()
print(f"✅ Updated {count} child chunks with Speaker IDs.")
cur.close()
conn.close()
if __name__ == "__main__":
match_speakers()