- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
57 lines
1.3 KiB
Python
57 lines
1.3 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Match Speaker IDs from ASRX to Child Chunks
|
|
"""
|
|
|
|
import json
|
|
import psycopg2
|
|
|
|
UUID = "384b0ff44aaaa1f1"
|
|
ASRX_PATH = f"output/{UUID}/{UUID}.asrx.json"
|
|
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
|
|
|
|
|
def match_speakers():
|
|
print(f"🚀 Matching Speakers for {UUID}...")
|
|
with open(ASRX_PATH) as f:
|
|
asrx = json.load(f)
|
|
|
|
segments = asrx if isinstance(asrx, list) else asrx.get("segments", [])
|
|
print(f"📂 Loaded {len(segments)} ASRX segments.")
|
|
|
|
conn = psycopg2.connect(DB_URL)
|
|
cur = conn.cursor()
|
|
|
|
count = 0
|
|
for seg in segments:
|
|
start = seg["start"]
|
|
end = seg["end"]
|
|
speaker = seg.get("speaker_id")
|
|
if not speaker:
|
|
continue
|
|
|
|
# Find overlapping child chunks
|
|
cur.execute(
|
|
"""
|
|
UPDATE child_chunks
|
|
SET speaker_ids = array_append(speaker_ids, %s)
|
|
WHERE uuid = %s
|
|
AND start_time < %s
|
|
AND end_time > %s
|
|
AND NOT (speaker_ids @> ARRAY[%s]::text[])
|
|
""",
|
|
(speaker, UUID, end, start, speaker),
|
|
)
|
|
|
|
if cur.rowcount > 0:
|
|
count += cur.rowcount
|
|
|
|
conn.commit()
|
|
print(f"✅ Updated {count} child chunks with Speaker IDs.")
|
|
cur.close()
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
match_speakers()
|