feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
56
scripts/match_speakers_to_chunks.py
Normal file
56
scripts/match_speakers_to_chunks.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Match Speaker IDs from ASRX to Child Chunks
|
||||
"""
|
||||
|
||||
import json
|
||||
import psycopg2
|
||||
|
||||
UUID = "384b0ff44aaaa1f1"
|
||||
ASRX_PATH = f"output/{UUID}/{UUID}.asrx.json"
|
||||
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
||||
|
||||
|
||||
def match_speakers():
|
||||
print(f"🚀 Matching Speakers for {UUID}...")
|
||||
with open(ASRX_PATH) as f:
|
||||
asrx = json.load(f)
|
||||
|
||||
segments = asrx if isinstance(asrx, list) else asrx.get("segments", [])
|
||||
print(f"📂 Loaded {len(segments)} ASRX segments.")
|
||||
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
count = 0
|
||||
for seg in segments:
|
||||
start = seg["start"]
|
||||
end = seg["end"]
|
||||
speaker = seg.get("speaker_id")
|
||||
if not speaker:
|
||||
continue
|
||||
|
||||
# Find overlapping child chunks
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE child_chunks
|
||||
SET speaker_ids = array_append(speaker_ids, %s)
|
||||
WHERE uuid = %s
|
||||
AND start_time < %s
|
||||
AND end_time > %s
|
||||
AND NOT (speaker_ids @> ARRAY[%s]::text[])
|
||||
""",
|
||||
(speaker, UUID, end, start, speaker),
|
||||
)
|
||||
|
||||
if cur.rowcount > 0:
|
||||
count += cur.rowcount
|
||||
|
||||
conn.commit()
|
||||
print(f"✅ Updated {count} child chunks with Speaker IDs.")
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
match_speakers()
|
||||
Reference in New Issue
Block a user