- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
201 lines
7.4 KiB
Python
201 lines
7.4 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Auto-Identify Persons: Bridge face_clustered.json + ASRX speaker data
|
|
Creates/updates person_identities with auto-generated names and speaker links.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import psycopg2
|
|
from collections import defaultdict
|
|
|
|
UUID = sys.argv[1] if len(sys.argv) > 1 else "384b0ff44aaaa1f1"
|
|
BASE_DIR = f"output/{UUID}"
|
|
|
|
DB_CONFIG = {
|
|
"host": "localhost",
|
|
"user": "accusys",
|
|
"dbname": "momentry",
|
|
}
|
|
|
|
|
|
def load_json(filepath):
|
|
with open(filepath, "r") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def main():
|
|
print(f"🔍 Auto-Identify Persons for {UUID}")
|
|
print("=" * 60)
|
|
|
|
# 1. Load face_clustered.json
|
|
clustered_path = os.path.join(BASE_DIR, f"{UUID}.face_clustered.json")
|
|
if not os.path.exists(clustered_path):
|
|
print(f"❌ Not found: {clustered_path}")
|
|
return
|
|
|
|
clustered = load_json(clustered_path)
|
|
print(f"📸 Loaded {len(clustered['frames'])} frames with face data")
|
|
|
|
# 2. Build Person stats from face_clustered.json
|
|
person_stats = defaultdict(
|
|
lambda: {
|
|
"frame_count": 0,
|
|
"timestamps": [],
|
|
"first_frame": None,
|
|
"last_frame": None,
|
|
"first_time": None,
|
|
"last_time": None,
|
|
}
|
|
)
|
|
|
|
for frame in clustered["frames"]:
|
|
ts = frame["timestamp"]
|
|
for face in frame.get("faces", []):
|
|
pid = face.get("person_id")
|
|
if pid:
|
|
stats = person_stats[pid]
|
|
stats["frame_count"] += 1
|
|
stats["timestamps"].append(ts)
|
|
if stats["first_time"] is None or ts < stats["first_time"]:
|
|
stats["first_time"] = ts
|
|
stats["first_frame"] = frame["frame"]
|
|
if stats["last_time"] is None or ts > stats["last_time"]:
|
|
stats["last_time"] = ts
|
|
stats["last_frame"] = frame["frame"]
|
|
|
|
print(f"👤 Found {len(person_stats)} unique persons from face clustering")
|
|
|
|
# 3. Load ASRX data from sentence chunks (via DB or JSON)
|
|
asrx_path = os.path.join(BASE_DIR, f"{UUID}.asrx.json")
|
|
asrx_data = None
|
|
if os.path.exists(asrx_path):
|
|
asrx_data = load_json(asrx_path)
|
|
print(f"🎤 Loaded ASRX: {len(asrx_data.get('segments', []))} segments")
|
|
|
|
# 4. Match speakers to persons by time overlap
|
|
person_speaker_votes = defaultdict(lambda: defaultdict(float))
|
|
|
|
if asrx_data:
|
|
for segment in asrx_data.get("segments", []):
|
|
speaker_id = segment.get("speaker_id")
|
|
if not speaker_id:
|
|
continue
|
|
seg_start = segment["start"]
|
|
seg_end = segment["end"]
|
|
|
|
# Find persons whose face timestamps overlap with this ASRX segment
|
|
for pid, stats in person_stats.items():
|
|
for ts in stats["timestamps"]:
|
|
if seg_start <= ts <= seg_end:
|
|
person_speaker_votes[pid][speaker_id] += 1.0
|
|
|
|
# 5. Determine dominant speaker per person
|
|
person_dominant_speaker = {}
|
|
for pid, votes in person_speaker_votes.items():
|
|
if votes:
|
|
dominant = max(votes, key=votes.get)
|
|
person_dominant_speaker[pid] = {
|
|
"speaker_id": dominant,
|
|
"votes": votes[dominant],
|
|
"total_votes": sum(votes.values()),
|
|
"confidence": votes[dominant] / sum(votes.values()),
|
|
}
|
|
|
|
# 6. Generate report
|
|
print(f"\n{'=' * 60}")
|
|
print("📊 Person Identification Results")
|
|
print(f"{'=' * 60}")
|
|
|
|
# Sort by frame count
|
|
sorted_persons = sorted(
|
|
person_stats.items(), key=lambda x: x[1]["frame_count"], reverse=True
|
|
)
|
|
|
|
for pid, stats in sorted_persons[:20]:
|
|
speaker_info = person_dominant_speaker.get(pid, {})
|
|
speaker_id = speaker_info.get("speaker_id", "N/A")
|
|
confidence = speaker_info.get("confidence", 0.0)
|
|
print(
|
|
f" {pid:12s} | frames:{stats['frame_count']:5d} | "
|
|
f"time:{stats['first_time']:.0f}s-{stats['last_time']:.0f}s | "
|
|
f"speaker:{speaker_id} ({confidence:.0%})"
|
|
)
|
|
|
|
# 7. Output JSON for API consumption
|
|
output = {"uuid": UUID, "persons": []}
|
|
for pid, stats in sorted_persons:
|
|
speaker_info = person_dominant_speaker.get(pid, {})
|
|
person_data = {
|
|
"person_id": pid,
|
|
"frame_count": stats["frame_count"],
|
|
"first_time": stats["first_time"],
|
|
"last_time": stats["last_time"],
|
|
"speaker_id": speaker_info.get("speaker_id"),
|
|
"speaker_confidence": speaker_info.get("confidence", 0.0),
|
|
"suggested_name": pid, # Use cluster label as initial name
|
|
}
|
|
output["persons"].append(person_data)
|
|
|
|
output_path = os.path.join(BASE_DIR, f"{UUID}.person_identification.json")
|
|
with open(output_path, "w") as f:
|
|
json.dump(output, f, indent=2)
|
|
|
|
print(f"\n💾 Saved: {output_path}")
|
|
print(f"📝 Total persons identified: {len(output['persons'])}")
|
|
|
|
# 8. Execute SQL INSERT statements
|
|
print("\n--- Executing SQL ---")
|
|
conn = psycopg2.connect(**DB_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
executed = 0
|
|
for p in output["persons"]:
|
|
speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL"
|
|
sql = f"""INSERT INTO dev.person_identities (person_id, video_uuid, name, speaker_id,
|
|
first_appearance_time, last_appearance_time, appearance_count, metadata)
|
|
VALUES ('{p["person_id"]}', '{UUID}', '{p["person_id"]}', {speaker_val},
|
|
{p["first_time"]}, {p["last_time"]}, {p["frame_count"]},
|
|
'{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}')
|
|
ON CONFLICT (person_id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id),
|
|
first_appearance_time = EXCLUDED.first_appearance_time,
|
|
last_appearance_time = EXCLUDED.last_appearance_time,
|
|
appearance_count = EXCLUDED.appearance_count,
|
|
updated_at = NOW()"""
|
|
try:
|
|
cur.execute(sql)
|
|
executed += 1
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
conn.commit()
|
|
cur.close()
|
|
conn.close()
|
|
print(f"✅ Executed {executed} SQL statements")
|
|
|
|
# 9. Generate SQL INSERT statements for person_identities
|
|
print("\n--- SQL INSERT statements for person_identities ---")
|
|
for p in output["persons"][:10]:
|
|
speaker_val = f"'{p['speaker_id']}'" if p["speaker_id"] else "NULL"
|
|
print(
|
|
f"INSERT INTO person_identities (person_id, video_uuid, name, speaker_id, "
|
|
f"first_appearance_time, last_appearance_time, appearance_count, metadata) "
|
|
f"VALUES ('{p['person_id']}', '{UUID}', '{p['person_id']}', {speaker_val}, "
|
|
f"{p['first_time']}, {p['last_time']}, {p['frame_count']}, "
|
|
f'\'{{"auto_identified": true, "speaker_confidence": {p["speaker_confidence"]}}}\') '
|
|
f"ON CONFLICT (person_id) DO UPDATE SET "
|
|
f"name = EXCLUDED.name, "
|
|
f"speaker_id = COALESCE(EXCLUDED.speaker_id, person_identities.speaker_id), "
|
|
f"first_appearance_time = EXCLUDED.first_appearance_time, "
|
|
f"last_appearance_time = EXCLUDED.last_appearance_time, "
|
|
f"appearance_count = EXCLUDED.appearance_count, "
|
|
f"updated_at = NOW();"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|