- ASRX handler no longer stores duplicate 'asr' pre_chunks - Pre_chunks storage made idempotent (delete-before-insert) - Rule 1 + trace_ingest changed to query 'asrx' not 'asr' - Trace chunks removed (dynamic from TKG/Qdrant) - TKG scroll_face_points fixed: trace_id >= 1 (not == 1) - TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON) - Unregister error handling: log instead of silent discard - Add publish_pipeline_progress calls at each pipeline stage (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
295 lines
9.7 KiB
Python
295 lines
9.7 KiB
Python
#!/opt/homebrew/bin/python3.11
|
||
"""
|
||
Face Clustering Processor
|
||
職責:將短暫的 Face ID 聚合為持續的 Person ID,並自動綁定 Speaker。
|
||
"""
|
||
|
||
import cv2
|
||
import json
|
||
import numpy as np
|
||
import os
|
||
import sys
|
||
import psycopg2
|
||
from sklearn.cluster import AgglomerativeClustering
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# Use FaceNet embeddings from face.json instead of DeepFace
|
||
HAS_DEEPFACE = False
|
||
print("[FACE_CLUSTER] Using FaceNet embeddings from face.json (DeepFace not required)")
|
||
|
||
# 設定
|
||
UUID = os.getenv("UUID", "quick_preview")
|
||
OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "./output")
|
||
VIDEO_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.mp4")
|
||
FACE_JSON_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.face.json")
|
||
OUTPUT_JSON_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.face_clustered.json")
|
||
ASRX_JSON_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.asrx.json")
|
||
DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||
|
||
|
||
def optimized_clustering(embeddings):
|
||
"""
|
||
Optimized Clustering for large datasets (e.g. 25k faces).
|
||
Strategy: Sample -> Agglomerative -> Centroid Assignment
|
||
"""
|
||
import numpy as np
|
||
from sklearn.cluster import AgglomerativeClustering
|
||
from sklearn.metrics.pairwise import cosine_distances
|
||
|
||
n_faces = len(embeddings)
|
||
print(f" 🚀 Starting optimized clustering for {n_faces} faces...")
|
||
|
||
# 1. Sampling
|
||
sample_size = min(5000, n_faces)
|
||
if n_faces > sample_size:
|
||
indices = np.random.choice(n_faces, sample_size, replace=False)
|
||
sample_embeddings = embeddings[indices]
|
||
else:
|
||
sample_embeddings = embeddings
|
||
indices = np.arange(n_faces)
|
||
|
||
print(f" 📊 Sampling {len(sample_embeddings)} faces for clustering structure...")
|
||
|
||
# 2. Agglomerative Clustering on Sample
|
||
clustering = AgglomerativeClustering(
|
||
n_clusters=None, distance_threshold=0.4, metric="cosine", linkage="average"
|
||
)
|
||
sample_labels = clustering.fit_predict(sample_embeddings)
|
||
|
||
unique_labels = set(sample_labels)
|
||
n_clusters = len(unique_labels)
|
||
print(f" 🔍 Found {n_clusters} unique clusters in sample.")
|
||
|
||
# 3. Compute Centroids for each cluster
|
||
centroids = []
|
||
for label in unique_labels:
|
||
cluster_mask = sample_labels == label
|
||
cluster_faces = sample_embeddings[cluster_mask]
|
||
# Mean embedding
|
||
centroid = np.mean(cluster_faces, axis=0)
|
||
centroids.append(centroid)
|
||
|
||
centroids = np.array(centroids) # Shape: (n_clusters, 512)
|
||
|
||
# 4. Assign all faces to nearest centroid
|
||
# Batch processing to save memory
|
||
print(f" 🏃 Assigning {n_faces} faces to {n_clusters} clusters...")
|
||
all_labels = np.zeros(n_faces, dtype=int)
|
||
|
||
batch_size = 5000
|
||
for start in range(0, n_faces, batch_size):
|
||
end = min(start + batch_size, n_faces)
|
||
batch = embeddings[start:end]
|
||
dists = cosine_distances(batch, centroids)
|
||
all_labels[start:end] = np.argmin(dists, axis=1)
|
||
|
||
return all_labels
|
||
|
||
|
||
def main():
|
||
if not os.path.exists(FACE_JSON_PATH):
|
||
print("❌ Face JSON not found.")
|
||
return
|
||
|
||
with open(FACE_JSON_PATH) as f:
|
||
face_data = json.load(f)
|
||
|
||
frames_list = face_data.get("frames", [])
|
||
if not frames_list:
|
||
print("❌ No frames in JSON.")
|
||
return
|
||
|
||
# Get embeddings from Qdrant
|
||
print(f"[FACE_CLUSTER] Loading embeddings from Qdrant for {UUID}...")
|
||
try:
|
||
import requests
|
||
qdrant_url = "http://localhost:6333"
|
||
collection = "_faces"
|
||
|
||
# Query all embeddings for this file_uuid
|
||
response = requests.post(
|
||
f"{qdrant_url}/collections/{collection}/points/scroll",
|
||
json={
|
||
"filter": {
|
||
"must": [
|
||
{"key": "file_uuid", "match": {"value": UUID}}
|
||
]
|
||
},
|
||
"limit": 10000,
|
||
"with_vector": True
|
||
}
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
result = response.json()
|
||
points = result.get("result", {}).get("points", [])
|
||
print(f"[FACE_CLUSTER] Loaded {len(points)} embeddings from Qdrant")
|
||
|
||
# Build face_id -> embedding map
|
||
embedding_map = {}
|
||
for point in points:
|
||
face_id = point.get("payload", {}).get("face_id")
|
||
vector = point.get("vector")
|
||
if face_id and vector:
|
||
embedding_map[face_id] = vector
|
||
else:
|
||
print(f"[FACE_CLUSTER] Qdrant query failed: {response.status_code}")
|
||
embedding_map = {}
|
||
except Exception as e:
|
||
print(f"[FACE_CLUSTER] Failed to load embeddings from Qdrant: {e}")
|
||
embedding_map = {}
|
||
|
||
# Use embeddings from Qdrant or face.json
|
||
embeddings = []
|
||
face_refs = []
|
||
|
||
print(f"🔍 Collecting face embeddings for {UUID}...")
|
||
|
||
for frame_idx, frame_obj in enumerate(frames_list):
|
||
faces = frame_obj.get("faces", [])
|
||
if not faces:
|
||
continue
|
||
|
||
for face_idx, face in enumerate(faces):
|
||
face_id = face.get("face_id")
|
||
if face_id and face_id in embedding_map:
|
||
embeddings.append(embedding_map[face_id])
|
||
face_refs.append({"frame_idx": frame_idx, "face_idx": face_idx, "face_id": face_id})
|
||
|
||
if not embeddings:
|
||
print("❌ No embeddings found in Qdrant.")
|
||
return
|
||
|
||
embeddings = np.array(embeddings)
|
||
print(f"✅ Collected {len(embeddings)} face embeddings from Qdrant.")
|
||
|
||
# 2. 聚類
|
||
print(f"🧠 Clustering {len(embeddings)} faces...")
|
||
clustering = AgglomerativeClustering(
|
||
n_clusters=None, distance_threshold=0.4, metric="cosine", linkage="average"
|
||
)
|
||
labels = clustering.fit_predict(embeddings)
|
||
|
||
unique_labels = set(labels)
|
||
label_to_person = {l: f"Person_{i}" for i, l in enumerate(unique_labels)}
|
||
print(
|
||
f"👥 Detected {len(unique_labels)} unique persons: {[label_to_person[l] for l in unique_labels]}"
|
||
)
|
||
|
||
# 3. 更新 JSON
|
||
for ref, label in zip(face_refs, labels):
|
||
f_idx = ref["frame_idx"]
|
||
face_idx = ref["face_idx"]
|
||
person_id = label_to_person[label]
|
||
|
||
if f_idx < len(frames_list):
|
||
faces = frames_list[f_idx].get("faces", [])
|
||
if face_idx < len(faces):
|
||
frames_list[f_idx]["faces"][face_idx]["person_id"] = person_id
|
||
|
||
# 保存
|
||
with open(OUTPUT_JSON_PATH, "w", encoding="utf-8") as f:
|
||
json.dump(face_data, f, indent=2, ensure_ascii=False)
|
||
print(f"✅ Saved clustered data to {OUTPUT_JSON_PATH}")
|
||
|
||
# 4. 自動綁定 Speaker
|
||
auto_bind_speakers()
|
||
|
||
|
||
def auto_bind_speakers():
|
||
if not os.path.exists(OUTPUT_JSON_PATH) or not os.path.exists(ASRX_JSON_PATH):
|
||
print("⚠️ Missing data for speaker binding.")
|
||
return
|
||
|
||
with open(OUTPUT_JSON_PATH) as f:
|
||
face_clustered = json.load(f)
|
||
with open(ASRX_JSON_PATH) as f:
|
||
asrx_data = json.load(f)
|
||
|
||
print("🔗 Auto-binding Speakers to Persons...")
|
||
|
||
# 建立 Face 時間列表
|
||
face_spans = []
|
||
for frame_obj in face_clustered.get("frames", []):
|
||
ts = frame_obj.get("timestamp")
|
||
for face in frame_obj.get("faces", []):
|
||
person_id = face.get("person_id")
|
||
if person_id and ts is not None:
|
||
face_spans.append({"ts": ts, "person_id": person_id})
|
||
|
||
speaker_person_counts = {}
|
||
|
||
# 對於每個說話片段,找出畫面中出現的人
|
||
for seg in asrx_data.get("segments", []):
|
||
start = seg.get("start")
|
||
end = seg.get("end")
|
||
speaker = seg.get("speaker_id")
|
||
if not speaker:
|
||
continue
|
||
|
||
# 找時間重疊
|
||
candidates = [f for f in face_spans if start <= f["ts"] <= end]
|
||
if candidates:
|
||
# 投票
|
||
person_counts = {}
|
||
for c in candidates:
|
||
pid = c["person_id"]
|
||
person_counts[pid] = person_counts.get(pid, 0) + 1
|
||
|
||
if speaker not in speaker_person_counts:
|
||
speaker_person_counts[speaker] = {}
|
||
|
||
best_person = max(person_counts, key=person_counts.get)
|
||
speaker_person_counts[speaker][best_person] = (
|
||
speaker_person_counts[speaker].get(best_person, 0) + 1
|
||
)
|
||
|
||
# 寫入資料庫
|
||
try:
|
||
conn = psycopg2.connect(DB_URL)
|
||
cur = conn.cursor()
|
||
|
||
for speaker, persons in speaker_person_counts.items():
|
||
if not persons:
|
||
continue
|
||
best_person = max(persons, key=persons.get)
|
||
print(
|
||
f" 🎤 {speaker} is likely {best_person} ({persons[best_person]} votes)"
|
||
)
|
||
|
||
# 1. 找或建 Talent
|
||
cur.execute("SELECT id FROM talents WHERE real_name = %s", (best_person,))
|
||
row = cur.fetchone()
|
||
|
||
if row:
|
||
talent_id = row[0]
|
||
else:
|
||
cur.execute(
|
||
"INSERT INTO talents (real_name) VALUES (%s) RETURNING id",
|
||
(best_person,),
|
||
)
|
||
talent_id = cur.fetchone()[0]
|
||
print(f" ✨ Created Talent #{talent_id} ({best_person})")
|
||
|
||
# 2. 綁定 Speaker
|
||
cur.execute(
|
||
"""
|
||
INSERT INTO identity_bindings (talent_id, binding_type, binding_value, source, confidence)
|
||
VALUES (%s, 'speaker', %s, 'auto_cluster', 0.8)
|
||
ON CONFLICT (binding_type, binding_value) DO UPDATE SET talent_id = EXCLUDED.talent_id
|
||
""",
|
||
(talent_id, speaker),
|
||
)
|
||
print(f" ✅ Bound {speaker} -> {best_person}")
|
||
|
||
conn.commit()
|
||
cur.close()
|
||
conn.close()
|
||
except Exception as e:
|
||
print(f" ❌ DB Error: {e}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|