#!/opt/homebrew/bin/python3.11 """ Face Clustering Processor 職責:將短暫的 Face ID 聚合為持續的 Person ID,並自動綁定 Speaker。 """ import cv2 import json import numpy as np import os import sys import psycopg2 from sklearn.cluster import AgglomerativeClustering sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # Use FaceNet embeddings from face.json instead of DeepFace HAS_DEEPFACE = False print("[FACE_CLUSTER] Using FaceNet embeddings from face.json (DeepFace not required)") # 設定 UUID = os.getenv("UUID", "quick_preview") OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "./output") VIDEO_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.mp4") FACE_JSON_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.face.json") OUTPUT_JSON_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.face_clustered.json") ASRX_JSON_PATH = os.path.join(OUTPUT_DIR, UUID, f"{UUID}.asrx.json") DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry") def optimized_clustering(embeddings): """ Optimized Clustering for large datasets (e.g. 25k faces). Strategy: Sample -> Agglomerative -> Centroid Assignment """ import numpy as np from sklearn.cluster import AgglomerativeClustering from sklearn.metrics.pairwise import cosine_distances n_faces = len(embeddings) print(f" 🚀 Starting optimized clustering for {n_faces} faces...") # 1. Sampling sample_size = min(5000, n_faces) if n_faces > sample_size: indices = np.random.choice(n_faces, sample_size, replace=False) sample_embeddings = embeddings[indices] else: sample_embeddings = embeddings indices = np.arange(n_faces) print(f" 📊 Sampling {len(sample_embeddings)} faces for clustering structure...") # 2. Agglomerative Clustering on Sample clustering = AgglomerativeClustering( n_clusters=None, distance_threshold=0.4, metric="cosine", linkage="average" ) sample_labels = clustering.fit_predict(sample_embeddings) unique_labels = set(sample_labels) n_clusters = len(unique_labels) print(f" 🔍 Found {n_clusters} unique clusters in sample.") # 3. Compute Centroids for each cluster centroids = [] for label in unique_labels: cluster_mask = sample_labels == label cluster_faces = sample_embeddings[cluster_mask] # Mean embedding centroid = np.mean(cluster_faces, axis=0) centroids.append(centroid) centroids = np.array(centroids) # Shape: (n_clusters, 512) # 4. Assign all faces to nearest centroid # Batch processing to save memory print(f" 🏃 Assigning {n_faces} faces to {n_clusters} clusters...") all_labels = np.zeros(n_faces, dtype=int) batch_size = 5000 for start in range(0, n_faces, batch_size): end = min(start + batch_size, n_faces) batch = embeddings[start:end] dists = cosine_distances(batch, centroids) all_labels[start:end] = np.argmin(dists, axis=1) return all_labels def main(): if not os.path.exists(FACE_JSON_PATH): print("❌ Face JSON not found.") return with open(FACE_JSON_PATH) as f: face_data = json.load(f) frames_list = face_data.get("frames", []) if not frames_list: print("❌ No frames in JSON.") return # Get embeddings from Qdrant print(f"[FACE_CLUSTER] Loading embeddings from Qdrant for {UUID}...") try: import requests qdrant_url = "http://localhost:6333" collection = "_faces" # Query all embeddings for this file_uuid response = requests.post( f"{qdrant_url}/collections/{collection}/points/scroll", json={ "filter": { "must": [ {"key": "file_uuid", "match": {"value": UUID}} ] }, "limit": 10000, "with_vector": True } ) if response.status_code == 200: result = response.json() points = result.get("result", {}).get("points", []) print(f"[FACE_CLUSTER] Loaded {len(points)} embeddings from Qdrant") # Build face_id -> embedding map embedding_map = {} for point in points: face_id = point.get("payload", {}).get("face_id") vector = point.get("vector") if face_id and vector: embedding_map[face_id] = vector else: print(f"[FACE_CLUSTER] Qdrant query failed: {response.status_code}") embedding_map = {} except Exception as e: print(f"[FACE_CLUSTER] Failed to load embeddings from Qdrant: {e}") embedding_map = {} # Use embeddings from Qdrant or face.json embeddings = [] face_refs = [] print(f"🔍 Collecting face embeddings for {UUID}...") for frame_idx, frame_obj in enumerate(frames_list): faces = frame_obj.get("faces", []) if not faces: continue for face_idx, face in enumerate(faces): face_id = face.get("face_id") if face_id and face_id in embedding_map: embeddings.append(embedding_map[face_id]) face_refs.append({"frame_idx": frame_idx, "face_idx": face_idx, "face_id": face_id}) if not embeddings: print("❌ No embeddings found in Qdrant.") return embeddings = np.array(embeddings) print(f"✅ Collected {len(embeddings)} face embeddings from Qdrant.") # 2. 聚類 print(f"🧠 Clustering {len(embeddings)} faces...") clustering = AgglomerativeClustering( n_clusters=None, distance_threshold=0.4, metric="cosine", linkage="average" ) labels = clustering.fit_predict(embeddings) unique_labels = set(labels) label_to_person = {l: f"Person_{i}" for i, l in enumerate(unique_labels)} print( f"👥 Detected {len(unique_labels)} unique persons: {[label_to_person[l] for l in unique_labels]}" ) # 3. 更新 JSON for ref, label in zip(face_refs, labels): f_idx = ref["frame_idx"] face_idx = ref["face_idx"] person_id = label_to_person[label] if f_idx < len(frames_list): faces = frames_list[f_idx].get("faces", []) if face_idx < len(faces): frames_list[f_idx]["faces"][face_idx]["person_id"] = person_id # 保存 with open(OUTPUT_JSON_PATH, "w", encoding="utf-8") as f: json.dump(face_data, f, indent=2, ensure_ascii=False) print(f"✅ Saved clustered data to {OUTPUT_JSON_PATH}") # 4. 自動綁定 Speaker auto_bind_speakers() def auto_bind_speakers(): if not os.path.exists(OUTPUT_JSON_PATH) or not os.path.exists(ASRX_JSON_PATH): print("⚠️ Missing data for speaker binding.") return with open(OUTPUT_JSON_PATH) as f: face_clustered = json.load(f) with open(ASRX_JSON_PATH) as f: asrx_data = json.load(f) print("🔗 Auto-binding Speakers to Persons...") # 建立 Face 時間列表 face_spans = [] for frame_obj in face_clustered.get("frames", []): ts = frame_obj.get("timestamp") for face in frame_obj.get("faces", []): person_id = face.get("person_id") if person_id and ts is not None: face_spans.append({"ts": ts, "person_id": person_id}) speaker_person_counts = {} # 對於每個說話片段,找出畫面中出現的人 for seg in asrx_data.get("segments", []): start = seg.get("start") end = seg.get("end") speaker = seg.get("speaker_id") if not speaker: continue # 找時間重疊 candidates = [f for f in face_spans if start <= f["ts"] <= end] if candidates: # 投票 person_counts = {} for c in candidates: pid = c["person_id"] person_counts[pid] = person_counts.get(pid, 0) + 1 if speaker not in speaker_person_counts: speaker_person_counts[speaker] = {} best_person = max(person_counts, key=person_counts.get) speaker_person_counts[speaker][best_person] = ( speaker_person_counts[speaker].get(best_person, 0) + 1 ) # 寫入資料庫 try: conn = psycopg2.connect(DB_URL) cur = conn.cursor() for speaker, persons in speaker_person_counts.items(): if not persons: continue best_person = max(persons, key=persons.get) print( f" 🎤 {speaker} is likely {best_person} ({persons[best_person]} votes)" ) # 1. 找或建 Talent cur.execute("SELECT id FROM talents WHERE real_name = %s", (best_person,)) row = cur.fetchone() if row: talent_id = row[0] else: cur.execute( "INSERT INTO talents (real_name) VALUES (%s) RETURNING id", (best_person,), ) talent_id = cur.fetchone()[0] print(f" ✨ Created Talent #{talent_id} ({best_person})") # 2. 綁定 Speaker cur.execute( """ INSERT INTO identity_bindings (talent_id, binding_type, binding_value, source, confidence) VALUES (%s, 'speaker', %s, 'auto_cluster', 0.8) ON CONFLICT (binding_type, binding_value) DO UPDATE SET talent_id = EXCLUDED.talent_id """, (talent_id, speaker), ) print(f" ✅ Bound {speaker} -> {best_person}") conn.commit() cur.close() conn.close() except Exception as e: print(f" ❌ DB Error: {e}") if __name__ == "__main__": main()