diff --git a/scripts/store_traced_faces.py b/scripts/store_traced_faces.py index cc1c354..c3a86f6 100644 --- a/scripts/store_traced_faces.py +++ b/scripts/store_traced_faces.py @@ -19,6 +19,7 @@ import sys import os import json import argparse +import numpy as np import psycopg2 import psycopg2.extras from datetime import datetime @@ -36,6 +37,129 @@ def get_conn(): return psycopg2.connect(DB_URL) +def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict: + """Merge traces within the same cut if they have similar embeddings (same person re-appeared).""" + frames = face_data.get("frames", {}) + if not frames: + return face_data + + # Map each frame to its scene/cut number + frame_to_scene = {} + for s in cut_scenes: + for f in range(s["start_frame"], s["end_frame"] + 1): + frame_to_scene[f] = s["scene_number"] + + # Collect per-trace data: scene numbers, embeddings, face positions + trace_frames = defaultdict(list) + trace_embeddings = defaultdict(list) + trace_poses = {} + + for fnum_str, frm_data in frames.items(): + fnum = int(fnum_str) + for face in frm_data.get("faces", []): + tid = face.get("trace_id") + if tid is None: + continue + trace_frames[tid].append(fnum) + emb = face.get("embedding") + if emb is not None: + trace_embeddings[tid].append(emb) + if tid not in trace_poses: + trace_poses[tid] = (face.get("x", 0), face.get("y", 0), + face.get("width", 0), face.get("height", 0)) + + if len(trace_embeddings) < 2: + return face_data + + # Compute centroid per trace + trace_centroids = {} + for tid, embs in trace_embeddings.items(): + centroid = np.mean(embs, axis=0) + norm = np.linalg.norm(centroid) + trace_centroids[tid] = centroid / norm if norm > 0 else centroid + + # Determine which scene each trace belongs to (majority of frames) + trace_scene = {} + for tid, fns in trace_frames.items(): + scene_votes = defaultdict(int) + for fn in fns: + scene = frame_to_scene.get(fn, -1) + scene_votes[scene] += 1 + trace_scene[tid] = max(scene_votes, key=scene_votes.get) if scene_votes else -1 + + # Within each scene, merge traces with similar centroids + scene_traces = defaultdict(list) + for tid, scene in trace_scene.items(): + if scene >= 0 and tid in trace_centroids: + scene_traces[scene].append(tid) + + merged = 0 + next_new_id = max(trace_frames.keys()) + 1 if trace_frames else 0 + SIMILARITY_THRESHOLD = 0.75 + + for scene, tids in scene_traces.items(): + if len(tids) < 2: + continue + used = set() + for i in range(len(tids)): + if tids[i] in used: + continue + keep_tid = tids[i] + for j in range(i + 1, len(tids)): + if tids[j] in used: + continue + sim = float(np.dot(trace_centroids[tids[i]], trace_centroids[tids[j]])) + if sim >= SIMILARITY_THRESHOLD: + # Merge tids[j] into keep_tid + for fnum_str, frm_data in frames.items(): + for face in frm_data.get("faces", []): + if face.get("trace_id") == tids[j]: + face["trace_id"] = keep_tid + used.add(tids[j]) + merged += 1 + + # If any merges happened, rebuild trace metadata + if merged > 0: + # Rebuild traces dict + new_traces = {} + new_trace_frames = defaultdict(list) + for fnum_str, frm_data in frames.items(): + fnum = int(fnum_str) + for face in frm_data.get("faces", []): + tid = face.get("trace_id") + if tid is not None: + new_trace_frames[tid].append({ + "frame": fnum, + "face_index": 0, + "bbox": {"x": face.get("x", 0), "y": face.get("y", 0), + "width": face.get("width", 0), "height": face.get("height", 0)}, + "confidence": face.get("confidence", 0.0), + }) + + for tid, path in new_trace_frames.items(): + if len(path) >= 1: + frames_sorted = sorted(set(p["frame"] for p in path)) + new_traces[str(tid)] = { + "trace_id": tid, + "start_frame": frames_sorted[0], + "end_frame": frames_sorted[-1], + "duration_frames": frames_sorted[-1] - frames_sorted[0] + 1, + "duration_seconds": (frames_sorted[-1] - frames_sorted[0]) / face_data.get("metadata", {}).get("fps", 25.0), + "total_appearances": len(path), + "path": path, + } + + face_data["traces"] = new_traces + face_data["metadata"]["trace_stats"] = { + "total_traces": len(new_traces), + "active_traces": len(new_traces), + "long_traces": len([t for t in new_traces.values() if t["duration_frames"] >= 2]), + } + print(f"[TRACE] Post-merge: {merged} traces merged, {len(new_traces)} total traces") + + return face_data + + def run_face_tracker(face_json_path: str, traced_json_path: str) -> str: """Run face_tracker.py on face.json, returns path to face_traced.json""" from face_tracker import track_faces @@ -115,14 +239,21 @@ def run_face_tracker(face_json_path: str, traced_json_path: str) -> str: # Load cut boundaries from cut.json (same directory as face.json) cut_boundaries = None + cut_scenes = None cuts_path = face_json_path.replace("_traced.json", ".cut.json").replace(".face.json", ".cut.json") if os.path.exists(cuts_path): with open(cuts_path) as f: cuts = json.load(f) - cut_boundaries = {s["start_frame"] for s in cuts.get("scenes", []) if s["start_frame"] > 0} + cut_scenes = cuts.get("scenes", []) + cut_boundaries = {s["start_frame"] for s in cut_scenes if s["start_frame"] > 0} print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries") face_data = track_faces(face_data, use_embedding=True, cut_boundaries=cut_boundaries) + + # Merge traces within same cut (same person re-appearing after occlusion/pose change) + if cut_scenes and len(cut_scenes) > 0: + face_data = merge_traces_within_cuts(face_data, cut_scenes) + metadata = face_data.get("metadata", {}) metadata["tracking_method"] = "iou_embedding" metadata["tracked_at"] = datetime.now().isoformat()