From 74f00d3baa7db552a598ce11b348f3b494727d70 Mon Sep 17 00:00:00 2001 From: Accusys Date: Thu, 14 May 2026 00:21:17 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20face=20traces=20split=20at=20scene=20cut?= =?UTF-8?q?s=20=E2=80=94=20even=20same=20person,=20different=20cut?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/store_traced_faces.py | 13 +++++++++++-- scripts/utils/face_tracker.py | 32 +++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/scripts/store_traced_faces.py b/scripts/store_traced_faces.py index dfdbb78..469a6e3 100644 --- a/scripts/store_traced_faces.py +++ b/scripts/store_traced_faces.py @@ -62,8 +62,17 @@ def run_face_tracker(face_json_path: str, traced_json_path: str) -> str: } print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames") - - face_data = track_faces(face_data, use_embedding=True) + + # Load cut boundaries from cut.json (same directory as face.json) + cut_boundaries = None + cuts_path = face_json_path.replace("_traced.json", ".cut.json").replace(".face.json", ".cut.json") + if os.path.exists(cuts_path): + with open(cuts_path) as f: + cuts = json.load(f) + cut_boundaries = {s["start_frame"] for s in cuts.get("scenes", []) if s["start_frame"] > 0} + print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries") + + face_data = track_faces(face_data, use_embedding=True, cut_boundaries=cut_boundaries) metadata = face_data.get("metadata", {}) metadata["tracking_method"] = "iou_embedding" metadata["tracked_at"] = datetime.now().isoformat() diff --git a/scripts/utils/face_tracker.py b/scripts/utils/face_tracker.py index 13efe79..90cf246 100755 --- a/scripts/utils/face_tracker.py +++ b/scripts/utils/face_tracker.py @@ -28,7 +28,7 @@ Output: import json import argparse import numpy as np -from typing import Dict, List +from typing import Dict, List, Set from collections import defaultdict @@ -112,6 +112,9 @@ def match_faces( distance_threshold: float = 100.0, use_embedding: bool = True, frame_gap: int = 1, + cut_boundaries: Set[int] = None, + prev_frame: int = None, + curr_frame: int = None, ) -> Dict[int, int]: """ Match current frame faces to previous frame faces @@ -124,6 +127,9 @@ def match_faces( distance_threshold: Maximum bbox center distance for matching use_embedding: Whether to use embedding similarity frame_gap: Number of frames between current and previous (1=adjacent) + cut_boundaries: Set of frame numbers where scene cuts occur + prev_frame: Previous frame number (for cut detection) + curr_frame: Current frame number (for cut detection) Returns: Dict mapping current_face_index -> previous_face_index (or -1 if new) @@ -131,6 +137,12 @@ def match_faces( if not previous_faces: return {i: -1 for i in range(len(current_faces))} + # If a scene cut exists between prev and current frame, force all new traces + if cut_boundaries and prev_frame is not None and curr_frame is not None: + for cf in cut_boundaries: + if prev_frame < cf <= curr_frame: + return {i: -1 for i in range(len(current_faces))} + matches = {} used_prev = set() @@ -193,8 +205,6 @@ def match_faces( if prev_at_edge and not curr_at_edge and similarity < 0.8: continue - if iou > iou_threshold and similarity > similarity_threshold: - if iou > iou_threshold and similarity > similarity_threshold: score = iou + similarity elif iou > 0.5 and similarity > 0.65: @@ -226,6 +236,7 @@ def track_faces( similarity_threshold: float = 0.7, distance_threshold: float = 100.0, use_embedding: bool = True, + cut_boundaries: Set[int] = None, ) -> Dict: """ Track faces across all frames @@ -254,6 +265,7 @@ def track_faces( prev_faces = [] prev_trace_ids = [] prev_frame_num = None + prev_face_frame = None # last frame number that had actual faces print(f"\nTracking faces across {len(sorted_frames)} frames...") print(f"Parameters: iou={iou_threshold}, similarity={similarity_threshold}, distance={distance_threshold}") @@ -279,6 +291,9 @@ def track_faces( distance_threshold, use_embedding, frame_gap, + cut_boundaries, + prev_face_frame, + frame_num, ) trace_ids = [] @@ -307,6 +322,7 @@ def track_faces( prev_faces = faces prev_trace_ids = trace_ids + prev_face_frame = frame_num if frame_num % 100 == 0: print(f" Frame {frame_num}: {len(faces)} faces, {len(set(trace_ids))} active traces") @@ -455,9 +471,18 @@ def main(): parser.add_argument("--similarity-threshold", type=float, default=0.7, help="Embedding similarity threshold") parser.add_argument("--distance-threshold", type=float, default=100.0, help="Distance threshold") parser.add_argument("--no-embedding", action="store_true", help="Disable embedding matching") + parser.add_argument("--cuts-json", help="Path to cut.json for scene-cut-aware tracking") parser.add_argument("--analyze-only", action="store_true", help="Only analyze, don't output") args = parser.parse_args() + # Load cut boundaries if provided + cut_boundaries = None + if args.cuts_json: + with open(args.cuts_json) as f: + cuts = json.load(f) + cut_boundaries = {s["start_frame"] for s in cuts.get("scenes", []) if s["start_frame"] > 0} + print(f" Cut boundaries loaded: {len(cut_boundaries)} cuts") + print("=" * 60) print("Face Tracker") print("=" * 60) @@ -474,6 +499,7 @@ def main(): similarity_threshold=args.similarity_threshold, distance_threshold=args.distance_threshold, use_embedding=not args.no_embedding, + cut_boundaries=cut_boundaries, ) analyze_traces(face_data)