fix: RCA trace 39/45 collision - raise composite threshold 0.35→0.50, add min_face_similarity, add temporal collision check. Verified: collision resolved

This commit is contained in:
Warren
2026-05-06 14:55:49 +08:00
parent 65a1f77e65
commit ca4f59d811
6 changed files with 2456 additions and 2287 deletions

View File

@@ -1,14 +1,17 @@
{
"id": "008",
"name": "Composite: TMDb vector + speaker frequency scoring",
"name": "Composite: TMDb vector + speaker frequency scoring + collision check (FIXED)",
"file_uuid": "417a7e93860d70c87aee6c4c1b715d70",
"min_frames": 3,
"enable_identity_match": true,
"stage1_face_threshold": 0.55,
"stage1_bind_ratio": 0.60,
"stage1b_composite_threshold": 0.50,
"stage1b_min_face_similarity": 0.30,
"stage2_threshold": 0.85,
"stage2_adaptive": true,
"enable_speaker_weight": true,
"speaker_weight_factor": 0.3,
"notes": "V2.0 embedding space。Speaker 出現次數(segment count)加權 × vector similarity 綜合評分。主角(SPEAKER_0/SPEAKER_1)加權較高。"
"enable_temporal_collision_check": true,
"notes": "V2.1 FIX: composite threshold 0.35→0.50, added min_face_similarity=0.30, added temporal collision check"
}

View File

@@ -1,15 +1,18 @@
{
"id": "008",
"name": "Composite: TMDb vector + speaker frequency scoring",
"name": "Composite: TMDb vector + speaker frequency scoring + collision check (FIXED)",
"file_uuid": "417a7e93860d70c87aee6c4c1b715d70",
"min_frames": 3,
"enable_identity_match": true,
"stage1_face_threshold": 0.55,
"stage1_bind_ratio": 0.6,
"stage1b_composite_threshold": 0.5,
"stage1b_min_face_similarity": 0.3,
"stage2_threshold": 0.85,
"stage2_adaptive": true,
"enable_speaker_weight": true,
"speaker_weight_factor": 0.3,
"notes": "V2.0 embedding space。Speaker 出現次數(segment count)加權 × vector similarity 綜合評分。主角(SPEAKER_0/SPEAKER_1)加權較高。",
"enable_temporal_collision_check": true,
"notes": "V2.1 FIX: composite threshold 0.35→0.50, added min_face_similarity=0.30, added temporal collision check",
"write_db": true
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,10 @@
{
"total_traces": 677,
"stage1_bound": 671,
"stage1_bound_traces": 671,
"stage2_clusters": 6,
"stage2_unbound_clustered": 6,
"stage1_bound": 657,
"stage1_bound_traces": 657,
"stage2_clusters": 20,
"stage2_unbound_clustered": 20,
"total_clusters": 677,
"execution_time_s": 11.841914176940918,
"execution_time_s": 15.544250011444092,
"coverage": 1.0
}

View File

@@ -291,11 +291,17 @@ def run_experiment(config: dict) -> dict:
avg_sim = np.mean(face_sims) if face_sims else 0
match_ratio = sum(1 for s in face_sims if s >= config.get("stage1_face_threshold", 0.55)) / len(face_sims)
# Absolute minimum: if avg similarity is too low, never bind
min_sim = config.get("stage1b_min_face_similarity", 0.30)
if avg_sim < min_sim:
continue
# Composite score: similarity + match ratio + speaker weight
spk_weight = 1.0 + 0.3 * speaker_counts.get(t["trace_id"], 0) / max(max(speaker_counts.values(), default=1), 1)
composite = avg_sim * spk_weight * (0.4 + 0.6 * match_ratio)
composite_threshold = config.get("stage1b_composite_threshold", 0.50)
if composite > best_score and composite > 0.35:
if composite > best_score and composite > composite_threshold:
best_score = composite
best_iid = iid
best_sim = avg_sim
@@ -339,6 +345,56 @@ def run_experiment(config: dict) -> dict:
# Speaker verification
all_labels = apply_speaker_verification(clusters, speaker_overlaps)
# --- Temporal Collision Check ---
# Split traces that have overlapping frames within the same identity
if config.get("enable_temporal_collision_check", True):
# Build trace timing map: trace_id → (min_frame, max_frame)
trace_timing = {}
for t in traces:
trace_timing[t["trace_id"]] = (t["start_frame"], t["end_frame"])
collision_splits = 0
for label in all_labels:
if label.get("trace_count", 0) < 2:
continue
tids = label["trace_ids"]
# Check all pairs in this label
for i in range(len(tids)):
for j in range(i+1, len(tids)):
a, b = tids[i], tids[j]
ta = trace_timing.get(a)
tb = trace_timing.get(b)
if not ta or not tb: continue
# Overlap: max(start) < min(end)
if max(ta[0], tb[0]) < min(ta[1], tb[1]):
collision_splits += 1
print(f" COLLISION: trace {a} & {b} overlap (frames {max(ta[0],tb[0])}-{min(ta[1],tb[1])}), splitting...")
# Move the lower-confidence trace to a new label
# Get avg confidence from face embeddings (we don't store per-face confidence in trace dict)
# Use the existing confidence data from DB
cur2 = conn.cursor()
cur2.execute(f"SELECT AVG(confidence) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s", (file_uuid, a))
conf_a = cur2.fetchone()[0] or 0
cur2.execute(f"SELECT AVG(confidence) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s", (file_uuid, b))
conf_b = cur2.fetchone()[0] or 0
cur2.close()
if conf_a < conf_b:
loser_tid = a
else:
loser_tid = b
# Remove loser from this label, create new label
label["trace_ids"].remove(loser_tid)
label["trace_count"] -= 1
all_labels.append({
"cluster_id": len(all_labels),
"trace_count": 1,
"trace_ids": [loser_tid],
"binding": None,
"binding_stage": "collision_split",
})
if collision_splits > 0:
print(f" Temporal collision: {collision_splits} traces split")
# Merge Stage 1 bound traces into labels
for t in bound:
all_labels.append({