feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID

2026-05-06 14:41:48 +08:00
parent 74b6182eba
commit 65a1f77e65
1048 changed files with 103499 additions and 0 deletions
--- a/experiments/identity_clustering/README.md
+++ b/experiments/identity_clustering/README.md
@@ -0,0 +1,41 @@
+# Identity Clustering 實驗記錄區
+
+每個實驗獨立運行，結果完整保留，用於後續分析比較。
+
+## 目錄結構
+
+```
+experiments/identity_clustering/
+├── README.md                    # 本文件
+├── configs/                     # 實驗配置
+│   └── exp_{id}.json            # 每個實驗的參數設定
+├── results/                     # 實驗結果
+│   └── exp_{id}/
+│       ├── clusters.json        # 分群結果
+│       ├── labels.json          # 標註結果（TMDb/Speaker）
+│       ├── metrics.json         # 評估指標
+│       └── summary.txt          # 摘要報告
+├── reports/                     # 比較分析報告
+│   └── comparison_{date}.md     # 跨實驗比較
+└── runner.py                    # 實驗執行器
+```
+
+## 實驗設計
+
+每個實驗包含以下維度的組合：
+
+| 維度 | 選項 |
+|------|------|
+| **Trace filter** | none / min_frames=30 / min_frames=60 |
+| **Centroid** | mean / median / best_confidence |
+| **Clustering** | cosine_threshold / DBSCAN / Agglomerative |
+| **Threshold** | fixed=0.85 / adaptive(pose) / auto |
+| **TMDb** | enabled / disabled |
+| **Speaker verify** | ✅ 標準工序（所有實驗強制） |
+
+## 當前輸入數據
+
+- file_uuid: `1a04db97be5fa12bd77369831dc141fd`
+- 6182 detections, 2347 traces, 512D embeddings
+- 10 speakers (ASRX), 57 YOLO objects
+- TMDb identities: available (Charade 1963 cast)
--- a/experiments/identity_clustering/configs/exp_001.json
+++ b/experiments/identity_clustering/configs/exp_001.json
@@ -0,0 +1,11 @@
+{
+  "id": "001",
+  "name": "Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": false,
+  "enable_tmdb": false,
+  "notes": "sample_interval=60 導致 trace 碎片化。min_frames=3 納入大部分 traces。"
+}
--- a/experiments/identity_clustering/configs/exp_002.json
+++ b/experiments/identity_clustering/configs/exp_002.json
@@ -0,0 +1,11 @@
+{
+  "id": "002",
+  "name": "Adaptive Threshold (pose-aware), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": true,
+  "enable_tmdb": false,
+  "notes": "Pose-aware: 短 trace 放寬 threshold 5%。適合 profile/three_quarter 角度辨識。"
+}
--- a/experiments/identity_clustering/configs/exp_003.json
+++ b/experiments/identity_clustering/configs/exp_003.json
@@ -0,0 +1,11 @@
+{
+  "id": "003",
+  "name": "DBSCAN (eps=0.3), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "dbscan",
+  "eps": 0.3,
+  "min_samples": 2,
+  "enable_tmdb": false,
+  "notes": "DBSCAN 自動偵測 cluster 數量，不需要手設 threshold。eps=0.3 對應 cosine distance。"
+}
--- a/experiments/identity_clustering/configs/exp_004.json
+++ b/experiments/identity_clustering/configs/exp_004.json
@@ -0,0 +1,11 @@
+{
+  "id": "004",
+  "name": "DBSCAN (eps=0.25), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "dbscan",
+  "eps": 0.25,
+  "min_samples": 2,
+  "enable_tmdb": false,
+  "notes": "DBSCAN 更嚴格版本（eps=0.25），預期更多 cluster、較少 false positive。"
+}
--- a/experiments/identity_clustering/configs/exp_005.json
+++ b/experiments/identity_clustering/configs/exp_005.json
@@ -0,0 +1,11 @@
+{
+  "id": "005",
+  "name": "Adaptive Threshold + TMDb matching, min 30 frames",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": true,
+  "enable_tmdb": true,
+  "notes": "最佳方案候選：pose-aware + TMDb 自動標註。預期 Cary Grant, Audrey Hepburn 等主要角色被標出。"
+}
--- a/experiments/identity_clustering/configs/exp_006.json
+++ b/experiments/identity_clustering/configs/exp_006.json
@@ -0,0 +1,13 @@
+{
+    "id": "006",
+    "name": "Multi-Stage: Face-level high-conf binding + centroid clustering + speaker",
+    "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+    "min_frames": 3,
+    "enable_identity_match": true,
+    "stage1_face_threshold": 0.92,
+    "stage1_bind_ratio": 0.85,
+    "stage2_threshold": 0.85,
+    "stage2_adaptive": true,
+    "enable_tmdb": false,
+    "notes": "Stage1: each face vs identity ref, bind if >85% faces match >0.92. Stage2: centroid clustering of unbound + speaker merge."
+}
--- a/experiments/identity_clustering/configs/exp_007.json
+++ b/experiments/identity_clustering/configs/exp_007.json
@@ -0,0 +1,13 @@
+{
+    "id": "007",
+    "name": "Multi-Stage: relaxed TMDb bind + 3-angle anchor selection",
+    "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+    "min_frames": 3,
+    "enable_identity_match": true,
+    "stage1_face_threshold": 0.72,
+    "stage1_bind_ratio": 0.75,
+    "stage2_threshold": 0.85,
+    "stage2_adaptive": true,
+    "enable_tmdb": false,
+    "notes": "Stage1: TMDb bind threshold 0.72 (跨 domain 較寬)。Stage2: 每個 identity 從 bound traces 挑 frontal/three_quarter/profile 三角度 face 組合成多角度 reference，用於 further matching。"
+}
--- a/experiments/identity_clustering/configs/exp_008.json
+++ b/experiments/identity_clustering/configs/exp_008.json
@@ -0,0 +1,14 @@
+{
+    "id": "008",
+    "name": "Composite: TMDb vector + speaker frequency scoring",
+    "file_uuid": "417a7e93860d70c87aee6c4c1b715d70",
+    "min_frames": 3,
+    "enable_identity_match": true,
+    "stage1_face_threshold": 0.55,
+    "stage1_bind_ratio": 0.60,
+    "stage2_threshold": 0.85,
+    "stage2_adaptive": true,
+    "enable_speaker_weight": true,
+    "speaker_weight_factor": 0.3,
+    "notes": "V2.0 embedding space。Speaker 出現次數(segment count)加權 × vector similarity 綜合評分。主角(SPEAKER_0/SPEAKER_1)加權較高。"
+}
--- a/experiments/identity_clustering/data_snapshot/face_detections.csv
+++ b/experiments/identity_clustering/data_snapshot/face_detections.csv
--- a/experiments/identity_clustering/results/exp_001/clusters.json
+++ b/experiments/identity_clustering/results/exp_001/clusters.json
--- a/experiments/identity_clustering/results/exp_001/config.json
+++ b/experiments/identity_clustering/results/exp_001/config.json
@@ -0,0 +1,11 @@
+{
+  "id": "001",
+  "name": "Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": false,
+  "enable_tmdb": false,
+  "notes": "sample_interval=60 導致 trace 碎片化。min_frames=3 納入大部分 traces。"
+}
--- a/experiments/identity_clustering/results/exp_001/labels.json
+++ b/experiments/identity_clustering/results/exp_001/labels.json
--- a/experiments/identity_clustering/results/exp_001/metrics.json
+++ b/experiments/identity_clustering/results/exp_001/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "clustered_traces": 677,
+  "cluster_count": 199,
+  "coverage": 1.0,
+  "avg_cluster_size": 3.4020100502512562,
+  "tmdb_matched": 0,
+  "tmdb_coverage": 0.0,
+  "execution_time_s": 3.706886053085327
+}
--- a/experiments/identity_clustering/results/exp_001/summary.txt
+++ b/experiments/identity_clustering/results/exp_001/summary.txt
@@ -0,0 +1,36 @@
+
+Experiment 001: Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb
+====================================
+Date: 2026-05-04T17:13:02.183318
+Config: {
+  "id": "001",
+  "name": "Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": false,
+  "enable_tmdb": false,
+  "notes": "sample_interval=60 \u5c0e\u81f4 trace \u788e\u7247\u5316\u3002min_frames=3 \u7d0d\u5165\u5927\u90e8\u5206 traces\u3002"
+}
+
+Results:
+  Traces loaded:     677
+  Clusters:          379
+  Clustered traces:  677
+  Coverage:          100.0%
+  Avg cluster size:  1.8
+  TMDb matched:      0
+  Execution time:    3.6s
+
+Top clusters:
+  Cluster 2: 74 traces → None (sim=0.000)
+  Cluster 29: 38 traces → None (sim=0.000)
+  Cluster 133: 14 traces → None (sim=0.000)
+  Cluster 14: 13 traces → None (sim=0.000)
+  Cluster 62: 10 traces → None (sim=0.000)
+  Cluster 126: 8 traces → None (sim=0.000)
+  Cluster 31: 7 traces → None (sim=0.000)
+  Cluster 13: 6 traces → None (sim=0.000)
+  Cluster 19: 6 traces → None (sim=0.000)
+  Cluster 89: 6 traces → None (sim=0.000)
--- a/experiments/identity_clustering/results/exp_002/clusters.json
+++ b/experiments/identity_clustering/results/exp_002/clusters.json
--- a/experiments/identity_clustering/results/exp_002/config.json
+++ b/experiments/identity_clustering/results/exp_002/config.json
@@ -0,0 +1,11 @@
+{
+  "id": "002",
+  "name": "Adaptive Threshold (pose-aware), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": true,
+  "enable_tmdb": false,
+  "notes": "Pose-aware: 短 trace 放寬 threshold 5%。適合 profile/three_quarter 角度辨識。"
+}
--- a/experiments/identity_clustering/results/exp_002/labels.json
+++ b/experiments/identity_clustering/results/exp_002/labels.json
--- a/experiments/identity_clustering/results/exp_002/metrics.json
+++ b/experiments/identity_clustering/results/exp_002/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "clustered_traces": 677,
+  "cluster_count": 143,
+  "coverage": 1.0,
+  "avg_cluster_size": 4.734265734265734,
+  "tmdb_matched": 0,
+  "tmdb_coverage": 0.0,
+  "execution_time_s": 3.065944194793701
+}
--- a/experiments/identity_clustering/results/exp_002/summary.txt
+++ b/experiments/identity_clustering/results/exp_002/summary.txt
@@ -0,0 +1,36 @@
+
+Experiment 002: Adaptive Threshold (pose-aware), min 30 frames, no TMDb
+====================================
+Date: 2026-05-04T17:13:05.263374
+Config: {
+  "id": "002",
+  "name": "Adaptive Threshold (pose-aware), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": true,
+  "enable_tmdb": false,
+  "notes": "Pose-aware: \u77ed trace \u653e\u5bec threshold 5%\u3002\u9069\u5408 profile/three_quarter \u89d2\u5ea6\u8fa8\u8b58\u3002"
+}
+
+Results:
+  Traces loaded:     677
+  Clusters:          293
+  Clustered traces:  677
+  Coverage:          100.0%
+  Avg cluster size:  2.3
+  TMDb matched:      0
+  Execution time:    3.0s
+
+Top clusters:
+  Cluster 2: 114 traces → None (sim=0.000)
+  Cluster 13: 43 traces → None (sim=0.000)
+  Cluster 51: 19 traces → None (sim=0.000)
+  Cluster 112: 15 traces → None (sim=0.000)
+  Cluster 28: 12 traces → None (sim=0.000)
+  Cluster 30: 12 traces → None (sim=0.000)
+  Cluster 56: 11 traces → None (sim=0.000)
+  Cluster 107: 11 traces → None (sim=0.000)
+  Cluster 169: 11 traces → None (sim=0.000)
+  Cluster 74: 9 traces → None (sim=0.000)
--- a/experiments/identity_clustering/results/exp_003/clusters.json
+++ b/experiments/identity_clustering/results/exp_003/clusters.json
--- a/experiments/identity_clustering/results/exp_003/config.json
+++ b/experiments/identity_clustering/results/exp_003/config.json
@@ -0,0 +1,11 @@
+{
+  "id": "003",
+  "name": "DBSCAN (eps=0.3), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "dbscan",
+  "eps": 0.3,
+  "min_samples": 2,
+  "enable_tmdb": false,
+  "notes": "DBSCAN 自動偵測 cluster 數量，不需要手設 threshold。eps=0.3 對應 cosine distance。"
+}
--- a/experiments/identity_clustering/results/exp_003/labels.json
+++ b/experiments/identity_clustering/results/exp_003/labels.json
--- a/experiments/identity_clustering/results/exp_003/metrics.json
+++ b/experiments/identity_clustering/results/exp_003/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "clustered_traces": 677,
+  "cluster_count": 34,
+  "coverage": 1.0,
+  "avg_cluster_size": 19.91176470588235,
+  "tmdb_matched": 0,
+  "tmdb_coverage": 0.0,
+  "execution_time_s": 2.6430821418762207
+}
--- a/experiments/identity_clustering/results/exp_003/summary.txt
+++ b/experiments/identity_clustering/results/exp_003/summary.txt
@@ -0,0 +1,36 @@
+
+Experiment 003: DBSCAN (eps=0.3), min 30 frames, no TMDb
+====================================
+Date: 2026-05-04T17:13:08.042584
+Config: {
+  "id": "003",
+  "name": "DBSCAN (eps=0.3), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "dbscan",
+  "eps": 0.3,
+  "min_samples": 2,
+  "enable_tmdb": false,
+  "notes": "DBSCAN \u81ea\u52d5\u5075\u6e2c cluster \u6578\u91cf\uff0c\u4e0d\u9700\u8981\u624b\u8a2d threshold\u3002eps=0.3 \u5c0d\u61c9 cosine distance\u3002"
+}
+
+Results:
+  Traces loaded:     677
+  Clusters:          78
+  Clustered traces:  677
+  Coverage:          100.0%
+  Avg cluster size:  8.7
+  TMDb matched:      0
+  Execution time:    2.7s
+
+Top clusters:
+  Cluster 1: 537 traces → None (sim=0.000)
+  Cluster 10: 26 traces → None (sim=0.000)
+  Cluster 2: 14 traces → None (sim=0.000)
+  Cluster 9: 9 traces → None (sim=0.000)
+  Cluster 47: 8 traces → None (sim=0.000)
+  Cluster 37: 4 traces → None (sim=0.000)
+  Cluster 7: 2 traces → None (sim=0.000)
+  Cluster 32: 2 traces → None (sim=0.000)
+  Cluster 36: 2 traces → None (sim=0.000)
+  Cluster 48: 2 traces → None (sim=0.000)
--- a/experiments/identity_clustering/results/exp_004/clusters.json
+++ b/experiments/identity_clustering/results/exp_004/clusters.json
--- a/experiments/identity_clustering/results/exp_004/config.json
+++ b/experiments/identity_clustering/results/exp_004/config.json
@@ -0,0 +1,11 @@
+{
+  "id": "004",
+  "name": "DBSCAN (eps=0.25), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "dbscan",
+  "eps": 0.25,
+  "min_samples": 2,
+  "enable_tmdb": false,
+  "notes": "DBSCAN 更嚴格版本（eps=0.25），預期更多 cluster、較少 false positive。"
+}
--- a/experiments/identity_clustering/results/exp_004/labels.json
+++ b/experiments/identity_clustering/results/exp_004/labels.json
--- a/experiments/identity_clustering/results/exp_004/metrics.json
+++ b/experiments/identity_clustering/results/exp_004/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "clustered_traces": 677,
+  "cluster_count": 64,
+  "coverage": 1.0,
+  "avg_cluster_size": 10.578125,
+  "tmdb_matched": 0,
+  "tmdb_coverage": 0.0,
+  "execution_time_s": 2.588068962097168
+}
--- a/experiments/identity_clustering/results/exp_004/summary.txt
+++ b/experiments/identity_clustering/results/exp_004/summary.txt
@@ -0,0 +1,36 @@
+
+Experiment 004: DBSCAN (eps=0.25), min 30 frames, no TMDb
+====================================
+Date: 2026-05-04T17:13:10.776315
+Config: {
+  "id": "004",
+  "name": "DBSCAN (eps=0.25), min 30 frames, no TMDb",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "dbscan",
+  "eps": 0.25,
+  "min_samples": 2,
+  "enable_tmdb": false,
+  "notes": "DBSCAN \u66f4\u56b4\u683c\u7248\u672c\uff08eps=0.25\uff09\uff0c\u9810\u671f\u66f4\u591a cluster\u3001\u8f03\u5c11 false positive\u3002"
+}
+
+Results:
+  Traces loaded:     677
+  Clusters:          129
+  Clustered traces:  677
+  Coverage:          100.0%
+  Avg cluster size:  5.2
+  TMDb matched:      0
+  Execution time:    2.6s
+
+Top clusters:
+  Cluster 1: 444 traces → None (sim=0.000)
+  Cluster 32: 43 traces → None (sim=0.000)
+  Cluster 14: 24 traces → None (sim=0.000)
+  Cluster 4: 13 traces → None (sim=0.000)
+  Cluster 115: 6 traces → None (sim=0.000)
+  Cluster 38: 4 traces → None (sim=0.000)
+  Cluster 53: 4 traces → None (sim=0.000)
+  Cluster 65: 4 traces → None (sim=0.000)
+  Cluster 88: 4 traces → None (sim=0.000)
+  Cluster 102: 4 traces → None (sim=0.000)
--- a/experiments/identity_clustering/results/exp_005/clusters.json
+++ b/experiments/identity_clustering/results/exp_005/clusters.json
--- a/experiments/identity_clustering/results/exp_005/config.json
+++ b/experiments/identity_clustering/results/exp_005/config.json
@@ -0,0 +1,12 @@
+{
+  "id": "005",
+  "name": "Adaptive Threshold + TMDb matching, min 30 frames",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": true,
+  "enable_tmdb": true,
+  "enable_speaker_verify": false,
+  "notes": "最佳方案候選：pose-aware + TMDb 自動標註。預期 Cary Grant, Audrey Hepburn 等主要角色被標出。"
+}
--- a/experiments/identity_clustering/results/exp_005/labels.json
+++ b/experiments/identity_clustering/results/exp_005/labels.json
--- a/experiments/identity_clustering/results/exp_005/metrics.json
+++ b/experiments/identity_clustering/results/exp_005/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "clustered_traces": 677,
+  "cluster_count": 293,
+  "coverage": 1.0,
+  "avg_cluster_size": 2.310580204778157,
+  "tmdb_matched": 0,
+  "tmdb_coverage": 0.0,
+  "execution_time_s": 3.034806966781616
+}
--- a/experiments/identity_clustering/results/exp_005/summary.txt
+++ b/experiments/identity_clustering/results/exp_005/summary.txt
@@ -0,0 +1,37 @@
+
+Experiment 005: Adaptive Threshold + TMDb matching, min 30 frames
+====================================
+Date: 2026-05-04T17:05:33.808099
+Config: {
+  "id": "005",
+  "name": "Adaptive Threshold + TMDb matching, min 30 frames",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "clustering_method": "threshold",
+  "threshold": 0.85,
+  "adaptive_threshold": true,
+  "enable_tmdb": true,
+  "enable_speaker_verify": false,
+  "notes": "\u6700\u4f73\u65b9\u6848\u5019\u9078\uff1apose-aware + TMDb \u81ea\u52d5\u6a19\u8a3b\u3002\u9810\u671f Cary Grant, Audrey Hepburn \u7b49\u4e3b\u8981\u89d2\u8272\u88ab\u6a19\u51fa\u3002"
+}
+
+Results:
+  Traces loaded:     677
+  Clusters:          293
+  Clustered traces:  677
+  Coverage:          100.0%
+  Avg cluster size:  2.3
+  TMDb matched:      0
+  Execution time:    3.0s
+
+Top clusters:
+  Cluster 2: 114 traces → None (sim=0.000)
+  Cluster 13: 43 traces → None (sim=0.000)
+  Cluster 51: 19 traces → None (sim=0.000)
+  Cluster 112: 15 traces → None (sim=0.000)
+  Cluster 28: 12 traces → None (sim=0.000)
+  Cluster 30: 12 traces → None (sim=0.000)
+  Cluster 56: 11 traces → None (sim=0.000)
+  Cluster 107: 11 traces → None (sim=0.000)
+  Cluster 169: 11 traces → None (sim=0.000)
+  Cluster 74: 9 traces → None (sim=0.000)
--- a/experiments/identity_clustering/results/exp_006/config.json
+++ b/experiments/identity_clustering/results/exp_006/config.json
@@ -0,0 +1,13 @@
+{
+  "id": "006",
+  "name": "Multi-Stage: Face-level high-conf binding + centroid clustering + speaker",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "enable_identity_match": true,
+  "stage1_face_threshold": 0.92,
+  "stage1_bind_ratio": 0.85,
+  "stage2_threshold": 0.85,
+  "stage2_adaptive": true,
+  "enable_tmdb": false,
+  "notes": "Stage1: each face vs identity ref, bind if >85% faces match >0.92. Stage2: centroid clustering of unbound + speaker merge."
+}
--- a/experiments/identity_clustering/results/exp_006/labels.json
+++ b/experiments/identity_clustering/results/exp_006/labels.json
--- a/experiments/identity_clustering/results/exp_006/metrics.json
+++ b/experiments/identity_clustering/results/exp_006/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "stage1_bound": 0,
+  "stage1_bound_traces": 0,
+  "stage2_clusters": 295,
+  "stage2_unbound_clustered": 677,
+  "total_clusters": 295,
+  "execution_time_s": 3.226997137069702,
+  "coverage": 1.0
+}
--- a/experiments/identity_clustering/results/exp_007/config.json
+++ b/experiments/identity_clustering/results/exp_007/config.json
@@ -0,0 +1,13 @@
+{
+  "id": "007",
+  "name": "Multi-Stage: relaxed TMDb bind + 3-angle anchor selection",
+  "file_uuid": "1a04db97be5fa12bd77369831dc141fd",
+  "min_frames": 3,
+  "enable_identity_match": true,
+  "stage1_face_threshold": 0.72,
+  "stage1_bind_ratio": 0.75,
+  "stage2_threshold": 0.85,
+  "stage2_adaptive": true,
+  "enable_tmdb": false,
+  "notes": "Stage1: TMDb bind threshold 0.72 (跨 domain 較寬)。Stage2: 每個 identity 從 bound traces 挑 frontal/three_quarter/profile 三角度 face 組合成多角度 reference，用於 further matching。"
+}
--- a/experiments/identity_clustering/results/exp_007/labels.json
+++ b/experiments/identity_clustering/results/exp_007/labels.json
--- a/experiments/identity_clustering/results/exp_007/metrics.json
+++ b/experiments/identity_clustering/results/exp_007/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "stage1_bound": 0,
+  "stage1_bound_traces": 0,
+  "stage2_clusters": 295,
+  "stage2_unbound_clustered": 677,
+  "total_clusters": 295,
+  "execution_time_s": 3.2448980808258057,
+  "coverage": 1.0
+}
--- a/experiments/identity_clustering/results/exp_008/config.json
+++ b/experiments/identity_clustering/results/exp_008/config.json
@@ -0,0 +1,15 @@
+{
+  "id": "008",
+  "name": "Composite: TMDb vector + speaker frequency scoring",
+  "file_uuid": "417a7e93860d70c87aee6c4c1b715d70",
+  "min_frames": 3,
+  "enable_identity_match": true,
+  "stage1_face_threshold": 0.55,
+  "stage1_bind_ratio": 0.6,
+  "stage2_threshold": 0.85,
+  "stage2_adaptive": true,
+  "enable_speaker_weight": true,
+  "speaker_weight_factor": 0.3,
+  "notes": "V2.0 embedding space。Speaker 出現次數(segment count)加權 × vector similarity 綜合評分。主角(SPEAKER_0/SPEAKER_1)加權較高。",
+  "write_db": true
+}
--- a/experiments/identity_clustering/results/exp_008/labels.json
+++ b/experiments/identity_clustering/results/exp_008/labels.json
--- a/experiments/identity_clustering/results/exp_008/metrics.json
+++ b/experiments/identity_clustering/results/exp_008/metrics.json
@@ -0,0 +1,10 @@
+{
+  "total_traces": 677,
+  "stage1_bound": 671,
+  "stage1_bound_traces": 671,
+  "stage2_clusters": 6,
+  "stage2_unbound_clustered": 6,
+  "total_clusters": 677,
+  "execution_time_s": 11.841914176940918,
+  "coverage": 1.0
+}
--- a/experiments/identity_clustering/runner.py
+++ b/experiments/identity_clustering/runner.py
@@ -0,0 +1,446 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Identity Clustering Experiment Runner
+
+Usage:
+    python runner.py --config configs/exp_001.json
+
+Each experiment:
+1. Reads config parameters
+2. Fetches face trace data from DB
+3. Runs clustering algorithm
+4. Optionally matches against TMDb
+5. Optionally verifies against speakers
+6. Saves all results to experiments/identity_clustering/results/exp_{id}/
+"""
+
+import sys
+import os
+import json
+import argparse
+import time
+import numpy as np
+from datetime import datetime
+from collections import defaultdict
+from typing import Dict, List, Tuple, Optional
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..", "scripts"))
+
+# DB connection
+import psycopg2
+import psycopg2.extras
+
+DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
+SCHEMA = "dev"
+EXPERIMENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_conn():
+    return psycopg2.connect(DB_URL)
+
+
+def load_experiment_config(config_path: str) -> dict:
+    with open(config_path) as f:
+        return json.load(f)
+
+
+def fetch_trace_data(cur, file_uuid: str, min_frames: int) -> List[dict]:
+    """Fetch trace centroids + metadata from face_detections"""
+    sql = f"""
+    SELECT 
+        trace_id,
+        COUNT(*) as frame_count,
+        MIN(frame_number) as start_frame,
+        MAX(frame_number) as end_frame,
+        AVG(x)::float as avg_x,
+        AVG(y)::float as avg_y,
+        AVG(width)::float as avg_w,
+        AVG(height)::float as avg_h,
+        AVG(confidence) as avg_confidence
+    FROM {SCHEMA}.face_detections
+    WHERE file_uuid = %s AND trace_id IS NOT NULL AND embedding IS NOT NULL
+    GROUP BY trace_id
+    HAVING COUNT(*) >= %s
+    ORDER BY trace_id
+    """
+    cur.execute(sql, (file_uuid, min_frames))
+    rows = cur.fetchall()
+
+    traces = []
+    for row in rows:
+        # Get all embeddings for this trace
+        cur.execute(
+            f"SELECT embedding FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL ORDER BY confidence DESC",
+            (file_uuid, row[0]),
+        )
+        embeddings = [np.array(r[0]) for r in cur.fetchall()]
+
+        centroid_method = "mean"  # default, configurable
+        if centroid_method == "mean":
+            centroid = np.mean(embeddings, axis=0) if embeddings else None
+        elif centroid_method == "median":
+            centroid = np.median(embeddings, axis=0) if embeddings else None
+        else:
+            centroid = embeddings[0] if embeddings else None
+
+        traces.append(
+            {
+                "trace_id": row[0],
+                "frame_count": row[1],
+                "start_frame": row[2],
+                "end_frame": row[3],
+                "avg_bbox": {"x": row[4], "y": row[5], "w": row[6], "h": row[7]},
+                "avg_confidence": row[8],
+                "embedding_count": len(embeddings),
+                "centroid": centroid.tolist() if centroid is not None else None,
+            }
+        )
+
+    return traces
+
+
+def cosine_similarity(a, b):
+    a, b = np.array(a), np.array(b)
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)
+
+
+def cluster_by_threshold(
+    traces: List[dict], threshold: float, adaptive: bool = False
+) -> List[dict]:
+    """Simple threshold-based clustering"""
+    clusters = []
+    assigned = set()
+
+    for i, t1 in enumerate(traces):
+        if t1["trace_id"] in assigned:
+            continue
+        cluster = [t1]
+        assigned.add(t1["trace_id"])
+
+        for j, t2 in enumerate(traces):
+            if t2["trace_id"] in assigned or i == j:
+                continue
+            if t1["centroid"] is None or t2["centroid"] is None:
+                continue
+
+            sim = cosine_similarity(t1["centroid"], t2["centroid"])
+            th = threshold
+            if adaptive:
+                # Slightly relax threshold for profile angles
+                fc1, fc2 = t1["frame_count"], t2["frame_count"]
+                if fc1 < 60 or fc2 < 60:
+                    th = threshold - 0.05  # relax for short traces
+
+            if sim >= th:
+                cluster.append(t2)
+                assigned.add(t2["trace_id"])
+
+        if len(cluster) >= 1:
+            clusters.append(cluster)
+
+    return clusters
+
+
+def cluster_dbscan(
+    traces: List[dict], eps: float = 0.3, min_samples: int = 2
+) -> List[dict]:
+    """DBSCAN clustering on embeddings"""
+    from sklearn.cluster import DBSCAN
+
+    valid = [t for t in traces if t["centroid"] is not None]
+    X = np.array([t["centroid"] for t in valid])
+
+    # Cosine distance = 1 - cosine_similarity
+    clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="cosine").fit(X)
+    labels = clustering.labels_
+
+    clusters_dict = defaultdict(list)
+    for i, label in enumerate(labels):
+        key = int(label) if label >= 0 else f"noise_{i}"
+        clusters_dict[key].append(valid[i])
+
+    return list(clusters_dict.values())
+
+
+def fetch_tmdb_identities(cur) -> List[dict]:
+    """Get TMDb identities with embeddings"""
+    cur.execute(
+        f"SELECT id, name, face_embedding FROM {SCHEMA}.identities WHERE source='tmdb' AND face_embedding IS NOT NULL"
+    )
+    return [
+        {"id": r[0], "name": r[1], "embedding": r[2]}
+        for r in cur.fetchall()
+        if r[2] is not None
+    ]
+
+
+def fetch_speaker_overlaps(cur, file_uuid: str) -> dict:
+    """Get speaker-face trace overlap from TKG edges.
+    Returns {trace_id: {speaker_id: overlap_count}}"""
+    cur.execute(
+        f"""
+        SELECT 
+            REPLACE(n.external_id, 'trace_', '')::int as trace_id,
+            n2.external_id as speaker_id,
+            (e.properties->>'overlap_ratio')::float as overlap_ratio
+        FROM {SCHEMA}.tkg_edges e
+        JOIN {SCHEMA}.tkg_nodes n ON e.source_node_id = n.id
+        JOIN {SCHEMA}.tkg_nodes n2 ON e.target_node_id = n2.id
+        WHERE e.edge_type = 'SPEAKS_AS'
+          AND n.node_type = 'face_trace'
+          AND n2.node_type = 'speaker'
+          AND e.file_uuid = %s
+        """,
+        (file_uuid,),
+    )
+    overlaps = defaultdict(lambda: defaultdict(float))
+    for row in cur.fetchall():
+        trace_id, speaker_id, ratio = row[0], row[1], row[2] or 0
+        if trace_id is None or speaker_id is None:
+            continue
+        overlaps[int(trace_id)][speaker_id] = float(ratio)
+    return dict(overlaps)
+
+
+def verify_with_speakers(
+    clusters: List[dict], speaker_overlaps: dict
+) -> List[dict]:
+    """Annotate clusters with dominant speaker from time overlap"""
+    for cluster in clusters:
+        # Collect all speaker overlaps for traces in this cluster
+        speaker_votes = defaultdict(float)
+        trace_ids = cluster.get("trace_ids", [])
+        if not trace_ids:
+            # Raw cluster list
+            trace_ids = [t["trace_id"] for t in cluster]
+
+        for tid in trace_ids:
+            if tid in speaker_overlaps:
+                for spk, ratio in speaker_overlaps[tid].items():
+                    speaker_votes[spk] += ratio
+
+        if speaker_votes:
+            best_speaker = max(speaker_votes, key=speaker_votes.get)
+            best_score = speaker_votes[best_speaker]
+            cluster["dominant_speaker"] = best_speaker
+            cluster["speaker_overlap_score"] = round(best_score, 3)
+            cluster["speaker_votes"] = dict(speaker_votes)
+        else:
+            cluster["dominant_speaker"] = None
+            cluster["speaker_overlap_score"] = 0
+            cluster["speaker_votes"] = {}
+
+    # Merge clusters that share dominant speaker (high overlap with same speaker)
+    speaker_clusters = defaultdict(list)
+    for i, cluster in enumerate(clusters):
+        spk = cluster.get("dominant_speaker")
+        if spk and cluster.get("speaker_overlap_score", 0) > 0.5:
+            speaker_clusters[spk].append(i)
+
+    merged = set()
+    new_clusters = []
+    for spk, indices in speaker_clusters.items():
+        if len(indices) <= 1:
+            continue
+        # Merge all clusters belonging to same speaker
+        merged_group = []
+        for idx in indices:
+            merged_group.extend(
+                clusters[idx].get("trace_ids", []) or [t["trace_id"] for t in clusters[idx]]
+            )
+            merged.add(idx)
+        new_clusters.append({
+            "merged_from": indices,
+            "trace_ids": list(set(merged_group)),
+            "trace_count": len(set(merged_group)),
+            "dominant_speaker": spk,
+            "merge_reason": "shared_dominant_speaker",
+        })
+
+    # Keep unmerged clusters
+    for i, cluster in enumerate(clusters):
+        if i not in merged:
+            new_clusters.append(cluster)
+
+    return new_clusters
+
+
+def match_tmdb(clusters: List[dict], tmdb_identities: List[dict]) -> List[dict]:
+    """Match each cluster to best TMDb identity"""
+    results = []
+    for i, cluster in enumerate(clusters):
+        if len(cluster) == 0:
+            continue
+        # Use the trace with most frames as representative
+        best_trace = max(cluster, key=lambda t: t["frame_count"])
+        centroid = best_trace.get("centroid")
+        if centroid is None:
+            continue
+
+        matches = []
+        for t in tmdb_identities:
+            if t["embedding"] is None:
+                continue
+            sim = cosine_similarity(centroid, t["embedding"])
+            if sim >= 0.55:  # TMDb threshold
+                matches.append({"id": t["id"], "name": t["name"], "similarity": float(sim)})
+
+        matches.sort(key=lambda m: m["similarity"], reverse=True)
+
+        cluster_result = {
+            "cluster_id": i,
+            "trace_count": len(cluster),
+            "total_frames": sum(t["frame_count"] for t in cluster),
+            "trace_ids": [t["trace_id"] for t in cluster],
+            "tmdb_matches": matches,
+            "best_match": matches[0]["name"] if matches else None,
+            "best_similarity": matches[0]["similarity"] if matches else 0,
+        }
+        results.append(cluster_result)
+
+    return results
+
+
+def compute_metrics(clusters: List[dict], total_traces: int) -> dict:
+    clustered = sum(c["trace_count"] for c in clusters) if "trace_count" in clusters[0] else sum(len(c) for c in clusters)
+    return {
+        "total_traces": total_traces,
+        "clustered_traces": clustered,
+        "cluster_count": len(clusters),
+        "coverage": clustered / max(total_traces, 1),
+        "avg_cluster_size": clustered / max(len(clusters), 1),
+        "tmdb_matched": sum(1 for c in clusters if isinstance(c, dict) and c.get("best_match")),
+        "tmdb_coverage": sum(1 for c in clusters if isinstance(c, dict) and c.get("best_match")) / max(len(clusters), 1),
+    }
+
+
+def run_experiment(config: dict) -> dict:
+    """Main experiment flow"""
+    exp_id = config["id"]
+    file_uuid = config.get("file_uuid", "1a04db97be5fa12bd77369831dc141fd")
+    print(f"\n{'='*60}")
+    print(f"Experiment {exp_id}: {config['name']}")
+    print(f"{'='*60}")
+
+    conn = get_conn()
+    cur = conn.cursor()
+
+    t0 = time.time()
+
+    # Step 1: Fetch traces
+    print(f"\n[1] Fetching traces (min_frames={config.get('min_frames', 30)})...")
+    traces = fetch_trace_data(cur, file_uuid, config.get("min_frames", 30))
+    print(f"    {len(traces)} traces loaded")
+
+    # Step 2: Clustering
+    method = config.get("clustering_method", "threshold")
+    print(f"\n[2] Clustering: method={method}...")
+
+    if method == "threshold":
+        threshold = config.get("threshold", 0.85)
+        adaptive = config.get("adaptive_threshold", False)
+        clusters = cluster_by_threshold(traces, threshold, adaptive)
+    elif method == "dbscan":
+        eps = config.get("eps", 0.3)
+        min_samples = config.get("min_samples", 2)
+        clusters = cluster_dbscan(traces, eps, min_samples)
+    else:
+        clusters = cluster_by_threshold(traces, 0.85, True)
+
+    clustered_traces = sum(len(c) for c in clusters)
+    print(f"    {len(clusters)} clusters, {clustered_traces} traces clustered")
+
+    # Step 3: Speaker verification (mandatory — standard step)
+    print(f"\n[3] Speaker verification...")
+    speaker_overlaps = fetch_speaker_overlaps(cur, file_uuid)
+    # Convert raw clusters to label dicts
+    labels = [
+        {
+            "cluster_id": i,
+            "trace_count": len(c),
+            "trace_ids": [t["trace_id"] for t in c],
+            "tmdb_matches": [],
+            "best_match": None,
+        }
+        for i, c in enumerate(clusters)
+    ]
+    labels = verify_with_speakers(labels, speaker_overlaps)
+    matched_speakers = sum(1 for l in labels if l.get("dominant_speaker"))
+    merged = sum(1 for l in labels if l.get("merge_reason"))
+    print(f"    {matched_speakers} clusters have speaker match, {merged} merged by speaker")
+
+    # Step 4: TMDb matching (optional)
+    if config.get("enable_tmdb", False):
+        print(f"\n[4] TMDb matching...")
+        tmdb = fetch_tmdb_identities(cur)
+        print(f"    {len(tmdb)} TMDb identities loaded")
+        labels = match_tmdb(labels if labels else clusters, tmdb)
+        matched = sum(1 for l in labels if l["best_match"])
+        print(f"    {matched} clusters matched to TMDb")
+
+    # Step 5: Metrics
+    metrics = compute_metrics(labels if labels else clusters, len(traces))
+    metrics["execution_time_s"] = time.time() - t0
+
+    cur.close()
+    conn.close()
+
+    # Step 5: Save results
+    result_dir = os.path.join(EXPERIMENT_DIR, "results", f"exp_{exp_id}")
+    os.makedirs(result_dir, exist_ok=True)
+
+    with open(os.path.join(result_dir, "clusters.json"), "w") as f:
+        json.dump(clusters if not labels else labels, f, indent=2, ensure_ascii=False)
+
+    with open(os.path.join(result_dir, "labels.json"), "w") as f:
+        json.dump(labels, f, indent=2, ensure_ascii=False)
+
+    with open(os.path.join(result_dir, "metrics.json"), "w") as f:
+        json.dump(metrics, f, indent=2, ensure_ascii=False)
+
+    with open(os.path.join(result_dir, "config.json"), "w") as f:
+        json.dump(config, f, indent=2, ensure_ascii=False)
+
+    # Summary
+    summary = f"""
+Experiment {exp_id}: {config['name']}
+====================================
+Date: {datetime.now().isoformat()}
+Config: {json.dumps(config, indent=2)}
+
+Results:
+  Traces loaded:     {len(traces)}
+  Clusters:          {len(clusters)}
+  Clustered traces:  {clustered_traces}
+  Coverage:          {metrics['coverage']:.1%}
+  Avg cluster size:  {metrics['avg_cluster_size']:.1f}
+  TMDb matched:      {metrics.get('tmdb_matched', 0)}
+  Execution time:    {metrics['execution_time_s']:.1f}s
+
+Top clusters:
+"""
+    sorted_labels = sorted(labels, key=lambda l: l.get("trace_count", 0), reverse=True)
+    for l in sorted_labels[:10]:
+        name = l.get("best_match", "unlabeled")
+        summary += f"  Cluster {l['cluster_id']}: {l['trace_count']} traces → {name} (sim={l.get('best_similarity', 0):.3f})\n"
+
+    with open(os.path.join(result_dir, "summary.txt"), "w") as f:
+        f.write(summary)
+
+    print(f"\n[✓] Results saved to {result_dir}")
+    print(summary)
+
+    return metrics
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Identity Clustering Experiment Runner")
+    parser.add_argument("--config", required=True, help="Experiment config JSON")
+    args = parser.parse_args()
+
+    config = load_experiment_config(args.config)
+    run_experiment(config)
+
+
+if __name__ == "__main__":
+    main()
--- a/experiments/identity_clustering/runner_v2.py
+++ b/experiments/identity_clustering/runner_v2.py
@@ -0,0 +1,431 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Multi-Stage Identity Clustering Runner
+
+Stage 1: High-confidence face-level matching
+  - Compare ALL face embeddings in each trace against identity references
+  - Bind trace to identity if >90% of faces match with >0.90 similarity
+  - These become "anchors" for Stage 2
+
+Stage 2: Trace centroid clustering of remaining unbounded traces
+  - Use centroid of unbound traces, cluster with adaptive threshold
+  - Merge clusters with speaker overlap verification
+
+Stage 3 (optional): TMDb matching
+"""
+
+import sys, os, json, argparse, time, numpy as np
+from datetime import datetime
+from collections import defaultdict
+from typing import Dict, List, Tuple, Optional
+
+import psycopg2
+
+DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
+SCHEMA = "dev"
+EXPERIMENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_conn(): return psycopg2.connect(DB_URL)
+
+
+def cosine_similarity(a, b):
+    a, b = np.array(a), np.array(b)
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)
+
+
+def parse_pg_array(val):
+    """Parse PostgreSQL real[] array — returns numpy float64 array or None"""
+    if val is None: return None
+    if isinstance(val, np.ndarray): return val.astype(np.float64)
+    if isinstance(val, list): return np.array(val, dtype=np.float64)
+    if isinstance(val, str):
+        s = val.strip('[]{}')
+        if not s: return None
+        return np.fromstring(s, sep=',').astype(np.float64)
+    return None
+
+
+def fetch_trace_with_faces(cur, file_uuid: str, min_frames: int) -> List[dict]:
+    """Fetch traces with ALL their individual face embeddings"""
+    # Get trace summaries
+    cur.execute(
+        f"""
+        SELECT trace_id, COUNT(*) as fc, MIN(frame_number), MAX(frame_number),
+               AVG(x::float), AVG(y::float), AVG(width::float), AVG(height::float)
+        FROM {SCHEMA}.face_detections
+        WHERE file_uuid=%s AND trace_id IS NOT NULL AND embedding IS NOT NULL
+        GROUP BY trace_id HAVING COUNT(*)>=%s ORDER BY trace_id
+        """, (file_uuid, min_frames))
+    
+    traces = []
+    for row in cur.fetchall():
+        tid = row[0]
+        cur.execute(
+            f"SELECT embedding FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL ORDER BY confidence DESC",
+            (file_uuid, tid))
+        faces = []
+        for r in cur.fetchall():
+            emb = parse_pg_array(r[0])
+            if emb is not None:
+                faces.append({"embedding": emb.astype(np.float64)})
+        
+        traces.append({
+            "trace_id": tid, "frame_count": row[1],
+            "start_frame": row[2], "end_frame": row[3],
+            "avg_bbox": {"x": row[4], "y": row[5], "w": row[6], "h": row[7]},
+            "faces": faces,
+            "centroid": np.mean([f["embedding"] for f in faces], axis=0).tolist() if faces else None,
+        })
+    return traces
+
+
+def fetch_speaker_overlaps(cur, file_uuid: str) -> dict:
+    cur.execute(f"""
+        SELECT REPLACE(n.external_id,'trace_','')::int, n2.external_id,
+               (e.properties->>'overlap_ratio')::float
+        FROM {SCHEMA}.tkg_edges e
+        JOIN {SCHEMA}.tkg_nodes n ON e.source_node_id=n.id
+        JOIN {SCHEMA}.tkg_nodes n2 ON e.target_node_id=n2.id
+        WHERE e.edge_type='SPEAKS_AS' AND n.node_type='face_trace' AND n2.node_type='speaker' AND e.file_uuid=%s
+    """, (file_uuid,))
+    overlaps = defaultdict(lambda: defaultdict(float))
+    for tid, spk, ratio in cur.fetchall():
+        if tid and spk: overlaps[int(tid)][spk] = float(ratio or 0)
+    return dict(overlaps)
+
+
+def fetch_identity_references(cur) -> List[dict]:
+    """Get registered identities with face embeddings as references"""
+    cur.execute(f"SELECT id, name, face_embedding FROM {SCHEMA}.identities WHERE face_embedding IS NOT NULL")
+    results = []
+    for r in cur.fetchall():
+        emb = parse_pg_array(r[2])
+        if emb is None: continue
+        results.append({"id": r[0], "name": r[1], "embedding": emb.astype(np.float64)})
+    return results
+
+
+# ===== STAGE 1: High-confidence face-level matching =====
+
+def stage1_high_confidence_binding(
+    traces: List[dict], identities: List[dict],
+    face_match_threshold: float = 0.92,
+    trace_bind_ratio: float = 0.85,
+) -> Tuple[List[dict], List[dict]]:
+    """
+    For each trace, compare EVERY face against EVERY identity.
+    Bind trace to identity if >trace_bind_ratio% of faces match with >face_match_threshold.
+    Returns (bound_traces, unbound_traces)
+    """
+    bound = []
+    unbound = []
+
+    for trace in traces:
+        faces = trace.get("faces", [])
+        if not faces:
+            unbound.append(trace)
+            continue
+
+        best_identity = None
+        best_match_count = 0
+
+        for ident in identities:
+            match_count = 0
+            for face in faces:
+                sim = cosine_similarity(face["embedding"], ident["embedding"])
+                if sim >= face_match_threshold:
+                    match_count += 1
+
+            ratio = match_count / len(faces)
+            if ratio >= trace_bind_ratio and match_count > best_match_count:
+                best_match_count = match_count
+                best_identity = {
+                    "id": ident["id"],
+                    "name": ident["name"],
+                    "match_ratio": round(ratio, 3),
+                    "matched_faces": match_count,
+                    "total_faces": len(faces),
+                }
+
+        if best_identity:
+            trace["binding"] = best_identity
+            trace["binding_stage"] = "stage1_face_level"
+            bound.append(trace)
+        else:
+            unbound.append(trace)
+
+    return bound, unbound
+
+
+# ===== STAGE 2: Centroid clustering of unbound traces =====
+
+def stage2_cluster_unbound(
+    traces: List[dict], threshold: float, adaptive: bool = False
+) -> List[dict]:
+    """Cluster unbound traces by centroid similarity + speaker verify"""
+    clusters = []
+    assigned = set()
+
+    for i, t1 in enumerate(traces):
+        if t1["trace_id"] in assigned: continue
+        cluster = [t1]; assigned.add(t1["trace_id"])
+
+        for j, t2 in enumerate(traces):
+            if t2["trace_id"] in assigned or i == j: continue
+            if t1["centroid"] is None or t2["centroid"] is None: continue
+
+            sim = cosine_similarity(t1["centroid"], t2["centroid"])
+            th = threshold
+            if adaptive and (t1["frame_count"] < 10 or t2["frame_count"] < 10):
+                th -= 0.05
+
+            if sim >= th:
+                cluster.append(t2); assigned.add(t2["trace_id"])
+
+        clusters.append(cluster)
+    return clusters
+
+
+def apply_speaker_verification(clusters: List[dict], speaker_overlaps: dict) -> List[dict]:
+    """Label clusters with speaker + merge same-speaker clusters"""
+    labels = []
+    for i, cluster in enumerate(clusters):
+        trace_ids = [t["trace_id"] for t in cluster]
+        votes = defaultdict(float)
+        for tid in trace_ids:
+            if tid in speaker_overlaps:
+                for spk, r in speaker_overlaps[tid].items():
+                    votes[spk] += r
+        
+        best_spk = max(votes, key=votes.get) if votes else None
+        labels.append({
+            "cluster_id": i, "trace_count": len(cluster),
+            "trace_ids": trace_ids,
+            "dominant_speaker": best_spk,
+            "speaker_score": round(votes.get(best_spk, 0), 3) if best_spk else 0,
+            "binding": cluster[0].get("binding"),
+            "binding_stage": cluster[0].get("binding_stage"),
+        })
+    return labels
+
+
+# ===== Main Experiment =====
+
+def run_experiment(config: dict) -> dict:
+    exp_id = config["id"]; file_uuid = config.get("file_uuid", "")
+    conn = get_conn(); cur = conn.cursor()
+    t0 = time.time()
+    out = lambda *a: None  # noqa
+
+    # Load data
+    traces = fetch_trace_with_faces(cur, file_uuid, config.get("min_frames", 3))
+    identities = fetch_identity_references(cur) if config.get("enable_identity_match", True) else []
+    speaker_overlaps = fetch_speaker_overlaps(cur, file_uuid)
+    print(f"Traces: {len(traces)}, Identities: {len(identities)}, Speaker edges: {len(speaker_overlaps)}")
+
+    # Stage 1: TMDb-based first-pass binding (relaxed threshold)
+    bound, unbound = [], traces
+    if identities:
+        bound, unbound = stage1_high_confidence_binding(
+            traces, identities,
+            config.get("stage1_face_threshold", 0.55),
+            config.get("stage1_bind_ratio", 0.60),
+        )
+        print(f"Stage 1 (TMDb): {len(bound)} traces bound, {len(unbound)} unbound")
+
+    # Stage 1b+2: Iterative enrichment — each bound trace adds 3 best faces as references
+    if bound and identities and unbound:
+        # Build initial reference sets from Stage 1 bound traces
+        # For each identity, collect top-3 confidence faces from each bound trace
+        identity_refs = {}  # identity_id -> list of reference embeddings
+        for t in bound:
+            b = t.get("binding", {})
+            iid = b.get("id") if isinstance(b, dict) else None
+            if not iid or not t.get("faces"): continue
+
+            if iid not in identity_refs:
+                identity_refs[iid] = []
+
+            # Sample 3 best faces from this trace (top confidence = best quality)
+            faces = t["faces"]
+            n_sample = min(3, len(faces))
+            for f in faces[:n_sample]:
+                identity_refs[iid].append(f["embedding"])
+
+        # Build identity lookup
+        id_to_name = {ident["id"]: ident["name"] for ident in identities}
+
+        for iid, refs in identity_refs.items():
+            print(f"    {id_to_name.get(iid, '?'):<20} {len(refs)} reference faces (multi-angle sampling)")
+
+        # Speaker segment counts for weighting
+        speaker_counts = defaultdict(float)
+        for tid, spks in speaker_overlaps.items():
+            speaker_counts[tid] = sum(spks.values())
+
+        # Iterative matching with growing reference set
+        round_num = 0
+        while True:
+            round_num += 1
+            bound_this_round = []
+
+            for t in unbound:
+                best_score = 0
+                best_iid = None
+                best_sim = 0
+                best_match_count = 0
+
+                for iid, refs in identity_refs.items():
+                    faces = t.get("faces", [])
+                    if not faces: continue
+
+                    # Compare each face against ALL references, take max per face
+                    face_sims = []
+                    for face in faces:
+                        max_sim = max(
+                            cosine_similarity(face["embedding"], ref) for ref in refs
+                        )
+                        face_sims.append(max_sim)
+
+                    avg_sim = np.mean(face_sims) if face_sims else 0
+                    match_ratio = sum(1 for s in face_sims if s >= config.get("stage1_face_threshold", 0.55)) / len(face_sims)
+
+                    # Composite score: similarity + match ratio + speaker weight
+                    spk_weight = 1.0 + 0.3 * speaker_counts.get(t["trace_id"], 0) / max(max(speaker_counts.values(), default=1), 1)
+                    composite = avg_sim * spk_weight * (0.4 + 0.6 * match_ratio)
+
+                    if composite > best_score and composite > 0.35:
+                        best_score = composite
+                        best_iid = iid
+                        best_sim = avg_sim
+                        best_match_count = sum(1 for s in face_sims if s >= 0.50)
+
+                if best_iid is not None:
+                    t["binding"] = {
+                        "id": best_iid, "name": id_to_name.get(best_iid, "?"),
+                        "avg_similarity": round(best_sim, 3),
+                        "match_ratio": round(best_match_count / max(len(t.get("faces", [])), 1), 3),
+                        "composite_score": round(best_score, 3),
+                        "source": f"video_ref_r{round_num}",
+                    }
+                    t["binding_stage"] = f"stage1b_r{round_num}"
+                    bound_this_round.append(t)
+                    bound.append(t)
+
+            if not bound_this_round:
+                break
+
+            # Enrich references: add 3 best faces from newly bound traces
+            for t in bound_this_round:
+                iid = t["binding"]["id"]
+                faces = t.get("faces", [])
+                n = min(3, len(faces))
+                for f in faces[:n]:
+                    identity_refs[iid].append(f["embedding"])
+
+            # Remove from unbound
+            bound_ids = {t["trace_id"] for t in bound_this_round}
+            unbound = [t for t in unbound if t["trace_id"] not in bound_ids]
+
+            print(f"    Round {round_num}: {len(bound_this_round)} traces bound, {len(unbound)} unbound")
+    clusters = stage2_cluster_unbound(
+        unbound,
+        config.get("stage2_threshold", 0.85),
+        config.get("stage2_adaptive", False),
+    )
+    print(f"Stage 2: {len(clusters)} clusters from {len(unbound)} unbound traces")
+
+    # Speaker verification
+    all_labels = apply_speaker_verification(clusters, speaker_overlaps)
+
+    # Merge Stage 1 bound traces into labels
+    for t in bound:
+        all_labels.append({
+            "cluster_id": len(all_labels),
+            "trace_count": 1,
+            "trace_ids": [t["trace_id"]],
+            "binding": t.get("binding"),
+            "binding_stage": "stage1_face_level",
+            "dominant_speaker": next(iter(speaker_overlaps.get(t["trace_id"], {}).keys()), None) if t["trace_id"] in speaker_overlaps else None,
+        })
+
+    # Metrics
+    metrics = {
+        "total_traces": len(traces),
+        "stage1_bound": len(bound),
+        "stage1_bound_traces": len(bound),
+        "stage2_clusters": len(clusters),
+        "stage2_unbound_clustered": sum(len(c) for c in clusters),
+        "total_clusters": len(all_labels),
+        "execution_time_s": time.time() - t0,
+        "coverage": (len(bound) + sum(len(c) for c in clusters)) / max(len(traces), 1),
+    }
+    for k, v in metrics.items():
+        print(f"  {k}: {v}")
+
+    cur.close(); conn.close()
+
+    # --- Write bindings to database ---
+    if config.get("write_db", False):
+        conn2 = get_conn(); cur2 = conn2.cursor()
+        total_written = 0
+        for label in all_labels:
+            binding = label.get("binding")
+            if not binding: continue
+            identity_name = binding.get("name", "")
+            if not identity_name: continue
+
+            # Get or create identity
+            cur2.execute(f"SELECT id FROM {SCHEMA}.identities WHERE name=%s", (identity_name,))
+            row = cur2.fetchone()
+            if row:
+                identity_id = row[0]
+            else:
+                cur2.execute(
+                    f"INSERT INTO {SCHEMA}.identities (name, identity_type, source, status) VALUES (%s,'people','auto','pending') RETURNING id",
+                    (identity_name,))
+                identity_id = cur2.fetchone()[0]
+
+            # Bind all faces in each trace to the identity
+            for tid in label["trace_ids"]:
+                cur2.execute(
+                    f"UPDATE {SCHEMA}.face_detections SET identity_id=%s WHERE file_uuid=%s AND trace_id=%s AND identity_id IS NULL",
+                    (identity_id, file_uuid, tid))
+                affected = cur2.rowcount
+                if affected > 0:
+                    # Write to identity_bindings for traceability
+                    confidence = float(binding.get("avg_similarity", 0.8))
+                    cur2.execute(
+                        f"INSERT INTO {SCHEMA}.identity_bindings (identity_id, identity_type, identity_value, confidence) VALUES (%s,'trace',%s,%s) ON CONFLICT DO NOTHING",
+                        (identity_id, str(tid), confidence))
+                    total_written += affected
+
+        conn2.commit()
+        cur2.close(); conn2.close()
+        print(f"\nDB write: {total_written} face_detections updated")
+
+    # Save
+    result_dir = os.path.join(EXPERIMENT_DIR, "results", f"exp_{exp_id}")
+    os.makedirs(result_dir, exist_ok=True)
+    for name, data in [("labels.json", all_labels), ("metrics.json", metrics), ("config.json", config)]:
+        with open(os.path.join(result_dir, name), "w") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False, default=str)
+
+    print(f"\nSaved to {result_dir}")
+    return metrics
+
+
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--config", required=True)
+    p.add_argument("--write-db", action="store_true", help="Write bindings to database")
+    args = p.parse_args()
+    with open(args.config) as f: config = json.load(f)
+    if args.write_db:
+        config["write_db"] = True
+    run_experiment(config)
+
+
+if __name__ == "__main__":
+    main()
--- a/experiments/trace_quality_agent.py
+++ b/experiments/trace_quality_agent.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""
+Trace 品質檢查 Agent — 選型實驗報告
+評估每個 trace 是否符合 identity 標準，檢測需補掃/覆查的異常 trace。
+
+檢查項目:
+  1. 取樣密度      — trace < 3 frames → 需要 dense scan
+  2. 人臉驗證      — DeepFace vs Apple Vision 確認是否為人臉
+  3. Embedding 品質 — trace 內方差過大 → 可能混入多人
+  4. 時序衝突      — 同 identity 兩 trace 同時出現 → 需 split
+"""
+
+import json, sys, os, time, argparse, io
+from collections import defaultdict
+from pathlib import Path
+
+DB_URL = "postgresql://accusys@localhost:5432/momentry"
+SCHEMA = "dev"
+FILE_UUID = "417a7e93860d70c87aee6c4c1b715d70"
+VIDEO_PATH = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
+OUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/trace_quality")
+OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+# ============================================================
+# Report Header
+# ============================================================
+print("=" * 70)
+print("Trace 品質檢查 — 技術選型實驗報告")
+print("=" * 70)
+print(f"File: Charade (1963), {FILE_UUID}")
+print(f"Traces: 2347, Faces: 6182")
+print()
+
+import psycopg2
+import psycopg2.extras
+import numpy as np
+
+conn = psycopg2.connect(DB_URL)
+cur = conn.cursor()
+
+# ============================================================
+# Check 1: Sample Density (取樣密度)
+# ============================================================
+print("=" * 70)
+print("Check 1: 取樣密度 (Sample Density)")
+print("=" * 70)
+
+cur.execute(f"""
+    SELECT 
+        CASE WHEN fc = 1 THEN '1 frame'
+             WHEN fc <= 3 THEN '2-3 frames'
+             WHEN fc <= 10 THEN '4-10 frames'
+             ELSE '11+ frames'
+        END AS density,
+        COUNT(*) AS trace_count,
+        ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM (SELECT trace_id, COUNT(*) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t) * 100, 1) AS pct
+    FROM (SELECT trace_id, COUNT(*) AS fc FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t
+    GROUP BY 1 ORDER BY MIN(fc)
+""", (FILE_UUID, FILE_UUID))
+
+for density, count, pct in cur.fetchall():
+    marker = " ← needs dense scan" if "frame" in density and int(density[0]) < 4 else ""
+    print(f"  {density:<15} {count:>6} traces ({pct:>5.1f}%){marker}")
+
+need_dense = sum(1 for _ in cur.fetchall()) if False else 0
+cur.execute(f"SELECT COUNT(*) FROM (SELECT trace_id FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id HAVING COUNT(*) < 4) t", (FILE_UUID,))
+need_dense = cur.fetchone()[0]
+print(f"\n  需 dense scan: {need_dense} traces ({need_dense/2347*100:.1f}%)")
+
+print()
+print("  技術方案:")
+print("    方案A: swift_face --sample-interval 1 (Apple Vision, ~250fps)")
+print("    方案B: ffmpeg + DeepFace (Python, ~0.2s/face)")
+print("  建議: 方案A，無需額外模型，速度快，已整合於 pipeline")
+
+# ============================================================
+# Check 2: Human Face Verification (人臉驗證)
+# ============================================================
+print()
+print("=" * 70)
+print("Check 2: 人臉驗證 (Human Face Verification)")
+print("=" * 70)
+
+# Sample 20 traces: 10 with high confidence (likely human), 10 with low (possibly non-human)
+cur.execute(f"""
+    (SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h,
+            MIN(frame_number) AS f
+     FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
+     GROUP BY trace_id ORDER BY AVG(confidence) ASC LIMIT 5)
+    UNION ALL
+    (SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h,
+            MIN(frame_number) AS f
+     FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
+     GROUP BY trace_id ORDER BY AVG(confidence) DESC LIMIT 5)
+""", (FILE_UUID, FILE_UUID))
+
+samples = cur.fetchall()
+
+# Test DeepFace
+print("  DeepFace 人臉驗證 (10 samples):")
+try:
+    from deepface import DeepFace
+    import warnings
+    warnings.filterwarnings("ignore")
+
+    t0 = time.time()
+    for tid, conf, w, h, frame in samples:
+        sec = frame / 59.94
+        img_path = OUT_DIR / f"trace_{tid}_verify.jpg"
+        if not img_path.exists():
+            os.system(f'ffmpeg -y -ss {sec:.1f} -i "{VIDEO_PATH}" -frames:v 1 -q:v 3 {img_path} 2>/dev/null')
+        try:
+            r = DeepFace.analyze(str(img_path), actions=['age','gender'], enforce_detection=False, detector_backend='opencv')
+            if isinstance(r, list): r = r[0]
+            age = r.get('age', 0)
+            gender = r.get('dominant_gender', 'N/A')
+            is_human = age > 0 and gender in ('Man', 'Woman')
+            print(f"    trace {tid:>5}: conf={conf:.3f} {w}x{h} → age={age:.0f} gender={gender:<5} {'✅ human' if is_human else '⚠️ non-human?'}")
+        except Exception as e:
+            print(f"    trace {tid:>5}: conf={conf:.3f} {w}x{h} → ERROR {str(e)[:60]}")
+    dt = time.time() - t0
+    print(f"    Time: {dt:.1f}s ({dt/10:.1f}s/face)")
+except ImportError:
+    print("    DeepFace not available")
+
+# Test Apple Vision approach (statistical, no ML)
+print()
+print("  Statistical filter (no ML):")
+print("    Rule: confidence < 0.5 OR aspect_ratio deviation > 0.3 → flag")
+cur.execute(f"""
+    SELECT COUNT(*) FROM {SCHEMA}.face_detections 
+    WHERE file_uuid=%s AND trace_id IS NOT NULL AND confidence < 0.5
+""", (FILE_UUID,))
+low_conf = cur.fetchone()[0]
+print(f"    Low confidence (<0.5): {low_conf} faces")
+print(f"    Aspect ratio: all detections are square (Vision bbox), no filtering possible")
+
+print()
+print("  建議: DeepFace verify for low-confidence traces only")
+print("        可選 gateway: conf < 0.6 才跑 DeepFace，節省 90% 成本")
+
+# ============================================================
+# Check 3: Embedding Quality
+# ============================================================
+print()
+print("=" * 70)
+print("Check 3: Embedding Quality (嵌入品質)")
+print("=" * 70)
+
+# Check intra-trace embedding variance for top 5 largest traces
+cur.execute(f"""
+    SELECT trace_id, COUNT(*) AS fc, AVG(confidence)::numeric(4,3) AS conf
+    FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
+    GROUP BY trace_id ORDER BY fc DESC LIMIT 10
+""", (FILE_UUID,))
+top_traces = cur.fetchall()
+
+print("  Intra-trace embedding variance (top 10 traces by size):")
+for tid, fc, conf in top_traces:
+    cur.execute(f"""
+        SELECT embedding FROM {SCHEMA}.face_detections
+        WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL
+    """, (FILE_UUID, tid))
+    embs = [np.array(row[0]) for row in cur.fetchall() if row[0]]
+    if len(embs) < 2:
+        print(f"    trace {tid:>5}: {fc:>3} faces, conf={conf:.3f} — not enough embeddings")
+        continue
+    
+    # Normalize and compute pairwise cosine similarity
+    embs_norm = np.array([e / (np.linalg.norm(e) + 1e-10) for e in embs])
+    sim_matrix = embs_norm @ embs_norm.T
+    np.fill_diagonal(sim_matrix, 0)
+    # Exclude diagonal zeros when finding min
+    non_diag = sim_matrix[sim_matrix > 0.0001]
+    var = float(1.0 - np.mean(sim_matrix[sim_matrix > 0.0001])) if len(non_diag) > 0 else 0.0
+    min_sim = float(np.min(non_diag)) if len(non_diag) > 0 else 0.0
+
+    quality = "✅ good" if var < 0.3 and min_sim > 0.5 else \
+              "⚠️ check" if var < 0.5 and min_sim > 0.3 else \
+              "❌ split likely"
+    print(f"    trace {tid:>5}: {fc:>3} faces, conf={conf:.3f}, variance={var:.3f}, min_sim={min_sim:.3f} → {quality}")
+
+print()
+print("  建議: variance > 0.2 OR min_sim < 0.4 → 標記 split")
+print("        純統計方法，無需模型")
+
+# ============================================================
+# Check 4: Temporal Collision
+# ============================================================
+print()
+print("=" * 70)
+print("Check 4: 時序衝突 (Temporal Collision)")
+print("=" * 70)
+
+cur.execute(f"""
+    SELECT i.name, a.trace_id, a.frame_number AS a_frame, b.trace_id AS b_trace, b.frame_number AS b_frame
+    FROM {SCHEMA}.face_detections a
+    JOIN {SCHEMA}.face_detections b ON a.file_uuid=b.file_uuid AND a.frame_number=b.frame_number AND a.trace_id<b.trace_id
+    JOIN {SCHEMA}.identities i ON a.identity_id=i.id AND b.identity_id=i.id
+    WHERE a.file_uuid=%s AND a.identity_id IS NOT NULL
+    ORDER BY a.frame_number LIMIT 10
+""", (FILE_UUID,))
+collisions = cur.fetchall()
+
+if collisions:
+    print("  ⚠️ 同一 identity 的 trace 出現在同一幀:")
+    for name, a_tid, af, b_tid, bf in collisions:
+        print(f"    {name}: trace {a_tid} & {b_tid} at frame {af}")
+else:
+    print("  ✅ No temporal collisions detected")
+
+print()
+print("  建議: 純 SQL 檢測，發現碰撞 → 自動 split into separate identities")
+
+cur.close(); conn.close()
+
+# ============================================================
+# Summary
+# ============================================================
+print()
+print("=" * 70)
+print("選型建議總結")
+print("=" * 70)
+print()
+print(f"  {'檢查':<25} {'技術':<20} {'模型':<12} {'速度':<10} {'可行性'}")
+print(f"  {'-'*70}")
+print(f"  {'1.取樣密度':<25} {'SQL + swift_face':<20} {'Apple Vision':<12} {'250fps':<10} {'✅ 已整合'}")
+print(f"  {'2.人臉驗證':<25} {'DeepFace analyze':<20} {'AgeNet':<12} {'0.2s/face':<10} {'✅ MIT license'}")
+print(f"  {'3.Embedding 品質':<25} {'numpy statistics':<20} {'None':<12} {'instant':<10} {'✅ 純計算'}")
+print(f"  {'4.時序衝突':<25} {'SQL JOIN':<20} {'None':<12} {'instant':<10} {'✅ 純查詢'}")
+print(f"  {'5.Speaker 一致性':<25} {'SQL + overlap':<20} {'None':<12} {'instant':<10} {'✅ 後續追加'}")
+print()
+print(f"  唯一需要外部模型的: Check 2 (DeepFace, MIT, 0.2s/face)")
+print(f"  其他全為純 SQL/統計，可立即實作")