feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID
This commit is contained in:
41
experiments/identity_clustering/README.md
Normal file
41
experiments/identity_clustering/README.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# Identity Clustering 實驗記錄區
|
||||
|
||||
每個實驗獨立運行,結果完整保留,用於後續分析比較。
|
||||
|
||||
## 目錄結構
|
||||
|
||||
```
|
||||
experiments/identity_clustering/
|
||||
├── README.md # 本文件
|
||||
├── configs/ # 實驗配置
|
||||
│ └── exp_{id}.json # 每個實驗的參數設定
|
||||
├── results/ # 實驗結果
|
||||
│ └── exp_{id}/
|
||||
│ ├── clusters.json # 分群結果
|
||||
│ ├── labels.json # 標註結果(TMDb/Speaker)
|
||||
│ ├── metrics.json # 評估指標
|
||||
│ └── summary.txt # 摘要報告
|
||||
├── reports/ # 比較分析報告
|
||||
│ └── comparison_{date}.md # 跨實驗比較
|
||||
└── runner.py # 實驗執行器
|
||||
```
|
||||
|
||||
## 實驗設計
|
||||
|
||||
每個實驗包含以下維度的組合:
|
||||
|
||||
| 維度 | 選項 |
|
||||
|------|------|
|
||||
| **Trace filter** | none / min_frames=30 / min_frames=60 |
|
||||
| **Centroid** | mean / median / best_confidence |
|
||||
| **Clustering** | cosine_threshold / DBSCAN / Agglomerative |
|
||||
| **Threshold** | fixed=0.85 / adaptive(pose) / auto |
|
||||
| **TMDb** | enabled / disabled |
|
||||
| **Speaker verify** | ✅ 標準工序(所有實驗強制) |
|
||||
|
||||
## 當前輸入數據
|
||||
|
||||
- file_uuid: `1a04db97be5fa12bd77369831dc141fd`
|
||||
- 6182 detections, 2347 traces, 512D embeddings
|
||||
- 10 speakers (ASRX), 57 YOLO objects
|
||||
- TMDb identities: available (Charade 1963 cast)
|
||||
11
experiments/identity_clustering/configs/exp_001.json
Normal file
11
experiments/identity_clustering/configs/exp_001.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "001",
|
||||
"name": "Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": false,
|
||||
"enable_tmdb": false,
|
||||
"notes": "sample_interval=60 導致 trace 碎片化。min_frames=3 納入大部分 traces。"
|
||||
}
|
||||
11
experiments/identity_clustering/configs/exp_002.json
Normal file
11
experiments/identity_clustering/configs/exp_002.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "002",
|
||||
"name": "Adaptive Threshold (pose-aware), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Pose-aware: 短 trace 放寬 threshold 5%。適合 profile/three_quarter 角度辨識。"
|
||||
}
|
||||
11
experiments/identity_clustering/configs/exp_003.json
Normal file
11
experiments/identity_clustering/configs/exp_003.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "003",
|
||||
"name": "DBSCAN (eps=0.3), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "dbscan",
|
||||
"eps": 0.3,
|
||||
"min_samples": 2,
|
||||
"enable_tmdb": false,
|
||||
"notes": "DBSCAN 自動偵測 cluster 數量,不需要手設 threshold。eps=0.3 對應 cosine distance。"
|
||||
}
|
||||
11
experiments/identity_clustering/configs/exp_004.json
Normal file
11
experiments/identity_clustering/configs/exp_004.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "004",
|
||||
"name": "DBSCAN (eps=0.25), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "dbscan",
|
||||
"eps": 0.25,
|
||||
"min_samples": 2,
|
||||
"enable_tmdb": false,
|
||||
"notes": "DBSCAN 更嚴格版本(eps=0.25),預期更多 cluster、較少 false positive。"
|
||||
}
|
||||
11
experiments/identity_clustering/configs/exp_005.json
Normal file
11
experiments/identity_clustering/configs/exp_005.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "005",
|
||||
"name": "Adaptive Threshold + TMDb matching, min 30 frames",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": true,
|
||||
"enable_tmdb": true,
|
||||
"notes": "最佳方案候選:pose-aware + TMDb 自動標註。預期 Cary Grant, Audrey Hepburn 等主要角色被標出。"
|
||||
}
|
||||
13
experiments/identity_clustering/configs/exp_006.json
Normal file
13
experiments/identity_clustering/configs/exp_006.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"id": "006",
|
||||
"name": "Multi-Stage: Face-level high-conf binding + centroid clustering + speaker",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"enable_identity_match": true,
|
||||
"stage1_face_threshold": 0.92,
|
||||
"stage1_bind_ratio": 0.85,
|
||||
"stage2_threshold": 0.85,
|
||||
"stage2_adaptive": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Stage1: each face vs identity ref, bind if >85% faces match >0.92. Stage2: centroid clustering of unbound + speaker merge."
|
||||
}
|
||||
13
experiments/identity_clustering/configs/exp_007.json
Normal file
13
experiments/identity_clustering/configs/exp_007.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"id": "007",
|
||||
"name": "Multi-Stage: relaxed TMDb bind + 3-angle anchor selection",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"enable_identity_match": true,
|
||||
"stage1_face_threshold": 0.72,
|
||||
"stage1_bind_ratio": 0.75,
|
||||
"stage2_threshold": 0.85,
|
||||
"stage2_adaptive": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Stage1: TMDb bind threshold 0.72 (跨 domain 較寬)。Stage2: 每個 identity 從 bound traces 挑 frontal/three_quarter/profile 三角度 face 組合成多角度 reference,用於 further matching。"
|
||||
}
|
||||
14
experiments/identity_clustering/configs/exp_008.json
Normal file
14
experiments/identity_clustering/configs/exp_008.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"id": "008",
|
||||
"name": "Composite: TMDb vector + speaker frequency scoring",
|
||||
"file_uuid": "417a7e93860d70c87aee6c4c1b715d70",
|
||||
"min_frames": 3,
|
||||
"enable_identity_match": true,
|
||||
"stage1_face_threshold": 0.55,
|
||||
"stage1_bind_ratio": 0.60,
|
||||
"stage2_threshold": 0.85,
|
||||
"stage2_adaptive": true,
|
||||
"enable_speaker_weight": true,
|
||||
"speaker_weight_factor": 0.3,
|
||||
"notes": "V2.0 embedding space。Speaker 出現次數(segment count)加權 × vector similarity 綜合評分。主角(SPEAKER_0/SPEAKER_1)加權較高。"
|
||||
}
|
||||
6183
experiments/identity_clustering/data_snapshot/face_detections.csv
Normal file
6183
experiments/identity_clustering/data_snapshot/face_detections.csv
Normal file
File diff suppressed because one or more lines are too long
3198
experiments/identity_clustering/results/exp_001/clusters.json
Normal file
3198
experiments/identity_clustering/results/exp_001/clusters.json
Normal file
File diff suppressed because it is too large
Load Diff
11
experiments/identity_clustering/results/exp_001/config.json
Normal file
11
experiments/identity_clustering/results/exp_001/config.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "001",
|
||||
"name": "Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": false,
|
||||
"enable_tmdb": false,
|
||||
"notes": "sample_interval=60 導致 trace 碎片化。min_frames=3 納入大部分 traces。"
|
||||
}
|
||||
3198
experiments/identity_clustering/results/exp_001/labels.json
Normal file
3198
experiments/identity_clustering/results/exp_001/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_001/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_001/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"clustered_traces": 677,
|
||||
"cluster_count": 199,
|
||||
"coverage": 1.0,
|
||||
"avg_cluster_size": 3.4020100502512562,
|
||||
"tmdb_matched": 0,
|
||||
"tmdb_coverage": 0.0,
|
||||
"execution_time_s": 3.706886053085327
|
||||
}
|
||||
36
experiments/identity_clustering/results/exp_001/summary.txt
Normal file
36
experiments/identity_clustering/results/exp_001/summary.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
Experiment 001: Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb
|
||||
====================================
|
||||
Date: 2026-05-04T17:13:02.183318
|
||||
Config: {
|
||||
"id": "001",
|
||||
"name": "Baseline: Fixed Threshold (0.85), min 3 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": false,
|
||||
"enable_tmdb": false,
|
||||
"notes": "sample_interval=60 \u5c0e\u81f4 trace \u788e\u7247\u5316\u3002min_frames=3 \u7d0d\u5165\u5927\u90e8\u5206 traces\u3002"
|
||||
}
|
||||
|
||||
Results:
|
||||
Traces loaded: 677
|
||||
Clusters: 379
|
||||
Clustered traces: 677
|
||||
Coverage: 100.0%
|
||||
Avg cluster size: 1.8
|
||||
TMDb matched: 0
|
||||
Execution time: 3.6s
|
||||
|
||||
Top clusters:
|
||||
Cluster 2: 74 traces → None (sim=0.000)
|
||||
Cluster 29: 38 traces → None (sim=0.000)
|
||||
Cluster 133: 14 traces → None (sim=0.000)
|
||||
Cluster 14: 13 traces → None (sim=0.000)
|
||||
Cluster 62: 10 traces → None (sim=0.000)
|
||||
Cluster 126: 8 traces → None (sim=0.000)
|
||||
Cluster 31: 7 traces → None (sim=0.000)
|
||||
Cluster 13: 6 traces → None (sim=0.000)
|
||||
Cluster 19: 6 traces → None (sim=0.000)
|
||||
Cluster 89: 6 traces → None (sim=0.000)
|
||||
2522
experiments/identity_clustering/results/exp_002/clusters.json
Normal file
2522
experiments/identity_clustering/results/exp_002/clusters.json
Normal file
File diff suppressed because it is too large
Load Diff
11
experiments/identity_clustering/results/exp_002/config.json
Normal file
11
experiments/identity_clustering/results/exp_002/config.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "002",
|
||||
"name": "Adaptive Threshold (pose-aware), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Pose-aware: 短 trace 放寬 threshold 5%。適合 profile/three_quarter 角度辨識。"
|
||||
}
|
||||
2522
experiments/identity_clustering/results/exp_002/labels.json
Normal file
2522
experiments/identity_clustering/results/exp_002/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_002/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_002/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"clustered_traces": 677,
|
||||
"cluster_count": 143,
|
||||
"coverage": 1.0,
|
||||
"avg_cluster_size": 4.734265734265734,
|
||||
"tmdb_matched": 0,
|
||||
"tmdb_coverage": 0.0,
|
||||
"execution_time_s": 3.065944194793701
|
||||
}
|
||||
36
experiments/identity_clustering/results/exp_002/summary.txt
Normal file
36
experiments/identity_clustering/results/exp_002/summary.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
Experiment 002: Adaptive Threshold (pose-aware), min 30 frames, no TMDb
|
||||
====================================
|
||||
Date: 2026-05-04T17:13:05.263374
|
||||
Config: {
|
||||
"id": "002",
|
||||
"name": "Adaptive Threshold (pose-aware), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Pose-aware: \u77ed trace \u653e\u5bec threshold 5%\u3002\u9069\u5408 profile/three_quarter \u89d2\u5ea6\u8fa8\u8b58\u3002"
|
||||
}
|
||||
|
||||
Results:
|
||||
Traces loaded: 677
|
||||
Clusters: 293
|
||||
Clustered traces: 677
|
||||
Coverage: 100.0%
|
||||
Avg cluster size: 2.3
|
||||
TMDb matched: 0
|
||||
Execution time: 3.0s
|
||||
|
||||
Top clusters:
|
||||
Cluster 2: 114 traces → None (sim=0.000)
|
||||
Cluster 13: 43 traces → None (sim=0.000)
|
||||
Cluster 51: 19 traces → None (sim=0.000)
|
||||
Cluster 112: 15 traces → None (sim=0.000)
|
||||
Cluster 28: 12 traces → None (sim=0.000)
|
||||
Cluster 30: 12 traces → None (sim=0.000)
|
||||
Cluster 56: 11 traces → None (sim=0.000)
|
||||
Cluster 107: 11 traces → None (sim=0.000)
|
||||
Cluster 169: 11 traces → None (sim=0.000)
|
||||
Cluster 74: 9 traces → None (sim=0.000)
|
||||
1135
experiments/identity_clustering/results/exp_003/clusters.json
Normal file
1135
experiments/identity_clustering/results/exp_003/clusters.json
Normal file
File diff suppressed because it is too large
Load Diff
11
experiments/identity_clustering/results/exp_003/config.json
Normal file
11
experiments/identity_clustering/results/exp_003/config.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "003",
|
||||
"name": "DBSCAN (eps=0.3), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "dbscan",
|
||||
"eps": 0.3,
|
||||
"min_samples": 2,
|
||||
"enable_tmdb": false,
|
||||
"notes": "DBSCAN 自動偵測 cluster 數量,不需要手設 threshold。eps=0.3 對應 cosine distance。"
|
||||
}
|
||||
1135
experiments/identity_clustering/results/exp_003/labels.json
Normal file
1135
experiments/identity_clustering/results/exp_003/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_003/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_003/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"clustered_traces": 677,
|
||||
"cluster_count": 34,
|
||||
"coverage": 1.0,
|
||||
"avg_cluster_size": 19.91176470588235,
|
||||
"tmdb_matched": 0,
|
||||
"tmdb_coverage": 0.0,
|
||||
"execution_time_s": 2.6430821418762207
|
||||
}
|
||||
36
experiments/identity_clustering/results/exp_003/summary.txt
Normal file
36
experiments/identity_clustering/results/exp_003/summary.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
Experiment 003: DBSCAN (eps=0.3), min 30 frames, no TMDb
|
||||
====================================
|
||||
Date: 2026-05-04T17:13:08.042584
|
||||
Config: {
|
||||
"id": "003",
|
||||
"name": "DBSCAN (eps=0.3), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "dbscan",
|
||||
"eps": 0.3,
|
||||
"min_samples": 2,
|
||||
"enable_tmdb": false,
|
||||
"notes": "DBSCAN \u81ea\u52d5\u5075\u6e2c cluster \u6578\u91cf\uff0c\u4e0d\u9700\u8981\u624b\u8a2d threshold\u3002eps=0.3 \u5c0d\u61c9 cosine distance\u3002"
|
||||
}
|
||||
|
||||
Results:
|
||||
Traces loaded: 677
|
||||
Clusters: 78
|
||||
Clustered traces: 677
|
||||
Coverage: 100.0%
|
||||
Avg cluster size: 8.7
|
||||
TMDb matched: 0
|
||||
Execution time: 2.7s
|
||||
|
||||
Top clusters:
|
||||
Cluster 1: 537 traces → None (sim=0.000)
|
||||
Cluster 10: 26 traces → None (sim=0.000)
|
||||
Cluster 2: 14 traces → None (sim=0.000)
|
||||
Cluster 9: 9 traces → None (sim=0.000)
|
||||
Cluster 47: 8 traces → None (sim=0.000)
|
||||
Cluster 37: 4 traces → None (sim=0.000)
|
||||
Cluster 7: 2 traces → None (sim=0.000)
|
||||
Cluster 32: 2 traces → None (sim=0.000)
|
||||
Cluster 36: 2 traces → None (sim=0.000)
|
||||
Cluster 48: 2 traces → None (sim=0.000)
|
||||
1519
experiments/identity_clustering/results/exp_004/clusters.json
Normal file
1519
experiments/identity_clustering/results/exp_004/clusters.json
Normal file
File diff suppressed because it is too large
Load Diff
11
experiments/identity_clustering/results/exp_004/config.json
Normal file
11
experiments/identity_clustering/results/exp_004/config.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "004",
|
||||
"name": "DBSCAN (eps=0.25), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "dbscan",
|
||||
"eps": 0.25,
|
||||
"min_samples": 2,
|
||||
"enable_tmdb": false,
|
||||
"notes": "DBSCAN 更嚴格版本(eps=0.25),預期更多 cluster、較少 false positive。"
|
||||
}
|
||||
1519
experiments/identity_clustering/results/exp_004/labels.json
Normal file
1519
experiments/identity_clustering/results/exp_004/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_004/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_004/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"clustered_traces": 677,
|
||||
"cluster_count": 64,
|
||||
"coverage": 1.0,
|
||||
"avg_cluster_size": 10.578125,
|
||||
"tmdb_matched": 0,
|
||||
"tmdb_coverage": 0.0,
|
||||
"execution_time_s": 2.588068962097168
|
||||
}
|
||||
36
experiments/identity_clustering/results/exp_004/summary.txt
Normal file
36
experiments/identity_clustering/results/exp_004/summary.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
Experiment 004: DBSCAN (eps=0.25), min 30 frames, no TMDb
|
||||
====================================
|
||||
Date: 2026-05-04T17:13:10.776315
|
||||
Config: {
|
||||
"id": "004",
|
||||
"name": "DBSCAN (eps=0.25), min 30 frames, no TMDb",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "dbscan",
|
||||
"eps": 0.25,
|
||||
"min_samples": 2,
|
||||
"enable_tmdb": false,
|
||||
"notes": "DBSCAN \u66f4\u56b4\u683c\u7248\u672c\uff08eps=0.25\uff09\uff0c\u9810\u671f\u66f4\u591a cluster\u3001\u8f03\u5c11 false positive\u3002"
|
||||
}
|
||||
|
||||
Results:
|
||||
Traces loaded: 677
|
||||
Clusters: 129
|
||||
Clustered traces: 677
|
||||
Coverage: 100.0%
|
||||
Avg cluster size: 5.2
|
||||
TMDb matched: 0
|
||||
Execution time: 2.6s
|
||||
|
||||
Top clusters:
|
||||
Cluster 1: 444 traces → None (sim=0.000)
|
||||
Cluster 32: 43 traces → None (sim=0.000)
|
||||
Cluster 14: 24 traces → None (sim=0.000)
|
||||
Cluster 4: 13 traces → None (sim=0.000)
|
||||
Cluster 115: 6 traces → None (sim=0.000)
|
||||
Cluster 38: 4 traces → None (sim=0.000)
|
||||
Cluster 53: 4 traces → None (sim=0.000)
|
||||
Cluster 65: 4 traces → None (sim=0.000)
|
||||
Cluster 88: 4 traces → None (sim=0.000)
|
||||
Cluster 102: 4 traces → None (sim=0.000)
|
||||
3609
experiments/identity_clustering/results/exp_005/clusters.json
Normal file
3609
experiments/identity_clustering/results/exp_005/clusters.json
Normal file
File diff suppressed because it is too large
Load Diff
12
experiments/identity_clustering/results/exp_005/config.json
Normal file
12
experiments/identity_clustering/results/exp_005/config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "005",
|
||||
"name": "Adaptive Threshold + TMDb matching, min 30 frames",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": true,
|
||||
"enable_tmdb": true,
|
||||
"enable_speaker_verify": false,
|
||||
"notes": "最佳方案候選:pose-aware + TMDb 自動標註。預期 Cary Grant, Audrey Hepburn 等主要角色被標出。"
|
||||
}
|
||||
3609
experiments/identity_clustering/results/exp_005/labels.json
Normal file
3609
experiments/identity_clustering/results/exp_005/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_005/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_005/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"clustered_traces": 677,
|
||||
"cluster_count": 293,
|
||||
"coverage": 1.0,
|
||||
"avg_cluster_size": 2.310580204778157,
|
||||
"tmdb_matched": 0,
|
||||
"tmdb_coverage": 0.0,
|
||||
"execution_time_s": 3.034806966781616
|
||||
}
|
||||
37
experiments/identity_clustering/results/exp_005/summary.txt
Normal file
37
experiments/identity_clustering/results/exp_005/summary.txt
Normal file
@@ -0,0 +1,37 @@
|
||||
|
||||
Experiment 005: Adaptive Threshold + TMDb matching, min 30 frames
|
||||
====================================
|
||||
Date: 2026-05-04T17:05:33.808099
|
||||
Config: {
|
||||
"id": "005",
|
||||
"name": "Adaptive Threshold + TMDb matching, min 30 frames",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"clustering_method": "threshold",
|
||||
"threshold": 0.85,
|
||||
"adaptive_threshold": true,
|
||||
"enable_tmdb": true,
|
||||
"enable_speaker_verify": false,
|
||||
"notes": "\u6700\u4f73\u65b9\u6848\u5019\u9078\uff1apose-aware + TMDb \u81ea\u52d5\u6a19\u8a3b\u3002\u9810\u671f Cary Grant, Audrey Hepburn \u7b49\u4e3b\u8981\u89d2\u8272\u88ab\u6a19\u51fa\u3002"
|
||||
}
|
||||
|
||||
Results:
|
||||
Traces loaded: 677
|
||||
Clusters: 293
|
||||
Clustered traces: 677
|
||||
Coverage: 100.0%
|
||||
Avg cluster size: 2.3
|
||||
TMDb matched: 0
|
||||
Execution time: 3.0s
|
||||
|
||||
Top clusters:
|
||||
Cluster 2: 114 traces → None (sim=0.000)
|
||||
Cluster 13: 43 traces → None (sim=0.000)
|
||||
Cluster 51: 19 traces → None (sim=0.000)
|
||||
Cluster 112: 15 traces → None (sim=0.000)
|
||||
Cluster 28: 12 traces → None (sim=0.000)
|
||||
Cluster 30: 12 traces → None (sim=0.000)
|
||||
Cluster 56: 11 traces → None (sim=0.000)
|
||||
Cluster 107: 11 traces → None (sim=0.000)
|
||||
Cluster 169: 11 traces → None (sim=0.000)
|
||||
Cluster 74: 9 traces → None (sim=0.000)
|
||||
13
experiments/identity_clustering/results/exp_006/config.json
Normal file
13
experiments/identity_clustering/results/exp_006/config.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"id": "006",
|
||||
"name": "Multi-Stage: Face-level high-conf binding + centroid clustering + speaker",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"enable_identity_match": true,
|
||||
"stage1_face_threshold": 0.92,
|
||||
"stage1_bind_ratio": 0.85,
|
||||
"stage2_threshold": 0.85,
|
||||
"stage2_adaptive": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Stage1: each face vs identity ref, bind if >85% faces match >0.92. Stage2: centroid clustering of unbound + speaker merge."
|
||||
}
|
||||
3629
experiments/identity_clustering/results/exp_006/labels.json
Normal file
3629
experiments/identity_clustering/results/exp_006/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_006/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_006/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"stage1_bound": 0,
|
||||
"stage1_bound_traces": 0,
|
||||
"stage2_clusters": 295,
|
||||
"stage2_unbound_clustered": 677,
|
||||
"total_clusters": 295,
|
||||
"execution_time_s": 3.226997137069702,
|
||||
"coverage": 1.0
|
||||
}
|
||||
13
experiments/identity_clustering/results/exp_007/config.json
Normal file
13
experiments/identity_clustering/results/exp_007/config.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"id": "007",
|
||||
"name": "Multi-Stage: relaxed TMDb bind + 3-angle anchor selection",
|
||||
"file_uuid": "1a04db97be5fa12bd77369831dc141fd",
|
||||
"min_frames": 3,
|
||||
"enable_identity_match": true,
|
||||
"stage1_face_threshold": 0.72,
|
||||
"stage1_bind_ratio": 0.75,
|
||||
"stage2_threshold": 0.85,
|
||||
"stage2_adaptive": true,
|
||||
"enable_tmdb": false,
|
||||
"notes": "Stage1: TMDb bind threshold 0.72 (跨 domain 較寬)。Stage2: 每個 identity 從 bound traces 挑 frontal/three_quarter/profile 三角度 face 組合成多角度 reference,用於 further matching。"
|
||||
}
|
||||
3629
experiments/identity_clustering/results/exp_007/labels.json
Normal file
3629
experiments/identity_clustering/results/exp_007/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_007/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_007/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"stage1_bound": 0,
|
||||
"stage1_bound_traces": 0,
|
||||
"stage2_clusters": 295,
|
||||
"stage2_unbound_clustered": 677,
|
||||
"total_clusters": 295,
|
||||
"execution_time_s": 3.2448980808258057,
|
||||
"coverage": 1.0
|
||||
}
|
||||
15
experiments/identity_clustering/results/exp_008/config.json
Normal file
15
experiments/identity_clustering/results/exp_008/config.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "008",
|
||||
"name": "Composite: TMDb vector + speaker frequency scoring",
|
||||
"file_uuid": "417a7e93860d70c87aee6c4c1b715d70",
|
||||
"min_frames": 3,
|
||||
"enable_identity_match": true,
|
||||
"stage1_face_threshold": 0.55,
|
||||
"stage1_bind_ratio": 0.6,
|
||||
"stage2_threshold": 0.85,
|
||||
"stage2_adaptive": true,
|
||||
"enable_speaker_weight": true,
|
||||
"speaker_weight_factor": 0.3,
|
||||
"notes": "V2.0 embedding space。Speaker 出現次數(segment count)加權 × vector similarity 綜合評分。主角(SPEAKER_0/SPEAKER_1)加權較高。",
|
||||
"write_db": true
|
||||
}
|
||||
11181
experiments/identity_clustering/results/exp_008/labels.json
Normal file
11181
experiments/identity_clustering/results/exp_008/labels.json
Normal file
File diff suppressed because it is too large
Load Diff
10
experiments/identity_clustering/results/exp_008/metrics.json
Normal file
10
experiments/identity_clustering/results/exp_008/metrics.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"total_traces": 677,
|
||||
"stage1_bound": 671,
|
||||
"stage1_bound_traces": 671,
|
||||
"stage2_clusters": 6,
|
||||
"stage2_unbound_clustered": 6,
|
||||
"total_clusters": 677,
|
||||
"execution_time_s": 11.841914176940918,
|
||||
"coverage": 1.0
|
||||
}
|
||||
446
experiments/identity_clustering/runner.py
Normal file
446
experiments/identity_clustering/runner.py
Normal file
@@ -0,0 +1,446 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Identity Clustering Experiment Runner
|
||||
|
||||
Usage:
|
||||
python runner.py --config configs/exp_001.json
|
||||
|
||||
Each experiment:
|
||||
1. Reads config parameters
|
||||
2. Fetches face trace data from DB
|
||||
3. Runs clustering algorithm
|
||||
4. Optionally matches against TMDb
|
||||
5. Optionally verifies against speakers
|
||||
6. Saves all results to experiments/identity_clustering/results/exp_{id}/
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import time
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..", "scripts"))
|
||||
|
||||
# DB connection
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = "dev"
|
||||
EXPERIMENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def get_conn():
|
||||
return psycopg2.connect(DB_URL)
|
||||
|
||||
|
||||
def load_experiment_config(config_path: str) -> dict:
|
||||
with open(config_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def fetch_trace_data(cur, file_uuid: str, min_frames: int) -> List[dict]:
|
||||
"""Fetch trace centroids + metadata from face_detections"""
|
||||
sql = f"""
|
||||
SELECT
|
||||
trace_id,
|
||||
COUNT(*) as frame_count,
|
||||
MIN(frame_number) as start_frame,
|
||||
MAX(frame_number) as end_frame,
|
||||
AVG(x)::float as avg_x,
|
||||
AVG(y)::float as avg_y,
|
||||
AVG(width)::float as avg_w,
|
||||
AVG(height)::float as avg_h,
|
||||
AVG(confidence) as avg_confidence
|
||||
FROM {SCHEMA}.face_detections
|
||||
WHERE file_uuid = %s AND trace_id IS NOT NULL AND embedding IS NOT NULL
|
||||
GROUP BY trace_id
|
||||
HAVING COUNT(*) >= %s
|
||||
ORDER BY trace_id
|
||||
"""
|
||||
cur.execute(sql, (file_uuid, min_frames))
|
||||
rows = cur.fetchall()
|
||||
|
||||
traces = []
|
||||
for row in rows:
|
||||
# Get all embeddings for this trace
|
||||
cur.execute(
|
||||
f"SELECT embedding FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL ORDER BY confidence DESC",
|
||||
(file_uuid, row[0]),
|
||||
)
|
||||
embeddings = [np.array(r[0]) for r in cur.fetchall()]
|
||||
|
||||
centroid_method = "mean" # default, configurable
|
||||
if centroid_method == "mean":
|
||||
centroid = np.mean(embeddings, axis=0) if embeddings else None
|
||||
elif centroid_method == "median":
|
||||
centroid = np.median(embeddings, axis=0) if embeddings else None
|
||||
else:
|
||||
centroid = embeddings[0] if embeddings else None
|
||||
|
||||
traces.append(
|
||||
{
|
||||
"trace_id": row[0],
|
||||
"frame_count": row[1],
|
||||
"start_frame": row[2],
|
||||
"end_frame": row[3],
|
||||
"avg_bbox": {"x": row[4], "y": row[5], "w": row[6], "h": row[7]},
|
||||
"avg_confidence": row[8],
|
||||
"embedding_count": len(embeddings),
|
||||
"centroid": centroid.tolist() if centroid is not None else None,
|
||||
}
|
||||
)
|
||||
|
||||
return traces
|
||||
|
||||
|
||||
def cosine_similarity(a, b):
|
||||
a, b = np.array(a), np.array(b)
|
||||
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)
|
||||
|
||||
|
||||
def cluster_by_threshold(
|
||||
traces: List[dict], threshold: float, adaptive: bool = False
|
||||
) -> List[dict]:
|
||||
"""Simple threshold-based clustering"""
|
||||
clusters = []
|
||||
assigned = set()
|
||||
|
||||
for i, t1 in enumerate(traces):
|
||||
if t1["trace_id"] in assigned:
|
||||
continue
|
||||
cluster = [t1]
|
||||
assigned.add(t1["trace_id"])
|
||||
|
||||
for j, t2 in enumerate(traces):
|
||||
if t2["trace_id"] in assigned or i == j:
|
||||
continue
|
||||
if t1["centroid"] is None or t2["centroid"] is None:
|
||||
continue
|
||||
|
||||
sim = cosine_similarity(t1["centroid"], t2["centroid"])
|
||||
th = threshold
|
||||
if adaptive:
|
||||
# Slightly relax threshold for profile angles
|
||||
fc1, fc2 = t1["frame_count"], t2["frame_count"]
|
||||
if fc1 < 60 or fc2 < 60:
|
||||
th = threshold - 0.05 # relax for short traces
|
||||
|
||||
if sim >= th:
|
||||
cluster.append(t2)
|
||||
assigned.add(t2["trace_id"])
|
||||
|
||||
if len(cluster) >= 1:
|
||||
clusters.append(cluster)
|
||||
|
||||
return clusters
|
||||
|
||||
|
||||
def cluster_dbscan(
|
||||
traces: List[dict], eps: float = 0.3, min_samples: int = 2
|
||||
) -> List[dict]:
|
||||
"""DBSCAN clustering on embeddings"""
|
||||
from sklearn.cluster import DBSCAN
|
||||
|
||||
valid = [t for t in traces if t["centroid"] is not None]
|
||||
X = np.array([t["centroid"] for t in valid])
|
||||
|
||||
# Cosine distance = 1 - cosine_similarity
|
||||
clustering = DBSCAN(eps=eps, min_samples=min_samples, metric="cosine").fit(X)
|
||||
labels = clustering.labels_
|
||||
|
||||
clusters_dict = defaultdict(list)
|
||||
for i, label in enumerate(labels):
|
||||
key = int(label) if label >= 0 else f"noise_{i}"
|
||||
clusters_dict[key].append(valid[i])
|
||||
|
||||
return list(clusters_dict.values())
|
||||
|
||||
|
||||
def fetch_tmdb_identities(cur) -> List[dict]:
|
||||
"""Get TMDb identities with embeddings"""
|
||||
cur.execute(
|
||||
f"SELECT id, name, face_embedding FROM {SCHEMA}.identities WHERE source='tmdb' AND face_embedding IS NOT NULL"
|
||||
)
|
||||
return [
|
||||
{"id": r[0], "name": r[1], "embedding": r[2]}
|
||||
for r in cur.fetchall()
|
||||
if r[2] is not None
|
||||
]
|
||||
|
||||
|
||||
def fetch_speaker_overlaps(cur, file_uuid: str) -> dict:
|
||||
"""Get speaker-face trace overlap from TKG edges.
|
||||
Returns {trace_id: {speaker_id: overlap_count}}"""
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT
|
||||
REPLACE(n.external_id, 'trace_', '')::int as trace_id,
|
||||
n2.external_id as speaker_id,
|
||||
(e.properties->>'overlap_ratio')::float as overlap_ratio
|
||||
FROM {SCHEMA}.tkg_edges e
|
||||
JOIN {SCHEMA}.tkg_nodes n ON e.source_node_id = n.id
|
||||
JOIN {SCHEMA}.tkg_nodes n2 ON e.target_node_id = n2.id
|
||||
WHERE e.edge_type = 'SPEAKS_AS'
|
||||
AND n.node_type = 'face_trace'
|
||||
AND n2.node_type = 'speaker'
|
||||
AND e.file_uuid = %s
|
||||
""",
|
||||
(file_uuid,),
|
||||
)
|
||||
overlaps = defaultdict(lambda: defaultdict(float))
|
||||
for row in cur.fetchall():
|
||||
trace_id, speaker_id, ratio = row[0], row[1], row[2] or 0
|
||||
if trace_id is None or speaker_id is None:
|
||||
continue
|
||||
overlaps[int(trace_id)][speaker_id] = float(ratio)
|
||||
return dict(overlaps)
|
||||
|
||||
|
||||
def verify_with_speakers(
|
||||
clusters: List[dict], speaker_overlaps: dict
|
||||
) -> List[dict]:
|
||||
"""Annotate clusters with dominant speaker from time overlap"""
|
||||
for cluster in clusters:
|
||||
# Collect all speaker overlaps for traces in this cluster
|
||||
speaker_votes = defaultdict(float)
|
||||
trace_ids = cluster.get("trace_ids", [])
|
||||
if not trace_ids:
|
||||
# Raw cluster list
|
||||
trace_ids = [t["trace_id"] for t in cluster]
|
||||
|
||||
for tid in trace_ids:
|
||||
if tid in speaker_overlaps:
|
||||
for spk, ratio in speaker_overlaps[tid].items():
|
||||
speaker_votes[spk] += ratio
|
||||
|
||||
if speaker_votes:
|
||||
best_speaker = max(speaker_votes, key=speaker_votes.get)
|
||||
best_score = speaker_votes[best_speaker]
|
||||
cluster["dominant_speaker"] = best_speaker
|
||||
cluster["speaker_overlap_score"] = round(best_score, 3)
|
||||
cluster["speaker_votes"] = dict(speaker_votes)
|
||||
else:
|
||||
cluster["dominant_speaker"] = None
|
||||
cluster["speaker_overlap_score"] = 0
|
||||
cluster["speaker_votes"] = {}
|
||||
|
||||
# Merge clusters that share dominant speaker (high overlap with same speaker)
|
||||
speaker_clusters = defaultdict(list)
|
||||
for i, cluster in enumerate(clusters):
|
||||
spk = cluster.get("dominant_speaker")
|
||||
if spk and cluster.get("speaker_overlap_score", 0) > 0.5:
|
||||
speaker_clusters[spk].append(i)
|
||||
|
||||
merged = set()
|
||||
new_clusters = []
|
||||
for spk, indices in speaker_clusters.items():
|
||||
if len(indices) <= 1:
|
||||
continue
|
||||
# Merge all clusters belonging to same speaker
|
||||
merged_group = []
|
||||
for idx in indices:
|
||||
merged_group.extend(
|
||||
clusters[idx].get("trace_ids", []) or [t["trace_id"] for t in clusters[idx]]
|
||||
)
|
||||
merged.add(idx)
|
||||
new_clusters.append({
|
||||
"merged_from": indices,
|
||||
"trace_ids": list(set(merged_group)),
|
||||
"trace_count": len(set(merged_group)),
|
||||
"dominant_speaker": spk,
|
||||
"merge_reason": "shared_dominant_speaker",
|
||||
})
|
||||
|
||||
# Keep unmerged clusters
|
||||
for i, cluster in enumerate(clusters):
|
||||
if i not in merged:
|
||||
new_clusters.append(cluster)
|
||||
|
||||
return new_clusters
|
||||
|
||||
|
||||
def match_tmdb(clusters: List[dict], tmdb_identities: List[dict]) -> List[dict]:
|
||||
"""Match each cluster to best TMDb identity"""
|
||||
results = []
|
||||
for i, cluster in enumerate(clusters):
|
||||
if len(cluster) == 0:
|
||||
continue
|
||||
# Use the trace with most frames as representative
|
||||
best_trace = max(cluster, key=lambda t: t["frame_count"])
|
||||
centroid = best_trace.get("centroid")
|
||||
if centroid is None:
|
||||
continue
|
||||
|
||||
matches = []
|
||||
for t in tmdb_identities:
|
||||
if t["embedding"] is None:
|
||||
continue
|
||||
sim = cosine_similarity(centroid, t["embedding"])
|
||||
if sim >= 0.55: # TMDb threshold
|
||||
matches.append({"id": t["id"], "name": t["name"], "similarity": float(sim)})
|
||||
|
||||
matches.sort(key=lambda m: m["similarity"], reverse=True)
|
||||
|
||||
cluster_result = {
|
||||
"cluster_id": i,
|
||||
"trace_count": len(cluster),
|
||||
"total_frames": sum(t["frame_count"] for t in cluster),
|
||||
"trace_ids": [t["trace_id"] for t in cluster],
|
||||
"tmdb_matches": matches,
|
||||
"best_match": matches[0]["name"] if matches else None,
|
||||
"best_similarity": matches[0]["similarity"] if matches else 0,
|
||||
}
|
||||
results.append(cluster_result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def compute_metrics(clusters: List[dict], total_traces: int) -> dict:
|
||||
clustered = sum(c["trace_count"] for c in clusters) if "trace_count" in clusters[0] else sum(len(c) for c in clusters)
|
||||
return {
|
||||
"total_traces": total_traces,
|
||||
"clustered_traces": clustered,
|
||||
"cluster_count": len(clusters),
|
||||
"coverage": clustered / max(total_traces, 1),
|
||||
"avg_cluster_size": clustered / max(len(clusters), 1),
|
||||
"tmdb_matched": sum(1 for c in clusters if isinstance(c, dict) and c.get("best_match")),
|
||||
"tmdb_coverage": sum(1 for c in clusters if isinstance(c, dict) and c.get("best_match")) / max(len(clusters), 1),
|
||||
}
|
||||
|
||||
|
||||
def run_experiment(config: dict) -> dict:
|
||||
"""Main experiment flow"""
|
||||
exp_id = config["id"]
|
||||
file_uuid = config.get("file_uuid", "1a04db97be5fa12bd77369831dc141fd")
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Experiment {exp_id}: {config['name']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
# Step 1: Fetch traces
|
||||
print(f"\n[1] Fetching traces (min_frames={config.get('min_frames', 30)})...")
|
||||
traces = fetch_trace_data(cur, file_uuid, config.get("min_frames", 30))
|
||||
print(f" {len(traces)} traces loaded")
|
||||
|
||||
# Step 2: Clustering
|
||||
method = config.get("clustering_method", "threshold")
|
||||
print(f"\n[2] Clustering: method={method}...")
|
||||
|
||||
if method == "threshold":
|
||||
threshold = config.get("threshold", 0.85)
|
||||
adaptive = config.get("adaptive_threshold", False)
|
||||
clusters = cluster_by_threshold(traces, threshold, adaptive)
|
||||
elif method == "dbscan":
|
||||
eps = config.get("eps", 0.3)
|
||||
min_samples = config.get("min_samples", 2)
|
||||
clusters = cluster_dbscan(traces, eps, min_samples)
|
||||
else:
|
||||
clusters = cluster_by_threshold(traces, 0.85, True)
|
||||
|
||||
clustered_traces = sum(len(c) for c in clusters)
|
||||
print(f" {len(clusters)} clusters, {clustered_traces} traces clustered")
|
||||
|
||||
# Step 3: Speaker verification (mandatory — standard step)
|
||||
print(f"\n[3] Speaker verification...")
|
||||
speaker_overlaps = fetch_speaker_overlaps(cur, file_uuid)
|
||||
# Convert raw clusters to label dicts
|
||||
labels = [
|
||||
{
|
||||
"cluster_id": i,
|
||||
"trace_count": len(c),
|
||||
"trace_ids": [t["trace_id"] for t in c],
|
||||
"tmdb_matches": [],
|
||||
"best_match": None,
|
||||
}
|
||||
for i, c in enumerate(clusters)
|
||||
]
|
||||
labels = verify_with_speakers(labels, speaker_overlaps)
|
||||
matched_speakers = sum(1 for l in labels if l.get("dominant_speaker"))
|
||||
merged = sum(1 for l in labels if l.get("merge_reason"))
|
||||
print(f" {matched_speakers} clusters have speaker match, {merged} merged by speaker")
|
||||
|
||||
# Step 4: TMDb matching (optional)
|
||||
if config.get("enable_tmdb", False):
|
||||
print(f"\n[4] TMDb matching...")
|
||||
tmdb = fetch_tmdb_identities(cur)
|
||||
print(f" {len(tmdb)} TMDb identities loaded")
|
||||
labels = match_tmdb(labels if labels else clusters, tmdb)
|
||||
matched = sum(1 for l in labels if l["best_match"])
|
||||
print(f" {matched} clusters matched to TMDb")
|
||||
|
||||
# Step 5: Metrics
|
||||
metrics = compute_metrics(labels if labels else clusters, len(traces))
|
||||
metrics["execution_time_s"] = time.time() - t0
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
# Step 5: Save results
|
||||
result_dir = os.path.join(EXPERIMENT_DIR, "results", f"exp_{exp_id}")
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
with open(os.path.join(result_dir, "clusters.json"), "w") as f:
|
||||
json.dump(clusters if not labels else labels, f, indent=2, ensure_ascii=False)
|
||||
|
||||
with open(os.path.join(result_dir, "labels.json"), "w") as f:
|
||||
json.dump(labels, f, indent=2, ensure_ascii=False)
|
||||
|
||||
with open(os.path.join(result_dir, "metrics.json"), "w") as f:
|
||||
json.dump(metrics, f, indent=2, ensure_ascii=False)
|
||||
|
||||
with open(os.path.join(result_dir, "config.json"), "w") as f:
|
||||
json.dump(config, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Summary
|
||||
summary = f"""
|
||||
Experiment {exp_id}: {config['name']}
|
||||
====================================
|
||||
Date: {datetime.now().isoformat()}
|
||||
Config: {json.dumps(config, indent=2)}
|
||||
|
||||
Results:
|
||||
Traces loaded: {len(traces)}
|
||||
Clusters: {len(clusters)}
|
||||
Clustered traces: {clustered_traces}
|
||||
Coverage: {metrics['coverage']:.1%}
|
||||
Avg cluster size: {metrics['avg_cluster_size']:.1f}
|
||||
TMDb matched: {metrics.get('tmdb_matched', 0)}
|
||||
Execution time: {metrics['execution_time_s']:.1f}s
|
||||
|
||||
Top clusters:
|
||||
"""
|
||||
sorted_labels = sorted(labels, key=lambda l: l.get("trace_count", 0), reverse=True)
|
||||
for l in sorted_labels[:10]:
|
||||
name = l.get("best_match", "unlabeled")
|
||||
summary += f" Cluster {l['cluster_id']}: {l['trace_count']} traces → {name} (sim={l.get('best_similarity', 0):.3f})\n"
|
||||
|
||||
with open(os.path.join(result_dir, "summary.txt"), "w") as f:
|
||||
f.write(summary)
|
||||
|
||||
print(f"\n[✓] Results saved to {result_dir}")
|
||||
print(summary)
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Identity Clustering Experiment Runner")
|
||||
parser.add_argument("--config", required=True, help="Experiment config JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
config = load_experiment_config(args.config)
|
||||
run_experiment(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
431
experiments/identity_clustering/runner_v2.py
Normal file
431
experiments/identity_clustering/runner_v2.py
Normal file
@@ -0,0 +1,431 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Multi-Stage Identity Clustering Runner
|
||||
|
||||
Stage 1: High-confidence face-level matching
|
||||
- Compare ALL face embeddings in each trace against identity references
|
||||
- Bind trace to identity if >90% of faces match with >0.90 similarity
|
||||
- These become "anchors" for Stage 2
|
||||
|
||||
Stage 2: Trace centroid clustering of remaining unbounded traces
|
||||
- Use centroid of unbound traces, cluster with adaptive threshold
|
||||
- Merge clusters with speaker overlap verification
|
||||
|
||||
Stage 3 (optional): TMDb matching
|
||||
"""
|
||||
|
||||
import sys, os, json, argparse, time, numpy as np
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
import psycopg2
|
||||
|
||||
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = "dev"
|
||||
EXPERIMENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def get_conn(): return psycopg2.connect(DB_URL)
|
||||
|
||||
|
||||
def cosine_similarity(a, b):
|
||||
a, b = np.array(a), np.array(b)
|
||||
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)
|
||||
|
||||
|
||||
def parse_pg_array(val):
|
||||
"""Parse PostgreSQL real[] array — returns numpy float64 array or None"""
|
||||
if val is None: return None
|
||||
if isinstance(val, np.ndarray): return val.astype(np.float64)
|
||||
if isinstance(val, list): return np.array(val, dtype=np.float64)
|
||||
if isinstance(val, str):
|
||||
s = val.strip('[]{}')
|
||||
if not s: return None
|
||||
return np.fromstring(s, sep=',').astype(np.float64)
|
||||
return None
|
||||
|
||||
|
||||
def fetch_trace_with_faces(cur, file_uuid: str, min_frames: int) -> List[dict]:
|
||||
"""Fetch traces with ALL their individual face embeddings"""
|
||||
# Get trace summaries
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT trace_id, COUNT(*) as fc, MIN(frame_number), MAX(frame_number),
|
||||
AVG(x::float), AVG(y::float), AVG(width::float), AVG(height::float)
|
||||
FROM {SCHEMA}.face_detections
|
||||
WHERE file_uuid=%s AND trace_id IS NOT NULL AND embedding IS NOT NULL
|
||||
GROUP BY trace_id HAVING COUNT(*)>=%s ORDER BY trace_id
|
||||
""", (file_uuid, min_frames))
|
||||
|
||||
traces = []
|
||||
for row in cur.fetchall():
|
||||
tid = row[0]
|
||||
cur.execute(
|
||||
f"SELECT embedding FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL ORDER BY confidence DESC",
|
||||
(file_uuid, tid))
|
||||
faces = []
|
||||
for r in cur.fetchall():
|
||||
emb = parse_pg_array(r[0])
|
||||
if emb is not None:
|
||||
faces.append({"embedding": emb.astype(np.float64)})
|
||||
|
||||
traces.append({
|
||||
"trace_id": tid, "frame_count": row[1],
|
||||
"start_frame": row[2], "end_frame": row[3],
|
||||
"avg_bbox": {"x": row[4], "y": row[5], "w": row[6], "h": row[7]},
|
||||
"faces": faces,
|
||||
"centroid": np.mean([f["embedding"] for f in faces], axis=0).tolist() if faces else None,
|
||||
})
|
||||
return traces
|
||||
|
||||
|
||||
def fetch_speaker_overlaps(cur, file_uuid: str) -> dict:
|
||||
cur.execute(f"""
|
||||
SELECT REPLACE(n.external_id,'trace_','')::int, n2.external_id,
|
||||
(e.properties->>'overlap_ratio')::float
|
||||
FROM {SCHEMA}.tkg_edges e
|
||||
JOIN {SCHEMA}.tkg_nodes n ON e.source_node_id=n.id
|
||||
JOIN {SCHEMA}.tkg_nodes n2 ON e.target_node_id=n2.id
|
||||
WHERE e.edge_type='SPEAKS_AS' AND n.node_type='face_trace' AND n2.node_type='speaker' AND e.file_uuid=%s
|
||||
""", (file_uuid,))
|
||||
overlaps = defaultdict(lambda: defaultdict(float))
|
||||
for tid, spk, ratio in cur.fetchall():
|
||||
if tid and spk: overlaps[int(tid)][spk] = float(ratio or 0)
|
||||
return dict(overlaps)
|
||||
|
||||
|
||||
def fetch_identity_references(cur) -> List[dict]:
|
||||
"""Get registered identities with face embeddings as references"""
|
||||
cur.execute(f"SELECT id, name, face_embedding FROM {SCHEMA}.identities WHERE face_embedding IS NOT NULL")
|
||||
results = []
|
||||
for r in cur.fetchall():
|
||||
emb = parse_pg_array(r[2])
|
||||
if emb is None: continue
|
||||
results.append({"id": r[0], "name": r[1], "embedding": emb.astype(np.float64)})
|
||||
return results
|
||||
|
||||
|
||||
# ===== STAGE 1: High-confidence face-level matching =====
|
||||
|
||||
def stage1_high_confidence_binding(
|
||||
traces: List[dict], identities: List[dict],
|
||||
face_match_threshold: float = 0.92,
|
||||
trace_bind_ratio: float = 0.85,
|
||||
) -> Tuple[List[dict], List[dict]]:
|
||||
"""
|
||||
For each trace, compare EVERY face against EVERY identity.
|
||||
Bind trace to identity if >trace_bind_ratio% of faces match with >face_match_threshold.
|
||||
Returns (bound_traces, unbound_traces)
|
||||
"""
|
||||
bound = []
|
||||
unbound = []
|
||||
|
||||
for trace in traces:
|
||||
faces = trace.get("faces", [])
|
||||
if not faces:
|
||||
unbound.append(trace)
|
||||
continue
|
||||
|
||||
best_identity = None
|
||||
best_match_count = 0
|
||||
|
||||
for ident in identities:
|
||||
match_count = 0
|
||||
for face in faces:
|
||||
sim = cosine_similarity(face["embedding"], ident["embedding"])
|
||||
if sim >= face_match_threshold:
|
||||
match_count += 1
|
||||
|
||||
ratio = match_count / len(faces)
|
||||
if ratio >= trace_bind_ratio and match_count > best_match_count:
|
||||
best_match_count = match_count
|
||||
best_identity = {
|
||||
"id": ident["id"],
|
||||
"name": ident["name"],
|
||||
"match_ratio": round(ratio, 3),
|
||||
"matched_faces": match_count,
|
||||
"total_faces": len(faces),
|
||||
}
|
||||
|
||||
if best_identity:
|
||||
trace["binding"] = best_identity
|
||||
trace["binding_stage"] = "stage1_face_level"
|
||||
bound.append(trace)
|
||||
else:
|
||||
unbound.append(trace)
|
||||
|
||||
return bound, unbound
|
||||
|
||||
|
||||
# ===== STAGE 2: Centroid clustering of unbound traces =====
|
||||
|
||||
def stage2_cluster_unbound(
|
||||
traces: List[dict], threshold: float, adaptive: bool = False
|
||||
) -> List[dict]:
|
||||
"""Cluster unbound traces by centroid similarity + speaker verify"""
|
||||
clusters = []
|
||||
assigned = set()
|
||||
|
||||
for i, t1 in enumerate(traces):
|
||||
if t1["trace_id"] in assigned: continue
|
||||
cluster = [t1]; assigned.add(t1["trace_id"])
|
||||
|
||||
for j, t2 in enumerate(traces):
|
||||
if t2["trace_id"] in assigned or i == j: continue
|
||||
if t1["centroid"] is None or t2["centroid"] is None: continue
|
||||
|
||||
sim = cosine_similarity(t1["centroid"], t2["centroid"])
|
||||
th = threshold
|
||||
if adaptive and (t1["frame_count"] < 10 or t2["frame_count"] < 10):
|
||||
th -= 0.05
|
||||
|
||||
if sim >= th:
|
||||
cluster.append(t2); assigned.add(t2["trace_id"])
|
||||
|
||||
clusters.append(cluster)
|
||||
return clusters
|
||||
|
||||
|
||||
def apply_speaker_verification(clusters: List[dict], speaker_overlaps: dict) -> List[dict]:
|
||||
"""Label clusters with speaker + merge same-speaker clusters"""
|
||||
labels = []
|
||||
for i, cluster in enumerate(clusters):
|
||||
trace_ids = [t["trace_id"] for t in cluster]
|
||||
votes = defaultdict(float)
|
||||
for tid in trace_ids:
|
||||
if tid in speaker_overlaps:
|
||||
for spk, r in speaker_overlaps[tid].items():
|
||||
votes[spk] += r
|
||||
|
||||
best_spk = max(votes, key=votes.get) if votes else None
|
||||
labels.append({
|
||||
"cluster_id": i, "trace_count": len(cluster),
|
||||
"trace_ids": trace_ids,
|
||||
"dominant_speaker": best_spk,
|
||||
"speaker_score": round(votes.get(best_spk, 0), 3) if best_spk else 0,
|
||||
"binding": cluster[0].get("binding"),
|
||||
"binding_stage": cluster[0].get("binding_stage"),
|
||||
})
|
||||
return labels
|
||||
|
||||
|
||||
# ===== Main Experiment =====
|
||||
|
||||
def run_experiment(config: dict) -> dict:
|
||||
exp_id = config["id"]; file_uuid = config.get("file_uuid", "")
|
||||
conn = get_conn(); cur = conn.cursor()
|
||||
t0 = time.time()
|
||||
out = lambda *a: None # noqa
|
||||
|
||||
# Load data
|
||||
traces = fetch_trace_with_faces(cur, file_uuid, config.get("min_frames", 3))
|
||||
identities = fetch_identity_references(cur) if config.get("enable_identity_match", True) else []
|
||||
speaker_overlaps = fetch_speaker_overlaps(cur, file_uuid)
|
||||
print(f"Traces: {len(traces)}, Identities: {len(identities)}, Speaker edges: {len(speaker_overlaps)}")
|
||||
|
||||
# Stage 1: TMDb-based first-pass binding (relaxed threshold)
|
||||
bound, unbound = [], traces
|
||||
if identities:
|
||||
bound, unbound = stage1_high_confidence_binding(
|
||||
traces, identities,
|
||||
config.get("stage1_face_threshold", 0.55),
|
||||
config.get("stage1_bind_ratio", 0.60),
|
||||
)
|
||||
print(f"Stage 1 (TMDb): {len(bound)} traces bound, {len(unbound)} unbound")
|
||||
|
||||
# Stage 1b+2: Iterative enrichment — each bound trace adds 3 best faces as references
|
||||
if bound and identities and unbound:
|
||||
# Build initial reference sets from Stage 1 bound traces
|
||||
# For each identity, collect top-3 confidence faces from each bound trace
|
||||
identity_refs = {} # identity_id -> list of reference embeddings
|
||||
for t in bound:
|
||||
b = t.get("binding", {})
|
||||
iid = b.get("id") if isinstance(b, dict) else None
|
||||
if not iid or not t.get("faces"): continue
|
||||
|
||||
if iid not in identity_refs:
|
||||
identity_refs[iid] = []
|
||||
|
||||
# Sample 3 best faces from this trace (top confidence = best quality)
|
||||
faces = t["faces"]
|
||||
n_sample = min(3, len(faces))
|
||||
for f in faces[:n_sample]:
|
||||
identity_refs[iid].append(f["embedding"])
|
||||
|
||||
# Build identity lookup
|
||||
id_to_name = {ident["id"]: ident["name"] for ident in identities}
|
||||
|
||||
for iid, refs in identity_refs.items():
|
||||
print(f" {id_to_name.get(iid, '?'):<20} {len(refs)} reference faces (multi-angle sampling)")
|
||||
|
||||
# Speaker segment counts for weighting
|
||||
speaker_counts = defaultdict(float)
|
||||
for tid, spks in speaker_overlaps.items():
|
||||
speaker_counts[tid] = sum(spks.values())
|
||||
|
||||
# Iterative matching with growing reference set
|
||||
round_num = 0
|
||||
while True:
|
||||
round_num += 1
|
||||
bound_this_round = []
|
||||
|
||||
for t in unbound:
|
||||
best_score = 0
|
||||
best_iid = None
|
||||
best_sim = 0
|
||||
best_match_count = 0
|
||||
|
||||
for iid, refs in identity_refs.items():
|
||||
faces = t.get("faces", [])
|
||||
if not faces: continue
|
||||
|
||||
# Compare each face against ALL references, take max per face
|
||||
face_sims = []
|
||||
for face in faces:
|
||||
max_sim = max(
|
||||
cosine_similarity(face["embedding"], ref) for ref in refs
|
||||
)
|
||||
face_sims.append(max_sim)
|
||||
|
||||
avg_sim = np.mean(face_sims) if face_sims else 0
|
||||
match_ratio = sum(1 for s in face_sims if s >= config.get("stage1_face_threshold", 0.55)) / len(face_sims)
|
||||
|
||||
# Composite score: similarity + match ratio + speaker weight
|
||||
spk_weight = 1.0 + 0.3 * speaker_counts.get(t["trace_id"], 0) / max(max(speaker_counts.values(), default=1), 1)
|
||||
composite = avg_sim * spk_weight * (0.4 + 0.6 * match_ratio)
|
||||
|
||||
if composite > best_score and composite > 0.35:
|
||||
best_score = composite
|
||||
best_iid = iid
|
||||
best_sim = avg_sim
|
||||
best_match_count = sum(1 for s in face_sims if s >= 0.50)
|
||||
|
||||
if best_iid is not None:
|
||||
t["binding"] = {
|
||||
"id": best_iid, "name": id_to_name.get(best_iid, "?"),
|
||||
"avg_similarity": round(best_sim, 3),
|
||||
"match_ratio": round(best_match_count / max(len(t.get("faces", [])), 1), 3),
|
||||
"composite_score": round(best_score, 3),
|
||||
"source": f"video_ref_r{round_num}",
|
||||
}
|
||||
t["binding_stage"] = f"stage1b_r{round_num}"
|
||||
bound_this_round.append(t)
|
||||
bound.append(t)
|
||||
|
||||
if not bound_this_round:
|
||||
break
|
||||
|
||||
# Enrich references: add 3 best faces from newly bound traces
|
||||
for t in bound_this_round:
|
||||
iid = t["binding"]["id"]
|
||||
faces = t.get("faces", [])
|
||||
n = min(3, len(faces))
|
||||
for f in faces[:n]:
|
||||
identity_refs[iid].append(f["embedding"])
|
||||
|
||||
# Remove from unbound
|
||||
bound_ids = {t["trace_id"] for t in bound_this_round}
|
||||
unbound = [t for t in unbound if t["trace_id"] not in bound_ids]
|
||||
|
||||
print(f" Round {round_num}: {len(bound_this_round)} traces bound, {len(unbound)} unbound")
|
||||
clusters = stage2_cluster_unbound(
|
||||
unbound,
|
||||
config.get("stage2_threshold", 0.85),
|
||||
config.get("stage2_adaptive", False),
|
||||
)
|
||||
print(f"Stage 2: {len(clusters)} clusters from {len(unbound)} unbound traces")
|
||||
|
||||
# Speaker verification
|
||||
all_labels = apply_speaker_verification(clusters, speaker_overlaps)
|
||||
|
||||
# Merge Stage 1 bound traces into labels
|
||||
for t in bound:
|
||||
all_labels.append({
|
||||
"cluster_id": len(all_labels),
|
||||
"trace_count": 1,
|
||||
"trace_ids": [t["trace_id"]],
|
||||
"binding": t.get("binding"),
|
||||
"binding_stage": "stage1_face_level",
|
||||
"dominant_speaker": next(iter(speaker_overlaps.get(t["trace_id"], {}).keys()), None) if t["trace_id"] in speaker_overlaps else None,
|
||||
})
|
||||
|
||||
# Metrics
|
||||
metrics = {
|
||||
"total_traces": len(traces),
|
||||
"stage1_bound": len(bound),
|
||||
"stage1_bound_traces": len(bound),
|
||||
"stage2_clusters": len(clusters),
|
||||
"stage2_unbound_clustered": sum(len(c) for c in clusters),
|
||||
"total_clusters": len(all_labels),
|
||||
"execution_time_s": time.time() - t0,
|
||||
"coverage": (len(bound) + sum(len(c) for c in clusters)) / max(len(traces), 1),
|
||||
}
|
||||
for k, v in metrics.items():
|
||||
print(f" {k}: {v}")
|
||||
|
||||
cur.close(); conn.close()
|
||||
|
||||
# --- Write bindings to database ---
|
||||
if config.get("write_db", False):
|
||||
conn2 = get_conn(); cur2 = conn2.cursor()
|
||||
total_written = 0
|
||||
for label in all_labels:
|
||||
binding = label.get("binding")
|
||||
if not binding: continue
|
||||
identity_name = binding.get("name", "")
|
||||
if not identity_name: continue
|
||||
|
||||
# Get or create identity
|
||||
cur2.execute(f"SELECT id FROM {SCHEMA}.identities WHERE name=%s", (identity_name,))
|
||||
row = cur2.fetchone()
|
||||
if row:
|
||||
identity_id = row[0]
|
||||
else:
|
||||
cur2.execute(
|
||||
f"INSERT INTO {SCHEMA}.identities (name, identity_type, source, status) VALUES (%s,'people','auto','pending') RETURNING id",
|
||||
(identity_name,))
|
||||
identity_id = cur2.fetchone()[0]
|
||||
|
||||
# Bind all faces in each trace to the identity
|
||||
for tid in label["trace_ids"]:
|
||||
cur2.execute(
|
||||
f"UPDATE {SCHEMA}.face_detections SET identity_id=%s WHERE file_uuid=%s AND trace_id=%s AND identity_id IS NULL",
|
||||
(identity_id, file_uuid, tid))
|
||||
affected = cur2.rowcount
|
||||
if affected > 0:
|
||||
# Write to identity_bindings for traceability
|
||||
confidence = float(binding.get("avg_similarity", 0.8))
|
||||
cur2.execute(
|
||||
f"INSERT INTO {SCHEMA}.identity_bindings (identity_id, identity_type, identity_value, confidence) VALUES (%s,'trace',%s,%s) ON CONFLICT DO NOTHING",
|
||||
(identity_id, str(tid), confidence))
|
||||
total_written += affected
|
||||
|
||||
conn2.commit()
|
||||
cur2.close(); conn2.close()
|
||||
print(f"\nDB write: {total_written} face_detections updated")
|
||||
|
||||
# Save
|
||||
result_dir = os.path.join(EXPERIMENT_DIR, "results", f"exp_{exp_id}")
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
for name, data in [("labels.json", all_labels), ("metrics.json", metrics), ("config.json", config)]:
|
||||
with open(os.path.join(result_dir, name), "w") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
print(f"\nSaved to {result_dir}")
|
||||
return metrics
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--config", required=True)
|
||||
p.add_argument("--write-db", action="store_true", help="Write bindings to database")
|
||||
args = p.parse_args()
|
||||
with open(args.config) as f: config = json.load(f)
|
||||
if args.write_db:
|
||||
config["write_db"] = True
|
||||
run_experiment(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
234
experiments/trace_quality_agent.py
Normal file
234
experiments/trace_quality_agent.py
Normal file
@@ -0,0 +1,234 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Trace 品質檢查 Agent — 選型實驗報告
|
||||
評估每個 trace 是否符合 identity 標準,檢測需補掃/覆查的異常 trace。
|
||||
|
||||
檢查項目:
|
||||
1. 取樣密度 — trace < 3 frames → 需要 dense scan
|
||||
2. 人臉驗證 — DeepFace vs Apple Vision 確認是否為人臉
|
||||
3. Embedding 品質 — trace 內方差過大 → 可能混入多人
|
||||
4. 時序衝突 — 同 identity 兩 trace 同時出現 → 需 split
|
||||
"""
|
||||
|
||||
import json, sys, os, time, argparse, io
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
||||
SCHEMA = "dev"
|
||||
FILE_UUID = "417a7e93860d70c87aee6c4c1b715d70"
|
||||
VIDEO_PATH = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
|
||||
OUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/trace_quality")
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ============================================================
|
||||
# Report Header
|
||||
# ============================================================
|
||||
print("=" * 70)
|
||||
print("Trace 品質檢查 — 技術選型實驗報告")
|
||||
print("=" * 70)
|
||||
print(f"File: Charade (1963), {FILE_UUID}")
|
||||
print(f"Traces: 2347, Faces: 6182")
|
||||
print()
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import numpy as np
|
||||
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
# ============================================================
|
||||
# Check 1: Sample Density (取樣密度)
|
||||
# ============================================================
|
||||
print("=" * 70)
|
||||
print("Check 1: 取樣密度 (Sample Density)")
|
||||
print("=" * 70)
|
||||
|
||||
cur.execute(f"""
|
||||
SELECT
|
||||
CASE WHEN fc = 1 THEN '1 frame'
|
||||
WHEN fc <= 3 THEN '2-3 frames'
|
||||
WHEN fc <= 10 THEN '4-10 frames'
|
||||
ELSE '11+ frames'
|
||||
END AS density,
|
||||
COUNT(*) AS trace_count,
|
||||
ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM (SELECT trace_id, COUNT(*) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t) * 100, 1) AS pct
|
||||
FROM (SELECT trace_id, COUNT(*) AS fc FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t
|
||||
GROUP BY 1 ORDER BY MIN(fc)
|
||||
""", (FILE_UUID, FILE_UUID))
|
||||
|
||||
for density, count, pct in cur.fetchall():
|
||||
marker = " ← needs dense scan" if "frame" in density and int(density[0]) < 4 else ""
|
||||
print(f" {density:<15} {count:>6} traces ({pct:>5.1f}%){marker}")
|
||||
|
||||
need_dense = sum(1 for _ in cur.fetchall()) if False else 0
|
||||
cur.execute(f"SELECT COUNT(*) FROM (SELECT trace_id FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id HAVING COUNT(*) < 4) t", (FILE_UUID,))
|
||||
need_dense = cur.fetchone()[0]
|
||||
print(f"\n 需 dense scan: {need_dense} traces ({need_dense/2347*100:.1f}%)")
|
||||
|
||||
print()
|
||||
print(" 技術方案:")
|
||||
print(" 方案A: swift_face --sample-interval 1 (Apple Vision, ~250fps)")
|
||||
print(" 方案B: ffmpeg + DeepFace (Python, ~0.2s/face)")
|
||||
print(" 建議: 方案A,無需額外模型,速度快,已整合於 pipeline")
|
||||
|
||||
# ============================================================
|
||||
# Check 2: Human Face Verification (人臉驗證)
|
||||
# ============================================================
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Check 2: 人臉驗證 (Human Face Verification)")
|
||||
print("=" * 70)
|
||||
|
||||
# Sample 20 traces: 10 with high confidence (likely human), 10 with low (possibly non-human)
|
||||
cur.execute(f"""
|
||||
(SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h,
|
||||
MIN(frame_number) AS f
|
||||
FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id ORDER BY AVG(confidence) ASC LIMIT 5)
|
||||
UNION ALL
|
||||
(SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h,
|
||||
MIN(frame_number) AS f
|
||||
FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id ORDER BY AVG(confidence) DESC LIMIT 5)
|
||||
""", (FILE_UUID, FILE_UUID))
|
||||
|
||||
samples = cur.fetchall()
|
||||
|
||||
# Test DeepFace
|
||||
print(" DeepFace 人臉驗證 (10 samples):")
|
||||
try:
|
||||
from deepface import DeepFace
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
t0 = time.time()
|
||||
for tid, conf, w, h, frame in samples:
|
||||
sec = frame / 59.94
|
||||
img_path = OUT_DIR / f"trace_{tid}_verify.jpg"
|
||||
if not img_path.exists():
|
||||
os.system(f'ffmpeg -y -ss {sec:.1f} -i "{VIDEO_PATH}" -frames:v 1 -q:v 3 {img_path} 2>/dev/null')
|
||||
try:
|
||||
r = DeepFace.analyze(str(img_path), actions=['age','gender'], enforce_detection=False, detector_backend='opencv')
|
||||
if isinstance(r, list): r = r[0]
|
||||
age = r.get('age', 0)
|
||||
gender = r.get('dominant_gender', 'N/A')
|
||||
is_human = age > 0 and gender in ('Man', 'Woman')
|
||||
print(f" trace {tid:>5}: conf={conf:.3f} {w}x{h} → age={age:.0f} gender={gender:<5} {'✅ human' if is_human else '⚠️ non-human?'}")
|
||||
except Exception as e:
|
||||
print(f" trace {tid:>5}: conf={conf:.3f} {w}x{h} → ERROR {str(e)[:60]}")
|
||||
dt = time.time() - t0
|
||||
print(f" Time: {dt:.1f}s ({dt/10:.1f}s/face)")
|
||||
except ImportError:
|
||||
print(" DeepFace not available")
|
||||
|
||||
# Test Apple Vision approach (statistical, no ML)
|
||||
print()
|
||||
print(" Statistical filter (no ML):")
|
||||
print(" Rule: confidence < 0.5 OR aspect_ratio deviation > 0.3 → flag")
|
||||
cur.execute(f"""
|
||||
SELECT COUNT(*) FROM {SCHEMA}.face_detections
|
||||
WHERE file_uuid=%s AND trace_id IS NOT NULL AND confidence < 0.5
|
||||
""", (FILE_UUID,))
|
||||
low_conf = cur.fetchone()[0]
|
||||
print(f" Low confidence (<0.5): {low_conf} faces")
|
||||
print(f" Aspect ratio: all detections are square (Vision bbox), no filtering possible")
|
||||
|
||||
print()
|
||||
print(" 建議: DeepFace verify for low-confidence traces only")
|
||||
print(" 可選 gateway: conf < 0.6 才跑 DeepFace,節省 90% 成本")
|
||||
|
||||
# ============================================================
|
||||
# Check 3: Embedding Quality
|
||||
# ============================================================
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Check 3: Embedding Quality (嵌入品質)")
|
||||
print("=" * 70)
|
||||
|
||||
# Check intra-trace embedding variance for top 5 largest traces
|
||||
cur.execute(f"""
|
||||
SELECT trace_id, COUNT(*) AS fc, AVG(confidence)::numeric(4,3) AS conf
|
||||
FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id ORDER BY fc DESC LIMIT 10
|
||||
""", (FILE_UUID,))
|
||||
top_traces = cur.fetchall()
|
||||
|
||||
print(" Intra-trace embedding variance (top 10 traces by size):")
|
||||
for tid, fc, conf in top_traces:
|
||||
cur.execute(f"""
|
||||
SELECT embedding FROM {SCHEMA}.face_detections
|
||||
WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL
|
||||
""", (FILE_UUID, tid))
|
||||
embs = [np.array(row[0]) for row in cur.fetchall() if row[0]]
|
||||
if len(embs) < 2:
|
||||
print(f" trace {tid:>5}: {fc:>3} faces, conf={conf:.3f} — not enough embeddings")
|
||||
continue
|
||||
|
||||
# Normalize and compute pairwise cosine similarity
|
||||
embs_norm = np.array([e / (np.linalg.norm(e) + 1e-10) for e in embs])
|
||||
sim_matrix = embs_norm @ embs_norm.T
|
||||
np.fill_diagonal(sim_matrix, 0)
|
||||
# Exclude diagonal zeros when finding min
|
||||
non_diag = sim_matrix[sim_matrix > 0.0001]
|
||||
var = float(1.0 - np.mean(sim_matrix[sim_matrix > 0.0001])) if len(non_diag) > 0 else 0.0
|
||||
min_sim = float(np.min(non_diag)) if len(non_diag) > 0 else 0.0
|
||||
|
||||
quality = "✅ good" if var < 0.3 and min_sim > 0.5 else \
|
||||
"⚠️ check" if var < 0.5 and min_sim > 0.3 else \
|
||||
"❌ split likely"
|
||||
print(f" trace {tid:>5}: {fc:>3} faces, conf={conf:.3f}, variance={var:.3f}, min_sim={min_sim:.3f} → {quality}")
|
||||
|
||||
print()
|
||||
print(" 建議: variance > 0.2 OR min_sim < 0.4 → 標記 split")
|
||||
print(" 純統計方法,無需模型")
|
||||
|
||||
# ============================================================
|
||||
# Check 4: Temporal Collision
|
||||
# ============================================================
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("Check 4: 時序衝突 (Temporal Collision)")
|
||||
print("=" * 70)
|
||||
|
||||
cur.execute(f"""
|
||||
SELECT i.name, a.trace_id, a.frame_number AS a_frame, b.trace_id AS b_trace, b.frame_number AS b_frame
|
||||
FROM {SCHEMA}.face_detections a
|
||||
JOIN {SCHEMA}.face_detections b ON a.file_uuid=b.file_uuid AND a.frame_number=b.frame_number AND a.trace_id<b.trace_id
|
||||
JOIN {SCHEMA}.identities i ON a.identity_id=i.id AND b.identity_id=i.id
|
||||
WHERE a.file_uuid=%s AND a.identity_id IS NOT NULL
|
||||
ORDER BY a.frame_number LIMIT 10
|
||||
""", (FILE_UUID,))
|
||||
collisions = cur.fetchall()
|
||||
|
||||
if collisions:
|
||||
print(" ⚠️ 同一 identity 的 trace 出現在同一幀:")
|
||||
for name, a_tid, af, b_tid, bf in collisions:
|
||||
print(f" {name}: trace {a_tid} & {b_tid} at frame {af}")
|
||||
else:
|
||||
print(" ✅ No temporal collisions detected")
|
||||
|
||||
print()
|
||||
print(" 建議: 純 SQL 檢測,發現碰撞 → 自動 split into separate identities")
|
||||
|
||||
cur.close(); conn.close()
|
||||
|
||||
# ============================================================
|
||||
# Summary
|
||||
# ============================================================
|
||||
print()
|
||||
print("=" * 70)
|
||||
print("選型建議總結")
|
||||
print("=" * 70)
|
||||
print()
|
||||
print(f" {'檢查':<25} {'技術':<20} {'模型':<12} {'速度':<10} {'可行性'}")
|
||||
print(f" {'-'*70}")
|
||||
print(f" {'1.取樣密度':<25} {'SQL + swift_face':<20} {'Apple Vision':<12} {'250fps':<10} {'✅ 已整合'}")
|
||||
print(f" {'2.人臉驗證':<25} {'DeepFace analyze':<20} {'AgeNet':<12} {'0.2s/face':<10} {'✅ MIT license'}")
|
||||
print(f" {'3.Embedding 品質':<25} {'numpy statistics':<20} {'None':<12} {'instant':<10} {'✅ 純計算'}")
|
||||
print(f" {'4.時序衝突':<25} {'SQL JOIN':<20} {'None':<12} {'instant':<10} {'✅ 純查詢'}")
|
||||
print(f" {'5.Speaker 一致性':<25} {'SQL + overlap':<20} {'None':<12} {'instant':<10} {'✅ 後續追加'}")
|
||||
print()
|
||||
print(f" 唯一需要外部模型的: Check 2 (DeepFace, MIT, 0.2s/face)")
|
||||
print(f" 其他全為純 SQL/統計,可立即實作")
|
||||
Reference in New Issue
Block a user