235 lines
9.7 KiB
Python
235 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Trace 品質檢查 Agent — 選型實驗報告
|
||
評估每個 trace 是否符合 identity 標準,檢測需補掃/覆查的異常 trace。
|
||
|
||
檢查項目:
|
||
1. 取樣密度 — trace < 3 frames → 需要 dense scan
|
||
2. 人臉驗證 — DeepFace vs Apple Vision 確認是否為人臉
|
||
3. Embedding 品質 — trace 內方差過大 → 可能混入多人
|
||
4. 時序衝突 — 同 identity 兩 trace 同時出現 → 需 split
|
||
"""
|
||
|
||
import json, sys, os, time, argparse, io
|
||
from collections import defaultdict
|
||
from pathlib import Path
|
||
|
||
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
||
SCHEMA = "dev"
|
||
FILE_UUID = "417a7e93860d70c87aee6c4c1b715d70"
|
||
VIDEO_PATH = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
|
||
OUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/trace_quality")
|
||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
# ============================================================
|
||
# Report Header
|
||
# ============================================================
|
||
print("=" * 70)
|
||
print("Trace 品質檢查 — 技術選型實驗報告")
|
||
print("=" * 70)
|
||
print(f"File: Charade (1963), {FILE_UUID}")
|
||
print(f"Traces: 2347, Faces: 6182")
|
||
print()
|
||
|
||
import psycopg2
|
||
import psycopg2.extras
|
||
import numpy as np
|
||
|
||
conn = psycopg2.connect(DB_URL)
|
||
cur = conn.cursor()
|
||
|
||
# ============================================================
|
||
# Check 1: Sample Density (取樣密度)
|
||
# ============================================================
|
||
print("=" * 70)
|
||
print("Check 1: 取樣密度 (Sample Density)")
|
||
print("=" * 70)
|
||
|
||
cur.execute(f"""
|
||
SELECT
|
||
CASE WHEN fc = 1 THEN '1 frame'
|
||
WHEN fc <= 3 THEN '2-3 frames'
|
||
WHEN fc <= 10 THEN '4-10 frames'
|
||
ELSE '11+ frames'
|
||
END AS density,
|
||
COUNT(*) AS trace_count,
|
||
ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM (SELECT trace_id, COUNT(*) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t) * 100, 1) AS pct
|
||
FROM (SELECT trace_id, COUNT(*) AS fc FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t
|
||
GROUP BY 1 ORDER BY MIN(fc)
|
||
""", (FILE_UUID, FILE_UUID))
|
||
|
||
for density, count, pct in cur.fetchall():
|
||
marker = " ← needs dense scan" if "frame" in density and int(density[0]) < 4 else ""
|
||
print(f" {density:<15} {count:>6} traces ({pct:>5.1f}%){marker}")
|
||
|
||
need_dense = sum(1 for _ in cur.fetchall()) if False else 0
|
||
cur.execute(f"SELECT COUNT(*) FROM (SELECT trace_id FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id HAVING COUNT(*) < 4) t", (FILE_UUID,))
|
||
need_dense = cur.fetchone()[0]
|
||
print(f"\n 需 dense scan: {need_dense} traces ({need_dense/2347*100:.1f}%)")
|
||
|
||
print()
|
||
print(" 技術方案:")
|
||
print(" 方案A: swift_face --sample-interval 1 (Apple Vision, ~250fps)")
|
||
print(" 方案B: ffmpeg + DeepFace (Python, ~0.2s/face)")
|
||
print(" 建議: 方案A,無需額外模型,速度快,已整合於 pipeline")
|
||
|
||
# ============================================================
|
||
# Check 2: Human Face Verification (人臉驗證)
|
||
# ============================================================
|
||
print()
|
||
print("=" * 70)
|
||
print("Check 2: 人臉驗證 (Human Face Verification)")
|
||
print("=" * 70)
|
||
|
||
# Sample 20 traces: 10 with high confidence (likely human), 10 with low (possibly non-human)
|
||
cur.execute(f"""
|
||
(SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h,
|
||
MIN(frame_number) AS f
|
||
FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
|
||
GROUP BY trace_id ORDER BY AVG(confidence) ASC LIMIT 5)
|
||
UNION ALL
|
||
(SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h,
|
||
MIN(frame_number) AS f
|
||
FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
|
||
GROUP BY trace_id ORDER BY AVG(confidence) DESC LIMIT 5)
|
||
""", (FILE_UUID, FILE_UUID))
|
||
|
||
samples = cur.fetchall()
|
||
|
||
# Test DeepFace
|
||
print(" DeepFace 人臉驗證 (10 samples):")
|
||
try:
|
||
from deepface import DeepFace
|
||
import warnings
|
||
warnings.filterwarnings("ignore")
|
||
|
||
t0 = time.time()
|
||
for tid, conf, w, h, frame in samples:
|
||
sec = frame / 59.94
|
||
img_path = OUT_DIR / f"trace_{tid}_verify.jpg"
|
||
if not img_path.exists():
|
||
os.system(f'ffmpeg -y -ss {sec:.1f} -i "{VIDEO_PATH}" -frames:v 1 -q:v 3 {img_path} 2>/dev/null')
|
||
try:
|
||
r = DeepFace.analyze(str(img_path), actions=['age','gender'], enforce_detection=False, detector_backend='opencv')
|
||
if isinstance(r, list): r = r[0]
|
||
age = r.get('age', 0)
|
||
gender = r.get('dominant_gender', 'N/A')
|
||
is_human = age > 0 and gender in ('Man', 'Woman')
|
||
print(f" trace {tid:>5}: conf={conf:.3f} {w}x{h} → age={age:.0f} gender={gender:<5} {'✅ human' if is_human else '⚠️ non-human?'}")
|
||
except Exception as e:
|
||
print(f" trace {tid:>5}: conf={conf:.3f} {w}x{h} → ERROR {str(e)[:60]}")
|
||
dt = time.time() - t0
|
||
print(f" Time: {dt:.1f}s ({dt/10:.1f}s/face)")
|
||
except ImportError:
|
||
print(" DeepFace not available")
|
||
|
||
# Test Apple Vision approach (statistical, no ML)
|
||
print()
|
||
print(" Statistical filter (no ML):")
|
||
print(" Rule: confidence < 0.5 OR aspect_ratio deviation > 0.3 → flag")
|
||
cur.execute(f"""
|
||
SELECT COUNT(*) FROM {SCHEMA}.face_detections
|
||
WHERE file_uuid=%s AND trace_id IS NOT NULL AND confidence < 0.5
|
||
""", (FILE_UUID,))
|
||
low_conf = cur.fetchone()[0]
|
||
print(f" Low confidence (<0.5): {low_conf} faces")
|
||
print(f" Aspect ratio: all detections are square (Vision bbox), no filtering possible")
|
||
|
||
print()
|
||
print(" 建議: DeepFace verify for low-confidence traces only")
|
||
print(" 可選 gateway: conf < 0.6 才跑 DeepFace,節省 90% 成本")
|
||
|
||
# ============================================================
|
||
# Check 3: Embedding Quality
|
||
# ============================================================
|
||
print()
|
||
print("=" * 70)
|
||
print("Check 3: Embedding Quality (嵌入品質)")
|
||
print("=" * 70)
|
||
|
||
# Check intra-trace embedding variance for top 5 largest traces
|
||
cur.execute(f"""
|
||
SELECT trace_id, COUNT(*) AS fc, AVG(confidence)::numeric(4,3) AS conf
|
||
FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL
|
||
GROUP BY trace_id ORDER BY fc DESC LIMIT 10
|
||
""", (FILE_UUID,))
|
||
top_traces = cur.fetchall()
|
||
|
||
print(" Intra-trace embedding variance (top 10 traces by size):")
|
||
for tid, fc, conf in top_traces:
|
||
cur.execute(f"""
|
||
SELECT embedding FROM {SCHEMA}.face_detections
|
||
WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL
|
||
""", (FILE_UUID, tid))
|
||
embs = [np.array(row[0]) for row in cur.fetchall() if row[0]]
|
||
if len(embs) < 2:
|
||
print(f" trace {tid:>5}: {fc:>3} faces, conf={conf:.3f} — not enough embeddings")
|
||
continue
|
||
|
||
# Normalize and compute pairwise cosine similarity
|
||
embs_norm = np.array([e / (np.linalg.norm(e) + 1e-10) for e in embs])
|
||
sim_matrix = embs_norm @ embs_norm.T
|
||
np.fill_diagonal(sim_matrix, 0)
|
||
# Exclude diagonal zeros when finding min
|
||
non_diag = sim_matrix[sim_matrix > 0.0001]
|
||
var = float(1.0 - np.mean(sim_matrix[sim_matrix > 0.0001])) if len(non_diag) > 0 else 0.0
|
||
min_sim = float(np.min(non_diag)) if len(non_diag) > 0 else 0.0
|
||
|
||
quality = "✅ good" if var < 0.3 and min_sim > 0.5 else \
|
||
"⚠️ check" if var < 0.5 and min_sim > 0.3 else \
|
||
"❌ split likely"
|
||
print(f" trace {tid:>5}: {fc:>3} faces, conf={conf:.3f}, variance={var:.3f}, min_sim={min_sim:.3f} → {quality}")
|
||
|
||
print()
|
||
print(" 建議: variance > 0.2 OR min_sim < 0.4 → 標記 split")
|
||
print(" 純統計方法,無需模型")
|
||
|
||
# ============================================================
|
||
# Check 4: Temporal Collision
|
||
# ============================================================
|
||
print()
|
||
print("=" * 70)
|
||
print("Check 4: 時序衝突 (Temporal Collision)")
|
||
print("=" * 70)
|
||
|
||
cur.execute(f"""
|
||
SELECT i.name, a.trace_id, a.frame_number AS a_frame, b.trace_id AS b_trace, b.frame_number AS b_frame
|
||
FROM {SCHEMA}.face_detections a
|
||
JOIN {SCHEMA}.face_detections b ON a.file_uuid=b.file_uuid AND a.frame_number=b.frame_number AND a.trace_id<b.trace_id
|
||
JOIN {SCHEMA}.identities i ON a.identity_id=i.id AND b.identity_id=i.id
|
||
WHERE a.file_uuid=%s AND a.identity_id IS NOT NULL
|
||
ORDER BY a.frame_number LIMIT 10
|
||
""", (FILE_UUID,))
|
||
collisions = cur.fetchall()
|
||
|
||
if collisions:
|
||
print(" ⚠️ 同一 identity 的 trace 出現在同一幀:")
|
||
for name, a_tid, af, b_tid, bf in collisions:
|
||
print(f" {name}: trace {a_tid} & {b_tid} at frame {af}")
|
||
else:
|
||
print(" ✅ No temporal collisions detected")
|
||
|
||
print()
|
||
print(" 建議: 純 SQL 檢測,發現碰撞 → 自動 split into separate identities")
|
||
|
||
cur.close(); conn.close()
|
||
|
||
# ============================================================
|
||
# Summary
|
||
# ============================================================
|
||
print()
|
||
print("=" * 70)
|
||
print("選型建議總結")
|
||
print("=" * 70)
|
||
print()
|
||
print(f" {'檢查':<25} {'技術':<20} {'模型':<12} {'速度':<10} {'可行性'}")
|
||
print(f" {'-'*70}")
|
||
print(f" {'1.取樣密度':<25} {'SQL + swift_face':<20} {'Apple Vision':<12} {'250fps':<10} {'✅ 已整合'}")
|
||
print(f" {'2.人臉驗證':<25} {'DeepFace analyze':<20} {'AgeNet':<12} {'0.2s/face':<10} {'✅ MIT license'}")
|
||
print(f" {'3.Embedding 品質':<25} {'numpy statistics':<20} {'None':<12} {'instant':<10} {'✅ 純計算'}")
|
||
print(f" {'4.時序衝突':<25} {'SQL JOIN':<20} {'None':<12} {'instant':<10} {'✅ 純查詢'}")
|
||
print(f" {'5.Speaker 一致性':<25} {'SQL + overlap':<20} {'None':<12} {'instant':<10} {'✅ 後續追加'}")
|
||
print()
|
||
print(f" 唯一需要外部模型的: Check 2 (DeepFace, MIT, 0.2s/face)")
|
||
print(f" 其他全為純 SQL/統計,可立即實作")
|