#!/usr/bin/env python3 """ Trace 品質檢查 Agent — 選型實驗報告 評估每個 trace 是否符合 identity 標準,檢測需補掃/覆查的異常 trace。 檢查項目: 1. 取樣密度 — trace < 3 frames → 需要 dense scan 2. 人臉驗證 — DeepFace vs Apple Vision 確認是否為人臉 3. Embedding 品質 — trace 內方差過大 → 可能混入多人 4. 時序衝突 — 同 identity 兩 trace 同時出現 → 需 split """ import json, sys, os, time, argparse, io from collections import defaultdict from pathlib import Path DB_URL = "postgresql://accusys@localhost:5432/momentry" SCHEMA = "dev" FILE_UUID = "417a7e93860d70c87aee6c4c1b715d70" VIDEO_PATH = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov" OUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/trace_quality") OUT_DIR.mkdir(parents=True, exist_ok=True) # ============================================================ # Report Header # ============================================================ print("=" * 70) print("Trace 品質檢查 — 技術選型實驗報告") print("=" * 70) print(f"File: Charade (1963), {FILE_UUID}") print(f"Traces: 2347, Faces: 6182") print() import psycopg2 import psycopg2.extras import numpy as np conn = psycopg2.connect(DB_URL) cur = conn.cursor() # ============================================================ # Check 1: Sample Density (取樣密度) # ============================================================ print("=" * 70) print("Check 1: 取樣密度 (Sample Density)") print("=" * 70) cur.execute(f""" SELECT CASE WHEN fc = 1 THEN '1 frame' WHEN fc <= 3 THEN '2-3 frames' WHEN fc <= 10 THEN '4-10 frames' ELSE '11+ frames' END AS density, COUNT(*) AS trace_count, ROUND(COUNT(*)::numeric / (SELECT COUNT(*) FROM (SELECT trace_id, COUNT(*) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t) * 100, 1) AS pct FROM (SELECT trace_id, COUNT(*) AS fc FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id) t GROUP BY 1 ORDER BY MIN(fc) """, (FILE_UUID, FILE_UUID)) for density, count, pct in cur.fetchall(): marker = " ← needs dense scan" if "frame" in density and int(density[0]) < 4 else "" print(f" {density:<15} {count:>6} traces ({pct:>5.1f}%){marker}") need_dense = sum(1 for _ in cur.fetchall()) if False else 0 cur.execute(f"SELECT COUNT(*) FROM (SELECT trace_id FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id HAVING COUNT(*) < 4) t", (FILE_UUID,)) need_dense = cur.fetchone()[0] print(f"\n 需 dense scan: {need_dense} traces ({need_dense/2347*100:.1f}%)") print() print(" 技術方案:") print(" 方案A: swift_face --sample-interval 1 (Apple Vision, ~250fps)") print(" 方案B: ffmpeg + DeepFace (Python, ~0.2s/face)") print(" 建議: 方案A,無需額外模型,速度快,已整合於 pipeline") # ============================================================ # Check 2: Human Face Verification (人臉驗證) # ============================================================ print() print("=" * 70) print("Check 2: 人臉驗證 (Human Face Verification)") print("=" * 70) # Sample 20 traces: 10 with high confidence (likely human), 10 with low (possibly non-human) cur.execute(f""" (SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h, MIN(frame_number) AS f FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY AVG(confidence) ASC LIMIT 5) UNION ALL (SELECT trace_id, AVG(confidence)::numeric(4,3) AS c, AVG(width)::int AS w, AVG(height)::int AS h, MIN(frame_number) AS f FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY AVG(confidence) DESC LIMIT 5) """, (FILE_UUID, FILE_UUID)) samples = cur.fetchall() # Test DeepFace print(" DeepFace 人臉驗證 (10 samples):") try: from deepface import DeepFace import warnings warnings.filterwarnings("ignore") t0 = time.time() for tid, conf, w, h, frame in samples: sec = frame / 59.94 img_path = OUT_DIR / f"trace_{tid}_verify.jpg" if not img_path.exists(): os.system(f'ffmpeg -y -ss {sec:.1f} -i "{VIDEO_PATH}" -frames:v 1 -q:v 3 {img_path} 2>/dev/null') try: r = DeepFace.analyze(str(img_path), actions=['age','gender'], enforce_detection=False, detector_backend='opencv') if isinstance(r, list): r = r[0] age = r.get('age', 0) gender = r.get('dominant_gender', 'N/A') is_human = age > 0 and gender in ('Man', 'Woman') print(f" trace {tid:>5}: conf={conf:.3f} {w}x{h} → age={age:.0f} gender={gender:<5} {'✅ human' if is_human else '⚠️ non-human?'}") except Exception as e: print(f" trace {tid:>5}: conf={conf:.3f} {w}x{h} → ERROR {str(e)[:60]}") dt = time.time() - t0 print(f" Time: {dt:.1f}s ({dt/10:.1f}s/face)") except ImportError: print(" DeepFace not available") # Test Apple Vision approach (statistical, no ML) print() print(" Statistical filter (no ML):") print(" Rule: confidence < 0.5 OR aspect_ratio deviation > 0.3 → flag") cur.execute(f""" SELECT COUNT(*) FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL AND confidence < 0.5 """, (FILE_UUID,)) low_conf = cur.fetchone()[0] print(f" Low confidence (<0.5): {low_conf} faces") print(f" Aspect ratio: all detections are square (Vision bbox), no filtering possible") print() print(" 建議: DeepFace verify for low-confidence traces only") print(" 可選 gateway: conf < 0.6 才跑 DeepFace,節省 90% 成本") # ============================================================ # Check 3: Embedding Quality # ============================================================ print() print("=" * 70) print("Check 3: Embedding Quality (嵌入品質)") print("=" * 70) # Check intra-trace embedding variance for top 5 largest traces cur.execute(f""" SELECT trace_id, COUNT(*) AS fc, AVG(confidence)::numeric(4,3) AS conf FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY fc DESC LIMIT 10 """, (FILE_UUID,)) top_traces = cur.fetchall() print(" Intra-trace embedding variance (top 10 traces by size):") for tid, fc, conf in top_traces: cur.execute(f""" SELECT embedding FROM {SCHEMA}.face_detections WHERE file_uuid=%s AND trace_id=%s AND embedding IS NOT NULL """, (FILE_UUID, tid)) embs = [np.array(row[0]) for row in cur.fetchall() if row[0]] if len(embs) < 2: print(f" trace {tid:>5}: {fc:>3} faces, conf={conf:.3f} — not enough embeddings") continue # Normalize and compute pairwise cosine similarity embs_norm = np.array([e / (np.linalg.norm(e) + 1e-10) for e in embs]) sim_matrix = embs_norm @ embs_norm.T np.fill_diagonal(sim_matrix, 0) # Exclude diagonal zeros when finding min non_diag = sim_matrix[sim_matrix > 0.0001] var = float(1.0 - np.mean(sim_matrix[sim_matrix > 0.0001])) if len(non_diag) > 0 else 0.0 min_sim = float(np.min(non_diag)) if len(non_diag) > 0 else 0.0 quality = "✅ good" if var < 0.3 and min_sim > 0.5 else \ "⚠️ check" if var < 0.5 and min_sim > 0.3 else \ "❌ split likely" print(f" trace {tid:>5}: {fc:>3} faces, conf={conf:.3f}, variance={var:.3f}, min_sim={min_sim:.3f} → {quality}") print() print(" 建議: variance > 0.2 OR min_sim < 0.4 → 標記 split") print(" 純統計方法,無需模型") # ============================================================ # Check 4: Temporal Collision # ============================================================ print() print("=" * 70) print("Check 4: 時序衝突 (Temporal Collision)") print("=" * 70) cur.execute(f""" SELECT i.name, a.trace_id, a.frame_number AS a_frame, b.trace_id AS b_trace, b.frame_number AS b_frame FROM {SCHEMA}.face_detections a JOIN {SCHEMA}.face_detections b ON a.file_uuid=b.file_uuid AND a.frame_number=b.frame_number AND a.trace_id