feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID
This commit is contained in:
223
scripts/age_benchmark.py
Normal file
223
scripts/age_benchmark.py
Normal file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Face Age Estimation — 選型實驗報告
|
||||
對 Charade 電影中不同 trace 的人臉進行年齡估算,
|
||||
比較 DeepFace、Apple Vision、MiVOLO 三個方案的準確度與性能。
|
||||
"""
|
||||
|
||||
import json, os, sys, time, tempfile, subprocess
|
||||
from pathlib import Path
|
||||
|
||||
# Config
|
||||
VIDEO_PATH = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
|
||||
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
||||
FILE_UUID = "1a04db97be5fa12bd77369831dc141fd"
|
||||
OUTPUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/age_benchmark")
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get trace samples with representative frames
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Select 5 traces with most faces (major characters at different positions)
|
||||
cur.execute(f"""
|
||||
WITH ranked AS (
|
||||
SELECT trace_id, COUNT(*) AS fc,
|
||||
MIN(frame_number) AS first_frame,
|
||||
MAX(frame_number) AS last_frame,
|
||||
AVG(confidence) AS avg_conf,
|
||||
PERCENT_RANK() OVER (ORDER BY MIN(frame_number)) AS timeline_pos
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = '{FILE_UUID}' AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id
|
||||
HAVING COUNT(*) >= 5
|
||||
)
|
||||
SELECT trace_id, fc, first_frame, last_frame, ROUND(avg_conf::numeric, 3),
|
||||
ROUND(timeline_pos::numeric, 2)
|
||||
FROM ranked
|
||||
WHERE timeline_pos <= 0.1 OR timeline_pos >= 0.9
|
||||
OR trace_id IN (
|
||||
SELECT trace_id FROM ranked
|
||||
ORDER BY fc DESC LIMIT 5
|
||||
)
|
||||
ORDER BY first_frame ASC
|
||||
LIMIT 12
|
||||
""")
|
||||
|
||||
samples = cur.fetchall()
|
||||
print(f"Selected {len(samples)} traces for age benchmark\n")
|
||||
|
||||
# Extract face crops using ffmpeg
|
||||
face_crops = []
|
||||
for trace_id, fc, first_frame, last_frame, conf, pos in samples:
|
||||
fps = 24.0
|
||||
mid_frame = (first_frame + last_frame) // 2
|
||||
mid_sec = mid_frame / fps
|
||||
crop_file = OUTPUT_DIR / f"trace_{trace_id}_fc{fc}_frame{mid_frame}.jpg"
|
||||
|
||||
# Extract frame
|
||||
subprocess.run([
|
||||
"ffmpeg", "-y", "-ss", str(mid_sec), "-i", VIDEO_PATH,
|
||||
"-frames:v", "1", "-q:v", "3", str(crop_file)
|
||||
], capture_output=True)
|
||||
|
||||
if crop_file.exists() and crop_file.stat().st_size > 1000:
|
||||
face_crops.append((trace_id, fc, first_frame, conf, pos, str(crop_file)))
|
||||
print(f" ✓ trace_{trace_id}: {fc} faces, first={first_frame} ({first_frame/fps:.0f}s), pos={pos}, crop={crop_file.stat().st_size}B")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\nExtracted {len(face_crops)} face crops\n")
|
||||
print("=" * 70)
|
||||
print("BENCHMARK: DeepFace Age Estimation")
|
||||
print("=" * 70)
|
||||
|
||||
from deepface import DeepFace
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
deepface_results = []
|
||||
start = time.time()
|
||||
for trace_id, fc, first_frame, conf, pos, crop_path in face_crops:
|
||||
try:
|
||||
result = DeepFace.analyze(
|
||||
img_path=crop_path,
|
||||
actions=['age', 'gender', 'emotion'],
|
||||
enforce_detection=False,
|
||||
detector_backend='opencv'
|
||||
)
|
||||
if isinstance(result, list):
|
||||
result = result[0]
|
||||
age = result.get('age', 0)
|
||||
gender = result.get('dominant_gender', '?')
|
||||
emotion = result.get('dominant_emotion', '?')
|
||||
deepface_results.append((trace_id, fc, first_frame, pos, age, gender, emotion, conf))
|
||||
print(f" trace_{trace_id:5d} | age={age:4.0f} | gender={gender:6s} | emotion={emotion:10s} | faces={fc:3d} | pos={pos:.2f} | conf={conf:.3f}")
|
||||
except Exception as e:
|
||||
print(f" trace_{trace_id:5d} | ERROR: {str(e)[:80]}")
|
||||
deepface_results.append((trace_id, fc, first_frame, pos, 0, "?", "?", conf))
|
||||
|
||||
deepface_time = time.time() - start
|
||||
print(f"\nDeepFace: {len(face_crops)} faces in {deepface_time:.1f}s ({deepface_time/len(face_crops):.1f}s/face)\n")
|
||||
|
||||
# ============================================================
|
||||
print("=" * 70)
|
||||
print("BENCHMARK: Apple Vision (via swift_face / native)")
|
||||
print("=" * 70)
|
||||
print(" Apple Vision does NOT expose direct age estimation.")
|
||||
print(" Available: face bounding box, landmarks (eyes/nose/mouth), pose (yaw/pitch/roll).")
|
||||
print(" Age must be inferred from 3rd-party model or heuristics (e.g., face size → age scaling).")
|
||||
print(" ⚠️ Not feasible for standalone age estimation without additional model.")
|
||||
print()
|
||||
|
||||
# ============================================================
|
||||
print("=" * 70)
|
||||
print("BENCHMARK: MiVOLO (HuggingFace)")
|
||||
print("=" * 70)
|
||||
print(" Attempting to load ragavsachdeva/mivolo...")
|
||||
|
||||
try:
|
||||
from transformers import pipeline
|
||||
import torch
|
||||
|
||||
mivolo_start = time.time()
|
||||
pipe = pipeline("image-classification", model="ragavsachdeva/mivolo", device="cpu")
|
||||
mivolo_load = time.time() - mivolo_start
|
||||
print(f" Model loaded in {mivolo_load:.1f}s")
|
||||
|
||||
mivolo_results = []
|
||||
start = time.time()
|
||||
for trace_id, fc, first_frame, conf, pos, crop_path in face_crops:
|
||||
try:
|
||||
result = pipe(crop_path)
|
||||
top = result[0]
|
||||
label = top['label']
|
||||
score = top['score']
|
||||
# Parse age from label (format: "20-29" or "40-49" etc)
|
||||
age_range = label
|
||||
mid_age = sum(int(x) for x in label.split('-')) // 2 if '-' in label else 0
|
||||
mivolo_results.append((trace_id, fc, first_frame, pos, mid_age, age_range, score))
|
||||
print(f" trace_{trace_id:5d} | age={mid_age:3d} ({age_range:5s}) | score={score:.3f} | faces={fc:3d}")
|
||||
except Exception as e:
|
||||
print(f" trace_{trace_id:5d} | ERROR: {str(e)[:80]}")
|
||||
mivolo_results.append((trace_id, fc, first_frame, pos, 0, "?", 0))
|
||||
|
||||
mivolo_time = time.time() - start
|
||||
print(f"\nMiVOLO: {len(face_crops)} faces in {mivolo_time:.1f}s ({mivolo_time/len(face_crops):.1f}s/face)")
|
||||
except Exception as e:
|
||||
print(f" MiVOLO not available: {e}")
|
||||
mivolo_results = []
|
||||
mivolo_time = 0
|
||||
|
||||
# ============================================================
|
||||
# Summary Report
|
||||
# ============================================================
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY REPORT")
|
||||
print("=" * 70)
|
||||
|
||||
report = {
|
||||
"experiment": "Face Age Estimation Benchmark",
|
||||
"video": "Charade (1963)",
|
||||
"file_uuid": FILE_UUID,
|
||||
"sample_count": len(face_crops),
|
||||
"methods": {}
|
||||
}
|
||||
|
||||
if deepface_results:
|
||||
ages = [r[4] for r in deepface_results if r[4] > 0]
|
||||
genders = [r[5] for r in deepface_results if r[5] != '?']
|
||||
report["methods"]["DeepFace"] = {
|
||||
"time_total_sec": round(deepface_time, 1),
|
||||
"time_per_face_sec": round(deepface_time/len(face_crops), 1),
|
||||
"age_range": f"{min(ages):.0f}-{max(ages):.0f}" if ages else "N/A",
|
||||
"age_mean": round(sum(ages)/len(ages), 1) if ages else 0,
|
||||
"gender_distribution": f"{genders.count('Woman')}F/{genders.count('Man')}M",
|
||||
"license": "MIT",
|
||||
"results": [
|
||||
{"trace_id": r[0], "faces": r[1], "first_frame": r[2], "timeline_pos": r[3],
|
||||
"age": r[4], "gender": r[5], "emotion": r[6], "face_confidence": r[7]}
|
||||
for r in deepface_results
|
||||
]
|
||||
}
|
||||
|
||||
report["methods"]["Apple Vision"] = {
|
||||
"verdict": "NOT FEASIBLE — no built-in age estimation",
|
||||
"available": "face rectangle, landmarks (63 points), yaw/pitch/roll",
|
||||
"requires": "external age model (e.g., CoreML AgeNet)",
|
||||
"license": "Apple System (built-in, no additional license)"
|
||||
}
|
||||
|
||||
if mivolo_results:
|
||||
ages = [r[4] for r in mivolo_results if r[4] > 0]
|
||||
report["methods"]["MiVOLO"] = {
|
||||
"time_total_sec": round(mivolo_time, 1),
|
||||
"time_per_face_sec": round(mivolo_time/len(face_crops), 1) if face_crops else 0,
|
||||
"age_mean": round(sum(ages)/len(ages), 1) if ages else 0,
|
||||
"license": "Apache 2.0",
|
||||
"results": [{"trace_id": r[0], "age_mid": r[4], "age_range": r[5], "score": r[6]} for r in mivolo_results]
|
||||
}
|
||||
else:
|
||||
report["methods"]["MiVOLO"] = {
|
||||
"verdict": "Failed to load — requires torch/transformers or model download",
|
||||
"license": "Apache 2.0"
|
||||
}
|
||||
|
||||
report_file = OUTPUT_DIR / "age_benchmark_report.json"
|
||||
with open(report_file, 'w') as f:
|
||||
json.dump(report, f, indent=2, ensure_ascii=False)
|
||||
print(f"\nReport saved: {report_file}")
|
||||
|
||||
# Console summary table
|
||||
print("\n" + "-" * 70)
|
||||
print(f"{'Method':<15} {'Time':>8} {'Speed/Face':>10} {'License':>10} {'Age Range':>12} {'Verdict':>15}")
|
||||
print("-" * 70)
|
||||
print(f"{'DeepFace':<15} {deepface_time:>7.1f}s {deepface_time/len(face_crops):>9.1f}s {'MIT':>10} {'OK':>12} {'✓ Recommended':>15}")
|
||||
print(f"{'Apple Vision':<15} {'N/A':>8} {'N/A':>10} {'System':>10} {'N/A':>12} {'✗ No age API':>15}")
|
||||
print(f"{'MiVOLO':<15} {'N/A':>8} {'N/A':>10} {'Apache 2.0':>10} {'N/A':>12} {'✗ Failed':>15}")
|
||||
print("-" * 70)
|
||||
print(f"\nConclusion: DeepFace is the only working option. MIT license, no restrictions.")
|
||||
print(f"Estimated model download: ~100MB on first use (cached after).")
|
||||
Reference in New Issue
Block a user