feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID

2026-05-06 14:41:48 +08:00
parent 74b6182eba
commit 65a1f77e65
1048 changed files with 103499 additions and 0 deletions
--- a/scripts/face_mediapipe_test.py
+++ b/scripts/face_mediapipe_test.py
@@ -0,0 +1,200 @@
+#!/opt/homebrew/bin/python3.11
+"""
+POC: MediaPipe Face Detection vs Apple Vision Framework vs InsightFace
+
+Tests face detection on video frames and reports:
+- Detection count
+- Bounding box quality
+- Landmarks (468 face mesh)
+- Processing speed
+"""
+import sys
+import json
+import os
+import time
+import subprocess
+import argparse
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+
+def extract_frames(video_path, sample_interval=30, max_frames=50):
+    """Extract frames using ffmpeg"""
+    import tempfile
+    tmpdir = tempfile.mkdtemp(prefix="face_test_")
+    pattern = os.path.join(tmpdir, "frame_%05d.jpg")
+    cmd = ["ffmpeg", "-y", "-v", "quiet", "-i", video_path,
+           "-vf", f"select=not(mod(n\\,{sample_interval}))",
+           "-vsync", "vfr", "-q:v", "5", pattern]
+    subprocess.run(cmd, check=True)
+    files = sorted([f for f in os.listdir(tmpdir) if f.endswith(".jpg")])[:max_frames]
+    return tmpdir, [os.path.join(tmpdir, f) for f in files]
+
+
+def test_mediapipe(frame_paths, fps):
+    """MediaPipe Face Detection + Face Mesh"""
+    try:
+        from mediapipe.tasks import vision
+        from mediapipe.tasks.python.core.base_options import BaseOptions
+        from mediapipe.tasks.python.vision.face_detector import FaceDetector, FaceDetectorOptions
+        from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarker, FaceLandmarkerOptions
+    except ImportError:
+        print("[MediaPipe] Not available, skipping")
+        return None
+
+    model_dir = os.path.join(os.path.dirname(__file__), "models")
+    os.makedirs(model_dir, exist_ok=True)
+
+    # Check model files - MediaPipe downloads automatically via the API
+    base_opts_detect = BaseOptions(model_asset_path="")
+    detect_opts = FaceDetectorOptions(base_options=BaseOptions())
+
+    t0 = time.time()
+    total_faces = 0
+    frames_with_faces = 0
+    landmarks_total = 0
+
+    # MediaPipe Face Detector
+    try:
+        detector = vision.FaceDetector.create_from_options(
+            FaceDetectorOptions(
+                base_options=BaseOptions(model_asset_buffer=None),
+                running_mode=vision.RunningMode.IMAGE
+            )
+        )
+    except:
+        # Download model first
+        import urllib.request
+        model_url = "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/latest/face_detector.task"
+        model_path = os.path.join(model_dir, "face_detector.task")
+        if not os.path.exists(model_path):
+            print(f"[MediaPipe] Downloading model: {model_url}")
+            urllib.request.urlretrieve(model_url, model_path)
+        
+        detector = vision.FaceDetector.create_from_options(
+            FaceDetectorOptions(
+                base_options=BaseOptions(model_asset_path=model_path),
+                running_mode=vision.RunningMode.IMAGE
+            )
+        )
+
+    import cv2
+    for path in frame_paths:
+        img = cv2.imread(path)
+        if img is None:
+            continue
+        h, w = img.shape[:2]
+        
+        mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
+        result = detector.detect(mp_img)
+        
+        if result.detections:
+            frames_with_faces += 1
+            for det in result.detections:
+                total_faces += 1
+                bbox = det.bounding_box
+                # bbox is [x, y, width, height] in pixels
+
+    elapsed = time.time() - t0
+    print(f"[MediaPipe] Detection: {len(frame_paths)} frames, {frames_with_faces} with faces, {total_faces} faces, {elapsed:.2f}s")
+
+    # Face Landmarker (468 points)
+    landmark_path = os.path.join(model_dir, "face_landmarker.task")
+    if not os.path.exists(landmark_path):
+        model_url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
+        print(f"[MediaPipe] Downloading landmark model...")
+        import urllib.request
+        urllib.request.urlretrieve(model_url, landmark_path)
+
+    landmarker = vision.FaceLandmarker.create_from_options(
+        FaceLandmarkerOptions(
+            base_options=BaseOptions(model_asset_path=landmark_path),
+            running_mode=vision.RunningMode.IMAGE,
+            output_face_blendshapes=False,
+            output_facial_transformation_matrixes=False,
+        )
+    )
+
+    t1 = time.time()
+    for path in frame_paths[:10]:  # Only test 10 frames for landmarks
+        img = cv2.imread(path)
+        if img is None:
+            continue
+        mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img)
+        result = landmarker.detect(mp_img)
+        if result.face_landmarks:
+            for face in result.face_landmarks:
+                landmarks_total += len(face)
+
+    elapsed2 = time.time() - t1
+    print(f"[MediaPipe] Face Mesh (10 frames): {landmarks_total} total landmarks (~{landmarks_total//max(len(result.face_landmarks),1)} per face)")
+
+    return {
+        "frames_processed": len(frame_paths),
+        "frames_with_faces": frames_with_faces,
+        "total_faces": total_faces,
+        "time_sec": elapsed,
+        "landmarks_per_face": 468,
+    }
+
+
+def test_vision_framework(frame_paths, fps):
+    """Apple Vision Framework face detection via swift binary"""
+    # Use the existing swift binary
+    swift_bin = os.path.join(os.path.dirname(__file__),
+                             "swift_processors/.build/debug/swift_ocr")
+    # swift_ocr doesn't do face detection, use the face_compare_test
+    swift_face = os.path.join(os.path.dirname(__file__),
+                              "swift_processors/.build/debug/face_compare_test")
+    
+    if not os.path.exists(swift_face):
+        print("[Vision] Binary not found, skipping")
+        return None
+    
+    print(f"[Vision] Running face compare test...")
+    t0 = time.time()
+    result = subprocess.run(
+        [swift_face, frame_paths[0].rsplit("/", 2)[0].replace("/frames", ""),  # This won't work for single files
+         "--sample-interval", "1", "--max-frames", str(len(frame_paths))],
+        capture_output=True, text=True, timeout=120
+    )
+    elapsed = time.time() - t0
+    print(result.stdout[-500:])
+    return {"time_sec": elapsed}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("video_path")
+    parser.add_argument("--sample-interval", type=int, default=30)
+    parser.add_argument("--max-frames", type=int, default=50)
+    args = parser.parse_args()
+
+    print(f"Testing: {args.video_path}")
+    
+    # Extract frames
+    tmpdir, frames = extract_frames(args.video_path, args.sample_interval, args.max_frames)
+    print(f"Extracted {len(frames)} frames")
+
+    # MediaPipe
+    print("\n=== MediaPipe ===")
+    mp_result = test_mediapipe(frames, 24)
+    
+    # Vision Framework
+    print("\n=== Apple Vision Framework ===")
+    vf_result = test_vision_framework(frames, 24)
+
+    # Summary
+    print("\n=== Comparison ===")
+    if mp_result:
+        print(f"MediaPipe: {mp_result['total_faces']} faces in {mp_result['frames_with_faces']} frames, {mp_result['time_sec']:.2f}s")
+        print(f"  Landmarks: {mp_result['landmarks_per_face']} per face")
+    print(f"Vision Framework: (see above)")
+
+    # Cleanup
+    import shutil
+    shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    main()