#!/opt/homebrew/bin/python3.11 """ POC: MediaPipe Face Detection vs Apple Vision Framework vs InsightFace Tests face detection on video frames and reports: - Detection count - Bounding box quality - Landmarks (468 face mesh) - Processing speed """ import sys import json import os import time import subprocess import argparse sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) def extract_frames(video_path, sample_interval=30, max_frames=50): """Extract frames using ffmpeg""" import tempfile tmpdir = tempfile.mkdtemp(prefix="face_test_") pattern = os.path.join(tmpdir, "frame_%05d.jpg") cmd = ["ffmpeg", "-y", "-v", "quiet", "-i", video_path, "-vf", f"select=not(mod(n\\,{sample_interval}))", "-vsync", "vfr", "-q:v", "5", pattern] subprocess.run(cmd, check=True) files = sorted([f for f in os.listdir(tmpdir) if f.endswith(".jpg")])[:max_frames] return tmpdir, [os.path.join(tmpdir, f) for f in files] def test_mediapipe(frame_paths, fps): """MediaPipe Face Detection + Face Mesh""" try: from mediapipe.tasks import vision from mediapipe.tasks.python.core.base_options import BaseOptions from mediapipe.tasks.python.vision.face_detector import FaceDetector, FaceDetectorOptions from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarker, FaceLandmarkerOptions except ImportError: print("[MediaPipe] Not available, skipping") return None model_dir = os.path.join(os.path.dirname(__file__), "models") os.makedirs(model_dir, exist_ok=True) # Check model files - MediaPipe downloads automatically via the API base_opts_detect = BaseOptions(model_asset_path="") detect_opts = FaceDetectorOptions(base_options=BaseOptions()) t0 = time.time() total_faces = 0 frames_with_faces = 0 landmarks_total = 0 # MediaPipe Face Detector try: detector = vision.FaceDetector.create_from_options( FaceDetectorOptions( base_options=BaseOptions(model_asset_buffer=None), running_mode=vision.RunningMode.IMAGE ) ) except: # Download model first import urllib.request model_url = "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/latest/face_detector.task" model_path = os.path.join(model_dir, "face_detector.task") if not os.path.exists(model_path): print(f"[MediaPipe] Downloading model: {model_url}") urllib.request.urlretrieve(model_url, model_path) detector = vision.FaceDetector.create_from_options( FaceDetectorOptions( base_options=BaseOptions(model_asset_path=model_path), running_mode=vision.RunningMode.IMAGE ) ) import cv2 for path in frame_paths: img = cv2.imread(path) if img is None: continue h, w = img.shape[:2] mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) result = detector.detect(mp_img) if result.detections: frames_with_faces += 1 for det in result.detections: total_faces += 1 bbox = det.bounding_box # bbox is [x, y, width, height] in pixels elapsed = time.time() - t0 print(f"[MediaPipe] Detection: {len(frame_paths)} frames, {frames_with_faces} with faces, {total_faces} faces, {elapsed:.2f}s") # Face Landmarker (468 points) landmark_path = os.path.join(model_dir, "face_landmarker.task") if not os.path.exists(landmark_path): model_url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task" print(f"[MediaPipe] Downloading landmark model...") import urllib.request urllib.request.urlretrieve(model_url, landmark_path) landmarker = vision.FaceLandmarker.create_from_options( FaceLandmarkerOptions( base_options=BaseOptions(model_asset_path=landmark_path), running_mode=vision.RunningMode.IMAGE, output_face_blendshapes=False, output_facial_transformation_matrixes=False, ) ) t1 = time.time() for path in frame_paths[:10]: # Only test 10 frames for landmarks img = cv2.imread(path) if img is None: continue mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) result = landmarker.detect(mp_img) if result.face_landmarks: for face in result.face_landmarks: landmarks_total += len(face) elapsed2 = time.time() - t1 print(f"[MediaPipe] Face Mesh (10 frames): {landmarks_total} total landmarks (~{landmarks_total//max(len(result.face_landmarks),1)} per face)") return { "frames_processed": len(frame_paths), "frames_with_faces": frames_with_faces, "total_faces": total_faces, "time_sec": elapsed, "landmarks_per_face": 468, } def test_vision_framework(frame_paths, fps): """Apple Vision Framework face detection via swift binary""" # Use the existing swift binary swift_bin = os.path.join(os.path.dirname(__file__), "swift_processors/.build/debug/swift_ocr") # swift_ocr doesn't do face detection, use the face_compare_test swift_face = os.path.join(os.path.dirname(__file__), "swift_processors/.build/debug/face_compare_test") if not os.path.exists(swift_face): print("[Vision] Binary not found, skipping") return None print(f"[Vision] Running face compare test...") t0 = time.time() result = subprocess.run( [swift_face, frame_paths[0].rsplit("/", 2)[0].replace("/frames", ""), # This won't work for single files "--sample-interval", "1", "--max-frames", str(len(frame_paths))], capture_output=True, text=True, timeout=120 ) elapsed = time.time() - t0 print(result.stdout[-500:]) return {"time_sec": elapsed} def main(): parser = argparse.ArgumentParser() parser.add_argument("video_path") parser.add_argument("--sample-interval", type=int, default=30) parser.add_argument("--max-frames", type=int, default=50) args = parser.parse_args() print(f"Testing: {args.video_path}") # Extract frames tmpdir, frames = extract_frames(args.video_path, args.sample_interval, args.max_frames) print(f"Extracted {len(frames)} frames") # MediaPipe print("\n=== MediaPipe ===") mp_result = test_mediapipe(frames, 24) # Vision Framework print("\n=== Apple Vision Framework ===") vf_result = test_vision_framework(frames, 24) # Summary print("\n=== Comparison ===") if mp_result: print(f"MediaPipe: {mp_result['total_faces']} faces in {mp_result['frames_with_faces']} frames, {mp_result['time_sec']:.2f}s") print(f" Landmarks: {mp_result['landmarks_per_face']} per face") print(f"Vision Framework: (see above)") # Cleanup import shutil shutil.rmtree(tmpdir, ignore_errors=True) if __name__ == "__main__": main()