Files
momentry_core/scripts/face_recognition_processor.py
Warren e75c4d6f07 cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
2026-05-04 01:31:21 +08:00

647 lines
22 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Face Recognition Processor
Integrates InsightFace for face detection, recognition, and tracking
Supports: face detection, face recognition, face tracking, face clustering
"""
import sys
import json
import argparse
import os
import numpy as np
from typing import List, Dict, Any, Optional, Tuple
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
class FaceRecognitionProcessor:
def __init__(
self,
enable_recognition: bool = True,
enable_tracking: bool = True,
enable_clustering: bool = True,
):
self.enable_recognition = enable_recognition
self.enable_tracking = enable_tracking
self.enable_clustering = enable_clustering
self.face_model = None
self.face_database = {}
self.face_tracker = None
self.face_clusters = {}
self.embedding_dim = 512 # InsightFace default embedding dimension
def load_models(self, use_mps: bool = False):
"""Load InsightFace models with MPS support"""
try:
import insightface
from insightface.app import FaceAnalysis
# Determine execution providers based on configuration
providers = ["CPUExecutionProvider"]
if use_mps:
try:
# Try to import MPS provider
import onnxruntime as ort
available_providers = ort.get_available_providers()
if "CoreMLExecutionProvider" in available_providers:
print(
"[INFO] Using CoreMLExecutionProvider for MPS acceleration"
)
providers = ["CoreMLExecutionProvider", "CPUExecutionProvider"]
elif "CUDAExecutionProvider" in available_providers:
print("[INFO] Using CUDAExecutionProvider")
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
else:
print("[INFO] MPS/CUDA not available, using CPU")
providers = ["CPUExecutionProvider"]
except ImportError:
print("[WARNING] ONNX Runtime not available, using CPU")
providers = ["CPUExecutionProvider"]
print(f"[INFO] Using execution providers: {providers}")
# Initialize face analysis app
self.face_model = FaceAnalysis(
name="buffalo_l", # or 'buffalo_s' for smaller model
providers=providers,
)
# For MPS/CoreML, we need to adjust context
ctx_id = -1 # Default for CPU
if use_mps and "CoreMLExecutionProvider" in providers:
ctx_id = 0 # CoreML uses device 0
self.face_model.prepare(ctx_id=ctx_id, det_size=(640, 640))
print("[INFO] InsightFace models loaded successfully")
return True
except ImportError as e:
print(f"[ERROR] Failed to import InsightFace: {e}")
print("[INFO] Install with: pip install insightface")
return False
except Exception as e:
print(f"[ERROR] Failed to load models: {e}")
return False
except Exception as e:
print(f"[ERROR] Failed to load models: {e}")
return False
def load_face_database(self, database_path: Optional[str] = None):
"""Load face database from file"""
if database_path and os.path.exists(database_path):
try:
with open(database_path, "r") as f:
self.face_database = json.load(f)
print(f"[INFO] Loaded {len(self.face_database)} faces from database")
except Exception as e:
print(f"[WARNING] Failed to load face database: {e}")
self.face_database = {}
else:
print("[INFO] No face database provided, starting with empty database")
self.face_database = {}
def detect_faces(self, image: np.ndarray) -> List[Dict[str, Any]]:
"""Detect faces in image using InsightFace"""
if self.face_model is None:
return []
try:
faces = self.face_model.get(image)
results = []
for face in faces:
# Get bounding box
bbox = face.bbox.astype(int)
x, y, x2, y2 = bbox
width = x2 - x
height = y2 - y
# Get embedding
embedding = (
face.embedding.tolist() if hasattr(face, "embedding") else None
)
# Get attributes
attributes = {}
if hasattr(face, "age") and face.age is not None:
attributes["age"] = int(face.age)
if hasattr(face, "gender") and face.gender is not None:
attributes["gender"] = "female" if face.gender == 0 else "male"
# Get pose if available
pose = None
if hasattr(face, "pose") and face.pose is not None:
pose = {
"yaw": float(face.pose[0]),
"pitch": float(face.pose[1]),
"roll": float(face.pose[2]),
}
# Create face detection result
face_result = {
"x": int(x),
"y": int(y),
"width": int(width),
"height": int(height),
"confidence": float(face.det_score)
if hasattr(face, "det_score")
else 0.8,
"embedding": embedding,
"attributes": {
"age": attributes.get("age"),
"gender": attributes.get("gender"),
"emotion": None, # InsightFace doesn't provide emotion
"glasses": None,
"mask": None,
"pose": pose,
}
if any([attributes.get("age"), attributes.get("gender"), pose])
else None,
"identity": None, # Will be filled by recognition step
}
results.append(face_result)
return results
except Exception as e:
print(f"[ERROR] Face detection failed: {e}")
return []
def recognize_faces(
self, faces: List[Dict[str, Any]], threshold: float = 0.6
) -> List[Dict[str, Any]]:
"""Recognize faces by comparing with database"""
if not self.enable_recognition or not faces:
return faces
recognized_faces = []
for face in faces:
if face.get("embedding") is None:
face["identity"] = None
recognized_faces.append(face)
continue
embedding = np.array(face["embedding"])
best_match = None
best_similarity = 0.0
# Compare with all faces in database
for face_id, db_face in self.face_database.items():
if "embedding" not in db_face:
continue
db_embedding = np.array(db_face["embedding"])
similarity = self.cosine_similarity(embedding, db_embedding)
if similarity > best_similarity and similarity >= threshold:
best_similarity = similarity
best_match = {
"name": db_face.get("name", "Unknown"),
"confidence": float(similarity),
"database_id": face_id,
"metadata": db_face.get("metadata", {}),
}
if best_match:
face["identity"] = best_match
else:
face["identity"] = None
recognized_faces.append(face)
return recognized_faces
def track_faces(self, frames: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Track faces across frames using simple IoU tracking"""
if not self.enable_tracking or not frames:
return frames
tracked_frames = []
face_tracks = {} # face_id -> track info
next_face_id = 1
for frame_idx, frame in enumerate(frames):
tracked_faces = []
for face in frame.get("faces", []):
# Calculate IoU with existing tracks
best_track_id = None
best_iou = 0.3 # IoU threshold
for track_id, track in face_tracks.items():
if frame_idx - track["last_frame"] > 10: # Skip old tracks
continue
iou = self.calculate_iou(face, track["last_bbox"])
if iou > best_iou:
best_iou = iou
best_track_id = track_id
if best_track_id is not None:
# Update existing track
face["face_id"] = f"face_{best_track_id}"
face_tracks[best_track_id]["last_bbox"] = (
face["x"],
face["y"],
face["width"],
face["height"],
)
face_tracks[best_track_id]["last_frame"] = frame_idx
else:
# Create new track
face["face_id"] = f"face_{next_face_id}"
face_tracks[next_face_id] = {
"last_bbox": (
face["x"],
face["y"],
face["width"],
face["height"],
),
"last_frame": frame_idx,
}
next_face_id += 1
tracked_faces.append(face)
tracked_frame = frame.copy()
tracked_frame["faces"] = tracked_faces
tracked_frames.append(tracked_frame)
return tracked_frames
def cluster_faces(
self, frames: List[Dict[str, Any]]
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
"""Cluster faces using DBSCAN algorithm"""
if not self.enable_clustering:
return frames, {}
try:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
# Collect all face embeddings
embeddings = []
face_info = []
for frame in frames:
for face in frame.get("faces", []):
if face.get("embedding") and face.get("face_id"):
embeddings.append(face["embedding"])
face_info.append(
{
"face_id": face["face_id"],
"frame_idx": frame["frame"],
"bbox": (
face["x"],
face["y"],
face["width"],
face["height"],
),
}
)
if len(embeddings) < 2:
return frames, {}
# Normalize embeddings
scaler = StandardScaler()
embeddings_scaled = scaler.fit_transform(embeddings)
# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=2, metric="euclidean")
clusters = dbscan.fit_predict(embeddings_scaled)
# Create cluster information
cluster_info = {}
for idx, cluster_id in enumerate(clusters):
if cluster_id == -1: # Noise
continue
cluster_key = f"cluster_{cluster_id}"
if cluster_key not in cluster_info:
cluster_info[cluster_key] = {
"face_ids": [],
"embeddings": [],
"size": 0,
}
cluster_info[cluster_key]["face_ids"].append(face_info[idx]["face_id"])
cluster_info[cluster_key]["embeddings"].append(embeddings[idx])
cluster_info[cluster_key]["size"] += 1
# Calculate centroids
for cluster_key, info in cluster_info.items():
if info["embeddings"]:
centroid = np.mean(info["embeddings"], axis=0).tolist()
info["centroid"] = centroid
# Find representative face (closest to centroid)
distances = [
np.linalg.norm(np.array(emb) - np.array(centroid))
for emb in info["embeddings"]
]
rep_idx = np.argmin(distances)
info["representative_face_id"] = info["face_ids"][rep_idx]
return frames, cluster_info
except ImportError:
print("[WARNING] scikit-learn not installed, skipping clustering")
return frames, {}
except Exception as e:
print(f"[ERROR] Clustering failed: {e}")
return frames, {}
def process_video(
self, video_path: str, output_path: str, uuid: str = "", use_mps: bool = False
) -> Dict[str, Any]:
"""Process video for face recognition with MPS support"""
publisher = RedisPublisher(uuid) if uuid else None
if publisher:
publisher.info("face_recognition", "FACE_RECOGNITION_START")
# Check if OpenCV is available
try:
import cv2
except ImportError:
if publisher:
publisher.error("face_recognition", "opencv-python not installed")
return self.create_empty_result()
# Load InsightFace models with MPS support
if publisher:
publisher.info("face_recognition", "LOADING_MODELS")
if not self.load_models(use_mps=use_mps):
if publisher:
publisher.error("face_recognition", "Failed to load InsightFace models")
return self.create_empty_result()
if publisher:
publisher.info("face_recognition", "MODELS_LOADED")
# Get video info
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
if publisher:
publisher.info("face_recognition", f"fps={fps}, frames={total_frames}")
publisher.progress("face_recognition", 0, total_frames, "Starting")
# Process every N frames to speed up
sample_interval = 30 # Process every 30 frames
frames = []
frame_count = 0
processed = 0
cap = cv2.VideoCapture(video_path)
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
# Sample frames
if frame_count % sample_interval != 0:
continue
processed += 1
timestamp = (frame_count - 1) / fps if fps > 0 else 0
# Detect faces
faces = self.detect_faces(frame)
# Recognize faces if enabled
if self.enable_recognition:
faces = self.recognize_faces(faces)
# Create frame result
frame_result = {
"frame": frame_count - 1,
"timestamp": round(timestamp, 3),
"faces": faces,
}
frames.append(frame_result)
if publisher:
publisher.progress(
"face_recognition",
processed,
total_frames // sample_interval,
f"Frame {frame_count}",
)
cap.release()
# Track faces if enabled
if self.enable_tracking:
frames = self.track_faces(frames)
# Cluster faces if enabled
cluster_info = {}
if self.enable_clustering:
frames, cluster_info = self.cluster_faces(frames)
# Extract recognized faces information
recognized_faces = self.extract_recognized_faces(frames)
# Prepare final result
result = {
"frame_count": total_frames,
"fps": fps,
"frames": frames,
"recognized_faces": recognized_faces,
"face_clusters": self.format_clusters(cluster_info),
}
if publisher:
publisher.complete(
"face_recognition",
f"{len(frames)} frames, {len(recognized_faces)} recognized faces",
)
# Save result
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
return result
def extract_recognized_faces(
self, frames: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""Extract unique recognized faces from frames"""
face_info = {}
for frame in frames:
for face in frame.get("faces", []):
face_id = face.get("face_id")
if not face_id:
continue
if face_id not in face_info:
face_info[face_id] = {
"face_id": face_id,
"embedding": face.get("embedding"),
"first_seen": frame["timestamp"],
"last_seen": frame["timestamp"],
"total_appearances": 1,
"attributes": face.get("attributes"),
"identities": [],
"cluster_id": None,
}
else:
face_info[face_id]["last_seen"] = frame["timestamp"]
face_info[face_id]["total_appearances"] += 1
# Add identity if recognized
if face.get("identity"):
identity = face["identity"]
# Check if this identity is already recorded
existing = False
for existing_id in face_info[face_id]["identities"]:
if existing_id.get("database_id") == identity.get(
"database_id"
):
existing = True
break
if not existing:
face_info[face_id]["identities"].append(identity)
return list(face_info.values())
def format_clusters(self, cluster_info: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Format cluster information for output"""
clusters = []
for cluster_id, info in cluster_info.items():
cluster = {
"cluster_id": cluster_id,
"face_ids": info.get("face_ids", []),
"centroid": info.get("centroid", []),
"size": info.get("size", 0),
"representative_face_id": info.get("representative_face_id"),
"metadata": {},
}
clusters.append(cluster)
return clusters
def create_empty_result(self) -> Dict[str, Any]:
"""Create empty result structure"""
return {
"frame_count": 0,
"fps": 0.0,
"frames": [],
"recognized_faces": [],
"face_clusters": [],
}
@staticmethod
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
"""Calculate cosine similarity between two vectors"""
dot_product = np.dot(a, b)
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
if norm_a == 0 or norm_b == 0:
return 0.0
return dot_product / (norm_a * norm_b)
@staticmethod
def calculate_iou(face1: Dict[str, Any], bbox2: Tuple[int, int, int, int]) -> float:
"""Calculate Intersection over Union between two bounding boxes"""
x1, y1, w1, h1 = face1["x"], face1["y"], face1["width"], face1["height"]
x2, y2, w2, h2 = bbox2
# Calculate intersection coordinates
x_left = max(x1, x2)
y_top = max(y1, y2)
x_right = min(x1 + w1, x2 + w2)
y_bottom = min(y1 + h1, y2 + h2)
if x_right < x_left or y_bottom < y_top:
return 0.0
intersection_area = (x_right - x_left) * (y_bottom - y_top)
area1 = w1 * h1
area2 = w2 * h2
union_area = area1 + area2 - intersection_area
return intersection_area / union_area if union_area > 0 else 0.0
def main():
parser = argparse.ArgumentParser(
description="Face Recognition Processor with MPS support"
)
parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path")
parser.add_argument(
"enable_recognition", help="Enable face recognition (0/1)", default="1"
)
parser.add_argument(
"enable_tracking", help="Enable face tracking (0/1)", default="1"
)
parser.add_argument(
"enable_clustering", help="Enable face clustering (0/1)", default="1"
)
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
parser.add_argument(
"--database", "-d", help="Path to face database JSON file", default=""
)
parser.add_argument(
"--use-mps",
"-m",
help="Use MPS acceleration (Apple Silicon)",
action="store_true",
default=False,
)
args = parser.parse_args()
# Create processor
processor = FaceRecognitionProcessor(
enable_recognition=args.enable_recognition == "1",
enable_tracking=args.enable_tracking == "1",
enable_clustering=args.enable_clustering == "1",
)
# Load face database if provided
if args.database:
processor.load_face_database(args.database)
# Process video with MPS support
result = processor.process_video(
video_path=args.video_path,
output_path=args.output_path,
uuid=args.uuid,
use_mps=args.use_mps,
)
print(f"[INFO] Processing complete: {len(result['frames'])} frames processed")
print(f"[INFO] Recognized faces: {len(result['recognized_faces'])}")
print(f"[INFO] Face clusters: {len(result['face_clusters'])}")
if __name__ == "__main__":
main()