feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
@@ -1,25 +1,52 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Face Processor - Face Detection & Demographics
|
||||
Uses InsightFace for detection, age, and gender analysis.
|
||||
Falls back to OpenCV Haar Cascade if InsightFace fails.
|
||||
Face Processor - Face Detection & Demographics with Resume Support
|
||||
Uses InsightFace for detection, age, gender, and embedding extraction.
|
||||
|
||||
IMPORTANT: InsightFace is REQUIRED. No Haar fallback.
|
||||
- InsightFace provides 512-dim ArcFace embedding for identity matching
|
||||
- Haar Cascade cannot generate embedding, only detection
|
||||
- If InsightFace fails, processor will ERROR and exit
|
||||
|
||||
Resume Feature:
|
||||
- Auto-detect existing results and resume from last frame
|
||||
- Auto-save at configurable intervals (default: 30 seconds)
|
||||
- Ctrl+C gracefully saves and exits
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
from resume_framework import ResumeFramework, format_time, print_progress
|
||||
from utils.pose_analyzer import calculate_pose_angle_v2
|
||||
|
||||
|
||||
def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for face detection and demographics analysis"""
|
||||
def process_face(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
uuid: str = "",
|
||||
auto_save_interval: int = 30,
|
||||
auto_save_frames: int = 300,
|
||||
force_restart: bool = False,
|
||||
sample_interval: int = 30,
|
||||
):
|
||||
"""Process video for face detection and demographics analysis with resume support"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("face", "FACE_START")
|
||||
framework = ResumeFramework(
|
||||
output_path=output_path,
|
||||
processor_name="face",
|
||||
uuid=uuid,
|
||||
auto_save_interval=auto_save_interval,
|
||||
auto_save_frames=auto_save_frames,
|
||||
force_restart=force_restart,
|
||||
)
|
||||
|
||||
framework.publish_info("FACE_START")
|
||||
|
||||
try:
|
||||
import cv2
|
||||
@@ -27,78 +54,95 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
import insightface
|
||||
except ImportError as e:
|
||||
error_msg = f"Missing dependency: {e.name}"
|
||||
if publisher:
|
||||
publisher.error("face", error_msg)
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
framework.publish_error(error_msg)
|
||||
result = {
|
||||
"metadata": {"status": "error", "error": error_msg},
|
||||
"frames": {},
|
||||
}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
# 1. Initialize InsightFace
|
||||
use_insightface = False
|
||||
app = None
|
||||
try:
|
||||
if publisher:
|
||||
publisher.info("face", "LOADING_INSIGHTFACE")
|
||||
# 'buffalo_l' is a robust model. det_size can be adjusted.
|
||||
framework.publish_info("LOADING_INSIGHTFACE")
|
||||
app = insightface.app.FaceAnalysis(
|
||||
name="buffalo_l", providers=["CPUExecutionProvider"]
|
||||
)
|
||||
app.prepare(ctx_id=0, det_size=(320, 320))
|
||||
use_insightface = True
|
||||
if publisher:
|
||||
publisher.info("face", "INSIGHTFACE_LOADED")
|
||||
framework.publish_info("INSIGHTFACE_LOADED")
|
||||
except Exception as e:
|
||||
print(f"[WARNING] InsightFace failed to load: {e}")
|
||||
use_insightface = False
|
||||
|
||||
# 2. Fallback to Haar Cascade
|
||||
face_cascade = None
|
||||
if not use_insightface:
|
||||
if publisher:
|
||||
publisher.info("face", "LOADING_HAAR_CASCADE")
|
||||
face_cascade = cv2.CascadeClassifier(
|
||||
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
||||
)
|
||||
if face_cascade.empty():
|
||||
if publisher:
|
||||
publisher.error("face", "Could not load Haar Cascade")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
if publisher:
|
||||
publisher.info("face", "HAAR_CASCADE_LOADED")
|
||||
|
||||
if publisher:
|
||||
publisher.info("face", "PROCESSING_VIDEO")
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
if publisher:
|
||||
publisher.error("face", "Could not open video")
|
||||
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
||||
error_msg = f"InsightFace failed to load (REQUIRED): {e}"
|
||||
framework.publish_error(error_msg)
|
||||
result = {
|
||||
"metadata": {"status": "error", "error": error_msg},
|
||||
"frames": {},
|
||||
}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
return result
|
||||
|
||||
framework.publish_info("PROCESSING_VIDEO")
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video: {video_path}")
|
||||
return {"metadata": {"status": "error"}, "frames": {}}
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
total_duration = total_frames / fps if fps > 0 else 0
|
||||
cap.release()
|
||||
|
||||
# Optimization: Process every N frames to speed up analysis
|
||||
# Since we just need attributes for the person identity, we don't need every single frame.
|
||||
sample_interval = 30
|
||||
if total_frames > 0:
|
||||
estimated_samples = total_frames // sample_interval
|
||||
framework.publish_info(f"fps={fps}, frames={total_frames}")
|
||||
|
||||
existing_data, last_checkpoint = framework.load_existing_data()
|
||||
resume_mode = existing_data is not None and last_checkpoint > 0 and not force_restart
|
||||
|
||||
if resume_mode:
|
||||
print(f"\nFound existing data: {output_path}")
|
||||
print(f"Last processed frame: {last_checkpoint}")
|
||||
print(f"Will resume from frame {last_checkpoint + 1}")
|
||||
|
||||
if resume_mode and existing_data:
|
||||
face_data = existing_data
|
||||
frame_count = last_checkpoint
|
||||
processed_frames = set(int(k) for k in existing_data.get("frames", {}).keys())
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
|
||||
else:
|
||||
estimated_samples = 0
|
||||
face_data = {
|
||||
"metadata": framework.init_metadata(
|
||||
video_path=video_path,
|
||||
fps=fps,
|
||||
width=width,
|
||||
height=height,
|
||||
total_frames=total_frames,
|
||||
total_duration=total_duration,
|
||||
extra={
|
||||
"sample_interval": sample_interval,
|
||||
"detection_method": "insightface",
|
||||
},
|
||||
),
|
||||
"frames": {},
|
||||
}
|
||||
frame_count = 0
|
||||
processed_frames = set()
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
|
||||
frame_count = 0
|
||||
processed_count = 0
|
||||
frames_data = []
|
||||
framework.set_data(face_data)
|
||||
|
||||
if publisher:
|
||||
publisher.progress("face", 0, estimated_samples, "Starting")
|
||||
start_time = time.time()
|
||||
framework.last_save_time = start_time
|
||||
|
||||
print(f"\nProcessing video: {total_frames} frames @ {fps:.2f} fps")
|
||||
print(f"Auto-save every {auto_save_interval}s or {auto_save_frames} frames")
|
||||
print(f"Resume from frame {frame_count + 1 if resume_mode else 1}")
|
||||
print(f"Detection method: InsightFace (REQUIRED)")
|
||||
print()
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
@@ -106,105 +150,151 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
current_time = (frame_count - 1) / fps if fps > 0 else 0
|
||||
|
||||
# Sampling
|
||||
if frame_count % sample_interval != 0:
|
||||
if frame_count in processed_frames:
|
||||
continue
|
||||
|
||||
processed_count += 1
|
||||
timestamp = (frame_count - 1) / fps if fps > 0 else 0
|
||||
if frame_count % sample_interval != 0:
|
||||
continue
|
||||
|
||||
face_list = []
|
||||
|
||||
try:
|
||||
if use_insightface and app:
|
||||
# InsightFace Detection & Analysis
|
||||
faces = app.get(frame)
|
||||
for face in faces:
|
||||
bbox = face.bbox.astype(int)
|
||||
bx, by, bw, bh = (
|
||||
bbox[0],
|
||||
bbox[1],
|
||||
bbox[2] - bbox[0],
|
||||
bbox[3] - bbox[1],
|
||||
)
|
||||
|
||||
# Extract Attributes
|
||||
age = int(face.age) if hasattr(face, "age") else None
|
||||
gender_val = face.gender if hasattr(face, "gender") else None
|
||||
gender = (
|
||||
"female"
|
||||
if gender_val == 0
|
||||
else ("male" if gender_val == 1 else None)
|
||||
)
|
||||
|
||||
face_list.append(
|
||||
{
|
||||
"x": int(bx),
|
||||
"y": int(by),
|
||||
"width": int(bw),
|
||||
"height": int(bh),
|
||||
"confidence": float(face.det_score)
|
||||
if hasattr(face, "det_score")
|
||||
else 0.9,
|
||||
"attributes": {"age": age, "gender": gender},
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Haar Cascade Fallback (No Age/Gender)
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
faces = face_cascade.detectMultiScale(
|
||||
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
||||
faces = app.get(frame)
|
||||
for face in faces:
|
||||
bbox = face.bbox.astype(int)
|
||||
bx, by, bw, bh = (
|
||||
bbox[0],
|
||||
bbox[1],
|
||||
bbox[2] - bbox[0],
|
||||
bbox[3] - bbox[1],
|
||||
)
|
||||
for x, y, w, h in faces:
|
||||
face_list.append(
|
||||
{
|
||||
"x": int(x),
|
||||
"y": int(y),
|
||||
"width": int(w),
|
||||
"height": int(h),
|
||||
"confidence": 0.8,
|
||||
"attributes": {"age": None, "gender": None},
|
||||
|
||||
age = int(face.age) if hasattr(face, "age") else None
|
||||
gender_val = face.gender if hasattr(face, "gender") else None
|
||||
gender = (
|
||||
"female"
|
||||
if gender_val == 0
|
||||
else ("male" if gender_val == 1 else None)
|
||||
)
|
||||
|
||||
embedding = None
|
||||
if hasattr(face, "embedding"):
|
||||
embedding = face.embedding.tolist()
|
||||
|
||||
landmarks = None
|
||||
if hasattr(face, "kps"):
|
||||
landmarks = face.kps.tolist()
|
||||
elif hasattr(face, "landmark_3d_68"):
|
||||
landmarks = face.landmark_3d_68.tolist()
|
||||
|
||||
pose_angle = None
|
||||
if landmarks and len(landmarks) >= 5:
|
||||
try:
|
||||
pose_result = calculate_pose_angle_v2(landmarks)
|
||||
pose_angle = {
|
||||
"angle": pose_result.get("angle", "unknown"),
|
||||
"confidence": pose_result.get("confidence", 0.0),
|
||||
"pitch": pose_result.get("pitch", "neutral"),
|
||||
"features": pose_result.get("features", {}),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
face_list.append(
|
||||
{
|
||||
"x": int(bx),
|
||||
"y": int(by),
|
||||
"width": int(bw),
|
||||
"height": int(bh),
|
||||
"confidence": float(face.det_score)
|
||||
if hasattr(face, "det_score")
|
||||
else 0.9,
|
||||
"embedding": embedding,
|
||||
"landmarks": landmarks,
|
||||
"pose_angle": pose_angle,
|
||||
"attributes": {"age": age, "gender": gender},
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Frame processing error: {e}")
|
||||
|
||||
if face_list:
|
||||
frames_data.append(
|
||||
{
|
||||
"frame": frame_count - 1,
|
||||
"timestamp": round(timestamp, 3),
|
||||
"faces": face_list,
|
||||
}
|
||||
)
|
||||
face_data["frames"][str(frame_count)] = {
|
||||
"frame_number": frame_count,
|
||||
"time_seconds": round(current_time, 3),
|
||||
"time_formatted": format_time(current_time),
|
||||
"faces": face_list,
|
||||
}
|
||||
processed_frames.add(frame_count)
|
||||
|
||||
if publisher:
|
||||
publisher.progress(
|
||||
"face",
|
||||
processed_count,
|
||||
estimated_samples,
|
||||
f"Frame {frame_count}",
|
||||
)
|
||||
if frame_count % 500 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
print_progress(frame_count, total_frames, elapsed, f"{len(face_list)} faces")
|
||||
framework.publish_progress(frame_count, total_frames, f"frame {frame_count}")
|
||||
|
||||
if framework.should_auto_save(frame_count):
|
||||
framework.save_progress(frame_count, silent=True)
|
||||
|
||||
cap.release()
|
||||
|
||||
result = {"frame_count": total_frames, "fps": fps, "frames": frames_data}
|
||||
total_processed = len(processed_frames)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("face", f"{len(frames_data)} frames processed")
|
||||
framework.finalize(
|
||||
total_processed=total_processed,
|
||||
extra_metadata={
|
||||
"sample_interval": sample_interval,
|
||||
"detection_method": "insightface",
|
||||
},
|
||||
)
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
print(f"\nFace detection completed: {total_processed} frames processed")
|
||||
print(f"Frames with faces: {len(face_data['frames'])}")
|
||||
|
||||
return result
|
||||
return face_data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Face Detection & Demographics")
|
||||
parser = argparse.ArgumentParser(description="Face Detection & Demographics with Resume Support")
|
||||
parser.add_argument("video_path", help="Path to video file")
|
||||
parser.add_argument("output_path", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
parser.add_argument(
|
||||
"--auto-save-interval",
|
||||
"-a",
|
||||
help="Auto-save interval in seconds",
|
||||
type=int,
|
||||
default=30,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--auto-save-frames",
|
||||
"-f",
|
||||
help="Auto-save interval in frames",
|
||||
type=int,
|
||||
default=300,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force-restart",
|
||||
"-r",
|
||||
help="Force restart (ignore existing data)",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sample-interval",
|
||||
"-s",
|
||||
help="Frame sample interval",
|
||||
type=int,
|
||||
default=30,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
process_face(args.video_path, args.output_path, args.uuid)
|
||||
process_face(
|
||||
args.video_path,
|
||||
args.output_path,
|
||||
args.uuid,
|
||||
args.auto_save_interval,
|
||||
args.auto_save_frames,
|
||||
args.force_restart,
|
||||
args.sample_interval,
|
||||
)
|
||||
Reference in New Issue
Block a user