feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
406
scripts/yolo_processor_mps.py
Normal file
406
scripts/yolo_processor_mps.py
Normal file
@@ -0,0 +1,406 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
YOLO Processor - Apple MPS Optimized Version
|
||||
Uses YOLOv8 via ultralytics with Apple Silicon MPS acceleration
|
||||
|
||||
Features:
|
||||
- Automatic MPS/CPU fallback
|
||||
- Metal GPU acceleration for inference
|
||||
- Batch processing for efficiency
|
||||
- Memory-optimized for unified memory architecture
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
|
||||
|
||||
YOLO_NAMES = [
|
||||
"person",
|
||||
"bicycle",
|
||||
"car",
|
||||
"motorbike",
|
||||
"aeroplane",
|
||||
"bus",
|
||||
"train",
|
||||
"truck",
|
||||
"boat",
|
||||
"traffic light",
|
||||
"fire hydrant",
|
||||
"stop sign",
|
||||
"parking meter",
|
||||
"bench",
|
||||
"bird",
|
||||
"cat",
|
||||
"dog",
|
||||
"horse",
|
||||
"sheep",
|
||||
"cow",
|
||||
"elephant",
|
||||
"bear",
|
||||
"zebra",
|
||||
"giraffe",
|
||||
"backpack",
|
||||
"umbrella",
|
||||
"handbag",
|
||||
"tie",
|
||||
"suitcase",
|
||||
"frisbee",
|
||||
"skis",
|
||||
"snowboard",
|
||||
"sports ball",
|
||||
"kite",
|
||||
"baseball bat",
|
||||
"baseball glove",
|
||||
"skateboard",
|
||||
"surfboard",
|
||||
"tennis racket",
|
||||
"bottle",
|
||||
"wine glass",
|
||||
"cup",
|
||||
"fork",
|
||||
"knife",
|
||||
"spoon",
|
||||
"bowl",
|
||||
"banana",
|
||||
"apple",
|
||||
"sandwich",
|
||||
"orange",
|
||||
"broccoli",
|
||||
"carrot",
|
||||
"hot dog",
|
||||
"pizza",
|
||||
"donut",
|
||||
"cake",
|
||||
"chair",
|
||||
"sofa",
|
||||
"pottedplant",
|
||||
"bed",
|
||||
"diningtable",
|
||||
"toilet",
|
||||
"tvmonitor",
|
||||
"laptop",
|
||||
"mouse",
|
||||
"remote",
|
||||
"keyboard",
|
||||
"cell phone",
|
||||
"microwave",
|
||||
"oven",
|
||||
"toaster",
|
||||
"sink",
|
||||
"refrigerator",
|
||||
"book",
|
||||
"clock",
|
||||
"vase",
|
||||
"scissors",
|
||||
"teddy bear",
|
||||
"hair drier",
|
||||
"toothbrush",
|
||||
]
|
||||
|
||||
|
||||
def get_device() -> str:
|
||||
"""Determine the best available device for inference"""
|
||||
if torch.backends.mps.is_available():
|
||||
return "mps"
|
||||
elif torch.cuda.is_available():
|
||||
return "cuda"
|
||||
else:
|
||||
return "cpu"
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
"""Handle interrupt signals gracefully"""
|
||||
print(f"\n[YOLO] Received signal {signum}, saving results and exiting...")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def process_video_yolo(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
model_name: str = "yolov8n",
|
||||
confidence: float = 0.25,
|
||||
iou_threshold: float = 0.45,
|
||||
device: str = "auto",
|
||||
batch_size: int = 8,
|
||||
skip_frames: int = 1,
|
||||
resume: bool = True,
|
||||
save_interval: int = 30,
|
||||
) -> Dict:
|
||||
"""
|
||||
Process video for YOLO object detection with MPS acceleration
|
||||
|
||||
Args:
|
||||
video_path: Path to input video file
|
||||
output_path: Path to output JSON file
|
||||
model_name: YOLO model name (yolov8n, yolov8s, yolov8m, yolov8l, yolov8x)
|
||||
confidence: Confidence threshold for detections
|
||||
iou_threshold: IoU threshold for NMS
|
||||
device: Device to use ('auto', 'mps', 'cuda', 'cpu')
|
||||
batch_size: Number of frames to process in parallel
|
||||
skip_frames: Process every N frames (1 = all frames)
|
||||
resume: Whether to resume from existing results
|
||||
save_interval: Save results every N seconds
|
||||
|
||||
Returns:
|
||||
Dictionary with detection results and metadata
|
||||
"""
|
||||
# Set up signal handlers
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# Determine device
|
||||
if device == "auto":
|
||||
device = get_device()
|
||||
|
||||
print(f"[YOLO] Starting YOLO processing with device: {device}")
|
||||
print(f"[YOLO] Model: {model_name}, Confidence: {confidence}, IoU: {iou_threshold}")
|
||||
|
||||
# Load model
|
||||
print(f"[YOLO] Loading model: {model_name}")
|
||||
model = YOLO(f"{model_name}.pt")
|
||||
|
||||
# Move to device
|
||||
if device in ["mps", "cuda"]:
|
||||
model.to(device)
|
||||
|
||||
# Load existing data if resuming
|
||||
existing_data = None
|
||||
last_processed_frame = 0
|
||||
|
||||
if resume and os.path.exists(output_path):
|
||||
try:
|
||||
with open(output_path, "r") as f:
|
||||
existing_data = json.load(f)
|
||||
frames = existing_data.get("frames", {})
|
||||
if frames:
|
||||
last_processed_frame = max(int(k) for k in frames.keys())
|
||||
print(f"[YOLO] Resuming from frame {last_processed_frame}")
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
|
||||
# Initialize result structure
|
||||
result = {
|
||||
"video_path": video_path,
|
||||
"model": model_name,
|
||||
"device": device,
|
||||
"confidence_threshold": confidence,
|
||||
"iou_threshold": iou_threshold,
|
||||
"processed_at": datetime.now().isoformat(),
|
||||
"frames": {},
|
||||
}
|
||||
|
||||
if existing_data:
|
||||
result["frames"] = existing_data.get("frames", {})
|
||||
|
||||
# Process video
|
||||
print(f"[YOLO] Processing video: {video_path}")
|
||||
start_time = time.time()
|
||||
|
||||
frame_count = 0
|
||||
detection_count = 0
|
||||
last_save_time = start_time
|
||||
|
||||
try:
|
||||
# Use stream mode for memory efficiency
|
||||
results = model(
|
||||
video_path,
|
||||
conf=confidence,
|
||||
iou=iou_threshold,
|
||||
device=device,
|
||||
stream=True,
|
||||
imgsz=640, # Smaller size for faster processing
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
for idx, r in enumerate(results):
|
||||
# Skip frames based on skip_frames setting
|
||||
if idx % skip_frames != 0:
|
||||
continue
|
||||
|
||||
# Get frame detections
|
||||
boxes = r.boxes
|
||||
if boxes is not None and len(boxes) > 0:
|
||||
frame_detections = []
|
||||
|
||||
for box in boxes:
|
||||
xyxy = box.xyxy[0].cpu().numpy()
|
||||
conf = float(box.conf[0].cpu())
|
||||
cls = int(box.cls[0].cpu())
|
||||
|
||||
detection = {
|
||||
"x": int(xyxy[0]),
|
||||
"y": int(xyxy[1]),
|
||||
"width": int(xyxy[2] - xyxy[0]),
|
||||
"height": int(xyxy[3] - xyxy[1]),
|
||||
"confidence": round(conf, 4),
|
||||
"class": YOLO_NAMES[cls]
|
||||
if cls < len(YOLO_NAMES)
|
||||
else f"class_{cls}",
|
||||
"class_id": cls,
|
||||
}
|
||||
frame_detections.append(detection)
|
||||
detection_count += 1
|
||||
|
||||
result["frames"][str(idx)] = {
|
||||
"timestamp": r.boxes.data[0].cpu().numpy()[4]
|
||||
if len(r.boxes.data) > 0
|
||||
else idx / 30.0,
|
||||
"detections": frame_detections,
|
||||
}
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Progress reporting
|
||||
if frame_count % 100 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
fps = frame_count / elapsed if elapsed > 0 else 0
|
||||
print(
|
||||
f"[YOLO] Processed {frame_count} frames, {detection_count} detections, {fps:.1f} FPS"
|
||||
)
|
||||
|
||||
# Periodic save
|
||||
if save_interval > 0 and time.time() - last_save_time > save_interval:
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
last_save_time = time.time()
|
||||
print(f"[YOLO] Auto-saved at frame {frame_count}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[YOLO] Error during processing: {e}")
|
||||
raise
|
||||
|
||||
# Final save
|
||||
elapsed_time = time.time() - start_time
|
||||
avg_fps = frame_count / elapsed_time if elapsed_time > 0 else 0
|
||||
|
||||
result["summary"] = {
|
||||
"total_frames": frame_count,
|
||||
"total_detections": detection_count,
|
||||
"processing_time": round(elapsed_time, 2),
|
||||
"average_fps": round(avg_fps, 2),
|
||||
"device": device,
|
||||
}
|
||||
|
||||
# Save final results
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
print(
|
||||
f"[YOLO] Completed: {frame_count} frames, {detection_count} detections in {elapsed_time:.1f}s ({avg_fps:.1f} FPS)"
|
||||
)
|
||||
print(f"[YOLO] Results saved to: {output_path}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def benchmark_models(video_path: str, num_frames: int = 100) -> Dict:
|
||||
"""Benchmark different YOLO models and devices"""
|
||||
devices = ["cpu"]
|
||||
if torch.backends.mps.is_available():
|
||||
devices.append("mps")
|
||||
if torch.cuda.is_available():
|
||||
devices.append("cuda")
|
||||
|
||||
models = ["yolov8n", "yolov8s", "yolov8m"]
|
||||
results = {}
|
||||
|
||||
for model_name in models:
|
||||
for device in devices:
|
||||
print(f"[YOLO] Benchmarking {model_name} on {device}...")
|
||||
|
||||
model = YOLO(f"{model_name}.pt")
|
||||
if device != "cpu":
|
||||
model.to(device)
|
||||
|
||||
start_time = time.time()
|
||||
count = 0
|
||||
|
||||
try:
|
||||
for idx, r in enumerate(
|
||||
model(video_path, device=device, stream=True, imgsz=320)
|
||||
):
|
||||
if idx >= num_frames:
|
||||
break
|
||||
count += 1
|
||||
except Exception as e:
|
||||
print(f"[YOLO] Error: {e}")
|
||||
continue
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
fps = count / elapsed if elapsed > 0 else 0
|
||||
|
||||
key = f"{model_name}_{device}"
|
||||
results[key] = {
|
||||
"frames": count,
|
||||
"time": round(elapsed, 2),
|
||||
"fps": round(fps, 2),
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="YOLO Processor with MPS Support")
|
||||
parser.add_argument("--video", required=True, help="Input video path")
|
||||
parser.add_argument("--output", required=True, help="Output JSON path")
|
||||
parser.add_argument(
|
||||
"--model", default="yolov8n", help="YOLO model (yolov8n/s/m/l/x)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--confidence", type=float, default=0.25, help="Confidence threshold"
|
||||
)
|
||||
parser.add_argument("--iou", type=float, default=0.45, help="IoU threshold for NMS")
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
default="auto",
|
||||
choices=["auto", "mps", "cuda", "cpu"],
|
||||
help="Device to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size", type=int, default=8, help="Batch size for processing"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-frames", type=int, default=1, help="Process every N frames"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-resume", action="store_true", help="Do not resume from existing results"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save-interval", type=int, default=30, help="Auto-save interval in seconds"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--benchmark", action="store_true", help="Run benchmark instead of processing"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.benchmark:
|
||||
results = benchmark_models(args.video)
|
||||
print("\n[Benchmark Results]")
|
||||
print(json.dumps(results, indent=2))
|
||||
else:
|
||||
process_video_yolo(
|
||||
video_path=args.video,
|
||||
output_path=args.output,
|
||||
model_name=args.model,
|
||||
confidence=args.confidence,
|
||||
iou_threshold=args.iou,
|
||||
device=args.device,
|
||||
batch_size=args.batch_size,
|
||||
skip_frames=args.skip_frames,
|
||||
resume=not args.no_resume,
|
||||
save_interval=args.save_interval,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user