feat: update Python processors and add utility scripts

- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
This commit is contained in:
Warren
2026-04-30 15:07:49 +08:00
parent f4697396e4
commit 8f05a7c188
256 changed files with 60505 additions and 299 deletions

View File

@@ -1,114 +1,159 @@
#!/opt/homebrew/bin/python3.11
"""
Pose Processor - Pose Estimation
Pose Processor - Pose Estimation with Resume Support
Uses YOLOv8 Pose via ultralytics (local model)
Resume Feature:
- Auto-detect existing results and resume from last frame
- Auto-save at configurable intervals (default: 30 seconds)
- Ctrl+C gracefully saves and exits
Note: YOLOv8 Pose uses stream mode which is optimized for video processing.
For resume support, we need to process frames manually with OpenCV.
"""
import sys
import json
import argparse
import os
import signal
import time
from datetime import datetime
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from redis_publisher import RedisPublisher
from resume_framework import ResumeFramework, format_time, print_progress
def signal_handler(signum, frame):
print(f"POSE: Received signal {signum}, exiting...")
sys.exit(1)
KEYPOINT_NAMES = [
"nose",
"left_eye",
"right_eye",
"left_ear",
"right_ear",
"left_shoulder",
"right_shoulder",
"left_elbow",
"right_elbow",
"left_wrist",
"right_wrist",
"left_hip",
"right_hip",
"left_knee",
"right_knee",
"left_ankle",
"right_ankle",
]
def process_pose(video_path: str, output_path: str, uuid: str = ""):
"""Process video for pose estimation using YOLOv8 Pose"""
def process_pose(
video_path: str,
output_path: str,
uuid: str = "",
auto_save_interval: int = 30,
auto_save_frames: int = 300,
force_restart: bool = False,
):
"""Process video for pose estimation using YOLOv8 Pose with resume support"""
# Set up signal handlers
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
framework = ResumeFramework(
output_path=output_path,
processor_name="pose",
uuid=uuid,
auto_save_interval=auto_save_interval,
auto_save_frames=auto_save_frames,
force_restart=force_restart,
)
publisher = RedisPublisher(uuid) if uuid else None
if publisher:
publisher.info("pose", "POSE_START")
framework.publish_info("POSE_START")
try:
from ultralytics import YOLO # pyright: ignore
from ultralytics import YOLO
except ImportError:
if publisher:
publisher.error("pose", "ultralytics not installed")
result = {"frame_count": 0, "fps": 0.0, "frames": []}
if publisher:
publisher.complete("pose", "0 frames")
framework.publish_error("ultralytics not installed")
result = {
"metadata": {"status": "error", "error": "ultralytics not installed"},
"frames": {},
}
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
return result
if publisher:
publisher.info("pose", "POSE_LOADING_MODEL")
framework.publish_info("POSE_LOADING_MODEL")
# Load YOLOv8 Pose model
# yolov8n-pose.pt = nano (fastest)
# yolov8s-pose.pt = small
# yolov8m-pose.pt = medium
model = YOLO("yolov8n-pose.pt")
# Get video info
import cv2
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video: {video_path}")
return {"metadata": {"status": "error"}, "frames": {}}
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
total_duration = total_frames / fps if fps > 0 else 0
cap.release()
if publisher:
publisher.info("pose", f"fps={fps}, frames={total_frames}")
publisher.progress("pose", 0, total_frames, "Starting")
framework.publish_info(f"fps={fps}, frames={total_frames}")
# Process video with YOLO Pose
results = model(
video_path,
conf=0.5, # confidence threshold
save=False,
stream=True,
verbose=False,
pose=True, # Enable pose estimation
)
existing_data, last_checkpoint = framework.load_existing_data()
resume_mode = existing_data is not None and last_checkpoint > 0 and not force_restart
# COCO keypoint names
KEYPOINT_NAMES = [
"nose",
"left_eye",
"right_eye",
"left_ear",
"right_ear",
"left_shoulder",
"right_shoulder",
"left_elbow",
"right_elbow",
"left_wrist",
"right_wrist",
"left_hip",
"right_hip",
"left_knee",
"right_knee",
"left_ankle",
"right_ankle",
]
if resume_mode:
print(f"\nFound existing data: {output_path}")
print(f"Last processed frame: {last_checkpoint}")
print(f"Will resume from frame {last_checkpoint + 1}")
frames = []
frame_count = 0
if resume_mode and existing_data:
pose_data = existing_data
frame_count = last_checkpoint
processed_frames = set(int(k) for k in existing_data.get("frames", {}).keys())
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
else:
pose_data = {
"metadata": framework.init_metadata(
video_path=video_path,
fps=fps,
width=width,
height=height,
total_frames=total_frames,
total_duration=total_duration,
extra={"model": "yolov8n-pose"},
),
"frames": {},
}
frame_count = 0
processed_frames = set()
cap = cv2.VideoCapture(video_path)
framework.set_data(pose_data)
start_time = time.time()
framework.last_save_time = start_time
print(f"\nProcessing video: {total_frames} frames @ {fps:.2f} fps")
print(f"Auto-save every {auto_save_interval}s or {auto_save_frames} frames")
print(f"Resume from frame {frame_count + 1 if resume_mode else 1}")
print()
while True:
ret, frame = cap.read()
if not ret:
break
for result in results:
frame_count += 1
current_time = (frame_count - 1) / fps if fps > 0 else 0
# Get frame number and timestamp
frame_idx = (
result.orig_frame_idx
if hasattr(result, "orig_frame_idx")
else frame_count - 1
)
timestamp = frame_idx / fps if fps > 0 else 0
if frame_count in processed_frames:
continue
results = model(frame, conf=0.5, verbose=False, pose=True)
result = results[0]
# Get pose keypoints
persons = []
if result.keypoints is not None:
@@ -128,7 +173,6 @@ def process_pose(video_path: str, output_path: str, uuid: str = ""):
}
)
# Get bounding box from keypoints if available
valid_kps = [kp for kp in keypoints if kp["confidence"] > 0.3]
if valid_kps:
xs = [kp["x"] for kp in valid_kps]
@@ -144,35 +188,70 @@ def process_pose(video_path: str, output_path: str, uuid: str = ""):
persons.append({"keypoints": keypoints, "bbox": bbox})
# Only add frames with poses or sample periodically
if persons or frame_count % 30 == 0:
frames.append(
{
"frame": frame_idx,
"timestamp": round(timestamp, 3),
"persons": persons,
}
)
pose_data["frames"][str(frame_count)] = {
"frame_number": frame_count,
"time_seconds": round(current_time, 3),
"time_formatted": format_time(current_time),
"persons": persons,
}
processed_frames.add(frame_count)
if publisher:
publisher.progress("pose", frame_count, total_frames, f"Frame {frame_idx}")
if frame_count % 500 == 0:
elapsed = time.time() - start_time
print_progress(frame_count, total_frames, elapsed, f"{len(persons)} persons")
framework.publish_progress(frame_count, total_frames, f"frame {frame_count}")
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
if framework.should_auto_save(frame_count):
framework.save_progress(frame_count, silent=True)
if publisher:
publisher.complete("pose", f"{len(frames)} frames with poses")
cap.release()
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
total_processed = len(processed_frames)
return result
framework.finalize(
total_processed=total_processed,
extra_metadata={"model": "yolov8n-pose"},
)
print(f"\nPose estimation completed: {total_processed} frames processed")
print(f"Frames with poses: {len([f for f in pose_data['frames'].values() if f['persons']])}")
return pose_data
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Pose Estimation")
parser = argparse.ArgumentParser(description="Pose Estimation with Resume Support")
parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
parser.add_argument(
"--auto-save-interval",
"-a",
help="Auto-save interval in seconds",
type=int,
default=30,
)
parser.add_argument(
"--auto-save-frames",
"-f",
help="Auto-save interval in frames",
type=int,
default=300,
)
parser.add_argument(
"--force-restart",
"-r",
help="Force restart (ignore existing data)",
action="store_true",
)
args = parser.parse_args()
process_pose(args.video_path, args.output_path, args.uuid)
process_pose(
args.video_path,
args.output_path,
args.uuid,
args.auto_save_interval,
args.auto_save_frames,
args.force_restart,
)