#!/opt/homebrew/bin/python3.11 """ OCR Processor - Text Recognition with Resume Support Uses EasyOCR (local model) Resume Feature: - Auto-detect existing results and resume from last frame - Auto-save at configurable intervals (default: 30 seconds) - Ctrl+C gracefully saves and exits """ import sys import json import argparse import os import signal import time from datetime import datetime sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from redis_publisher import RedisPublisher from resume_framework import ResumeFramework, format_time, print_progress def process_ocr( video_path: str, output_path: str, uuid: str = "", auto_save_interval: int = 30, auto_save_frames: int = 300, force_restart: bool = False, sample_interval: int = 30, ): """Process video for OCR using EasyOCR with resume support""" framework = ResumeFramework( output_path=output_path, processor_name="ocr", uuid=uuid, auto_save_interval=auto_save_interval, auto_save_frames=auto_save_frames, force_restart=force_restart, ) framework.publish_info("OCR_START") try: import easyocr except ImportError: framework.publish_error("easyocr not installed") result = { "metadata": {"status": "error", "error": "easyocr not installed"}, "frames": {}, } with open(output_path, "w") as f: json.dump(result, f, indent=2) framework.publish_progress(0, 0, "0 frames") return result framework.publish_info("OCR_LOADING_MODEL") reader = easyocr.Reader(["en"], gpu=False, verbose=False) framework.publish_info("OCR_MODEL_LOADED") import cv2 cap = cv2.VideoCapture(video_path) if not cap.isOpened(): print(f"Error: Cannot open video: {video_path}") return {"metadata": {"status": "error"}, "frames": {}} fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) total_duration = total_frames / fps if fps > 0 else 0 cap.release() framework.publish_info(f"fps={fps}, frames={total_frames}") existing_data, last_checkpoint = framework.load_existing_data() resume_mode = existing_data is not None and last_checkpoint > 0 and not force_restart if resume_mode: print(f"\nFound existing data: {output_path}") print(f"Last processed frame: {last_checkpoint}") print(f"Will resume from frame {last_checkpoint + 1}") if resume_mode and existing_data: ocr_data = existing_data frame_count = last_checkpoint processed_frames = set(int(k) for k in existing_data.get("frames", {}).keys()) cap = cv2.VideoCapture(video_path) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count) else: ocr_data = { "metadata": framework.init_metadata( video_path=video_path, fps=fps, width=width, height=height, total_frames=total_frames, total_duration=total_duration, extra={"sample_interval": sample_interval}, ), "frames": {}, } frame_count = 0 processed_frames = set() cap = cv2.VideoCapture(video_path) framework.set_data(ocr_data) start_time = time.time() framework.last_save_time = start_time print(f"\nProcessing video: {total_frames} frames @ {fps:.2f} fps") print(f"Auto-save every {auto_save_interval}s or {auto_save_frames} frames") print(f"Resume from frame {frame_count + 1 if resume_mode else 1}") print() while True: ret, frame = cap.read() if not ret: break frame_count += 1 current_time = (frame_count - 1) / fps if fps > 0 else 0 if frame_count in processed_frames: continue if frame_count % sample_interval != 0: continue frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) try: detections = reader.readtext( frame_rgb, text_threshold=0.5, low_text=0.3, link_threshold=0.3 ) except Exception as e: framework.publish_error(f"Frame {frame_count}: {e}") detections = [] texts = [] for detection in detections: det: tuple = tuple(detection) bbox = list(det[0]) text: str = str(det[1]) confidence: float = float(det[2]) x = int(min(float(p[0]) for p in bbox)) y = int(min(float(p[1]) for p in bbox)) w = int(max(float(p[0]) for p in bbox) - x) h = int(max(float(p[1]) for p in bbox) - y) if text.strip(): texts.append( { "text": text, "x": x, "y": y, "width": w, "height": h, "confidence": confidence, } ) if texts: ocr_data["frames"][str(frame_count)] = { "frame_number": frame_count, "time_seconds": round(current_time, 3), "time_formatted": format_time(current_time), "texts": texts, } processed_frames.add(frame_count) if frame_count % 500 == 0: elapsed = time.time() - start_time print_progress(frame_count, total_frames, elapsed, f"{len(texts)} texts") framework.publish_progress(frame_count, total_frames, f"frame {frame_count}") if framework.should_auto_save(frame_count): framework.save_progress(frame_count, silent=True) cap.release() total_processed = len(processed_frames) framework.finalize( total_processed=total_processed, extra_metadata={"sample_interval": sample_interval}, ) print(f"\nOCR completed: {total_processed} frames processed") print(f"Frames with text: {len(ocr_data['frames'])}") return ocr_data if __name__ == "__main__": parser = argparse.ArgumentParser(description="OCR Text Recognition with Resume Support") parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") parser.add_argument( "--auto-save-interval", "-a", help="Auto-save interval in seconds", type=int, default=30, ) parser.add_argument( "--auto-save-frames", "-f", help="Auto-save interval in frames", type=int, default=300, ) parser.add_argument( "--force-restart", "-r", help="Force restart (ignore existing data)", action="store_true", ) parser.add_argument( "--sample-interval", "-s", help="Frame sample interval", type=int, default=30, ) args = parser.parse_args() process_ocr( args.video_path, args.output_path, args.uuid, args.auto_save_interval, args.auto_save_frames, args.force_restart, args.sample_interval, )