#!/opt/homebrew/bin/python3.11 """ OCR Processor Wrapper Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR. """ import sys import json import os import subprocess import argparse SWIFT_OCR_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/debug/swift_ocr" ) SWIFT_OCR_ALT = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/arm64-apple-macosx/debug/swift_ocr" ) def process_ocr( video_path: str, output_path: str, uuid: str = "", sample_interval: int = 30, recognition_level: str = "accurate", ) -> dict: swift_bin = SWIFT_OCR_PATH if not os.path.exists(swift_bin): swift_bin = SWIFT_OCR_ALT if not os.path.exists(swift_bin): print("[OCR] Swift binary not found, using PaddleOCR", file=sys.stderr) return _fallback(video_path, output_path, uuid, sample_interval) cmd = [swift_bin, video_path, output_path, "--sample-interval", str(sample_interval), "--recognition-level", recognition_level, "--uuid", uuid] print(f"[OCR] Running Swift OCR", file=sys.stderr) result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200) if result.stdout: print(result.stdout.strip(), file=sys.stderr) if result.stderr: print(result.stderr.strip(), file=sys.stderr) if result.returncode != 0 or not os.path.exists(output_path): print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr) return _fallback(video_path, output_path, uuid, sample_interval) with open(output_path) as f: return json.load(f) def _fallback(video_path, output_path, uuid, sample_interval): """Fallback to original PaddleOCR implementation""" import importlib spec = importlib.util.spec_from_file_location( "paddle_ocr", os.path.join(os.path.dirname(__file__), "ocr_paddle.py") ) if spec is None: print("[OCR] No fallback available, returning empty result", file=sys.stderr) return {"frame_count": 0, "fps": 0, "frames": []} paddle = importlib.util.module_from_spec(spec) spec.loader.exec_module(paddle) return paddle.process_ocr(video_path, output_path, uuid, sample_interval=sample_interval) if __name__ == "__main__": parser = argparse.ArgumentParser(description="OCR Processor (Swift Vision)") parser.add_argument("video_path") parser.add_argument("output_path") parser.add_argument("--uuid", "-u", default="") parser.add_argument("--sample-interval", type=int, default=30) parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate") args = parser.parse_args() result = process_ocr(args.video_path, args.output_path, args.uuid, args.sample_interval, args.recognition_level) with open(args.output_path, "w") as f: json.dump(result, f, indent=2) print(f"OCR: {len(result.get('frames', []))} frames with text")