Files
momentry_core/scripts/ocr_processor.py

90 lines
3.0 KiB
Python
Executable File

#!/opt/homebrew/bin/python3.11
"""
OCR Processor Wrapper
Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
"""
import sys
import json
import os
import subprocess
import argparse
SWIFT_OCR_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/debug/swift_ocr"
)
SWIFT_OCR_ALT = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
)
def process_ocr(
video_path: str,
output_path: str,
uuid: str = "",
sample_interval: int = 30,
recognition_level: str = "accurate",
) -> dict:
swift_bin = SWIFT_OCR_PATH
if not os.path.exists(swift_bin):
swift_bin = SWIFT_OCR_ALT
if not os.path.exists(swift_bin):
print("[OCR] Swift binary not found, using PaddleOCR", file=sys.stderr)
return _fallback(video_path, output_path, uuid, sample_interval)
cmd = [swift_bin, video_path, output_path,
"--sample-interval", str(sample_interval),
"--recognition-level", recognition_level,
"--uuid", uuid]
print(f"[OCR] Running Swift OCR", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
if result.stdout:
print(result.stdout.strip(), file=sys.stderr)
if result.stderr:
print(result.stderr.strip(), file=sys.stderr)
if result.returncode != 0 or not os.path.exists(output_path):
print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr)
return _fallback(video_path, output_path, uuid, sample_interval)
with open(output_path) as f:
return json.load(f)
def _fallback(video_path, output_path, uuid, sample_interval):
"""Fallback to original PaddleOCR implementation"""
import importlib
spec = importlib.util.spec_from_file_location(
"paddle_ocr",
os.path.join(os.path.dirname(__file__), "ocr_paddle.py")
)
if spec is None:
print("[OCR] No fallback available, returning empty result", file=sys.stderr)
return {"frame_count": 0, "fps": 0, "frames": []}
paddle = importlib.util.module_from_spec(spec)
spec.loader.exec_module(paddle)
return paddle.process_ocr(video_path, output_path, uuid, sample_interval=sample_interval)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="OCR Processor (Swift Vision)")
parser.add_argument("video_path")
parser.add_argument("output_path")
parser.add_argument("--uuid", "-u", default="")
parser.add_argument("--sample-interval", type=int, default=30)
parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
args = parser.parse_args()
result = process_ocr(args.video_path, args.output_path, args.uuid,
args.sample_interval, args.recognition_level)
with open(args.output_path, "w") as f:
json.dump(result, f, indent=2)
print(f"OCR: {len(result.get('frames', []))} frames with text")