123 lines
4.1 KiB
Python
Executable File
123 lines
4.1 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
OCR Processor Wrapper
|
|
Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import argparse
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from redis_publisher import RedisPublisher
|
|
|
|
|
|
SWIFT_OCR_PATH = os.path.join(
|
|
os.path.dirname(os.path.abspath(__file__)),
|
|
"swift_processors/.build/debug/swift_ocr"
|
|
)
|
|
SWIFT_OCR_ALT = os.path.join(
|
|
os.path.dirname(os.path.abspath(__file__)),
|
|
"swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
|
|
)
|
|
SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%")
|
|
|
|
|
|
def process_ocr(
|
|
video_path: str,
|
|
output_path: str,
|
|
uuid: str = "",
|
|
sample_interval: int = 30,
|
|
recognition_level: str = "accurate",
|
|
publisher: RedisPublisher = None,
|
|
) -> dict:
|
|
swift_bin = SWIFT_OCR_PATH
|
|
if not os.path.exists(swift_bin):
|
|
swift_bin = SWIFT_OCR_ALT
|
|
|
|
if not os.path.exists(swift_bin):
|
|
print("[OCR] Swift binary not found, using PaddleOCR", file=sys.stderr)
|
|
return _fallback(video_path, output_path, uuid, sample_interval)
|
|
|
|
cmd = [swift_bin, video_path, output_path,
|
|
"--sample-interval", str(sample_interval),
|
|
"--recognition-level", recognition_level,
|
|
"--uuid", uuid]
|
|
|
|
print(f"[OCR] Running Swift OCR", file=sys.stderr)
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
|
|
last_pct = -1
|
|
stdout_lines = []
|
|
for line in proc.stdout:
|
|
line = line.strip()
|
|
stdout_lines.append(line)
|
|
m = SWIFT_PROGRESS_RE.search(line)
|
|
if m:
|
|
pct = int(m.group(1))
|
|
if pct > last_pct:
|
|
last_pct = pct
|
|
print(f"[OCR] Progress: {pct}%", file=sys.stderr)
|
|
if publisher:
|
|
publisher.progress("ocr", pct, 100, f"{pct}%")
|
|
elif line:
|
|
print(line, file=sys.stderr)
|
|
|
|
stderr_output = proc.stderr.read()
|
|
if stderr_output:
|
|
print(stderr_output.strip(), file=sys.stderr)
|
|
|
|
proc.wait()
|
|
|
|
if proc.returncode != 0 or not os.path.exists(output_path):
|
|
print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr)
|
|
if publisher:
|
|
publisher.error("ocr", f"Swift OCR failed, using fallback")
|
|
return _fallback(video_path, output_path, uuid, sample_interval)
|
|
|
|
with open(output_path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
def _fallback(video_path, output_path, uuid, sample_interval):
|
|
"""Fallback to original PaddleOCR implementation"""
|
|
import importlib
|
|
spec = importlib.util.spec_from_file_location(
|
|
"paddle_ocr",
|
|
os.path.join(os.path.dirname(__file__), "ocr_paddle.py")
|
|
)
|
|
if spec is None:
|
|
print("[OCR] No fallback available, returning empty result", file=sys.stderr)
|
|
return {"frame_count": 0, "fps": 0, "frames": []}
|
|
paddle = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(paddle)
|
|
return paddle.process_ocr(video_path, output_path, uuid, sample_interval=sample_interval)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="OCR Processor (Swift Vision)")
|
|
parser.add_argument("video_path")
|
|
parser.add_argument("output_path")
|
|
parser.add_argument("--uuid", "-u", default="")
|
|
parser.add_argument("--sample-interval", type=int, default=30)
|
|
parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
|
|
args = parser.parse_args()
|
|
|
|
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
|
if publisher:
|
|
publisher.info("ocr", "OCR_START")
|
|
|
|
result = process_ocr(args.video_path, args.output_path, args.uuid,
|
|
args.sample_interval, args.recognition_level,
|
|
publisher)
|
|
|
|
with open(args.output_path, "w") as f:
|
|
json.dump(result, f, indent=2)
|
|
print(f"OCR: {len(result.get('frames', []))} frames with text")
|
|
if publisher:
|
|
publisher.complete("ocr", f"{len(result.get('frames',[]))} frames")
|