feat: trace-level matching, health watcher/worker status, timezone config
This commit is contained in:
@@ -4,6 +4,7 @@ OCR Processor Wrapper
|
||||
Calls Swift Vision Framework OCR (swift_ocr) with fallback to PaddleOCR.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
@@ -11,6 +12,10 @@ import subprocess
|
||||
import argparse
|
||||
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
SWIFT_OCR_PATH = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/debug/swift_ocr"
|
||||
@@ -19,6 +24,7 @@ SWIFT_OCR_ALT = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"swift_processors/.build/arm64-apple-macosx/debug/swift_ocr"
|
||||
)
|
||||
SWIFT_PROGRESS_RE = re.compile(r"\[SwiftOCR\] Progress:\s*(\d+)%")
|
||||
|
||||
|
||||
def process_ocr(
|
||||
@@ -27,6 +33,7 @@ def process_ocr(
|
||||
uuid: str = "",
|
||||
sample_interval: int = 30,
|
||||
recognition_level: str = "accurate",
|
||||
publisher: RedisPublisher = None,
|
||||
) -> dict:
|
||||
swift_bin = SWIFT_OCR_PATH
|
||||
if not os.path.exists(swift_bin):
|
||||
@@ -42,15 +49,34 @@ def process_ocr(
|
||||
"--uuid", uuid]
|
||||
|
||||
print(f"[OCR] Running Swift OCR", file=sys.stderr)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=7200)
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
if result.stdout:
|
||||
print(result.stdout.strip(), file=sys.stderr)
|
||||
if result.stderr:
|
||||
print(result.stderr.strip(), file=sys.stderr)
|
||||
last_pct = -1
|
||||
stdout_lines = []
|
||||
for line in proc.stdout:
|
||||
line = line.strip()
|
||||
stdout_lines.append(line)
|
||||
m = SWIFT_PROGRESS_RE.search(line)
|
||||
if m:
|
||||
pct = int(m.group(1))
|
||||
if pct > last_pct:
|
||||
last_pct = pct
|
||||
print(f"[OCR] Progress: {pct}%", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.progress("ocr", pct, 100, f"{pct}%")
|
||||
elif line:
|
||||
print(line, file=sys.stderr)
|
||||
|
||||
if result.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[OCR] Swift OCR failed, falling back to PaddleOCR", file=sys.stderr)
|
||||
stderr_output = proc.stderr.read()
|
||||
if stderr_output:
|
||||
print(stderr_output.strip(), file=sys.stderr)
|
||||
|
||||
proc.wait()
|
||||
|
||||
if proc.returncode != 0 or not os.path.exists(output_path):
|
||||
print(f"[OCR] Swift OCR failed (exit={proc.returncode}), falling back to PaddleOCR", file=sys.stderr)
|
||||
if publisher:
|
||||
publisher.error("ocr", f"Swift OCR failed, using fallback")
|
||||
return _fallback(video_path, output_path, uuid, sample_interval)
|
||||
|
||||
with open(output_path) as f:
|
||||
@@ -81,9 +107,16 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--recognition-level", choices=["fast", "accurate"], default="accurate")
|
||||
args = parser.parse_args()
|
||||
|
||||
publisher = RedisPublisher(args.uuid) if args.uuid else None
|
||||
if publisher:
|
||||
publisher.info("ocr", "OCR_START")
|
||||
|
||||
result = process_ocr(args.video_path, args.output_path, args.uuid,
|
||||
args.sample_interval, args.recognition_level)
|
||||
args.sample_interval, args.recognition_level,
|
||||
publisher)
|
||||
|
||||
with open(args.output_path, "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
print(f"OCR: {len(result.get('frames', []))} frames with text")
|
||||
if publisher:
|
||||
publisher.complete("ocr", f"{len(result.get('frames',[]))} frames")
|
||||
|
||||
Reference in New Issue
Block a user