166 lines
4.6 KiB
Python
Executable File
166 lines
4.6 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
OCR Processor - Text Recognition
|
|
Uses EasyOCR (local model)
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import os
|
|
import signal
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from redis_publisher import RedisPublisher
|
|
|
|
|
|
def signal_handler(signum, frame):
|
|
print(f"OCR: Received signal {signum}, exiting...")
|
|
sys.exit(1)
|
|
|
|
|
|
def process_ocr(video_path: str, output_path: str, uuid: str = ""):
|
|
"""Process video for OCR using EasyOCR"""
|
|
|
|
# Set up signal handlers
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
publisher = RedisPublisher(uuid) if uuid else None
|
|
if publisher:
|
|
publisher.info("ocr", "OCR_START")
|
|
|
|
try:
|
|
import easyocr
|
|
except ImportError:
|
|
if publisher:
|
|
publisher.error("ocr", "easyocr not installed")
|
|
result = {"frame_count": 0, "fps": 0.0, "frames": []}
|
|
if publisher:
|
|
publisher.complete("ocr", "0 frames")
|
|
with open(output_path, "w") as f:
|
|
json.dump(result, f, indent=2)
|
|
return result
|
|
|
|
if publisher:
|
|
publisher.info("ocr", "OCR_LOADING_MODEL")
|
|
|
|
# Load EasyOCR reader
|
|
# languages: add more like 'fr', 'de', 'ja', 'ko', etc.
|
|
# gpu: set to True if GPU available
|
|
reader = easyocr.Reader(["en"], gpu=False, verbose=False)
|
|
|
|
if publisher:
|
|
publisher.info("ocr", "OCR_MODEL_LOADED")
|
|
|
|
# Get video info
|
|
import cv2
|
|
|
|
cap = cv2.VideoCapture(video_path)
|
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
cap.release()
|
|
|
|
if publisher:
|
|
publisher.info("ocr", f"fps={fps}, frames={total_frames}")
|
|
publisher.progress("ocr", 0, total_frames, "Starting")
|
|
|
|
# Process every N frames to speed up
|
|
sample_interval = 30 # Process every 30 frames
|
|
|
|
frames = []
|
|
frame_count = 0
|
|
processed = 0
|
|
|
|
cap = cv2.VideoCapture(video_path)
|
|
|
|
while True:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
|
|
frame_count += 1
|
|
|
|
# Sample frames
|
|
if frame_count % sample_interval != 0:
|
|
continue
|
|
|
|
processed += 1
|
|
timestamp = (frame_count - 1) / fps if fps > 0 else 0
|
|
|
|
# Convert BGR to RGB
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
|
|
# Run OCR
|
|
try:
|
|
detections = reader.readtext(
|
|
frame_rgb, text_threshold=0.5, low_text=0.3, link_threshold=0.3
|
|
)
|
|
except Exception as e:
|
|
if publisher:
|
|
publisher.error("ocr", f"Frame {frame_count}: {e}")
|
|
detections = []
|
|
|
|
texts = []
|
|
for detection in detections:
|
|
det: tuple = tuple(detection)
|
|
bbox = list(det[0])
|
|
text: str = str(det[1])
|
|
confidence: float = float(det[2])
|
|
|
|
x = int(min(float(p[0]) for p in bbox))
|
|
y = int(min(float(p[1]) for p in bbox))
|
|
width = int(max(float(p[0]) for p in bbox) - x)
|
|
height = int(max(float(p[1]) for p in bbox) - y)
|
|
|
|
if text.strip():
|
|
texts.append(
|
|
{
|
|
"text": text,
|
|
"x": x,
|
|
"y": y,
|
|
"width": width,
|
|
"height": height,
|
|
"confidence": confidence,
|
|
}
|
|
)
|
|
|
|
# Only add frames with text
|
|
if texts:
|
|
frames.append(
|
|
{
|
|
"frame": frame_count - 1,
|
|
"timestamp": round(timestamp, 3),
|
|
"texts": texts,
|
|
}
|
|
)
|
|
if publisher:
|
|
publisher.progress(
|
|
"ocr",
|
|
processed,
|
|
total_frames // sample_interval,
|
|
f"Frame {frame_count}",
|
|
)
|
|
|
|
cap.release()
|
|
|
|
result = {"frame_count": total_frames, "fps": fps, "frames": frames}
|
|
|
|
with open(output_path, "w") as f:
|
|
json.dump(result, f, indent=2)
|
|
|
|
if publisher:
|
|
publisher.complete("ocr", f"{len(frames)} frames with text")
|
|
|
|
return result
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="OCR Text Recognition")
|
|
parser.add_argument("video_path", help="Path to video file")
|
|
parser.add_argument("output_path", help="Output JSON path")
|
|
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
|
args = parser.parse_args()
|
|
|
|
process_ocr(args.video_path, args.output_path, args.uuid)
|