feat: Initial v0.9 release with API Key authentication

## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
2026-03-25 14:52:51 +08:00
parent 47e86b696f
commit 383201cacd
193 changed files with 40268 additions and 422 deletions
--- a/scripts/ocr_processor.py
+++ b/scripts/ocr_processor.py
@@ -0,0 +1,155 @@
+#!/opt/homebrew/bin/python3.11
+"""
+OCR Processor - Text Recognition
+Uses EasyOCR (local model)
+"""
+
+import sys
+import json
+import argparse
+import os
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from redis_publisher import RedisPublisher
+
+
+def process_ocr(video_path: str, output_path: str, uuid: str = ""):
+    """Process video for OCR using EasyOCR"""
+
+    publisher = RedisPublisher(uuid) if uuid else None
+    if publisher:
+        publisher.info("ocr", "OCR_START")
+
+    try:
+        import easyocr
+    except ImportError:
+        if publisher:
+            publisher.error("ocr", "easyocr not installed")
+        result = {"frame_count": 0, "fps": 0.0, "frames": []}
+        if publisher:
+            publisher.complete("ocr", "0 frames")
+        with open(output_path, "w") as f:
+            json.dump(result, f, indent=2)
+        return result
+
+    if publisher:
+        publisher.info("ocr", "OCR_LOADING_MODEL")
+
+    # Load EasyOCR reader
+    # languages: add more like 'fr', 'de', 'ja', 'ko', etc.
+    # gpu: set to True if GPU available
+    reader = easyocr.Reader(["en"], gpu=False, verbose=False)
+
+    if publisher:
+        publisher.info("ocr", "OCR_MODEL_LOADED")
+
+    # Get video info
+    import cv2
+
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+
+    if publisher:
+        publisher.info("ocr", f"fps={fps}, frames={total_frames}")
+        publisher.progress("ocr", 0, total_frames, "Starting")
+
+    # Process every N frames to speed up
+    sample_interval = 30  # Process every 30 frames
+
+    frames = []
+    frame_count = 0
+    processed = 0
+
+    cap = cv2.VideoCapture(video_path)
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        frame_count += 1
+
+        # Sample frames
+        if frame_count % sample_interval != 0:
+            continue
+
+        processed += 1
+        timestamp = (frame_count - 1) / fps if fps > 0 else 0
+
+        # Convert BGR to RGB
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+        # Run OCR
+        try:
+            detections = reader.readtext(
+                frame_rgb, text_threshold=0.5, low_text=0.3, link_threshold=0.3
+            )
+        except Exception as e:
+            if publisher:
+                publisher.error("ocr", f"Frame {frame_count}: {e}")
+            detections = []
+
+        texts = []
+        for detection in detections:
+            det: tuple = tuple(detection)
+            bbox = list(det[0])
+            text: str = str(det[1])
+            confidence: float = float(det[2])
+
+            x = int(min(float(p[0]) for p in bbox))
+            y = int(min(float(p[1]) for p in bbox))
+            width = int(max(float(p[0]) for p in bbox) - x)
+            height = int(max(float(p[1]) for p in bbox) - y)
+
+            if text.strip():
+                texts.append(
+                    {
+                        "text": text,
+                        "x": x,
+                        "y": y,
+                        "width": width,
+                        "height": height,
+                        "confidence": confidence,
+                    }
+                )
+
+        # Only add frames with text
+        if texts:
+            frames.append(
+                {
+                    "frame": frame_count - 1,
+                    "timestamp": round(timestamp, 3),
+                    "texts": texts,
+                }
+            )
+            if publisher:
+                publisher.progress(
+                    "ocr",
+                    processed,
+                    total_frames // sample_interval,
+                    f"Frame {frame_count}",
+                )
+
+    cap.release()
+
+    result = {"frame_count": total_frames, "fps": fps, "frames": frames}
+
+    with open(output_path, "w") as f:
+        json.dump(result, f, indent=2)
+
+    if publisher:
+        publisher.complete("ocr", f"{len(frames)} frames with text")
+
+    return result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="OCR Text Recognition")
+    parser.add_argument("video_path", help="Path to video file")
+    parser.add_argument("output_path", help="Output JSON path")
+    parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
+    args = parser.parse_args()
+
+    process_ocr(args.video_path, args.output_path, args.uuid)