feat: update Python processors and add utility scripts

- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00
parent f4697396e4
commit 8f05a7c188
256 changed files with 60505 additions and 299 deletions
--- a/scripts/test_ollama_feasibility.py
+++ b/scripts/test_ollama_feasibility.py
@@ -0,0 +1,99 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Ollama Local LLM Feasibility Test
+職責：測試使用本地 Ollama (Qwen3) 執行語義分析的速度與品質。
+"""
+
+import json
+import subprocess
+import time
+import sys
+
+UUID = "384b0ff44aaaa1f1"
+ASR_PATH = f"output/{UUID}/{UUID}.asr.json"
+
+# 選用的模型 (根據用戶要求使用 Gemma 4)
+# Gemma 4 (4B/12B depending on version, here using latest tag from list)
+MODEL_NAME = "gemma4:latest"
+
+
+def load_sample_text(n_segments=20):
+    """從 ASR JSON 中載入一段文字作為測試素材"""
+    try:
+        with open(ASR_PATH, "r") as f:
+            data = json.load(f)
+
+        # 隨機取一段連續的對話
+        segments = data.get("segments", [])
+        start_idx = 50  # 取第 50 段附近
+        sample_segments = segments[start_idx : start_idx + n_segments]
+
+        text = " ".join([s.get("text", "") for s in sample_segments])
+        return text
+    except Exception as e:
+        print(f"Error loading ASR: {e}")
+        return "Sample text for testing."
+
+
+def run_ollama_task(prompt, text):
+    """呼叫 Ollama CLI 並測量時間"""
+    # 組合完整輸入
+    full_input = f"{prompt}\n\nContext:\n{text}"
+
+    start_time = time.time()
+
+    # 執行指令
+    result = subprocess.run(
+        ["ollama", "run", MODEL_NAME, full_input],
+        capture_output=True,
+        text=True,
+        timeout=60,  # 60 秒超時
+    )
+
+    end_time = time.time()
+    duration = end_time - start_time
+
+    return result.stdout.strip(), duration
+
+
+def main():
+    print(f"🧪 Starting Ollama Feasibility Test with model: {MODEL_NAME}")
+
+    # 1. 載入素材
+    print("📂 Loading sample text...")
+    text = load_sample_text()
+    print(f"   Loaded {len(text)} characters.")
+
+    # 2. 任務 A: 劇情摘要 (Summarization)
+    print("\n📝 Task A: Plot Summarization")
+    prompt_summary = "請用一句話總結以下電影對白內容，只輸出摘要，不要解釋。"
+
+    res_a, time_a = run_ollama_task(prompt_summary, text)
+    print(f"   ⏱️  Time: {time_a:.2f}s")
+    print(f"   📄 Result: {res_a[:100]}...")
+
+    # 3. 任務 B: 情緒與意圖分析 (Sentiment & Intent)
+    print("\n🧠 Task B: Sentiment & Intent Analysis (JSON Output)")
+    prompt_sentiment = """
+    請分析以下電影對白的「情緒」與「意圖」，並以 JSON 格式輸出。
+    格式範例：{"mood": ["suspicious", "romantic"], "intent": "interrogation"}
+    不要輸出任何其他文字。
+    """
+
+    res_b, time_b = run_ollama_task(prompt_sentiment, text)
+    print(f"   ⏱️  Time: {time_b:.2f}s")
+    print(f"   📄 Result: {res_b}")
+
+    # 4. 總結評估
+    print("\n📊 Feasibility Assessment:")
+    total_time = time_a + time_b
+    print(f"   Total Time for 2 tasks: {total_time:.2f}s")
+
+    if total_time < 30:
+        print("   ✅ PASS: 速度可接受，適合批次處理 (Batch Processing)。")
+    else:
+        print("   ⚠️ WARN: 速度較慢，建議使用更小的模型 (如 qwen2.5:3b) 或非同步處理。")
+
+
+if __name__ == "__main__":
+    main()