momentry_core/scripts/test_ollama_feasibility.py

#!/opt/homebrew/bin/python3.11
"""
Ollama Local LLM Feasibility Test
職責：測試使用本地 Ollama (Qwen3) 執行語義分析的速度與品質。
"""

import json
import subprocess
import time

UUID = "384b0ff44aaaa1f1"
ASR_PATH = f"output/{UUID}/{UUID}.asr.json"

# 選用的模型 (根據用戶要求使用 Gemma 4)
# Gemma 4 (4B/12B depending on version, here using latest tag from list)
MODEL_NAME = "gemma4:latest"


def load_sample_text(n_segments=20):
    """從 ASR JSON 中載入一段文字作為測試素材"""
    try:
        with open(ASR_PATH, "r") as f:
            data = json.load(f)

        # 隨機取一段連續的對話
        segments = data.get("segments", [])
        start_idx = 50  # 取第 50 段附近
        sample_segments = segments[start_idx : start_idx + n_segments]

        text = " ".join([s.get("text", "") for s in sample_segments])
        return text
    except Exception as e:
        print(f"Error loading ASR: {e}")
        return "Sample text for testing."


def run_ollama_task(prompt, text):
    """呼叫 Ollama CLI 並測量時間"""
    # 組合完整輸入
    full_input = f"{prompt}\n\nContext:\n{text}"

    start_time = time.time()

    # 執行指令
    result = subprocess.run(
        ["ollama", "run", MODEL_NAME, full_input],
        capture_output=True,
        text=True,
        timeout=60,  # 60 秒超時
    )

    end_time = time.time()
    duration = end_time - start_time

    return result.stdout.strip(), duration


def main():
    print(f"🧪 Starting Ollama Feasibility Test with model: {MODEL_NAME}")

    # 1. 載入素材
    print("📂 Loading sample text...")
    text = load_sample_text()
    print(f"   Loaded {len(text)} characters.")

    # 2. 任務 A: 劇情摘要 (Summarization)
    print("\n📝 Task A: Plot Summarization")
    prompt_summary = "請用一句話總結以下電影對白內容，只輸出摘要，不要解釋。"

    res_a, time_a = run_ollama_task(prompt_summary, text)
    print(f"   ⏱️  Time: {time_a:.2f}s")
    print(f"   📄 Result: {res_a[:100]}...")

    # 3. 任務 B: 情緒與意圖分析 (Sentiment & Intent)
    print("\n🧠 Task B: Sentiment & Intent Analysis (JSON Output)")
    prompt_sentiment = """
    請分析以下電影對白的「情緒」與「意圖」，並以 JSON 格式輸出。
    格式範例：{"mood": ["suspicious", "romantic"], "intent": "interrogation"}
    不要輸出任何其他文字。
    """

    res_b, time_b = run_ollama_task(prompt_sentiment, text)
    print(f"   ⏱️  Time: {time_b:.2f}s")
    print(f"   📄 Result: {res_b}")

    # 4. 總結評估
    print("\n📊 Feasibility Assessment:")
    total_time = time_a + time_b
    print(f"   Total Time for 2 tasks: {total_time:.2f}s")

    if total_time < 30:
        print("   ✅ PASS: 速度可接受，適合批次處理 (Batch Processing)。")
    else:
        print("   ⚠️ WARN: 速度較慢，建議使用更小的模型 (如 qwen2.5:3b) 或非同步處理。")


if __name__ == "__main__":
    main()