feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
99
scripts/test_ollama_feasibility.py
Normal file
99
scripts/test_ollama_feasibility.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Ollama Local LLM Feasibility Test
|
||||
職責:測試使用本地 Ollama (Qwen3) 執行語義分析的速度與品質。
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import time
|
||||
import sys
|
||||
|
||||
UUID = "384b0ff44aaaa1f1"
|
||||
ASR_PATH = f"output/{UUID}/{UUID}.asr.json"
|
||||
|
||||
# 選用的模型 (根據用戶要求使用 Gemma 4)
|
||||
# Gemma 4 (4B/12B depending on version, here using latest tag from list)
|
||||
MODEL_NAME = "gemma4:latest"
|
||||
|
||||
|
||||
def load_sample_text(n_segments=20):
|
||||
"""從 ASR JSON 中載入一段文字作為測試素材"""
|
||||
try:
|
||||
with open(ASR_PATH, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# 隨機取一段連續的對話
|
||||
segments = data.get("segments", [])
|
||||
start_idx = 50 # 取第 50 段附近
|
||||
sample_segments = segments[start_idx : start_idx + n_segments]
|
||||
|
||||
text = " ".join([s.get("text", "") for s in sample_segments])
|
||||
return text
|
||||
except Exception as e:
|
||||
print(f"Error loading ASR: {e}")
|
||||
return "Sample text for testing."
|
||||
|
||||
|
||||
def run_ollama_task(prompt, text):
|
||||
"""呼叫 Ollama CLI 並測量時間"""
|
||||
# 組合完整輸入
|
||||
full_input = f"{prompt}\n\nContext:\n{text}"
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 執行指令
|
||||
result = subprocess.run(
|
||||
["ollama", "run", MODEL_NAME, full_input],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60, # 60 秒超時
|
||||
)
|
||||
|
||||
end_time = time.time()
|
||||
duration = end_time - start_time
|
||||
|
||||
return result.stdout.strip(), duration
|
||||
|
||||
|
||||
def main():
|
||||
print(f"🧪 Starting Ollama Feasibility Test with model: {MODEL_NAME}")
|
||||
|
||||
# 1. 載入素材
|
||||
print("📂 Loading sample text...")
|
||||
text = load_sample_text()
|
||||
print(f" Loaded {len(text)} characters.")
|
||||
|
||||
# 2. 任務 A: 劇情摘要 (Summarization)
|
||||
print("\n📝 Task A: Plot Summarization")
|
||||
prompt_summary = "請用一句話總結以下電影對白內容,只輸出摘要,不要解釋。"
|
||||
|
||||
res_a, time_a = run_ollama_task(prompt_summary, text)
|
||||
print(f" ⏱️ Time: {time_a:.2f}s")
|
||||
print(f" 📄 Result: {res_a[:100]}...")
|
||||
|
||||
# 3. 任務 B: 情緒與意圖分析 (Sentiment & Intent)
|
||||
print("\n🧠 Task B: Sentiment & Intent Analysis (JSON Output)")
|
||||
prompt_sentiment = """
|
||||
請分析以下電影對白的「情緒」與「意圖」,並以 JSON 格式輸出。
|
||||
格式範例:{"mood": ["suspicious", "romantic"], "intent": "interrogation"}
|
||||
不要輸出任何其他文字。
|
||||
"""
|
||||
|
||||
res_b, time_b = run_ollama_task(prompt_sentiment, text)
|
||||
print(f" ⏱️ Time: {time_b:.2f}s")
|
||||
print(f" 📄 Result: {res_b}")
|
||||
|
||||
# 4. 總結評估
|
||||
print("\n📊 Feasibility Assessment:")
|
||||
total_time = time_a + time_b
|
||||
print(f" Total Time for 2 tasks: {total_time:.2f}s")
|
||||
|
||||
if total_time < 30:
|
||||
print(" ✅ PASS: 速度可接受,適合批次處理 (Batch Processing)。")
|
||||
else:
|
||||
print(" ⚠️ WARN: 速度較慢,建議使用更小的模型 (如 qwen2.5:3b) 或非同步處理。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user