feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
124
scripts/test_llm_capabilities.py
Normal file
124
scripts/test_llm_capabilities.py
Normal file
@@ -0,0 +1,124 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Local LLM (Gemma 4) Capability & Speed Benchmark
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
import subprocess
|
||||
|
||||
UUID = "384b0ff44aaaa1f1"
|
||||
ASR_PATH = f"output/{UUID}/{UUID}.asr.json"
|
||||
MODEL = "gemma4:latest"
|
||||
|
||||
|
||||
def load_context(n_segments=20):
|
||||
try:
|
||||
with open(ASR_PATH, "r") as f:
|
||||
data = json.load(f)
|
||||
segments = data.get("segments", [])[50 : 50 + n_segments] # Pick a middle chunk
|
||||
text = " ".join([s.get("text", "") for s in segments])
|
||||
return text
|
||||
except Exception as e:
|
||||
return f"Error loading context: {e}"
|
||||
|
||||
|
||||
def run_test(name, prompt_template, context_text):
|
||||
print(f"\n🧪 Testing: {name}")
|
||||
print("-" * 50)
|
||||
|
||||
prompt = prompt_template.format(context=context_text)
|
||||
full_input = f"{prompt}\n\nContext:\n{context_text}"
|
||||
|
||||
start = time.time()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ollama", "run", MODEL, full_input],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
duration = time.time() - start
|
||||
output = result.stdout.strip()
|
||||
|
||||
# Check if it's JSON (basic check)
|
||||
is_json = output.startswith("{") and output.endswith("}")
|
||||
tag = "JSON ✅" if is_json else "Text ⚠️"
|
||||
|
||||
print(f"⏱️ Duration: {duration:.2f}s | Format: {tag}")
|
||||
print(f"🤖 Output: {output[:300]}...")
|
||||
return duration, output
|
||||
|
||||
except Exception as e:
|
||||
duration = time.time() - start
|
||||
print(f"❌ Failed ({duration:.2f}s): {e}")
|
||||
return duration, None
|
||||
|
||||
|
||||
def main():
|
||||
print(f"🚀 Starting Gemma 4 Capability Test on Context ({MODEL})")
|
||||
context = load_context()
|
||||
print(f"📂 Loaded Context: {len(context)} chars")
|
||||
if len(context) < 50:
|
||||
print("⚠️ Context too short, aborting.")
|
||||
return
|
||||
|
||||
print(f"👀 Preview: {context[:100]}...")
|
||||
|
||||
results = []
|
||||
|
||||
# Test 1: Summarization
|
||||
results.append(
|
||||
run_test(
|
||||
"1. Plot Summarization (摘要)",
|
||||
"Summarize the following movie dialogue into ONE sentence. Do not explain, just give the summary.",
|
||||
context,
|
||||
)
|
||||
)
|
||||
|
||||
# Test 2: 5W1H Extraction
|
||||
results.append(
|
||||
run_test(
|
||||
"2. 5W1H Entity Extraction (資訊提取)",
|
||||
"Extract the following information from the text and output valid JSON only:\n{{'who': '...', 'what': '...', 'where': '...', 'when': '...'}}.",
|
||||
context,
|
||||
)
|
||||
)
|
||||
|
||||
# Test 3: Sentiment Analysis
|
||||
results.append(
|
||||
run_test(
|
||||
"3. Sentiment & Mood Detection (情緒分析)",
|
||||
"Analyze the emotional tone of the dialogue. Output JSON: {{'mood': ['...'], 'tension_level': 'high/medium/low'}}.",
|
||||
context,
|
||||
)
|
||||
)
|
||||
|
||||
# Test 4: Logical Reasoning (Plot Deduction)
|
||||
results.append(
|
||||
run_test(
|
||||
"4. Logical Reasoning (邏輯推理)",
|
||||
"Based on the text, answer: What are the characters discussing or investigating? Be specific.",
|
||||
context,
|
||||
)
|
||||
)
|
||||
|
||||
# Summary
|
||||
valid_results = [r[0] for r in results if r[0] is not None]
|
||||
if valid_results:
|
||||
total = sum(valid_results)
|
||||
avg = total / len(valid_results)
|
||||
print(f"\n📊 Benchmark Summary:")
|
||||
print(f"Total Time for 4 tasks: {total:.2f}s")
|
||||
print(f"Average Time: {avg:.2f}s per task")
|
||||
|
||||
if avg > 20:
|
||||
print(
|
||||
"\n⚠️ Note: Gemma 4 is accurate but slow. Consider asynchronous processing or smaller models for speed."
|
||||
)
|
||||
else:
|
||||
print("\n✅ Note: Performance is acceptable for background tasks.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user