Files
momentry_core/scripts/asr_model_benchmark.py
Accusys 39ba5ddf76 feat: Phase 1 handover - schema migration, correction mechanism, API fixes
Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index
Correction: asr-1.json format, generate/apply scripts
API: 37/37 endpoints fixed and tested
Docs: HANDOVER_V2.0.md for M4
2026-05-11 07:03:22 +08:00

84 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/opt/homebrew/bin/python3.11
"""
Comprehensive ASR Model Selection Benchmark
Tests 5 models × 2 VAD settings across 3 test clips.
Output: JSON results + markdown report
"""
import json, time, os, gc, sys
from faster_whisper import WhisperModel
CLIPS = {
"A_rapid": {"path": "/tmp/asr_clip_A.mp4", "offset": 1540},
"B_normal": {"path": "/tmp/asr_clip_B.mp4", "offset": 600},
"C_complex": {"path": "/tmp/asr_clip_C.mp4", "offset": 4400},
}
MODELS = ["tiny", "base", "small", "medium", "large-v3"]
VAD_SETTINGS = [200, 500] # min_silence_duration_ms
RESULTS_FILE = "/tmp/asr_benchmark_results.json"
def run_transcribe(model, clip_path, clip_name, vad_ms):
segs = []
t0 = time.time()
vad_params = {"min_silence_duration_ms": vad_ms}
segments, info = model.transcribe(clip_path, beam_size=5, vad_filter=True,
vad_parameters=vad_params)
for seg in segments:
segs.append({"start": round(seg.start, 2), "end": round(seg.end, 2),
"text": seg.text.strip()})
elapsed = time.time() - t0
return segs, info, elapsed
# Load existing results to skip completed
all_results = {}
if os.path.exists(RESULTS_FILE):
all_results = json.load(open(RESULTS_FILE))
print(f"Loaded {sum(len(v) for v in all_results.values())} existing results")
total = len(CLIPS) * len(MODELS) * len(VAD_SETTINGS)
done = sum(len(v) for v in all_results.values())
print(f"Total: {total} tests, {done} already done, {total-done} remaining\n")
for clip_name, clip_cfg in CLIPS.items():
if clip_name not in all_results:
all_results[clip_name] = {}
for model_size in MODELS:
for vad_ms in VAD_SETTINGS:
key = f"{model_size}_vad{vad_ms}"
if key in all_results[clip_name]:
continue
print(f"[{clip_name}] {model_size} VAD={vad_ms}ms ...", end=" ", flush=True)
t_load = time.time()
model = WhisperModel(model_size, device="cpu", compute_type="int8")
load_time = time.time() - t_load
segs, info, trans_time = run_transcribe(model, clip_cfg["path"], clip_name, vad_ms)
# Total chars
total_chars = sum(len(s["text"]) for s in segs)
all_results[clip_name][key] = {
"model": model_size,
"vad_ms": vad_ms,
"segments": segs,
"segment_count": len(segs),
"total_chars": total_chars,
"runtime_secs": round(trans_time, 1),
"load_time_secs": round(load_time, 1),
"language": info.language,
}
print(f"{len(segs)} segs, {total_chars} chars, {trans_time:.1f}s")
# Free memory between models
del model
gc.collect()
# Save incrementally
json.dump(all_results, open(RESULTS_FILE, "w"))
print("\n=== All tests complete ===")
print(json.dumps({k: {kk: {kkk: vv for kkk, vv in v.items() if kkk != "segments"} for kk, v in vv.items()} for k, vv in all_results.items()}, indent=2))