feat: Phase 1 handover - schema migration, correction mechanism, API fixes
Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index Correction: asr-1.json format, generate/apply scripts API: 37/37 endpoints fixed and tested Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
83
scripts/asr_model_benchmark.py
Normal file
83
scripts/asr_model_benchmark.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Comprehensive ASR Model Selection Benchmark
|
||||
Tests 5 models × 2 VAD settings across 3 test clips.
|
||||
Output: JSON results + markdown report
|
||||
"""
|
||||
import json, time, os, gc, sys
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
CLIPS = {
|
||||
"A_rapid": {"path": "/tmp/asr_clip_A.mp4", "offset": 1540},
|
||||
"B_normal": {"path": "/tmp/asr_clip_B.mp4", "offset": 600},
|
||||
"C_complex": {"path": "/tmp/asr_clip_C.mp4", "offset": 4400},
|
||||
}
|
||||
|
||||
MODELS = ["tiny", "base", "small", "medium", "large-v3"]
|
||||
VAD_SETTINGS = [200, 500] # min_silence_duration_ms
|
||||
|
||||
RESULTS_FILE = "/tmp/asr_benchmark_results.json"
|
||||
|
||||
def run_transcribe(model, clip_path, clip_name, vad_ms):
|
||||
segs = []
|
||||
t0 = time.time()
|
||||
vad_params = {"min_silence_duration_ms": vad_ms}
|
||||
segments, info = model.transcribe(clip_path, beam_size=5, vad_filter=True,
|
||||
vad_parameters=vad_params)
|
||||
for seg in segments:
|
||||
segs.append({"start": round(seg.start, 2), "end": round(seg.end, 2),
|
||||
"text": seg.text.strip()})
|
||||
elapsed = time.time() - t0
|
||||
return segs, info, elapsed
|
||||
|
||||
# Load existing results to skip completed
|
||||
all_results = {}
|
||||
if os.path.exists(RESULTS_FILE):
|
||||
all_results = json.load(open(RESULTS_FILE))
|
||||
print(f"Loaded {sum(len(v) for v in all_results.values())} existing results")
|
||||
|
||||
total = len(CLIPS) * len(MODELS) * len(VAD_SETTINGS)
|
||||
done = sum(len(v) for v in all_results.values())
|
||||
print(f"Total: {total} tests, {done} already done, {total-done} remaining\n")
|
||||
|
||||
for clip_name, clip_cfg in CLIPS.items():
|
||||
if clip_name not in all_results:
|
||||
all_results[clip_name] = {}
|
||||
|
||||
for model_size in MODELS:
|
||||
for vad_ms in VAD_SETTINGS:
|
||||
key = f"{model_size}_vad{vad_ms}"
|
||||
if key in all_results[clip_name]:
|
||||
continue
|
||||
|
||||
print(f"[{clip_name}] {model_size} VAD={vad_ms}ms ...", end=" ", flush=True)
|
||||
t_load = time.time()
|
||||
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||
load_time = time.time() - t_load
|
||||
|
||||
segs, info, trans_time = run_transcribe(model, clip_cfg["path"], clip_name, vad_ms)
|
||||
|
||||
# Total chars
|
||||
total_chars = sum(len(s["text"]) for s in segs)
|
||||
|
||||
all_results[clip_name][key] = {
|
||||
"model": model_size,
|
||||
"vad_ms": vad_ms,
|
||||
"segments": segs,
|
||||
"segment_count": len(segs),
|
||||
"total_chars": total_chars,
|
||||
"runtime_secs": round(trans_time, 1),
|
||||
"load_time_secs": round(load_time, 1),
|
||||
"language": info.language,
|
||||
}
|
||||
print(f"{len(segs)} segs, {total_chars} chars, {trans_time:.1f}s")
|
||||
|
||||
# Free memory between models
|
||||
del model
|
||||
gc.collect()
|
||||
|
||||
# Save incrementally
|
||||
json.dump(all_results, open(RESULTS_FILE, "w"))
|
||||
|
||||
print("\n=== All tests complete ===")
|
||||
print(json.dumps({k: {kk: {kkk: vv for kkk, vv in v.items() if kkk != "segments"} for kk, v in vv.items()} for k, vv in all_results.items()}, indent=2))
|
||||
Reference in New Issue
Block a user