#!/opt/homebrew/bin/python3.11 """ Comprehensive ASR Model Selection Benchmark Tests 5 models × 2 VAD settings across 3 test clips. Output: JSON results + markdown report """ import json, time, os, gc, sys from faster_whisper import WhisperModel CLIPS = { "A_rapid": {"path": "/tmp/asr_clip_A.mp4", "offset": 1540}, "B_normal": {"path": "/tmp/asr_clip_B.mp4", "offset": 600}, "C_complex": {"path": "/tmp/asr_clip_C.mp4", "offset": 4400}, } MODELS = ["tiny", "base", "small", "medium", "large-v3"] VAD_SETTINGS = [200, 500] # min_silence_duration_ms RESULTS_FILE = "/tmp/asr_benchmark_results.json" def run_transcribe(model, clip_path, clip_name, vad_ms): segs = [] t0 = time.time() vad_params = {"min_silence_duration_ms": vad_ms} segments, info = model.transcribe(clip_path, beam_size=5, vad_filter=True, vad_parameters=vad_params) for seg in segments: segs.append({"start": round(seg.start, 2), "end": round(seg.end, 2), "text": seg.text.strip()}) elapsed = time.time() - t0 return segs, info, elapsed # Load existing results to skip completed all_results = {} if os.path.exists(RESULTS_FILE): all_results = json.load(open(RESULTS_FILE)) print(f"Loaded {sum(len(v) for v in all_results.values())} existing results") total = len(CLIPS) * len(MODELS) * len(VAD_SETTINGS) done = sum(len(v) for v in all_results.values()) print(f"Total: {total} tests, {done} already done, {total-done} remaining\n") for clip_name, clip_cfg in CLIPS.items(): if clip_name not in all_results: all_results[clip_name] = {} for model_size in MODELS: for vad_ms in VAD_SETTINGS: key = f"{model_size}_vad{vad_ms}" if key in all_results[clip_name]: continue print(f"[{clip_name}] {model_size} VAD={vad_ms}ms ...", end=" ", flush=True) t_load = time.time() model = WhisperModel(model_size, device="cpu", compute_type="int8") load_time = time.time() - t_load segs, info, trans_time = run_transcribe(model, clip_cfg["path"], clip_name, vad_ms) # Total chars total_chars = sum(len(s["text"]) for s in segs) all_results[clip_name][key] = { "model": model_size, "vad_ms": vad_ms, "segments": segs, "segment_count": len(segs), "total_chars": total_chars, "runtime_secs": round(trans_time, 1), "load_time_secs": round(load_time, 1), "language": info.language, } print(f"{len(segs)} segs, {total_chars} chars, {trans_time:.1f}s") # Free memory between models del model gc.collect() # Save incrementally json.dump(all_results, open(RESULTS_FILE, "w")) print("\n=== All tests complete ===") print(json.dumps({k: {kk: {kkk: vv for kkk, vv in v.items() if kkk != "segments"} for kk, v in vv.items()} for k, vv in all_results.items()}, indent=2))