momentry_core/scripts/test_asr_large_model.py

#!/opt/homebrew/bin/python3.11
"""
Compare ASR small vs large-v3 on a short test clip.
"""
import json, time, sys, os
from faster_whisper import WhisperModel

CLIP = "/tmp/charade_test_clip.mp4"

models = {
    "small": {"size": "small", "device": "cpu", "compute": "int8"},
    "large-v3": {"size": "large-v3", "device": "cpu", "compute": "int8"},
}

for name, cfg in models.items():
    outfile = f"/tmp/asr_{name}_result.json"
    if os.path.exists(outfile):
        print(f"{name}: already done, skip")
        continue

    print(f"\n=== Loading {name} model ===")
    t0 = time.time()
    model = WhisperModel(cfg["size"], device=cfg["device"], compute_type=cfg["compute"])
    print(f"  Loaded in {time.time()-t0:.1f}s")

    print(f"  Transcribing...")
    t0 = time.time()
    segments, info = model.transcribe(CLIP, beam_size=5, vad_filter=True,
                                       vad_parameters={"min_silence_duration_ms": 500})
    segs = []
    for seg in segments:
        segs.append({"start": round(seg.start + 1540, 2), "end": round(seg.end + 1540, 2),
                      "text": seg.text.strip()})
    elapsed = time.time() - t0

    result = {
        "model": name,
        "language": info.language,
        "segments": segs,
        "segment_count": len(segs),
        "duration_secs": round(elapsed, 1),
    }
    json.dump(result, open(outfile, "w"), indent=2, ensure_ascii=False)
    print(f"  Done: {len(segs)} segs in {elapsed:.1f}s")
    del model  # free memory

print("\n=== Comparison ===")
for name in models:
    r = json.load(open(f"/tmp/asr_{name}_result.json"))
    print(f"{name}: {r['segment_count']} segs, {r['duration_secs']}s runtime")

# Show differences
small = json.load(open("/tmp/asr_small_result.json"))["segments"]
large = json.load(open("/tmp/asr_large_v3_result.json"))["segments"]

small_texts = set(s["text"] for s in small)
large_texts = set(s["text"] for s in large)

only_small = small_texts - large_texts
only_large = large_texts - small_texts

print(f"\nTexts only in small: {len(only_small)}")
for t in sorted(only_small)[:10]:
    print(f"  SMALL: \"{t}\"")

print(f"\nTexts only in large: {len(only_large)}")
for t in sorted(only_large)[:10]:
    print(f"  LARGE: \"{t}\"")

# Compare segment boundaries
print(f"\nSegment time differences (large has more/fewer):")
print(f"  Small: {len(small)} segments")
print(f"  Large: {len(large)} segments")
print(f"  Diff: {len(large) - len(small)}")