momentry_core/scripts/test_speechbrain.py

#!/opt/homebrew/bin/python3.11
"""
SpeechBrain 測試腳本
測試 ASR 和說話人分離功能
"""

import sys
import time

def test_asr(video_path):
    """測試 SpeechBrain ASR"""
    print(f"[SpeechBrain] Testing ASR on: {video_path}")

    try:
        from speechbrain.inference.ASR import EncoderDecoderASR

        # 載入模型
        print("[SpeechBrain] Loading ASR model...")
        asr_model = EncoderDecoderASR.from_hparams(
            source="speechbrain/asr-wav2vec2-commonvoice-en",
            savedir="pretrained_models/asr-wav2vec2-commonvoice-en"
        )

        # 轉錄
        print("[SpeechBrain] Transcribing...")
        start_time = time.time()

        # SpeechBrain 需要 WAV 檔案
        # 這裡我們先測試基本功能
        print("[SpeechBrain] Note: SpeechBrain requires WAV format")
        print("[SpeechBrain] Testing basic model loading... OK")

        elapsed = time.time() - start_time
        print(f"[SpeechBrain] Model loaded in {elapsed:.2f}s")

        return True

    except Exception as e:
        print(f"[SpeechBrain] Error: {e}")
        import traceback
        traceback.print_exc()
        return False


def test_speaker_diarization(audio_path):
    """測試說話人分離"""
    print(f"[SpeechBrain] Testing speaker diarization on: {audio_path}")

    try:
        from speechbrain.inference.speaker import SpeakerRecognition

        print("[SpeechBrain] Loading speaker recognition model...")
        verification = SpeakerRecognition.from_hparams(
            source="speechbrain/spkrec-ecapa-voxceleb",
            savedir="pretrained_models/spkrec-ecapa-voxceleb"
        )

        print("[SpeechBrain] Model loaded successfully")
        return True

    except Exception as e:
        print(f"[SpeechBrain] Speaker diarization error: {e}")
        return False


if __name__ == "__main__":
    video_path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/test.wav"

    print("=== SpeechBrain 測試 ===")
    print("")

    # 測試 ASR
    asr_ok = test_asr(video_path)
    print("")

    # 測試說話人分離
    spk_ok = test_speaker_diarization(video_path)
    print("")

    # 總結
    print("=== 測試結果 ===")
    print(f"ASR: {'✅ 成功' if asr_ok else '❌ 失敗'}")
    print(f"Speaker Diarization: {'✅ 成功' if spk_ok else '❌ 失敗'}")