feat: update Python processors and add utility scripts

- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00
parent f4697396e4
commit 8f05a7c188
256 changed files with 60505 additions and 299 deletions
--- a/scripts/test_speechbrain.py
+++ b/scripts/test_speechbrain.py
@@ -0,0 +1,85 @@
+#!/opt/homebrew/bin/python3.11
+"""
+SpeechBrain 測試腳本
+測試 ASR 和說話人分離功能
+"""
+
+import sys
+import json
+import time
+from pathlib import Path
+
+def test_asr(video_path):
+    """測試 SpeechBrain ASR"""
+    print(f"[SpeechBrain] Testing ASR on: {video_path}")
+    
+    try:
+        from speechbrain.inference.ASR import EncoderDecoderASR
+        
+        # 載入模型
+        print("[SpeechBrain] Loading ASR model...")
+        asr_model = EncoderDecoderASR.from_hparams(
+            source="speechbrain/asr-wav2vec2-commonvoice-en",
+            savedir="pretrained_models/asr-wav2vec2-commonvoice-en"
+        )
+        
+        # 轉錄
+        print("[SpeechBrain] Transcribing...")
+        start_time = time.time()
+        
+        # SpeechBrain 需要 WAV 檔案
+        # 這裡我們先測試基本功能
+        print("[SpeechBrain] Note: SpeechBrain requires WAV format")
+        print("[SpeechBrain] Testing basic model loading... OK")
+        
+        elapsed = time.time() - start_time
+        print(f"[SpeechBrain] Model loaded in {elapsed:.2f}s")
+        
+        return True
+        
+    except Exception as e:
+        print(f"[SpeechBrain] Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def test_speaker_diarization(audio_path):
+    """測試說話人分離"""
+    print(f"[SpeechBrain] Testing speaker diarization on: {audio_path}")
+    
+    try:
+        from speechbrain.inference.speaker import SpeakerRecognition
+        
+        print("[SpeechBrain] Loading speaker recognition model...")
+        verification = SpeakerRecognition.from_hparams(
+            source="speechbrain/spkrec-ecapa-voxceleb",
+            savedir="pretrained_models/spkrec-ecapa-voxceleb"
+        )
+        
+        print("[SpeechBrain] Model loaded successfully")
+        return True
+        
+    except Exception as e:
+        print(f"[SpeechBrain] Speaker diarization error: {e}")
+        return False
+
+
+if __name__ == "__main__":
+    video_path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/test.wav"
+    
+    print("=== SpeechBrain 測試 ===")
+    print("")
+    
+    # 測試 ASR
+    asr_ok = test_asr(video_path)
+    print("")
+    
+    # 測試說話人分離
+    spk_ok = test_speaker_diarization(video_path)
+    print("")
+    
+    # 總結
+    print("=== 測試結果 ===")
+    print(f"ASR: {'✅ 成功' if asr_ok else '❌ 失敗'}")
+    print(f"Speaker Diarization: {'✅ 成功' if spk_ok else '❌ 失敗'}")