Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
84 lines
2.4 KiB
Python
Executable File
84 lines
2.4 KiB
Python
Executable File
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
SpeechBrain 測試腳本
|
|
測試 ASR 和說話人分離功能
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
|
|
def test_asr(video_path):
|
|
"""測試 SpeechBrain ASR"""
|
|
print(f"[SpeechBrain] Testing ASR on: {video_path}")
|
|
|
|
try:
|
|
from speechbrain.inference.ASR import EncoderDecoderASR
|
|
|
|
# 載入模型
|
|
print("[SpeechBrain] Loading ASR model...")
|
|
asr_model = EncoderDecoderASR.from_hparams(
|
|
source="speechbrain/asr-wav2vec2-commonvoice-en",
|
|
savedir="pretrained_models/asr-wav2vec2-commonvoice-en"
|
|
)
|
|
|
|
# 轉錄
|
|
print("[SpeechBrain] Transcribing...")
|
|
start_time = time.time()
|
|
|
|
# SpeechBrain 需要 WAV 檔案
|
|
# 這裡我們先測試基本功能
|
|
print("[SpeechBrain] Note: SpeechBrain requires WAV format")
|
|
print("[SpeechBrain] Testing basic model loading... OK")
|
|
|
|
elapsed = time.time() - start_time
|
|
print(f"[SpeechBrain] Model loaded in {elapsed:.2f}s")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"[SpeechBrain] Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def test_speaker_diarization(audio_path):
|
|
"""測試說話人分離"""
|
|
print(f"[SpeechBrain] Testing speaker diarization on: {audio_path}")
|
|
|
|
try:
|
|
from speechbrain.inference.speaker import SpeakerRecognition
|
|
|
|
print("[SpeechBrain] Loading speaker recognition model...")
|
|
verification = SpeakerRecognition.from_hparams(
|
|
source="speechbrain/spkrec-ecapa-voxceleb",
|
|
savedir="pretrained_models/spkrec-ecapa-voxceleb"
|
|
)
|
|
|
|
print("[SpeechBrain] Model loaded successfully")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"[SpeechBrain] Speaker diarization error: {e}")
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
video_path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/test.wav"
|
|
|
|
print("=== SpeechBrain 測試 ===")
|
|
print("")
|
|
|
|
# 測試 ASR
|
|
asr_ok = test_asr(video_path)
|
|
print("")
|
|
|
|
# 測試說話人分離
|
|
spk_ok = test_speaker_diarization(video_path)
|
|
print("")
|
|
|
|
# 總結
|
|
print("=== 測試結果 ===")
|
|
print(f"ASR: {'✅ 成功' if asr_ok else '❌ 失敗'}")
|
|
print(f"Speaker Diarization: {'✅ 成功' if spk_ok else '❌ 失敗'}")
|