feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
83
v1.1/scripts/test_speechbrain_v1.11.py
Executable file
83
v1.1/scripts/test_speechbrain_v1.11.py
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
SpeechBrain 測試腳本
|
||||
測試 ASR 和說話人分離功能
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
def test_asr(video_path):
|
||||
"""測試 SpeechBrain ASR"""
|
||||
print(f"[SpeechBrain] Testing ASR on: {video_path}")
|
||||
|
||||
try:
|
||||
from speechbrain.inference.ASR import EncoderDecoderASR
|
||||
|
||||
# 載入模型
|
||||
print("[SpeechBrain] Loading ASR model...")
|
||||
asr_model = EncoderDecoderASR.from_hparams(
|
||||
source="speechbrain/asr-wav2vec2-commonvoice-en",
|
||||
savedir="pretrained_models/asr-wav2vec2-commonvoice-en"
|
||||
)
|
||||
|
||||
# 轉錄
|
||||
print("[SpeechBrain] Transcribing...")
|
||||
start_time = time.time()
|
||||
|
||||
# SpeechBrain 需要 WAV 檔案
|
||||
# 這裡我們先測試基本功能
|
||||
print("[SpeechBrain] Note: SpeechBrain requires WAV format")
|
||||
print("[SpeechBrain] Testing basic model loading... OK")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"[SpeechBrain] Model loaded in {elapsed:.2f}s")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SpeechBrain] Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def test_speaker_diarization(audio_path):
|
||||
"""測試說話人分離"""
|
||||
print(f"[SpeechBrain] Testing speaker diarization on: {audio_path}")
|
||||
|
||||
try:
|
||||
from speechbrain.inference.speaker import SpeakerRecognition
|
||||
|
||||
print("[SpeechBrain] Loading speaker recognition model...")
|
||||
verification = SpeakerRecognition.from_hparams(
|
||||
source="speechbrain/spkrec-ecapa-voxceleb",
|
||||
savedir="pretrained_models/spkrec-ecapa-voxceleb"
|
||||
)
|
||||
|
||||
print("[SpeechBrain] Model loaded successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SpeechBrain] Speaker diarization error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
video_path = sys.argv[1] if len(sys.argv) > 1 else "/tmp/test.wav"
|
||||
|
||||
print("=== SpeechBrain 測試 ===")
|
||||
print("")
|
||||
|
||||
# 測試 ASR
|
||||
asr_ok = test_asr(video_path)
|
||||
print("")
|
||||
|
||||
# 測試說話人分離
|
||||
spk_ok = test_speaker_diarization(video_path)
|
||||
print("")
|
||||
|
||||
# 總結
|
||||
print("=== 測試結果 ===")
|
||||
print(f"ASR: {'✅ 成功' if asr_ok else '❌ 失敗'}")
|
||||
print(f"Speaker Diarization: {'✅ 成功' if spk_ok else '❌ 失敗'}")
|
||||
Reference in New Issue
Block a user