feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
65
scripts/asrx_self/speaker_classifier.py
Normal file
65
scripts/asrx_self/speaker_classifier.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Speaker Classifier - 聲紋品質評估與性別分類
|
||||
|
||||
提供品質計算與性別分類功能,作為 main_fixed.py 的輔助模組。
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compute_embedding_quality(embeddings, labels):
|
||||
"""每個 embedding 到所屬 cluster centroid 的餘弦相似度
|
||||
|
||||
Args:
|
||||
embeddings: [n_segments, 192] 聲紋向量矩陣
|
||||
labels: [n_segments] 聚類標籤
|
||||
|
||||
Returns:
|
||||
qualities: [n_segments] 品質分數 (0-1)
|
||||
"""
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
unique_labels = set(labels)
|
||||
centroids = {}
|
||||
for label in unique_labels:
|
||||
mask = labels == label
|
||||
centroid = np.mean(embeddings[mask], axis=0)
|
||||
norm = np.linalg.norm(centroid)
|
||||
if norm > 0:
|
||||
centroid = centroid / norm
|
||||
centroids[label] = centroid
|
||||
|
||||
qualities = []
|
||||
for emb, label in zip(embeddings, labels):
|
||||
sim = cosine_similarity([emb], [centroids[label]])[0][0]
|
||||
qualities.append(sim)
|
||||
|
||||
return np.array(qualities)
|
||||
|
||||
|
||||
def classify_gender(audio_wav, sample_rate, classifier):
|
||||
"""從音頻段分類性別
|
||||
|
||||
Args:
|
||||
audio_wav: 音頻波形 (numpy array)
|
||||
sample_rate: 採樣率
|
||||
classifier: SpeechBrain EncoderClassifier (gender-recognition-ecapa)
|
||||
|
||||
Returns:
|
||||
dict: {"gender": "male"|"female"|"unknown", "confidence": float}
|
||||
"""
|
||||
default = {"gender": "unknown", "confidence": 0.0}
|
||||
if classifier is None or len(audio_wav) == 0:
|
||||
return default
|
||||
try:
|
||||
import torch
|
||||
seg_tensor = torch.from_numpy(audio_wav).float().unsqueeze(0)
|
||||
out = classifier.classify_batch(seg_tensor)
|
||||
probs = torch.softmax(out[0], dim=-1).squeeze().cpu().detach().numpy()
|
||||
if len(probs) >= 2:
|
||||
idx = int(np.argmax(probs))
|
||||
label = "male" if idx == 0 else "female"
|
||||
return {"gender": label, "confidence": float(probs[idx])}
|
||||
except Exception as e:
|
||||
pass
|
||||
return default
|
||||
Reference in New Issue
Block a user