""" Speaker Classifier - 聲紋品質評估與性別分類 提供品質計算與性別分類功能,作為 main_fixed.py 的輔助模組。 """ import numpy as np def compute_embedding_quality(embeddings, labels): """每個 embedding 到所屬 cluster centroid 的餘弦相似度 Args: embeddings: [n_segments, 192] 聲紋向量矩陣 labels: [n_segments] 聚類標籤 Returns: qualities: [n_segments] 品質分數 (0-1) """ from sklearn.metrics.pairwise import cosine_similarity unique_labels = set(labels) centroids = {} for label in unique_labels: mask = labels == label centroid = np.mean(embeddings[mask], axis=0) norm = np.linalg.norm(centroid) if norm > 0: centroid = centroid / norm centroids[label] = centroid qualities = [] for emb, label in zip(embeddings, labels): sim = cosine_similarity([emb], [centroids[label]])[0][0] qualities.append(sim) return np.array(qualities) def classify_gender(audio_wav, sample_rate, classifier): """從音頻段分類性別 Args: audio_wav: 音頻波形 (numpy array) sample_rate: 採樣率 classifier: SpeechBrain EncoderClassifier (gender-recognition-ecapa) Returns: dict: {"gender": "male"|"female"|"unknown", "confidence": float} """ default = {"gender": "unknown", "confidence": 0.0} if classifier is None or len(audio_wav) == 0: return default try: import torch seg_tensor = torch.from_numpy(audio_wav).float().unsqueeze(0) out = classifier.classify_batch(seg_tensor) probs = torch.softmax(out[0], dim=-1).squeeze().cpu().detach().numpy() if len(probs) >= 2: idx = int(np.argmax(probs)) label = "male" if idx == 0 else "female" return {"gender": label, "confidence": float(probs[idx])} except Exception as e: pass return default