cleanup: remove dead code and duplicate docs

- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
This commit is contained in:
Warren
2026-05-04 01:31:21 +08:00
parent ee81e343ce
commit e75c4d6f07
3270 changed files with 35190 additions and 53367 deletions

View File

@@ -138,7 +138,7 @@ def spectral_clustering_speaker(
speaker_labels = clustering.fit_predict(similarity_matrix)
print(f"[Clustering] Spectral clustering completed")
print("[Clustering] Spectral clustering completed")
print(f"[Clustering] n_speakers: {n_speakers}")
print(f"[Clustering] n_segments: {n_segments}")
@@ -146,7 +146,7 @@ def spectral_clustering_speaker(
except Exception as e:
print(f"[Clustering] Spectral clustering failed: {e}")
print(f"[Clustering] Using fallback: 2 speakers")
print("[Clustering] Using fallback: 2 speakers")
# 簡單分配:前一半是 SPEAKER_0後一半是 SPEAKER_1
speaker_labels = np.array(
[0] * (n_segments // 2) + [1] * (n_segments - n_segments // 2)
@@ -203,7 +203,7 @@ def agglomerative_clustering_speaker(
speaker_labels = clustering.fit_predict(embeddings)
print(f"[Clustering] Agglomerative clustering completed")
print("[Clustering] Agglomerative clustering completed")
print(f"[Clustering] n_speakers: {n_speakers}")
return speaker_labels, n_speakers
@@ -249,7 +249,6 @@ def compute_diarization_purity(speaker_labels, ground_truth_labels=None):
"""
if ground_truth_labels is None:
# 沒有 ground truth使用聚類純度近似
from sklearn.metrics import silhouette_score
# 使用餘弦相似度作為距離
purity = 0.5 # 預設值
@@ -300,7 +299,7 @@ if __name__ == "__main__":
similarity, n_speakers=None, auto_estimate=True
)
print(f"\n[Test] Clustering results:")
print("\n[Test] Clustering results:")
print(f" True n_speakers: {n_speakers}")
print(f" Estimated n_speakers: {n_clusters}")
print(f" Unique labels: {np.unique(labels)}")