cleanup: remove dead code and duplicate docs

- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
This commit is contained in:
Warren
2026-05-04 01:31:21 +08:00
parent ee81e343ce
commit e75c4d6f07
3270 changed files with 35190 additions and 53367 deletions

View File

@@ -10,7 +10,7 @@ ASR方案内容对比分析
import json
from pathlib import Path
from difflib import unified_diff, SequenceMatcher
from difflib import SequenceMatcher
def load_segments(json_path):
"""加载JSON文件中的segments"""
@@ -25,7 +25,7 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
print(f"{'='*60}")
# 统计
print(f"\n【数量对比】")
print("\n【数量对比】")
print(f" {name_a}: {len(seg_a)} segments")
print(f" {name_b}: {len(seg_b)} segments")
print(f" 差异: {len(seg_a) - len(seg_b)} segments")
@@ -34,7 +34,7 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
total_time_a = sum(s['end'] - s['start'] for s in seg_a)
total_time_b = sum(s['end'] - s['start'] for s in seg_b)
print(f"\n【时间覆盖】")
print("\n【时间覆盖】")
print(f" {name_a}: {total_time_a:.2f}")
print(f" {name_b}: {total_time_b:.2f}")
print(f" 差异: {total_time_a - total_time_b:.2f}")
@@ -48,11 +48,11 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
text_b_full = ' '.join(texts_b)
similarity = SequenceMatcher(None, text_a_full, text_b_full).ratio()
print(f"\n【文本相似度】")
print("\n【文本相似度】")
print(f" 相似度: {similarity*100:.1f}%")
# 差异分析
print(f"\n【详细差异】")
print("\n【详细差异】")
# 按时间对齐对比
matched_diffs = []
@@ -98,7 +98,7 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
if len(matched_diffs) > 10:
print(f"\n ... 还有 {len(matched_diffs) - 10} 处差异")
else:
print(f" ✓ 无显著文本差异")
print(" ✓ 无显著文本差异")
return {
'segments_diff': len(seg_a) - len(seg_b),
@@ -122,10 +122,10 @@ def main():
# 方案基本信息
print("【测试方案】")
print(f" 方案A: faster-whisper small CPU")
print(f" 方案B: OpenAI whisper small CPU")
print(f" 方案D: OpenAI whisper medium CPU")
print(f" 方案C/E: MPS失败不支持")
print(" 方案A: faster-whisper small CPU")
print(" 方案B: OpenAI whisper small CPU")
print(" 方案D: OpenAI whisper medium CPU")
print(" 方案C/E: MPS失败不支持")
print()
# 三组对比
@@ -142,16 +142,16 @@ def main():
print("="*60)
print("\n【Segments数量】")
print(f" 方案A: 77 segments (最多)")
print(f" 方案B: 74 segments")
print(f" 方案D: 74 segments")
print(f" 结论: faster-whisper分割更细+3 segments")
print(" 方案A: 77 segments (最多)")
print(" 方案B: 74 segments")
print(" 方案D: 74 segments")
print(" 结论: faster-whisper分割更细+3 segments")
print("\n【文本相似度】")
print(f" A vs B: {results['A_vs_B']['similarity']*100:.1f}%")
print(f" A vs D: {results['A_vs_D']['similarity']*100:.1f}%")
print(f" B vs D: {results['B_vs_D']['similarity']*100:.1f}%")
print(f" 结论: 三个方案文本高度相似")
print(" 结论: 三个方案文本高度相似")
print("\n【文本差异统计】")
print(f" A vs B: {results['A_vs_B']['text_diffs']}处差异")
@@ -159,9 +159,9 @@ def main():
print(f" B vs D: {results['B_vs_D']['text_diffs']}处差异")
print("\n【方案Dmediumvs 方案Bsmall")
print(f" Segments数量相同: 74条")
print(" Segments数量相同: 74条")
print(f" 文本相似度: {results['B_vs_D']['similarity']*100:.1f}%")
print(f" 结论: medium模型无明显提升")
print(" 结论: medium模型无明显提升")
print()
print("="*60)