cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
This commit is contained in:
@@ -10,7 +10,7 @@ ASR方案内容对比分析
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from difflib import unified_diff, SequenceMatcher
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
def load_segments(json_path):
|
||||
"""加载JSON文件中的segments"""
|
||||
@@ -25,7 +25,7 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
|
||||
print(f"{'='*60}")
|
||||
|
||||
# 统计
|
||||
print(f"\n【数量对比】")
|
||||
print("\n【数量对比】")
|
||||
print(f" {name_a}: {len(seg_a)} segments")
|
||||
print(f" {name_b}: {len(seg_b)} segments")
|
||||
print(f" 差异: {len(seg_a) - len(seg_b)} segments")
|
||||
@@ -34,7 +34,7 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
|
||||
total_time_a = sum(s['end'] - s['start'] for s in seg_a)
|
||||
total_time_b = sum(s['end'] - s['start'] for s in seg_b)
|
||||
|
||||
print(f"\n【时间覆盖】")
|
||||
print("\n【时间覆盖】")
|
||||
print(f" {name_a}: {total_time_a:.2f}秒")
|
||||
print(f" {name_b}: {total_time_b:.2f}秒")
|
||||
print(f" 差异: {total_time_a - total_time_b:.2f}秒")
|
||||
@@ -48,11 +48,11 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
|
||||
text_b_full = ' '.join(texts_b)
|
||||
similarity = SequenceMatcher(None, text_a_full, text_b_full).ratio()
|
||||
|
||||
print(f"\n【文本相似度】")
|
||||
print("\n【文本相似度】")
|
||||
print(f" 相似度: {similarity*100:.1f}%")
|
||||
|
||||
# 差异分析
|
||||
print(f"\n【详细差异】")
|
||||
print("\n【详细差异】")
|
||||
|
||||
# 按时间对齐对比
|
||||
matched_diffs = []
|
||||
@@ -98,7 +98,7 @@ def compare_segments(seg_a, seg_b, name_a, name_b):
|
||||
if len(matched_diffs) > 10:
|
||||
print(f"\n ... 还有 {len(matched_diffs) - 10} 处差异")
|
||||
else:
|
||||
print(f" ✓ 无显著文本差异")
|
||||
print(" ✓ 无显著文本差异")
|
||||
|
||||
return {
|
||||
'segments_diff': len(seg_a) - len(seg_b),
|
||||
@@ -122,10 +122,10 @@ def main():
|
||||
|
||||
# 方案基本信息
|
||||
print("【测试方案】")
|
||||
print(f" 方案A: faster-whisper small CPU")
|
||||
print(f" 方案B: OpenAI whisper small CPU")
|
||||
print(f" 方案D: OpenAI whisper medium CPU")
|
||||
print(f" 方案C/E: MPS失败(不支持)")
|
||||
print(" 方案A: faster-whisper small CPU")
|
||||
print(" 方案B: OpenAI whisper small CPU")
|
||||
print(" 方案D: OpenAI whisper medium CPU")
|
||||
print(" 方案C/E: MPS失败(不支持)")
|
||||
print()
|
||||
|
||||
# 三组对比
|
||||
@@ -142,16 +142,16 @@ def main():
|
||||
print("="*60)
|
||||
|
||||
print("\n【Segments数量】")
|
||||
print(f" 方案A: 77 segments (最多)")
|
||||
print(f" 方案B: 74 segments")
|
||||
print(f" 方案D: 74 segments")
|
||||
print(f" 结论: faster-whisper分割更细(+3 segments)")
|
||||
print(" 方案A: 77 segments (最多)")
|
||||
print(" 方案B: 74 segments")
|
||||
print(" 方案D: 74 segments")
|
||||
print(" 结论: faster-whisper分割更细(+3 segments)")
|
||||
|
||||
print("\n【文本相似度】")
|
||||
print(f" A vs B: {results['A_vs_B']['similarity']*100:.1f}%")
|
||||
print(f" A vs D: {results['A_vs_D']['similarity']*100:.1f}%")
|
||||
print(f" B vs D: {results['B_vs_D']['similarity']*100:.1f}%")
|
||||
print(f" 结论: 三个方案文本高度相似")
|
||||
print(" 结论: 三个方案文本高度相似")
|
||||
|
||||
print("\n【文本差异统计】")
|
||||
print(f" A vs B: {results['A_vs_B']['text_diffs']}处差异")
|
||||
@@ -159,9 +159,9 @@ def main():
|
||||
print(f" B vs D: {results['B_vs_D']['text_diffs']}处差异")
|
||||
|
||||
print("\n【方案D(medium)vs 方案B(small)】")
|
||||
print(f" Segments数量相同: 74条")
|
||||
print(" Segments数量相同: 74条")
|
||||
print(f" 文本相似度: {results['B_vs_D']['similarity']*100:.1f}%")
|
||||
print(f" 结论: medium模型无明显提升")
|
||||
print(" 结论: medium模型无明显提升")
|
||||
|
||||
print()
|
||||
print("="*60)
|
||||
|
||||
Reference in New Issue
Block a user