#!/opt/homebrew/bin/python3.11 """ ASR 模型比對工具 對比不同模型的輸出結果 """ import json import sys from pathlib import Path from datetime import datetime def load_results(paths): """載入多個模型的輸出""" results = {} for name, path in paths.items(): with open(path) as f: results[name] = json.load(f) return results def find_keyword(segments, keyword): """在片段中查找關鍵詞""" for seg in segments: if keyword in seg["text"]: return seg return None def compare_models(results): """比對多個模型""" print("# ASR 模型對比報告\n") print(f"**生成時間**: {datetime.now().isoformat()}\n") # 模型列表 print("## 模型資訊\n") for name, result in results.items(): print( f"- **{name}**: {result.get('language', 'unknown')} " + f"({result.get('language_probability', 0) * 100:.1f}%), " + f"{len(result.get('segments', []))} 片段" ) print() # 關鍵詞彙比對 keywords = ["剪輯師", "調光師", "錄音師", "特效", "套片"] print("## 關鍵詞彙識別\n") print("| 詞彙 | tiny | base | small |") print("|------|------|------|-------|") for keyword in keywords: row = [keyword] for model_name in ["tiny", "base", "small"]: if model_name in results: found = find_keyword(results[model_name]["segments"], keyword) status = "✅" if found else "❌" row.append(f"{status}") else: row.append("-") print(f"| {' | '.join(row)} |") print() # 詳細比對(前 10 句) print("## 前 10 句對比\n") max_segments = max(len(r.get("segments", [])) for r in results.values()) for i in range(min(10, max_segments)): print(f"### 片段 {i + 1}\n") for model_name, result in results.items(): segments = result.get("segments", []) if i < len(segments): seg = segments[i] print( f"**{model_name}**: {seg['text']} " + f"({seg['start']:.1f}s - {seg['end']:.1f}s)" ) print() def main(): if len(sys.argv) < 3: print( "Usage: python3 compare_asr_models.py [small.json]" ) print("Note: small.json is optional") sys.exit(1) paths = {"tiny": sys.argv[1], "base": sys.argv[2]} if len(sys.argv) > 3: paths["small"] = sys.argv[3] # 檢查檔案存在 for name, path in paths.items(): if not Path(path).exists(): print(f"Error: {path} ({name}) not found") sys.exit(1) results = load_results(paths) compare_models(results) if __name__ == "__main__": main()