feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
105
scripts/compare_asr_models.py
Executable file
105
scripts/compare_asr_models.py
Executable file
@@ -0,0 +1,105 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
ASR 模型比對工具
|
||||
對比不同模型的輸出結果
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def load_results(paths):
|
||||
"""載入多個模型的輸出"""
|
||||
results = {}
|
||||
for name, path in paths.items():
|
||||
with open(path) as f:
|
||||
results[name] = json.load(f)
|
||||
return results
|
||||
|
||||
|
||||
def find_keyword(segments, keyword):
|
||||
"""在片段中查找關鍵詞"""
|
||||
for seg in segments:
|
||||
if keyword in seg["text"]:
|
||||
return seg
|
||||
return None
|
||||
|
||||
|
||||
def compare_models(results):
|
||||
"""比對多個模型"""
|
||||
print("# ASR 模型對比報告\n")
|
||||
print(f"**生成時間**: {datetime.now().isoformat()}\n")
|
||||
|
||||
# 模型列表
|
||||
print("## 模型資訊\n")
|
||||
for name, result in results.items():
|
||||
print(
|
||||
f"- **{name}**: {result.get('language', 'unknown')} "
|
||||
+ f"({result.get('language_probability', 0) * 100:.1f}%), "
|
||||
+ f"{len(result.get('segments', []))} 片段"
|
||||
)
|
||||
print()
|
||||
|
||||
# 關鍵詞彙比對
|
||||
keywords = ["剪輯師", "調光師", "錄音師", "特效", "套片"]
|
||||
print("## 關鍵詞彙識別\n")
|
||||
print("| 詞彙 | tiny | base | small |")
|
||||
print("|------|------|------|-------|")
|
||||
|
||||
for keyword in keywords:
|
||||
row = [keyword]
|
||||
for model_name in ["tiny", "base", "small"]:
|
||||
if model_name in results:
|
||||
found = find_keyword(results[model_name]["segments"], keyword)
|
||||
status = "✅" if found else "❌"
|
||||
row.append(f"{status}")
|
||||
else:
|
||||
row.append("-")
|
||||
print(f"| {' | '.join(row)} |")
|
||||
|
||||
print()
|
||||
|
||||
# 詳細比對(前 10 句)
|
||||
print("## 前 10 句對比\n")
|
||||
max_segments = max(len(r.get("segments", [])) for r in results.values())
|
||||
|
||||
for i in range(min(10, max_segments)):
|
||||
print(f"### 片段 {i + 1}\n")
|
||||
for model_name, result in results.items():
|
||||
segments = result.get("segments", [])
|
||||
if i < len(segments):
|
||||
seg = segments[i]
|
||||
print(
|
||||
f"**{model_name}**: {seg['text']} "
|
||||
+ f"({seg['start']:.1f}s - {seg['end']:.1f}s)"
|
||||
)
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
print(
|
||||
"Usage: python3 compare_asr_models.py <tiny.json> <base.json> [small.json]"
|
||||
)
|
||||
print("Note: small.json is optional")
|
||||
sys.exit(1)
|
||||
|
||||
paths = {"tiny": sys.argv[1], "base": sys.argv[2]}
|
||||
|
||||
if len(sys.argv) > 3:
|
||||
paths["small"] = sys.argv[3]
|
||||
|
||||
# 檢查檔案存在
|
||||
for name, path in paths.items():
|
||||
if not Path(path).exists():
|
||||
print(f"Error: {path} ({name}) not found")
|
||||
sys.exit(1)
|
||||
|
||||
results = load_results(paths)
|
||||
compare_models(results)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user