Files
momentry_core/scripts/update_terminology.py
Warren 8f05a7c188 feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
2026-04-30 15:07:49 +08:00

117 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""
架構文檔術語更新工具
用於將設計文檔中的術語更新為實際代碼實現的術語
"""
import os
import re
from pathlib import Path
from typing import Dict, List, Tuple
# 術語對照表
TERMINOLOGY_MAPPING: Dict[str, Tuple[str, str, str]] = {
# (設計值, 實際值, 狀態標記)
"sentence": ("sentence", "ChunkType::Sentence", "✅ 完整實現"),
"visual": ("visual", "未實現 (設計值: visual)", "❌ 未實現"),
"scene": ("scene", "ChunkType::Cut (設計值: scene)", "⚠️ 部分實現"),
"summary": ("summary", "ChunkType::Story (設計值: summary)", "⚠️ 概念調整"),
"time": ("time", "ChunkType::TimeBased", "✅ 完整實現"),
"trace": ("trace", "ChunkType::Trace", "✅ 完整實現"),
}
# 需要更新的目錄
ARCHITECTURE_DIR = Path("docs_v1.0/ARCHITECTURE")
def update_file(file_path: Path):
"""更新單個文件中的術語"""
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
original_content = content
# 進行術語替換
for design_term, (_, actual_term, _) in TERMINOLOGY_MAPPING.items():
# 替換 chunk_type 值
content = re.sub(
r"chunk_type\s*['\"]" + re.escape(design_term) + r"['\"]",
f'chunk_type "{actual_term}"',
content,
flags=re.IGNORECASE,
)
# 替換表格中的術語
content = re.sub(
r"\|\s*" + re.escape(design_term) + r"\s*\|",
f"| {actual_term} |",
content,
flags=re.IGNORECASE,
)
if content != original_content:
# 創建備份
backup_path = file_path.with_suffix(file_path.suffix + ".bak")
with open(backup_path, "w", encoding="utf-8") as f:
f.write(original_content)
# 寫入更新後的文件
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
print(f"✓ 已更新: {file_path}")
return True
else:
print(f"○ 無需更新: {file_path}")
return False
def generate_report(updated_files: List[Path], skipped_files: List[Path]):
"""生成更新報告"""
print("\n" + "=" * 80)
print("術語更新報告")
print("=" * 80)
print(f"\n已更新文件 ({len(updated_files)}):")
for file in updated_files:
print(f" - {file.relative_to(Path.cwd())}")
print(f"\n跳過文件 ({len(skipped_files)}):")
for file in skipped_files:
print(f" - {file.relative_to(Path.cwd())}")
print("\n術語對照表:")
for design_term, (_, actual_term, status) in TERMINOLOGY_MAPPING.items():
print(f" {design_term:10}{actual_term:30} [{status}]")
print("\n下一步建議:")
print("1. 手動檢查更新的文件,確保語義正確")
print("2. 運行 cargo test 確保代碼編譯正常")
print("3. 更新代碼註釋中的術語")
print("4. 運行一致性檢查工具")
def main():
"""主函數"""
print("開始術語標準化更新...")
updated_files = []
skipped_files = []
# 遞歸遍歷架構目錄
for root, dirs, files in os.walk(ARCHITECTURE_DIR):
for file in files:
if file.endswith(".md"):
file_path = Path(root) / file
if update_file(file_path):
updated_files.append(file_path)
else:
skipped_files.append(file_path)
# 生成報告
generate_report(updated_files, skipped_files)
if __name__ == "__main__":
main()