#!/usr/bin/env python3 """ 代碼與文檔一致性檢查工具 - Phase 1.2 成果 功能:檢查 Rust 代碼定義與架構文檔的一致性 核心原則:當設計與實現出現矛盾時,以實際的 Rust 代碼實現為最高權威 """ import re from pathlib import Path def load_code_definitions(): """加載 Rust 代碼定義""" print("🔍 解析 Rust 代碼定義...") project_root = Path(__file__).parent.parent src_dir = project_root / "src" chunk_type_pattern = re.compile(r"pub\s+enum\s+ChunkType\s*\{([^}]+)\}", re.DOTALL) for file_path in src_dir.glob("**/*.rs"): try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() match = chunk_type_pattern.search(content) if match: enum_body = match.group(1) variants = [] for line in enum_body.split("\n"): line = line.strip() if line and not line.startswith("//"): variant = line.split(",")[0].strip() if variant: variants.append(variant) print(f"📝 找到 ChunkType 定義: {', '.join(variants)}") return variants except Exception as e: print(f"⚠️ 解析文件 {file_path} 時出錯: {e}") print("❌ 未找到 ChunkType 定義") return [] def check_terminology_consistency(implemented_variants): """檢查術語一致性""" print("\n📝 檢查術語一致性...") project_root = Path(__file__).parent.parent architecture_dir = project_root / "docs_v1.0" / "ARCHITECTURE" # 設計術語集合 design_terms = {"sentence", "visual", "scene", "summary", "time"} # 檢查關鍵文件 key_files = [ "ARCHITECTURE_OVERVIEW.md", "CHUNKING_ARCHITECTURE.md", "DESIGN_IMPLEMENTATION_GAP.md", ] issues = [] for filename in key_files: file_path = architecture_dir / filename if not file_path.exists(): print(f" ⚠️ 文件不存在: {filename}") continue try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() except Exception as e: print(f" ❌ 無法讀取文件 {file_path}: {e}") continue # 檢查設計術語 for design_term in design_terms: if design_term in content.lower(): needs_implementation_note = design_term in [ "visual", "scene", "summary", ] if needs_implementation_note: # 檢查是否有狀態標記 has_status_marker = any( marker in content for marker in [ "✅", "⚠️", "❌", "🔄", "已實現", "未實現", "部分實現", "概念調整", ] ) if not has_status_marker: # 確定對應的實現術語 impl_term = get_implementation_term(design_term) status = get_status(impl_term) issues.append( { "file": str(file_path.relative_to(project_root)), "type": "terminology", "description": f"設計術語 '{design_term}' 缺少實現狀態說明", "severity": "warning", "suggested_fix": f"添加狀態說明,例如: '{status}' 或參考 TERMINOLOGY_MAPPING.md", } ) # 檢查實現術語是否正確 for impl_term in implemented_variants: if impl_term in content: expected_status = get_status(impl_term) if expected_status and expected_status not in content: issues.append( { "file": str(file_path.relative_to(project_root)), "type": "terminology", "description": f"實現術語 '{impl_term}' 缺少正確的狀態標記", "severity": "info", "suggested_fix": f"添加狀態標記: {expected_status}", } ) return issues def get_implementation_term(design_term): """根據設計術語獲取對應的實現術語""" mapping = { "sentence": "Sentence", "visual": "", # 未實現 "scene": "Cut", "summary": "Story", "time": "TimeBased", } return mapping.get(design_term, "") def get_status(impl_term): """獲取實現術語的狀態""" status_map = { "TimeBased": "✅ 已實現", "Sentence": "✅ 已實現", "Cut": "⚠️ 部分實現", "Trace": "✅ 已實現", "Story": "⚠️ 概念調整", "visual": "❌ 未實現", } return status_map.get(impl_term, "❓ 狀態未知") def main(): print("🚀 開始代碼與文檔一致性檢查 - Phase 1.2") print("=" * 50) # 1. 加載代碼定義 implemented_variants = load_code_definitions() if not implemented_variants: print("❌ 無法繼續檢查,請先確保 Rust 代碼正常編譯") return print(f"✅ 加載了 {len(implemented_variants)} 個代碼定義") # 2. 檢查術語一致性 issues = check_terminology_consistency(implemented_variants) # 3. 顯示結果 print("\n📊 檢查完成:") print(f" 發現問題數: {len(issues)}") if issues: print("\n🔍 詳細問題列表:") for issue in issues: print(f" [{issue['severity'].upper()}] {issue['file']}") print(f" 描述: {issue['description']}") print(f" 建議: {issue['suggested_fix']}") print() print("=" * 50) print("✅ 檢查完成。請參考 TERMINOLOGY_MAPPING.md 進行修復。") if __name__ == "__main__": main()