Files
momentry_core/scripts/check_code_document_consistency.py
Warren e75c4d6f07 cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
2026-05-04 01:31:21 +08:00

195 lines
6.3 KiB
Python

#!/usr/bin/env python3
"""
代碼與文檔一致性檢查工具 - Phase 1.2 成果
功能:檢查 Rust 代碼定義與架構文檔的一致性
核心原則:當設計與實現出現矛盾時,以實際的 Rust 代碼實現為最高權威
"""
import re
from pathlib import Path
def load_code_definitions():
"""加載 Rust 代碼定義"""
print("🔍 解析 Rust 代碼定義...")
project_root = Path(__file__).parent.parent
src_dir = project_root / "src"
chunk_type_pattern = re.compile(r"pub\s+enum\s+ChunkType\s*\{([^}]+)\}", re.DOTALL)
for file_path in src_dir.glob("**/*.rs"):
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
match = chunk_type_pattern.search(content)
if match:
enum_body = match.group(1)
variants = []
for line in enum_body.split("\n"):
line = line.strip()
if line and not line.startswith("//"):
variant = line.split(",")[0].strip()
if variant:
variants.append(variant)
print(f"📝 找到 ChunkType 定義: {', '.join(variants)}")
return variants
except Exception as e:
print(f"⚠️ 解析文件 {file_path} 時出錯: {e}")
print("❌ 未找到 ChunkType 定義")
return []
def check_terminology_consistency(implemented_variants):
"""檢查術語一致性"""
print("\n📝 檢查術語一致性...")
project_root = Path(__file__).parent.parent
architecture_dir = project_root / "docs_v1.0" / "ARCHITECTURE"
# 設計術語集合
design_terms = {"sentence", "visual", "scene", "summary", "time"}
# 檢查關鍵文件
key_files = [
"ARCHITECTURE_OVERVIEW.md",
"CHUNKING_ARCHITECTURE.md",
"DESIGN_IMPLEMENTATION_GAP.md",
]
issues = []
for filename in key_files:
file_path = architecture_dir / filename
if not file_path.exists():
print(f" ⚠️ 文件不存在: {filename}")
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
except Exception as e:
print(f" ❌ 無法讀取文件 {file_path}: {e}")
continue
# 檢查設計術語
for design_term in design_terms:
if design_term in content.lower():
needs_implementation_note = design_term in [
"visual",
"scene",
"summary",
]
if needs_implementation_note:
# 檢查是否有狀態標記
has_status_marker = any(
marker in content
for marker in [
"",
"⚠️",
"",
"🔄",
"已實現",
"未實現",
"部分實現",
"概念調整",
]
)
if not has_status_marker:
# 確定對應的實現術語
impl_term = get_implementation_term(design_term)
status = get_status(impl_term)
issues.append(
{
"file": str(file_path.relative_to(project_root)),
"type": "terminology",
"description": f"設計術語 '{design_term}' 缺少實現狀態說明",
"severity": "warning",
"suggested_fix": f"添加狀態說明,例如: '{status}' 或參考 TERMINOLOGY_MAPPING.md",
}
)
# 檢查實現術語是否正確
for impl_term in implemented_variants:
if impl_term in content:
expected_status = get_status(impl_term)
if expected_status and expected_status not in content:
issues.append(
{
"file": str(file_path.relative_to(project_root)),
"type": "terminology",
"description": f"實現術語 '{impl_term}' 缺少正確的狀態標記",
"severity": "info",
"suggested_fix": f"添加狀態標記: {expected_status}",
}
)
return issues
def get_implementation_term(design_term):
"""根據設計術語獲取對應的實現術語"""
mapping = {
"sentence": "Sentence",
"visual": "", # 未實現
"scene": "Cut",
"summary": "Story",
"time": "TimeBased",
}
return mapping.get(design_term, "")
def get_status(impl_term):
"""獲取實現術語的狀態"""
status_map = {
"TimeBased": "✅ 已實現",
"Sentence": "✅ 已實現",
"Cut": "⚠️ 部分實現",
"Trace": "✅ 已實現",
"Story": "⚠️ 概念調整",
"visual": "❌ 未實現",
}
return status_map.get(impl_term, "❓ 狀態未知")
def main():
print("🚀 開始代碼與文檔一致性檢查 - Phase 1.2")
print("=" * 50)
# 1. 加載代碼定義
implemented_variants = load_code_definitions()
if not implemented_variants:
print("❌ 無法繼續檢查,請先確保 Rust 代碼正常編譯")
return
print(f"✅ 加載了 {len(implemented_variants)} 個代碼定義")
# 2. 檢查術語一致性
issues = check_terminology_consistency(implemented_variants)
# 3. 顯示結果
print("\n📊 檢查完成:")
print(f" 發現問題數: {len(issues)}")
if issues:
print("\n🔍 詳細問題列表:")
for issue in issues:
print(f" [{issue['severity'].upper()}] {issue['file']}")
print(f" 描述: {issue['description']}")
print(f" 建議: {issue['suggested_fix']}")
print()
print("=" * 50)
print("✅ 檢查完成。請參考 TERMINOLOGY_MAPPING.md 進行修復。")
if __name__ == "__main__":
main()