feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
196
scripts/check_code_document_consistency.py
Normal file
196
scripts/check_code_document_consistency.py
Normal file
@@ -0,0 +1,196 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
代碼與文檔一致性檢查工具 - Phase 1.2 成果
|
||||
|
||||
功能:檢查 Rust 代碼定義與架構文檔的一致性
|
||||
核心原則:當設計與實現出現矛盾時,以實際的 Rust 代碼實現為最高權威
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_code_definitions():
|
||||
"""加載 Rust 代碼定義"""
|
||||
print("🔍 解析 Rust 代碼定義...")
|
||||
|
||||
project_root = Path(__file__).parent.parent
|
||||
src_dir = project_root / "src"
|
||||
|
||||
chunk_type_pattern = re.compile(r"pub\s+enum\s+ChunkType\s*\{([^}]+)\}", re.DOTALL)
|
||||
|
||||
for file_path in src_dir.glob("**/*.rs"):
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
match = chunk_type_pattern.search(content)
|
||||
if match:
|
||||
enum_body = match.group(1)
|
||||
variants = []
|
||||
for line in enum_body.split("\n"):
|
||||
line = line.strip()
|
||||
if line and not line.startswith("//"):
|
||||
variant = line.split(",")[0].strip()
|
||||
if variant:
|
||||
variants.append(variant)
|
||||
|
||||
print(f"📝 找到 ChunkType 定義: {', '.join(variants)}")
|
||||
return variants
|
||||
except Exception as e:
|
||||
print(f"⚠️ 解析文件 {file_path} 時出錯: {e}")
|
||||
|
||||
print("❌ 未找到 ChunkType 定義")
|
||||
return []
|
||||
|
||||
|
||||
def check_terminology_consistency(implemented_variants):
|
||||
"""檢查術語一致性"""
|
||||
print("\n📝 檢查術語一致性...")
|
||||
|
||||
project_root = Path(__file__).parent.parent
|
||||
architecture_dir = project_root / "docs_v1.0" / "ARCHITECTURE"
|
||||
|
||||
# 設計術語集合
|
||||
design_terms = {"sentence", "visual", "scene", "summary", "time"}
|
||||
|
||||
# 檢查關鍵文件
|
||||
key_files = [
|
||||
"ARCHITECTURE_OVERVIEW.md",
|
||||
"CHUNKING_ARCHITECTURE.md",
|
||||
"DESIGN_IMPLEMENTATION_GAP.md",
|
||||
]
|
||||
|
||||
issues = []
|
||||
|
||||
for filename in key_files:
|
||||
file_path = architecture_dir / filename
|
||||
if not file_path.exists():
|
||||
print(f" ⚠️ 文件不存在: {filename}")
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f" ❌ 無法讀取文件 {file_path}: {e}")
|
||||
continue
|
||||
|
||||
# 檢查設計術語
|
||||
for design_term in design_terms:
|
||||
if design_term in content.lower():
|
||||
needs_implementation_note = design_term in [
|
||||
"visual",
|
||||
"scene",
|
||||
"summary",
|
||||
]
|
||||
|
||||
if needs_implementation_note:
|
||||
# 檢查是否有狀態標記
|
||||
has_status_marker = any(
|
||||
marker in content
|
||||
for marker in [
|
||||
"✅",
|
||||
"⚠️",
|
||||
"❌",
|
||||
"🔄",
|
||||
"已實現",
|
||||
"未實現",
|
||||
"部分實現",
|
||||
"概念調整",
|
||||
]
|
||||
)
|
||||
|
||||
if not has_status_marker:
|
||||
# 確定對應的實現術語
|
||||
impl_term = get_implementation_term(design_term)
|
||||
status = get_status(impl_term)
|
||||
|
||||
issues.append(
|
||||
{
|
||||
"file": str(file_path.relative_to(project_root)),
|
||||
"type": "terminology",
|
||||
"description": f"設計術語 '{design_term}' 缺少實現狀態說明",
|
||||
"severity": "warning",
|
||||
"suggested_fix": f"添加狀態說明,例如: '{status}' 或參考 TERMINOLOGY_MAPPING.md",
|
||||
}
|
||||
)
|
||||
|
||||
# 檢查實現術語是否正確
|
||||
for impl_term in implemented_variants:
|
||||
if impl_term in content:
|
||||
expected_status = get_status(impl_term)
|
||||
if expected_status and expected_status not in content:
|
||||
issues.append(
|
||||
{
|
||||
"file": str(file_path.relative_to(project_root)),
|
||||
"type": "terminology",
|
||||
"description": f"實現術語 '{impl_term}' 缺少正確的狀態標記",
|
||||
"severity": "info",
|
||||
"suggested_fix": f"添加狀態標記: {expected_status}",
|
||||
}
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def get_implementation_term(design_term):
|
||||
"""根據設計術語獲取對應的實現術語"""
|
||||
mapping = {
|
||||
"sentence": "Sentence",
|
||||
"visual": "", # 未實現
|
||||
"scene": "Cut",
|
||||
"summary": "Story",
|
||||
"time": "TimeBased",
|
||||
}
|
||||
return mapping.get(design_term, "")
|
||||
|
||||
|
||||
def get_status(impl_term):
|
||||
"""獲取實現術語的狀態"""
|
||||
status_map = {
|
||||
"TimeBased": "✅ 已實現",
|
||||
"Sentence": "✅ 已實現",
|
||||
"Cut": "⚠️ 部分實現",
|
||||
"Trace": "✅ 已實現",
|
||||
"Story": "⚠️ 概念調整",
|
||||
"visual": "❌ 未實現",
|
||||
}
|
||||
return status_map.get(impl_term, "❓ 狀態未知")
|
||||
|
||||
|
||||
def main():
|
||||
print("🚀 開始代碼與文檔一致性檢查 - Phase 1.2")
|
||||
print("=" * 50)
|
||||
|
||||
# 1. 加載代碼定義
|
||||
implemented_variants = load_code_definitions()
|
||||
if not implemented_variants:
|
||||
print("❌ 無法繼續檢查,請先確保 Rust 代碼正常編譯")
|
||||
return
|
||||
|
||||
print(f"✅ 加載了 {len(implemented_variants)} 個代碼定義")
|
||||
|
||||
# 2. 檢查術語一致性
|
||||
issues = check_terminology_consistency(implemented_variants)
|
||||
|
||||
# 3. 顯示結果
|
||||
print(f"\n📊 檢查完成:")
|
||||
print(f" 發現問題數: {len(issues)}")
|
||||
|
||||
if issues:
|
||||
print("\n🔍 詳細問題列表:")
|
||||
for issue in issues:
|
||||
print(f" [{issue['severity'].upper()}] {issue['file']}")
|
||||
print(f" 描述: {issue['description']}")
|
||||
print(f" 建議: {issue['suggested_fix']}")
|
||||
print()
|
||||
|
||||
print("=" * 50)
|
||||
print("✅ 檢查完成。請參考 TERMINOLOGY_MAPPING.md 進行修復。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user