- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
482 lines
17 KiB
Python
482 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
架構文檔一致性檢查腳本
|
|
|
|
功能:
|
|
1. 檢查所有架構文檔間的鏈接有效性
|
|
2. 驗證術語一致性
|
|
3. 檢查設計與實現差異標記
|
|
4. 生成文檔質量報告
|
|
|
|
使用方法:
|
|
python3 scripts/check_architecture_docs.py [--report] [--verbose]
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
import glob
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import Dict, List, Set, Optional
|
|
from collections import defaultdict
|
|
|
|
# 配置
|
|
ARCHITECTURE_DIR = Path(__file__).parent.parent / "docs_v1.0" / "ARCHITECTURE"
|
|
DOC_EXTENSIONS = [".md"]
|
|
IGNORE_FILES = ["README.md", "index.md"]
|
|
|
|
# 術語一致性檢查配置
|
|
TERMINOLOGY_PATTERNS = {
|
|
"chunk_type": [
|
|
r"chunk[_\\s]?type",
|
|
r"分片類型",
|
|
r"ChunkType",
|
|
],
|
|
"sentence": [
|
|
r"sentence",
|
|
r"句子",
|
|
r"Rule 1",
|
|
],
|
|
"visual": [
|
|
r"visual",
|
|
r"視覺",
|
|
r"Rule 2",
|
|
],
|
|
"scene": [
|
|
r"scene",
|
|
r"場景",
|
|
r"Rule 3",
|
|
],
|
|
"summary": [
|
|
r"summary",
|
|
r"摘要",
|
|
r"Rule 4",
|
|
],
|
|
"time_based": [
|
|
r"time[_\\s]?based",
|
|
r"時間基準",
|
|
r"TimeBased",
|
|
],
|
|
"cut": [
|
|
r"cut",
|
|
r"CUT",
|
|
r"場景分割",
|
|
],
|
|
"trace": [
|
|
r"trace",
|
|
r"軌跡",
|
|
r"Trace",
|
|
],
|
|
"story": [
|
|
r"story",
|
|
r"故事",
|
|
r"Story",
|
|
],
|
|
}
|
|
|
|
|
|
class DocumentIssue:
|
|
"""文檔問題記錄"""
|
|
|
|
def __init__(
|
|
self,
|
|
file_path: Path,
|
|
line_number: int,
|
|
issue_type: str,
|
|
description: str,
|
|
severity: str,
|
|
suggested_fix: Optional[str] = None,
|
|
):
|
|
self.file_path = file_path
|
|
self.line_number = line_number
|
|
self.issue_type = (
|
|
issue_type # "broken_link", "terminology", "format", "consistency"
|
|
)
|
|
self.description = description
|
|
self.severity = severity # "error", "warning", "info"
|
|
self.suggested_fix = suggested_fix
|
|
|
|
|
|
class DocumentStats:
|
|
"""文檔統計信息"""
|
|
|
|
def __init__(self, file_path: Path):
|
|
self.file_path = file_path
|
|
self.total_lines = 0
|
|
self.total_links = 0
|
|
self.broken_links = 0
|
|
self.terminology_issues = 0
|
|
self.format_issues = 0
|
|
self.consistency_issues = 0
|
|
self.issues: List[DocumentIssue] = []
|
|
|
|
|
|
class ArchitectureDocChecker:
|
|
"""架構文檔檢查器"""
|
|
|
|
def __init__(self, architecture_dir: Path):
|
|
self.architecture_dir = architecture_dir
|
|
self.all_md_files: List[Path] = []
|
|
self.file_contents: Dict[Path, List[str]] = {}
|
|
self.document_stats: Dict[Path, DocumentStats] = {}
|
|
|
|
def load_all_documents(self) -> None:
|
|
"""加載所有文檔"""
|
|
print(f"📁 掃描架構文檔目錄: {self.architecture_dir}")
|
|
|
|
# 掃描所有 Markdown 文件
|
|
for ext in DOC_EXTENSIONS:
|
|
pattern = self.architecture_dir / "**" / f"*{ext}"
|
|
for file_path in glob.glob(str(pattern), recursive=True):
|
|
file_path = Path(file_path)
|
|
if file_path.name in IGNORE_FILES:
|
|
continue
|
|
self.all_md_files.append(file_path)
|
|
|
|
# 加載文件內容
|
|
for file_path in self.all_md_files:
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.readlines()
|
|
self.file_contents[file_path] = content
|
|
|
|
# 初始化統計信息
|
|
self.document_stats[file_path] = DocumentStats(file_path=file_path)
|
|
self.document_stats[file_path].total_lines = len(content)
|
|
except Exception as e:
|
|
print(f"❌ 無法讀取文件 {file_path}: {e}")
|
|
|
|
print(f"✅ 加載了 {len(self.all_md_files)} 個文檔文件")
|
|
|
|
def check_links(self) -> None:
|
|
"""檢查文檔鏈接有效性"""
|
|
print("\n🔗 檢查文檔鏈接...")
|
|
|
|
# 收集所有可用的文件路徑(相對路徑)
|
|
available_files = set()
|
|
for file_path in self.all_md_files:
|
|
# 相對於架構目錄的路徑
|
|
rel_path = file_path.relative_to(self.architecture_dir)
|
|
available_files.add(str(rel_path))
|
|
available_files.add(str(rel_path).lower())
|
|
|
|
link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
|
|
|
for file_path, content_lines in self.file_contents.items():
|
|
stats = self.document_stats[file_path]
|
|
|
|
for line_num, line in enumerate(content_lines, 1):
|
|
matches = link_pattern.findall(line)
|
|
stats.total_links += len(matches)
|
|
|
|
for link_text, link_url in matches:
|
|
# 檢查鏈接有效性
|
|
issue = self._check_single_link(
|
|
file_path, line_num, link_text, link_url, available_files
|
|
)
|
|
if issue:
|
|
stats.issues.append(issue)
|
|
stats.broken_links += 1
|
|
|
|
def _check_single_link(
|
|
self,
|
|
file_path: Path,
|
|
line_num: int,
|
|
link_text: str,
|
|
link_url: str,
|
|
available_files: Set[str],
|
|
) -> Optional[DocumentIssue]:
|
|
"""檢查單個鏈接"""
|
|
|
|
# 忽略外部鏈接
|
|
if link_url.startswith(("http://", "https://", "mailto:", "#")):
|
|
return None
|
|
|
|
# 清理鏈接(移除查詢參數和錨點)
|
|
clean_url = link_url.split("#")[0].split("?")[0]
|
|
|
|
# 檢查相對路徑鏈接
|
|
if clean_url.startswith("./"):
|
|
# 相對於當前文件的鏈接
|
|
current_dir = file_path.parent
|
|
target_path = (current_dir / clean_url[2:]).resolve()
|
|
|
|
# 轉換為相對於架構目錄的路徑
|
|
try:
|
|
rel_path = target_path.relative_to(self.architecture_dir)
|
|
if str(rel_path) not in available_files:
|
|
return DocumentIssue(
|
|
file_path=file_path,
|
|
line_number=line_num,
|
|
issue_type="broken_link",
|
|
description=f"鏈接目標不存在: {link_url} (解析為: {rel_path})",
|
|
severity="error",
|
|
suggested_fix=f"檢查文件是否存在: {target_path}",
|
|
)
|
|
except ValueError:
|
|
# 目標不在架構目錄內
|
|
if not target_path.exists():
|
|
return DocumentIssue(
|
|
file_path=file_path,
|
|
line_number=line_num,
|
|
issue_type="broken_link",
|
|
description=f"鏈接目標不存在: {link_url}",
|
|
severity="error",
|
|
suggested_fix=f"創建文件或修正鏈接: {target_path}",
|
|
)
|
|
|
|
# 檢查絕對路徑鏈接(相對於架構目錄)
|
|
elif not clean_url.startswith("/"):
|
|
if clean_url not in available_files:
|
|
return DocumentIssue(
|
|
file_path=file_path,
|
|
line_number=line_num,
|
|
issue_type="broken_link",
|
|
description=f"鏈接目標不存在: {link_url}",
|
|
severity="error",
|
|
suggested_fix=f"檢查文件是否存在: {clean_url}",
|
|
)
|
|
|
|
return None
|
|
|
|
def check_terminology(self) -> None:
|
|
"""檢查術語一致性"""
|
|
print("\n📝 檢查術語一致性...")
|
|
|
|
for file_path, content_lines in self.file_contents.items():
|
|
stats = self.document_stats[file_path]
|
|
|
|
for line_num, line in enumerate(content_lines, 1):
|
|
# 檢查設計與實現不一致的術語
|
|
design_terms = ["visual", "scene", "summary"]
|
|
impl_terms = ["TimeBased", "Cut", "Trace", "Story"]
|
|
|
|
# 如果文件提到設計術語,檢查是否有對應的實現說明
|
|
if any(term in line.lower() for term in design_terms):
|
|
# 檢查是否在 DESIGN_IMPLEMENTATION_GAP.md 中有說明
|
|
if file_path.name != "DESIGN_IMPLEMENTATION_GAP.md":
|
|
# 檢查前後文是否有提到實現差異
|
|
context_start = max(0, line_num - 3)
|
|
context_end = min(len(content_lines), line_num + 2)
|
|
context = content_lines[context_start:context_end]
|
|
context_text = "".join(context)
|
|
|
|
if not any(
|
|
impl_term in context_text for impl_term in impl_terms
|
|
):
|
|
stats.terminology_issues += 1
|
|
stats.issues.append(
|
|
DocumentIssue(
|
|
file_path=file_path,
|
|
line_number=line_num,
|
|
issue_type="terminology",
|
|
description="設計術語缺少實現狀態說明",
|
|
severity="warning",
|
|
suggested_fix="添加實現狀態說明或參考 DESIGN_IMPLEMENTATION_GAP.md",
|
|
)
|
|
)
|
|
|
|
def check_format(self) -> None:
|
|
"""檢查文檔格式"""
|
|
print("\n📋 檢查文檔格式...")
|
|
|
|
for file_path, content_lines in self.file_contents.items():
|
|
stats = self.document_stats[file_path]
|
|
|
|
# 檢查文件頭部格式
|
|
if content_lines and not content_lines[0].startswith("# "):
|
|
stats.format_issues += 1
|
|
stats.issues.append(
|
|
DocumentIssue(
|
|
file_path=file_path,
|
|
line_number=1,
|
|
issue_type="format",
|
|
description="文件缺少 H1 標題",
|
|
severity="warning",
|
|
suggested_fix="在第一行添加 # 標題",
|
|
)
|
|
)
|
|
|
|
# 檢查版本歷史表格
|
|
has_version_table = False
|
|
for line in content_lines:
|
|
if (
|
|
"版本歷史" in line
|
|
or "版本记录" in line
|
|
or "Version History" in line
|
|
):
|
|
has_version_table = True
|
|
break
|
|
|
|
if not has_version_table:
|
|
stats.format_issues += 1
|
|
stats.issues.append(
|
|
DocumentIssue(
|
|
file_path=file_path,
|
|
line_number=1,
|
|
issue_type="format",
|
|
description="文件缺少版本歷史表格",
|
|
severity="info",
|
|
suggested_fix="添加版本歷史表格",
|
|
)
|
|
)
|
|
|
|
def check_consistency(self) -> None:
|
|
"""檢查文檔間的一致性"""
|
|
print("\n🔄 檢查文檔間一致性...")
|
|
|
|
# 檢查 ARCHITECTURE_OVERVIEW.md 是否引用所有其他文檔
|
|
overview_file = self.architecture_dir / "ARCHITECTURE_OVERVIEW.md"
|
|
if overview_file in self.file_contents:
|
|
overview_content = "".join(self.file_contents[overview_file])
|
|
|
|
for other_file in self.all_md_files:
|
|
if other_file == overview_file:
|
|
continue
|
|
|
|
other_filename = other_file.name
|
|
if other_filename not in overview_content:
|
|
stats = self.document_stats[overview_file]
|
|
stats.consistency_issues += 1
|
|
stats.issues.append(
|
|
DocumentIssue(
|
|
file_path=overview_file,
|
|
line_number=1,
|
|
issue_type="consistency",
|
|
description=f"總覽文件未引用: {other_filename}",
|
|
severity="info",
|
|
suggested_fix=f"在相關文件索引中添加對 {other_filename} 的引用",
|
|
)
|
|
)
|
|
|
|
def generate_report(self, output_file: Optional[Path] = None) -> Dict:
|
|
"""生成檢查報告"""
|
|
print("\n📊 生成檢查報告...")
|
|
|
|
total_issues = 0
|
|
total_files = len(self.document_stats)
|
|
|
|
report = {
|
|
"summary": {
|
|
"total_files": total_files,
|
|
"total_issues": 0,
|
|
"issues_by_type": defaultdict(int),
|
|
"issues_by_severity": defaultdict(int),
|
|
},
|
|
"files": [],
|
|
}
|
|
|
|
for file_path, stats in self.document_stats.items():
|
|
file_report = {
|
|
"file": str(file_path.relative_to(self.architecture_dir.parent.parent)),
|
|
"total_lines": stats.total_lines,
|
|
"total_links": stats.total_links,
|
|
"broken_links": stats.broken_links,
|
|
"terminology_issues": stats.terminology_issues,
|
|
"format_issues": stats.format_issues,
|
|
"consistency_issues": stats.consistency_issues,
|
|
"issues": [],
|
|
}
|
|
|
|
for issue in stats.issues:
|
|
issue_dict = {
|
|
"line": issue.line_number,
|
|
"type": issue.issue_type,
|
|
"severity": issue.severity,
|
|
"description": issue.description,
|
|
"suggested_fix": issue.suggested_fix,
|
|
}
|
|
file_report["issues"].append(issue_dict)
|
|
|
|
# 更新統計
|
|
report["summary"]["total_issues"] += 1
|
|
report["summary"]["issues_by_type"][issue.issue_type] += 1
|
|
report["summary"]["issues_by_severity"][issue.severity] += 1
|
|
|
|
report["files"].append(file_report)
|
|
total_issues += len(stats.issues)
|
|
|
|
# 輸出報告
|
|
if output_file:
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|
print(f"✅ 報告已保存到: {output_file}")
|
|
else:
|
|
# 輸出簡要報告到控制台
|
|
print(f"\n{'=' * 60}")
|
|
print("架構文檔檢查報告")
|
|
print(f"{'=' * 60}")
|
|
print(f"📁 檢查文件數: {total_files}")
|
|
print(f"⚠️ 發現問題數: {total_issues}")
|
|
print("\n問題分類:")
|
|
for issue_type, count in report["summary"]["issues_by_type"].items():
|
|
print(f" - {issue_type}: {count}")
|
|
print("\n嚴重程度:")
|
|
for severity, count in report["summary"]["issues_by_severity"].items():
|
|
print(f" - {severity}: {count}")
|
|
|
|
if total_issues > 0:
|
|
print("\n🔍 詳細問題:")
|
|
for file_report in report["files"]:
|
|
if file_report["issues"]:
|
|
print(f"\n文件: {file_report['file']}")
|
|
for issue in file_report["issues"]:
|
|
print(
|
|
f" 行 {issue['line']} [{issue['severity']}] {issue['type']}: {issue['description']}"
|
|
)
|
|
|
|
return report
|
|
|
|
def run_all_checks(self) -> Dict:
|
|
"""運行所有檢查"""
|
|
print("🚀 開始架構文檔一致性檢查")
|
|
print(f"檢查目錄: {self.architecture_dir}")
|
|
|
|
self.load_all_documents()
|
|
self.check_links()
|
|
self.check_terminology()
|
|
self.check_format()
|
|
self.check_consistency()
|
|
|
|
return self.generate_report()
|
|
|
|
|
|
def main():
|
|
"""主函數"""
|
|
parser = argparse.ArgumentParser(description="架構文檔一致性檢查工具")
|
|
parser.add_argument("--report", type=str, help="生成 JSON 報告文件")
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="詳細輸出")
|
|
parser.add_argument("--check-only", action="store_true", help="只檢查不生成報告")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# 檢查目錄是否存在
|
|
if not ARCHITECTURE_DIR.exists():
|
|
print(f"❌ 架構目錄不存在: {ARCHITECTURE_DIR}")
|
|
sys.exit(1)
|
|
|
|
# 運行檢查
|
|
checker = ArchitectureDocChecker(ARCHITECTURE_DIR)
|
|
|
|
if args.check_only:
|
|
checker.load_all_documents()
|
|
checker.check_links()
|
|
checker.check_terminology()
|
|
print("\n✅ 檢查完成(僅檢查模式)")
|
|
else:
|
|
output_file = Path(args.report) if args.report else None
|
|
report = checker.run_all_checks()
|
|
|
|
# 根據問題數量決定退出代碼
|
|
if report["summary"]["total_issues"] > 0:
|
|
print(f"\n❌ 發現 {report['summary']['total_issues']} 個問題,請修復")
|
|
sys.exit(1)
|
|
else:
|
|
print("\n✅ 所有檢查通過!")
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|