#!/opt/homebrew/bin/python3.11 """ Pipeline Checklist — 獨立於 pipeline 之外的驗收檢查 每個 stage 完成後自動檢查產出是否到位,沒過的標記出來。 """ import json, os, subprocess, sys from datetime import datetime from pathlib import Path PROJECT = Path(__file__).resolve().parent.parent OUTPUT_DIR = Path(os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")) DB_USER = os.environ.get("USER", "accusys") DB_NAME = "momentry" PSQL = "/Users/accusys/pgsql/18.3/bin/psql" def run_sql(sql: str) -> str: r = subprocess.run( [PSQL, "-U", DB_USER, "-d", DB_NAME, "-t", "-A", "-c", sql], capture_output=True, text=True, timeout=30, ) return r.stdout.strip() def check_file(uuid: str, suffix: str) -> tuple[bool, str]: f = OUTPUT_DIR / f"{uuid}.{suffix}" if not f.exists(): return False, f"missing: {f.name}" if f.stat().st_size == 0: return False, f"empty: {f.name} (0 bytes)" return True, f"ok ({f.stat().st_size // 1024}KB)" stages = [] # (name, checks) uuid = "aeed71342a899fe4b4c57b7d41bcb692" results = [] def check(name, checks): stage_results = [] for desc, ok, msg in checks: stage_results.append((desc, ok, msg)) passed = all(ok for _, ok, _ in stage_results) results.append((name, passed, stage_results)) return passed print(f"\n{'='*60}") print(f"Pipeline Checklist — {uuid}") print(f"{'='*60}\n") # ── Stage 1: ASR ── print("[1/8] ASR") ok, msg = check_file(uuid, "asr.json") segments = 0 if ok: try: with open(OUTPUT_DIR / f"{uuid}.asr.json") as fh: d = json.load(fh) segments = len(d.get("segments", [])) except Exception: segments = 0 check("ASR output", [ ("asr.json exists", ok, msg), ("has segments", segments > 0, f"{segments} segments"), ]) # ── Stage 2: ASRX ── print("[2/8] ASRX") ok, msg = check_file(uuid, "asrx.json") segments = 0 if ok: try: with open(OUTPUT_DIR / f"{uuid}.asrx.json") as fh: d = json.load(fh) segments = len(d.get("segments", [])) except Exception: segments = 0 check("ASRX output", [ ("asrx.json exists", ok, msg), ("has segments", segments > 0, f"{segments} segments"), ]) # ── Stage 3: Rule 1 + Sentence Chunks ── print("[3/8] Rule 1 - Sentence Chunks") chunk_count = int(run_sql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{uuid}' AND chunk_type='sentence'")) check("sentence chunks", [ ("chunks exist", chunk_count > 0, f"{chunk_count} sentence chunks"), ]) # ── Stage 4: Vectorization ── print("[4/8] Vectorization") vec_count = int(run_sql(f"SELECT count(*) FROM dev.chunk_vectors WHERE uuid='{uuid}'")) col = os.environ.get('QDRANT_COLLECTION', 'momentry_dev_rule1_v2') qdrant = subprocess.run( ["curl", "-s", "-X", "POST", f"http://localhost:6333/collections/{col}/points/count", "-H", "Content-Type: application/json", "-d", '{"exact": true}'], capture_output=True, text=True, timeout=10, ) qdrant_ok = '"count"' in qdrant.stdout check("vector embeddings", [ ("PG vectors", vec_count > 0, f"{vec_count} vectors"), ("Qdrant accessible", qdrant_ok, "ok" if qdrant_ok else "no response"), ]) # ── Stage 5: Face Trace ── print("[5/8] Face Trace") trace_count = int(run_sql(f"SELECT count(DISTINCT trace_id) FROM dev.face_detections WHERE file_uuid='{uuid}' AND trace_id IS NOT NULL")) face_count = int(run_sql(f"SELECT count(*) FROM dev.face_detections WHERE file_uuid='{uuid}' AND trace_id IS NOT NULL")) check("face trace", [ ("traces exist", trace_count > 0, f"{trace_count} traces, {face_count} detections"), ]) # ── Stage 6: TKG Graph ── print("[6/8] TKG") node_count = int(run_sql(f"SELECT count(*) FROM dev.tkg_nodes WHERE file_uuid='{uuid}'")) edge_count = int(run_sql(f"SELECT count(*) FROM dev.tkg_edges WHERE file_uuid='{uuid}'")) face_face = int(run_sql(f"SELECT count(*) FROM dev.tkg_edges WHERE file_uuid='{uuid}' AND edge_type='CO_OCCURS_WITH' AND source_node_id IN (SELECT id FROM dev.tkg_nodes WHERE node_type='face_trace')")) check("TKG graph", [ ("nodes", node_count > 0, f"{node_count} nodes"), ("edges", edge_count > 0, f"{edge_count} edges"), ("face-face edges", face_face > 0, f"{face_face} face-face edges"), ]) # ── Stage 7: Trace Chunks ── print("[7/8] Trace Chunks") trace_chunks = int(run_sql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{uuid}' AND chunk_type='trace'")) check("trace chunks", [ ("trace chunks exist", trace_chunks > 0, f"{trace_chunks} trace chunks"), ]) # ── Stage 8: Phase 1 Release ── print("[8/8] Phase 1 Release") phase1_dir = PROJECT / "release" / "phase1" / "latest" phase1_ok = phase1_dir.exists() and (phase1_dir / "RELEASE_INFO.txt").exists() if phase1_ok: total_size = sum(f.stat().st_size for f in phase1_dir.rglob("*") if f.is_file()) msg = f"ok ({total_size // 1024}KB)" else: msg = "not found" check("Phase 1 release", [ ("release dir exists", phase1_ok, msg), ]) # ── Summary ── print(f"\n{'='*60}") print("SUMMARY") print(f"{'='*60}") all_passed = True for name, passed, _ in results: status = "✅" if passed else "❌" print(f" {status} {name}") all_passed = all_passed and passed print(f"\n{'PASS' if all_passed else 'FAIL'}") print(f"{'='*60}\n") # Output as JSON for machine parsing report = { "uuid": uuid, "timestamp": datetime.utcnow().isoformat(), "passed": all_passed, "stages": {name: {"passed": passed, "checks": {d: o for d, o, _ in checks}} for name, passed, checks in results}, } print(json.dumps(report, indent=2))