diff --git a/docs/PHASE1_RELEASE_CHECKLIST.md b/docs/PHASE1_RELEASE_CHECKLIST.md new file mode 100644 index 0000000..3e6dc82 --- /dev/null +++ b/docs/PHASE1_RELEASE_CHECKLIST.md @@ -0,0 +1,46 @@ +# Phase 1 Release Checklist — v1 (base model) + +**File UUID**: `{{file_uuid}}` +**Version**: `{{version}}` +**Date**: `{{date}}` + +--- + +## □ 1. Processor Output (.json) + +- [ ] ASR — `{uuid}.asr.json` 存在,segments > 0,最後 segment 接近影片結尾 +- [ ] ASRX — `{uuid}.asrx.json` 存在,segments > 0 +- [ ] 所有 `.json` 皆 valid JSON + +## □ 2. Sentence Chunks + Embeddings + +- [ ] Rule 1 Ingestion — `dev.chunks` 中有 `chunk_type='sentence'` 的記錄 +- [ ] Vectorization — `dev.chunk_vectors` 中有對應 embedding +- [ ] Qdrant — chunk vectors 已寫入 Qdrant collection + +## □ 3. Face Trace + Graph + +- [ ] Face Trace — `dev.face_detections` 有 trace_id,trace count > 0 +- [ ] TKG — `dev.tkg_nodes` + `dev.tkg_edges` 有資料 +- [ ] Trace Chunks — `dev.chunks` 中有 `chunk_type='trace'` 的記錄(含 bbox + co_appearances) + +## □ 4. Release Package + +- [ ] `release/phase1/latest/output_json/` — 所有 `{uuid}.*.json` +- [ ] `chunks.csv` — sentence + trace chunks +- [ ] `vectors.csv` — PG embeddings +- [ ] `identities.csv` — global identities +- [ ] `schema.sql` — DDL +- [ ] `RELEASE_INFO.txt` — Model name + Git commit + timestamp + +## □ 5. Verification + +- [ ] `pipeline_status.py --uuid {uuid}` → 全部 ✅ +- [ ] `pipeline_checklist.py --uuid {uuid}` → PASS +- [ ] file-existence check 通過(重啟 worker 後正確跳過已完成 processor) +- [ ] 離線可用:不需 DB / Redis / Qdrant 即可查閱 output_json + CSV + +## □ 6. Post-Release + +- [ ] Symlink `latest` → 最新版目錄 +- [ ] Phase 2 將從此 checkpoint 繼續(不覆蓋) diff --git a/scripts/release_pack.py b/scripts/release_pack.py index 5ab0d8f..9ecd08d 100644 --- a/scripts/release_pack.py +++ b/scripts/release_pack.py @@ -74,11 +74,12 @@ def pack_phase(file_uuid: str, phase: int) -> Path: vecs_csv = pkg_dir / "vectors.csv" run_sql(f"\\COPY (SELECT * FROM dev.chunk_vectors WHERE uuid='{file_uuid}') TO '{vecs_csv}' CSV HEADER") + idents_csv = pkg_dir / "identities.csv" + run_sql(f"\\COPY (SELECT * FROM dev.identities) TO '{idents_csv}' CSV HEADER") + if phase >= 2: faces_csv = pkg_dir / "face_detections.csv" run_sql(f"\\COPY (SELECT * FROM dev.face_detections WHERE file_uuid='{file_uuid}') TO '{faces_csv}' CSV HEADER") - idents_csv = pkg_dir / "identities.csv" - run_sql(f"\\COPY (SELECT * FROM dev.identities) TO '{idents_csv}' CSV HEADER") # 匯出 Qdrant collection 快照 import urllib.request