Files
momentry_core/scripts/release_pack.py
Accusys 28652f5b76 feat: phased release packaging (Phase 1 + Phase 2)
- scripts/release_pack.py: packages output_json + schema + chunks + vectors
- Phase 1: triggered after ASR+ASRX+Rule 1+vectorization (sentence chunk delivery)
- Phase 2: triggered after full pipeline + 5W1H Agent (full delivery)
- Both phases include all available {uuid}.*.json files
- Non-overlapping directories: release/phase1/ and release/phase2/
2026-05-09 13:58:55 +08:00

116 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""
Release packaging — two non-overlapping phases.
Phase 1: ASR + ASRX + Rule 1 sentence chunks complete
Phase 2: Full pipeline + Rule 3 + 5W1H complete
Output: release/phase{N}/v{VERSION}_{TIMESTAMP}/
"""
import json
import os
import shutil
import subprocess
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
PROJECT = Path(__file__).resolve().parent.parent
OUTPUT_DIR = Path(os.environ.get("MOMENTRY_OUTPUT_DIR", PROJECT / "output_dev"))
RELEASE_DIR = PROJECT / "release"
VERSION = "v1.0.0"
DB_USER = os.environ.get("USER", "accusys")
DB_NAME = "momentry"
def ts():
return datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
def run_sql(sql: str) -> str:
r = subprocess.run(
["psql", "-U", DB_USER, "-d", DB_NAME, "-t", "-A", "-c", sql],
capture_output=True, text=True, timeout=30,
)
return r.stdout.strip()
def pack_phase(file_uuid: str, phase: int) -> Path:
"""Package deliverables for phase 1 or 2."""
phase_dir = RELEASE_DIR / f"phase{phase}"
stamp = ts()
pkg_dir = phase_dir / f"{VERSION}_{stamp}"
out_dir = pkg_dir / "output_json"
out_dir.mkdir(parents=True, exist_ok=True)
# 收集 processor output .json 檔
for f in OUTPUT_DIR.glob(f"{file_uuid}.*.json"):
if f.is_file():
shutil.copy2(f, out_dir / f.name)
# 收集 schema
schema_path = pkg_dir / "schema.sql"
with open(schema_path, "w") as fh:
subprocess.run(
["pg_dump", "-U", DB_USER, "-d", DB_NAME, "--schema=dev", "--schema-only",
"-T", "dev.monitor_jobs", "-T", "dev.processor_results"],
stdout=fh, text=True, timeout=60,
)
# 收集 chunks
chunks_csv = pkg_dir / "chunks.csv"
run_sql(f"\\COPY (SELECT * FROM dev.chunks WHERE file_uuid='{file_uuid}') TO '{chunks_csv}' CSV HEADER")
# 收集 vectors
vecs_csv = pkg_dir / "vectors.csv"
run_sql(f"\\COPY (SELECT * FROM dev.chunk_vectors WHERE uuid='{file_uuid}') TO '{vecs_csv}' CSV HEADER")
if phase >= 2:
faces_csv = pkg_dir / "face_detections.csv"
run_sql(f"\\COPY (SELECT * FROM dev.face_detections WHERE file_uuid='{file_uuid}') TO '{faces_csv}' CSV HEADER")
idents_csv = pkg_dir / "identities.csv"
run_sql(f"\\COPY (SELECT * FROM dev.identities) TO '{idents_csv}' CSV HEADER")
# RELEASE_INFO
git_commit = subprocess.run(
["git", "-C", str(PROJECT), "rev-parse", "HEAD"],
capture_output=True, text=True, timeout=10,
).stdout.strip()
info = pkg_dir / "RELEASE_INFO.txt"
with open(info, "w") as fh:
fh.write(f"Phase: {phase}\n")
fh.write(f"Version: {VERSION}\n")
fh.write(f"Timestamp: {stamp}\n")
fh.write(f"File UUID: {file_uuid}\n")
fh.write(f"Git Commit: {git_commit}\n")
fh.write(f"Packaged at: {datetime.now(timezone.utc).isoformat()}\n")
# latest symlink
latest = phase_dir / "latest"
if latest.is_symlink():
latest.unlink()
if not latest.exists():
latest.symlink_to(pkg_dir.name, target_is_directory=True)
size = sum(f.stat().st_size for f in pkg_dir.rglob("*") if f.is_file())
print(f"[RELEASE] Phase {phase} packaged: {pkg_dir} ({size / 1024:.0f} KB)")
return pkg_dir
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--phase", type=int, required=True, choices=[1, 2])
parser.add_argument("--file-uuid", required=True)
args = parser.parse_args()
pack_phase(args.file_uuid, args.phase)
if __name__ == "__main__":
main()