refactor: model naming v1(base)/v2, Qdrant collection naming

- Phase 1 = v1 (base model, sentence chunk embedding)
- Phase 2 = v2 (full pipeline + 5W1H)
- Naming leaves room for v3, v4, etc.
- Qdrant collection: momentry_dev_v1 (active model under dev)
- Release packaging exports Qdrant points snapshot
This commit is contained in:
Accusys
2026-05-09 14:14:04 +08:00
parent 227c647a43
commit 19669a1f91
4 changed files with 47 additions and 11 deletions

View File

@@ -24,6 +24,8 @@ VERSION = "v1.0.0"
DB_USER = os.environ.get("USER", "accusys")
DB_NAME = "momentry"
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "momentry_dev_rule1_v2")
def ts():
@@ -74,18 +76,51 @@ def pack_phase(file_uuid: str, phase: int) -> Path:
idents_csv = pkg_dir / "identities.csv"
run_sql(f"\\COPY (SELECT * FROM dev.identities) TO '{idents_csv}' CSV HEADER")
# 匯出 Qdrant collection 快照
import urllib.request
qdrant_path = pkg_dir / "qdrant_points.jsonl"
try:
offset = None
with open(qdrant_path, "w") as qf:
while True:
params = f"limit=1000&with_payload=true&with_vectors=true"
if offset is not None:
params += f"&offset={offset}"
url = f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/scroll?{params}"
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read())
pts = data.get("result", {}).get("points", [])
if not pts:
break
for p in pts:
qf.write(json.dumps(p, ensure_ascii=False) + "\n")
# 從回傳的 next_page_offset 取得下一頁偏移量
offset = data.get("result", {}).get("next_page_offset")
if offset is None:
break
n_points = sum(1 for _ in open(qdrant_path) if _.strip())
print(f"[RELEASE] Qdrant: {n_points} points exported from '{QDRANT_COLLECTION}'")
except Exception as e:
print(f"[RELEASE] Qdrant export skipped: {e}")
if qdrant_path.exists():
qdrant_path.unlink()
# RELEASE_INFO
git_commit = subprocess.run(
["git", "-C", str(PROJECT), "rev-parse", "HEAD"],
capture_output=True, text=True, timeout=10,
).stdout.strip()
model_name = f"{file_uuid}_v1" if phase == 1 else f"{file_uuid}_v2"
info = pkg_dir / "RELEASE_INFO.txt"
with open(info, "w") as fh:
fh.write(f"Model: {model_name}\n")
fh.write(f"Phase: {phase}\n")
fh.write(f"Version: {VERSION}\n")
fh.write(f"Timestamp: {stamp}\n")
fh.write(f"File UUID: {file_uuid}\n")
fh.write(f"Qdrant Collection: {QDRANT_COLLECTION}\n")
fh.write(f"Git Commit: {git_commit}\n")
fh.write(f"Packaged at: {datetime.now(timezone.utc).isoformat()}\n")