refactor: model naming v1(base)/v2, Qdrant collection naming
- Phase 1 = v1 (base model, sentence chunk embedding) - Phase 2 = v2 (full pipeline + 5W1H) - Naming leaves room for v3, v4, etc. - Qdrant collection: momentry_dev_v1 (active model under dev) - Release packaging exports Qdrant points snapshot
This commit is contained in:
@@ -24,6 +24,8 @@ VERSION = "v1.0.0"
|
||||
|
||||
DB_USER = os.environ.get("USER", "accusys")
|
||||
DB_NAME = "momentry"
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "momentry_dev_rule1_v2")
|
||||
|
||||
|
||||
def ts():
|
||||
@@ -74,18 +76,51 @@ def pack_phase(file_uuid: str, phase: int) -> Path:
|
||||
idents_csv = pkg_dir / "identities.csv"
|
||||
run_sql(f"\\COPY (SELECT * FROM dev.identities) TO '{idents_csv}' CSV HEADER")
|
||||
|
||||
# 匯出 Qdrant collection 快照
|
||||
import urllib.request
|
||||
qdrant_path = pkg_dir / "qdrant_points.jsonl"
|
||||
try:
|
||||
offset = None
|
||||
with open(qdrant_path, "w") as qf:
|
||||
while True:
|
||||
params = f"limit=1000&with_payload=true&with_vectors=true"
|
||||
if offset is not None:
|
||||
params += f"&offset={offset}"
|
||||
url = f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/scroll?{params}"
|
||||
req = urllib.request.Request(url)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = json.loads(resp.read())
|
||||
pts = data.get("result", {}).get("points", [])
|
||||
if not pts:
|
||||
break
|
||||
for p in pts:
|
||||
qf.write(json.dumps(p, ensure_ascii=False) + "\n")
|
||||
# 從回傳的 next_page_offset 取得下一頁偏移量
|
||||
offset = data.get("result", {}).get("next_page_offset")
|
||||
if offset is None:
|
||||
break
|
||||
n_points = sum(1 for _ in open(qdrant_path) if _.strip())
|
||||
print(f"[RELEASE] Qdrant: {n_points} points exported from '{QDRANT_COLLECTION}'")
|
||||
except Exception as e:
|
||||
print(f"[RELEASE] Qdrant export skipped: {e}")
|
||||
if qdrant_path.exists():
|
||||
qdrant_path.unlink()
|
||||
|
||||
# RELEASE_INFO
|
||||
git_commit = subprocess.run(
|
||||
["git", "-C", str(PROJECT), "rev-parse", "HEAD"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
).stdout.strip()
|
||||
|
||||
model_name = f"{file_uuid}_v1" if phase == 1 else f"{file_uuid}_v2"
|
||||
info = pkg_dir / "RELEASE_INFO.txt"
|
||||
with open(info, "w") as fh:
|
||||
fh.write(f"Model: {model_name}\n")
|
||||
fh.write(f"Phase: {phase}\n")
|
||||
fh.write(f"Version: {VERSION}\n")
|
||||
fh.write(f"Timestamp: {stamp}\n")
|
||||
fh.write(f"File UUID: {file_uuid}\n")
|
||||
fh.write(f"Qdrant Collection: {QDRANT_COLLECTION}\n")
|
||||
fh.write(f"Git Commit: {git_commit}\n")
|
||||
fh.write(f"Packaged at: {datetime.now(timezone.utc).isoformat()}\n")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user