Files
momentry_core/scripts/qdrant_sync_collection.py

78 lines
2.1 KiB
Python

#!/usr/bin/env python3
"""Sync all vectors from one Qdrant collection to another on the same instance."""
import json
import time
import urllib.request
import urllib.error
QDRANT_URL = "http://localhost:6333"
API_KEY = "Test3200Test3200Test3200"
SOURCE = "momentry_dev_rule1_v2"
TARGET = "momentry_rule1"
BATCH_SIZE = 500
SLEEP = 0.05
def qdrant(method, path, body=None):
url = f"{QDRANT_URL}{path}"
data = json.dumps(body).encode() if body else None
req = urllib.request.Request(url, data=data, method=method)
req.add_header("Content-Type", "application/json")
req.add_header("Api-Key", API_KEY)
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
print(f" HTTP {e.code}: {e.read().decode()}")
raise
def scroll_batch(offset=None):
body = {"limit": BATCH_SIZE, "with_payload": True, "with_vector": True}
if offset is not None:
body["offset"] = offset
result = qdrant("POST", f"/collections/{SOURCE}/points/scroll", body)
points = result.get("result", {}).get("points", [])
next_offset = result.get("result", {}).get("next_page_offset")
return points, next_offset
def upsert_batch(points):
body = {"points": points}
result = qdrant("PUT", f"/collections/{TARGET}/points", body)
return result.get("status") == "ok" or result.get("result", {}).get("status") == "ok"
def main():
offset = None
total = 0
batch_num = 0
t0 = time.time()
while True:
points, offset = scroll_batch(offset)
if not points:
break
ok = upsert_batch(points)
if not ok:
print(f" FAILED batch {batch_num} ({len(points)} pts)")
break
total += len(points)
batch_num += 1
if batch_num % 10 == 0:
elapsed = time.time() - t0
print(f" Synced {total} points ({elapsed:.1f}s)")
if offset is None:
break
time.sleep(SLEEP)
elapsed = time.time() - t0
print(f"Done: {total} points synced in {elapsed:.1f}s")
if __name__ == "__main__":
main()