78 lines
2.1 KiB
Python
78 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Sync all vectors from one Qdrant collection to another on the same instance."""
|
|
import json
|
|
import time
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
QDRANT_URL = "http://localhost:6333"
|
|
API_KEY = "Test3200Test3200Test3200"
|
|
SOURCE = "momentry_dev_rule1_v2"
|
|
TARGET = "momentry_rule1"
|
|
BATCH_SIZE = 500
|
|
SLEEP = 0.05
|
|
|
|
|
|
def qdrant(method, path, body=None):
|
|
url = f"{QDRANT_URL}{path}"
|
|
data = json.dumps(body).encode() if body else None
|
|
req = urllib.request.Request(url, data=data, method=method)
|
|
req.add_header("Content-Type", "application/json")
|
|
req.add_header("Api-Key", API_KEY)
|
|
try:
|
|
with urllib.request.urlopen(req) as resp:
|
|
return json.loads(resp.read())
|
|
except urllib.error.HTTPError as e:
|
|
print(f" HTTP {e.code}: {e.read().decode()}")
|
|
raise
|
|
|
|
|
|
def scroll_batch(offset=None):
|
|
body = {"limit": BATCH_SIZE, "with_payload": True, "with_vector": True}
|
|
if offset is not None:
|
|
body["offset"] = offset
|
|
result = qdrant("POST", f"/collections/{SOURCE}/points/scroll", body)
|
|
points = result.get("result", {}).get("points", [])
|
|
next_offset = result.get("result", {}).get("next_page_offset")
|
|
return points, next_offset
|
|
|
|
|
|
def upsert_batch(points):
|
|
body = {"points": points}
|
|
result = qdrant("PUT", f"/collections/{TARGET}/points", body)
|
|
return result.get("status") == "ok" or result.get("result", {}).get("status") == "ok"
|
|
|
|
|
|
def main():
|
|
offset = None
|
|
total = 0
|
|
batch_num = 0
|
|
t0 = time.time()
|
|
|
|
while True:
|
|
points, offset = scroll_batch(offset)
|
|
if not points:
|
|
break
|
|
|
|
ok = upsert_batch(points)
|
|
if not ok:
|
|
print(f" FAILED batch {batch_num} ({len(points)} pts)")
|
|
break
|
|
|
|
total += len(points)
|
|
batch_num += 1
|
|
if batch_num % 10 == 0:
|
|
elapsed = time.time() - t0
|
|
print(f" Synced {total} points ({elapsed:.1f}s)")
|
|
|
|
if offset is None:
|
|
break
|
|
time.sleep(SLEEP)
|
|
|
|
elapsed = time.time() - t0
|
|
print(f"Done: {total} points synced in {elapsed:.1f}s")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|