#!/usr/bin/env python3 """Sync all vectors from one Qdrant collection to another on the same instance.""" import json import time import urllib.request import urllib.error QDRANT_URL = "http://localhost:6333" API_KEY = "Test3200Test3200Test3200" SOURCE = "momentry_dev_rule1_v2" TARGET = "momentry_rule1" BATCH_SIZE = 500 SLEEP = 0.05 def qdrant(method, path, body=None): url = f"{QDRANT_URL}{path}" data = json.dumps(body).encode() if body else None req = urllib.request.Request(url, data=data, method=method) req.add_header("Content-Type", "application/json") req.add_header("Api-Key", API_KEY) try: with urllib.request.urlopen(req) as resp: return json.loads(resp.read()) except urllib.error.HTTPError as e: print(f" HTTP {e.code}: {e.read().decode()}") raise def scroll_batch(offset=None): body = {"limit": BATCH_SIZE, "with_payload": True, "with_vector": True} if offset is not None: body["offset"] = offset result = qdrant("POST", f"/collections/{SOURCE}/points/scroll", body) points = result.get("result", {}).get("points", []) next_offset = result.get("result", {}).get("next_page_offset") return points, next_offset def upsert_batch(points): body = {"points": points} result = qdrant("PUT", f"/collections/{TARGET}/points", body) return result.get("status") == "ok" or result.get("result", {}).get("status") == "ok" def main(): offset = None total = 0 batch_num = 0 t0 = time.time() while True: points, offset = scroll_batch(offset) if not points: break ok = upsert_batch(points) if not ok: print(f" FAILED batch {batch_num} ({len(points)} pts)") break total += len(points) batch_num += 1 if batch_num % 10 == 0: elapsed = time.time() - t0 print(f" Synced {total} points ({elapsed:.1f}s)") if offset is None: break time.sleep(SLEEP) elapsed = time.time() - t0 print(f"Done: {total} points synced in {elapsed:.1f}s") if __name__ == "__main__": main()