feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
77
scripts/qdrant_sync_collection.py
Normal file
77
scripts/qdrant_sync_collection.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Sync all vectors from one Qdrant collection to another on the same instance."""
|
||||
import json
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
QDRANT_URL = "http://localhost:6333"
|
||||
API_KEY = "Test3200Test3200Test3200"
|
||||
SOURCE = "momentry_dev_rule1_v2"
|
||||
TARGET = "momentry_rule1"
|
||||
BATCH_SIZE = 500
|
||||
SLEEP = 0.05
|
||||
|
||||
|
||||
def qdrant(method, path, body=None):
|
||||
url = f"{QDRANT_URL}{path}"
|
||||
data = json.dumps(body).encode() if body else None
|
||||
req = urllib.request.Request(url, data=data, method=method)
|
||||
req.add_header("Content-Type", "application/json")
|
||||
req.add_header("Api-Key", API_KEY)
|
||||
try:
|
||||
with urllib.request.urlopen(req) as resp:
|
||||
return json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f" HTTP {e.code}: {e.read().decode()}")
|
||||
raise
|
||||
|
||||
|
||||
def scroll_batch(offset=None):
|
||||
body = {"limit": BATCH_SIZE, "with_payload": True, "with_vector": True}
|
||||
if offset is not None:
|
||||
body["offset"] = offset
|
||||
result = qdrant("POST", f"/collections/{SOURCE}/points/scroll", body)
|
||||
points = result.get("result", {}).get("points", [])
|
||||
next_offset = result.get("result", {}).get("next_page_offset")
|
||||
return points, next_offset
|
||||
|
||||
|
||||
def upsert_batch(points):
|
||||
body = {"points": points}
|
||||
result = qdrant("PUT", f"/collections/{TARGET}/points", body)
|
||||
return result.get("status") == "ok" or result.get("result", {}).get("status") == "ok"
|
||||
|
||||
|
||||
def main():
|
||||
offset = None
|
||||
total = 0
|
||||
batch_num = 0
|
||||
t0 = time.time()
|
||||
|
||||
while True:
|
||||
points, offset = scroll_batch(offset)
|
||||
if not points:
|
||||
break
|
||||
|
||||
ok = upsert_batch(points)
|
||||
if not ok:
|
||||
print(f" FAILED batch {batch_num} ({len(points)} pts)")
|
||||
break
|
||||
|
||||
total += len(points)
|
||||
batch_num += 1
|
||||
if batch_num % 10 == 0:
|
||||
elapsed = time.time() - t0
|
||||
print(f" Synced {total} points ({elapsed:.1f}s)")
|
||||
|
||||
if offset is None:
|
||||
break
|
||||
time.sleep(SLEEP)
|
||||
|
||||
elapsed = time.time() - t0
|
||||
print(f"Done: {total} points synced in {elapsed:.1f}s")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user