#!/usr/bin/env python3 """批量更新 Qdrant collection 中的 file_uuid (舊→新)""" import json import subprocess import sys QDRANT_URL = "http://localhost:6333" # UUID mapping: 舊 → 新 UUID_MAP = { "aeed71342a899fe4b4c57b7d41bcb692": [ "bd80fec92b0b6963d177a2c55bf713e2", ], } # Collections to process COLLECTIONS = [ "momentry_dev_v1", "momentry_dev_stories", "momentry_dev_voice", "momentry_dev_rule1_v2", "momentry_dev_faces", "sentence_story", "sentence_summary", ] def qdrant_get(path: str) -> dict: res = subprocess.run( ["curl", "-s", "-X", "GET", f"{QDRANT_URL}{path}"], capture_output=True, text=True ) return json.loads(res.stdout) if res.stdout.strip() else {} def qdrant_post(path: str, body: dict) -> dict: tmp = "/tmp/qdrant_post.json" with open(tmp, "w") as f: json.dump(body, f) res = subprocess.run( ["curl", "-s", "-X", "POST", f"{QDRANT_URL}{path}", "-H", "Content-Type: application/json", "-d", f"@{tmp}"], capture_output=True, text=True ) return json.loads(res.stdout) if res.stdout.strip() else {} def qdrant_put(path: str, body: dict) -> dict: tmp = "/tmp/qdrant_update.json" with open(tmp, "w") as f: json.dump(body, f) res = subprocess.run( ["curl", "-s", "-X", "PUT", f"{QDRANT_URL}{path}", "-H", "Content-Type: application/json", "-d", f"@{tmp}"], capture_output=True, text=True ) return json.loads(res.stdout) if res.stdout.strip() else {} def scroll_all(collection: str, filter_old: dict) -> list: """Scroll all matching points from a collection""" points = [] offset = None while True: body = { "limit": 1000, "with_payload": True, "with_vector": True, "filter": filter_old, } if offset: body["offset"] = offset result = qdrant_post(f"/collections/{collection}/points/scroll", body) batch = result.get("result", {}).get("points", []) points.extend(batch) next_offset = result.get("result", {}).get("next_page_offset") if next_offset is None: break offset = next_offset return points def update_points(collection: str, points: list, old_uuid: str, new_uuid: str): """Update file_uuid in payload for the given points""" if not points: return 0 updated = [] for p in points: pl = p.get("payload", {}) # Check both 'uuid' and 'file_uuid' fields changed = False if pl.get("uuid") == old_uuid: pl["uuid"] = new_uuid changed = True if pl.get("file_uuid") == old_uuid: pl["file_uuid"] = new_uuid changed = True if changed: updated.append({ "id": p["id"], "vector": p["vector"], "payload": pl, }) if not updated: return 0 # Update in batches of 500 total = len(updated) for i in range(0, total, 500): batch = updated[i:i+500] result = qdrant_put( f"/collections/{collection}/points?wait=true", {"points": batch} ) if result.get("status") != "ok": print(f" Error at {i}: {result}") return i return total def main(): for collection in COLLECTIONS: # Check if collection exists info = qdrant_get(f"/collections/{collection}") if "result" not in info: continue for old_uuid, new_uuids in UUID_MAP.items(): for new_uuid in new_uuids: # Scroll all points with this old UUID filter_body = { "must": [ {"should": [ {"key": "uuid", "match": {"value": old_uuid}}, {"key": "file_uuid", "match": {"value": old_uuid}}, ]} ] } points = scroll_all(collection, filter_body) if not points: continue print(f"{collection}: {len(points)} points with UUID {old_uuid[:8]}...") updated = update_points(collection, points, old_uuid, new_uuid) print(f" → {updated} points updated to {new_uuid[:8]}...") # Verify print("\n=== Verification ===") for collection in COLLECTIONS: for old_uuid, new_uuids in UUID_MAP.items(): for what, uuid in [("old", old_uuid), ("new", new_uuids[0])]: filter_body = { "must": [ {"should": [ {"key": "uuid", "match": {"value": uuid}}, {"key": "file_uuid", "match": {"value": uuid}}, ]} ] } result = qdrant_post( f"/collections/{collection}/points/count", {"filter": filter_body} ) cnt = result.get("result", {}).get("count", 0) if cnt > 0: print(f" {collection}: {cnt} points with {what} UUID") print("✅ Done") if __name__ == "__main__": main()