fix: pipeline timeline log, chunk lookup, face processor no fallback, Qdrant UUID script, delete safety rules
This commit is contained in:
174
scripts/update_qdrant_uuid.py
Normal file
174
scripts/update_qdrant_uuid.py
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python3
|
||||
"""批量更新 Qdrant collection 中的 file_uuid (舊→新)"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
QDRANT_URL = "http://localhost:6333"
|
||||
|
||||
# UUID mapping: 舊 → 新
|
||||
UUID_MAP = {
|
||||
"aeed71342a899fe4b4c57b7d41bcb692": [
|
||||
"bd80fec92b0b6963d177a2c55bf713e2",
|
||||
],
|
||||
}
|
||||
|
||||
# Collections to process
|
||||
COLLECTIONS = [
|
||||
"momentry_dev_v1",
|
||||
"momentry_dev_stories",
|
||||
"momentry_dev_voice",
|
||||
"momentry_dev_rule1_v2",
|
||||
"momentry_dev_faces",
|
||||
"sentence_story",
|
||||
"sentence_summary",
|
||||
]
|
||||
|
||||
|
||||
def qdrant_get(path: str) -> dict:
|
||||
res = subprocess.run(
|
||||
["curl", "-s", "-X", "GET", f"{QDRANT_URL}{path}"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
return json.loads(res.stdout) if res.stdout.strip() else {}
|
||||
|
||||
|
||||
def qdrant_post(path: str, body: dict) -> dict:
|
||||
tmp = "/tmp/qdrant_post.json"
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(body, f)
|
||||
res = subprocess.run(
|
||||
["curl", "-s", "-X", "POST", f"{QDRANT_URL}{path}",
|
||||
"-H", "Content-Type: application/json", "-d", f"@{tmp}"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
return json.loads(res.stdout) if res.stdout.strip() else {}
|
||||
|
||||
|
||||
def qdrant_put(path: str, body: dict) -> dict:
|
||||
tmp = "/tmp/qdrant_update.json"
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(body, f)
|
||||
res = subprocess.run(
|
||||
["curl", "-s", "-X", "PUT", f"{QDRANT_URL}{path}",
|
||||
"-H", "Content-Type: application/json", "-d", f"@{tmp}"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
return json.loads(res.stdout) if res.stdout.strip() else {}
|
||||
|
||||
|
||||
def scroll_all(collection: str, filter_old: dict) -> list:
|
||||
"""Scroll all matching points from a collection"""
|
||||
points = []
|
||||
offset = None
|
||||
while True:
|
||||
body = {
|
||||
"limit": 1000,
|
||||
"with_payload": True,
|
||||
"with_vector": True,
|
||||
"filter": filter_old,
|
||||
}
|
||||
if offset:
|
||||
body["offset"] = offset
|
||||
result = qdrant_post(f"/collections/{collection}/points/scroll", body)
|
||||
batch = result.get("result", {}).get("points", [])
|
||||
points.extend(batch)
|
||||
next_offset = result.get("result", {}).get("next_page_offset")
|
||||
if next_offset is None:
|
||||
break
|
||||
offset = next_offset
|
||||
return points
|
||||
|
||||
|
||||
def update_points(collection: str, points: list, old_uuid: str, new_uuid: str):
|
||||
"""Update file_uuid in payload for the given points"""
|
||||
if not points:
|
||||
return 0
|
||||
|
||||
updated = []
|
||||
for p in points:
|
||||
pl = p.get("payload", {})
|
||||
# Check both 'uuid' and 'file_uuid' fields
|
||||
changed = False
|
||||
if pl.get("uuid") == old_uuid:
|
||||
pl["uuid"] = new_uuid
|
||||
changed = True
|
||||
if pl.get("file_uuid") == old_uuid:
|
||||
pl["file_uuid"] = new_uuid
|
||||
changed = True
|
||||
if changed:
|
||||
updated.append({
|
||||
"id": p["id"],
|
||||
"vector": p["vector"],
|
||||
"payload": pl,
|
||||
})
|
||||
|
||||
if not updated:
|
||||
return 0
|
||||
|
||||
# Update in batches of 500
|
||||
total = len(updated)
|
||||
for i in range(0, total, 500):
|
||||
batch = updated[i:i+500]
|
||||
result = qdrant_put(
|
||||
f"/collections/{collection}/points?wait=true",
|
||||
{"points": batch}
|
||||
)
|
||||
if result.get("status") != "ok":
|
||||
print(f" Error at {i}: {result}")
|
||||
return i
|
||||
return total
|
||||
|
||||
|
||||
def main():
|
||||
for collection in COLLECTIONS:
|
||||
# Check if collection exists
|
||||
info = qdrant_get(f"/collections/{collection}")
|
||||
if "result" not in info:
|
||||
continue
|
||||
|
||||
for old_uuid, new_uuids in UUID_MAP.items():
|
||||
for new_uuid in new_uuids:
|
||||
# Scroll all points with this old UUID
|
||||
filter_body = {
|
||||
"must": [
|
||||
{"should": [
|
||||
{"key": "uuid", "match": {"value": old_uuid}},
|
||||
{"key": "file_uuid", "match": {"value": old_uuid}},
|
||||
]}
|
||||
]
|
||||
}
|
||||
points = scroll_all(collection, filter_body)
|
||||
if not points:
|
||||
continue
|
||||
|
||||
print(f"{collection}: {len(points)} points with UUID {old_uuid[:8]}...")
|
||||
updated = update_points(collection, points, old_uuid, new_uuid)
|
||||
print(f" → {updated} points updated to {new_uuid[:8]}...")
|
||||
|
||||
# Verify
|
||||
print("\n=== Verification ===")
|
||||
for collection in COLLECTIONS:
|
||||
for old_uuid, new_uuids in UUID_MAP.items():
|
||||
for what, uuid in [("old", old_uuid), ("new", new_uuids[0])]:
|
||||
filter_body = {
|
||||
"must": [
|
||||
{"should": [
|
||||
{"key": "uuid", "match": {"value": uuid}},
|
||||
{"key": "file_uuid", "match": {"value": uuid}},
|
||||
]}
|
||||
]
|
||||
}
|
||||
result = qdrant_post(
|
||||
f"/collections/{collection}/points/count",
|
||||
{"filter": filter_body}
|
||||
)
|
||||
cnt = result.get("result", {}).get("count", 0)
|
||||
if cnt > 0:
|
||||
print(f" {collection}: {cnt} points with {what} UUID")
|
||||
print("✅ Done")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user