fix: TKG rebuild type mismatch and face_track nodes
- Fix trace_id type mismatch (INT4 vs i64) with explicit ::bigint cast - Change build_face_track_nodes to use from_pg version - Add skin_tone_trace_nodes to API response - Add #[derive(Serialize)] to TkgResult - Fix Unicode panic in text label truncation - Add push_existing_embeddings.py script
This commit is contained in:
87
scripts/push_existing_embeddings.py
Executable file
87
scripts/push_existing_embeddings.py
Executable file
@@ -0,0 +1,87 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Push existing embeddings from face.json to Qdrant _faces collection.
|
||||
This is faster than recomputing embeddings with face_processor.py.
|
||||
|
||||
Usage:
|
||||
python scripts/push_existing_embeddings.py --file-uuid <uuid>
|
||||
python scripts/push_existing_embeddings.py --all
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from utils.qdrant_faces import (
|
||||
ensure_faces_collection,
|
||||
push_face_embeddings_batch,
|
||||
)
|
||||
|
||||
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output")
|
||||
|
||||
|
||||
def push_embeddings_for_file(file_uuid: str) -> int:
|
||||
"""Push embeddings from face.json to Qdrant"""
|
||||
face_json_path = Path(OUTPUT_DIR) / f"{file_uuid}.face.json"
|
||||
if not face_json_path.exists():
|
||||
print(f"ERROR: {face_json_path} not found")
|
||||
return 0
|
||||
|
||||
with open(face_json_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
faces = []
|
||||
for frame_data in data.get("frames", []):
|
||||
frame = frame_data.get("frame", 0)
|
||||
for face in frame_data.get("faces", []):
|
||||
embedding = face.get("embedding")
|
||||
if not embedding:
|
||||
continue
|
||||
|
||||
faces.append({
|
||||
"frame": frame,
|
||||
"trace_id": face.get("trace_id"),
|
||||
"bbox": {
|
||||
"x": face.get("x", 0),
|
||||
"y": face.get("y", 0),
|
||||
"width": face.get("width", 0),
|
||||
"height": face.get("height", 0),
|
||||
},
|
||||
"confidence": face.get("confidence", 0),
|
||||
"embedding": embedding,
|
||||
})
|
||||
|
||||
if faces:
|
||||
count = push_face_embeddings_batch(file_uuid, faces)
|
||||
print(f"Pushed {count} embeddings for {file_uuid}")
|
||||
return count
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Push existing embeddings to Qdrant")
|
||||
parser.add_argument("--file-uuid", help="File UUID to process")
|
||||
parser.add_argument("--all", action="store_true", help="Process all files in output dir")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.all:
|
||||
total = 0
|
||||
for face_json in Path(OUTPUT_DIR).glob("*.face.json"):
|
||||
# Extract UUID from filename like "uuid.face.json"
|
||||
filename = face_json.name
|
||||
file_uuid = filename.replace(".face.json", "")
|
||||
count = push_embeddings_for_file(file_uuid)
|
||||
total += count
|
||||
print(f"\nTotal: {total} embeddings pushed")
|
||||
elif args.file_uuid:
|
||||
push_embeddings_for_file(args.file_uuid)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user