feat: add Qdrant _faces collection embedding push

- Add qdrant_faces.py utility module for _faces collection operations
- Modify face_processor.py to push embeddings to Qdrant (CoreML extraction re-enabled)
- Modify store_traced_faces.py to update trace_id in Qdrant after face tracking
- Collection schema: 512D vectors, Cosine distance, fixed name '_faces'
- Payload: file_uuid, frame, trace_id, bbox, confidence, identity_id/uuid, stranger_id
- Batch size: 100 (default), configurable via QDRANT_BATCH_SIZE env var
- Error handling: face_processor.py exits with error if Qdrant push fails
This commit is contained in:
Accusys
2026-06-25 00:23:20 +08:00
parent 074cdcdbed
commit 9fbb4f9b48
3 changed files with 355 additions and 3 deletions

View File

@@ -0,0 +1,308 @@
#!/opt/homebrew/bin/python3.11
"""
Qdrant _faces Collection Operations
Functions:
- ensure_faces_collection(): Create _faces collection if not exists
- generate_point_id(): Generate consistent point ID
- push_face_embeddings_batch(): Batch push embeddings to Qdrant
- update_trace_ids(): Update trace_id after face tracking
Collection Schema:
- Name: _faces (fixed, no schema prefix)
- Vector: 512D, Cosine distance
- Payload: {file_uuid, frame, trace_id, bbox, confidence, identity_id, identity_uuid, stranger_id}
"""
import os
import json
import hashlib
import urllib.request
import urllib.error
from typing import Optional
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY", "Test3200Test3200Test3200")
FACES_COLLECTION = "_faces"
VECTOR_DIM = 512
BATCH_SIZE = int(os.environ.get("QDRANT_BATCH_SIZE", "100"))
def qdrant_request(method: str, path: str, body: dict = None) -> dict:
"""Make HTTP request to Qdrant"""
url = f"{QDRANT_URL}{path}"
data = json.dumps(body).encode() if body else None
req = urllib.request.Request(url, data=data, method=method)
req.add_header("Content-Type", "application/json")
req.add_header("Api-Key", QDRANT_API_KEY)
try:
with urllib.request.urlopen(req) as resp:
return json.loads(resp.read())
except urllib.error.HTTPError as e:
error_body = e.read().decode()
raise RuntimeError(f"Qdrant HTTP {e.code}: {error_body}")
def ensure_faces_collection() -> bool:
"""Create _faces collection if not exists"""
url = f"{QDRANT_URL}/collections/{FACES_COLLECTION}"
req = urllib.request.Request(url, method="GET")
req.add_header("Api-Key", QDRANT_API_KEY)
try:
urllib.request.urlopen(req)
return True # Collection exists
except urllib.error.HTTPError as e:
if e.code != 404:
raise RuntimeError(f"Qdrant check failed: {e.read().decode()}")
# Create collection
body = {
"vectors": {
"size": VECTOR_DIM,
"distance": "Cosine"
}
}
create_url = f"{QDRANT_URL}/collections/{FACES_COLLECTION}"
data = json.dumps(body).encode()
req = urllib.request.Request(create_url, data=data, method="PUT")
req.add_header("Content-Type", "application/json")
req.add_header("Api-Key", QDRANT_API_KEY)
try:
urllib.request.urlopen(req)
print(f"[QDRANT] Created collection: {FACES_COLLECTION}")
return True
except urllib.error.HTTPError as e:
raise RuntimeError(f"Qdrant create collection failed: {e.read().decode()}")
def generate_point_id(file_uuid: str, frame: int, trace_id: int = 0) -> int:
"""Generate consistent point ID from file_uuid + frame + trace_id"""
key = f"{file_uuid}_{frame}_{trace_id}"
return int(hashlib.md5(key.encode()).hexdigest()[:16], 16)
def push_face_embeddings_batch(
file_uuid: str,
faces: list,
publisher=None
) -> int:
"""Batch push face embeddings to _faces collection
Args:
file_uuid: Video file UUID
faces: List of {frame, trace_id, bbox, confidence, embedding}
publisher: RedisPublisher for progress reporting (optional)
Returns:
Number of successfully pushed embeddings
Raises:
RuntimeError: If Qdrant push fails
"""
if not faces:
return 0
ensure_faces_collection()
total = len(faces)
pushed = 0
for i in range(0, total, BATCH_SIZE):
batch = faces[i:i + BATCH_SIZE]
points = []
for face in batch:
point_id = generate_point_id(
file_uuid,
face["frame"],
face.get("trace_id", 0)
)
points.append({
"id": point_id,
"vector": face["embedding"],
"payload": {
"file_uuid": file_uuid,
"frame": face["frame"],
"trace_id": face.get("trace_id", 0),
"bbox": face["bbox"],
"confidence": face.get("confidence", 0.5),
"identity_id": None,
"identity_uuid": None,
"stranger_id": None,
}
})
body = {"points": points}
url = f"{QDRANT_URL}/collections/{FACES_COLLECTION}/points?wait=true"
data = json.dumps(body).encode()
req = urllib.request.Request(url, data=data, method="PUT")
req.add_header("Content-Type", "application/json")
req.add_header("Api-Key", QDRANT_API_KEY)
try:
urllib.request.urlopen(req)
pushed += len(batch)
except urllib.error.HTTPError as e:
error_body = e.read().decode()
raise RuntimeError(
f"Qdrant push failed (batch {i//BATCH_SIZE}): HTTP {e.code} - {error_body}"
)
if publisher:
pct = int((i + len(batch)) * 100 / total)
publisher.progress("face", i + len(batch), total, f"Qdrant push {pct}%")
print(f"[QDRANT] Pushed {pushed} embeddings to {FACES_COLLECTION}")
return pushed
def update_trace_ids(file_uuid: str, trace_mapping: dict) -> int:
"""Update trace_id for all face points in a file
Called by store_traced_faces.py after face tracking.
Args:
file_uuid: Video file UUID
trace_mapping: {frame: {bbox_key: trace_id}}
bbox_key = f"{x}_{y}_{width}_{height}"
Returns:
Number of updated points
"""
all_points = []
offset = None
while True:
body = {
"limit": BATCH_SIZE,
"with_payload": True,
"with_vector": True,
"filter": {
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
}
}
if offset:
body["offset"] = offset
result = qdrant_request("POST", f"/collections/{FACES_COLLECTION}/points/scroll", body)
batch = result.get("result", {}).get("points", [])
if not batch:
break
all_points.extend(batch)
offset = result.get("result", {}).get("next_page_offset")
if not offset:
break
updates = []
for point in all_points:
point_id = point["id"]
payload = point.get("payload", {})
vector = point.get("vector", [])
frame = payload.get("frame")
bbox = payload.get("bbox", {})
bbox_key = f"{bbox.get('x')}_{bbox.get('y')}_{bbox.get('width')}_{bbox.get('height')}"
trace_id = trace_mapping.get(frame, {}).get(bbox_key)
if trace_id is None:
continue
payload["trace_id"] = trace_id
updates.append({
"id": point_id,
"vector": vector,
"payload": payload,
})
if not updates:
return 0
for i in range(0, len(updates), BATCH_SIZE):
batch = updates[i:i + BATCH_SIZE]
body = {"points": batch}
qdrant_request("PUT", f"/collections/{FACES_COLLECTION}/points?wait=true", body)
print(f"[QDRANT] Updated {len(updates)} trace_ids in {FACES_COLLECTION}")
return len(updates)
def delete_file_faces(file_uuid: str) -> int:
"""Delete all face points for a file
Args:
file_uuid: Video file UUID
Returns:
Number of deleted points
"""
body = {
"filter": {
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
}
}
result = qdrant_request("POST", f"/collections/{FACES_COLLECTION}/points/delete", body)
deleted = result.get("result", {}).get("operation_id", 0)
print(f"[QDRANT] Deleted faces for file_uuid={file_uuid}")
return deleted
def get_file_faces(file_uuid: str) -> list:
"""Get all face points for a file
Args:
file_uuid: Video file UUID
Returns:
List of points with payload and vector
"""
all_points = []
offset = None
while True:
body = {
"limit": BATCH_SIZE,
"with_payload": True,
"with_vector": True,
"filter": {
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
}
}
if offset:
body["offset"] = offset
result = qdrant_request("POST", f"/collections/{FACES_COLLECTION}/points/scroll", body)
batch = result.get("result", {}).get("points", [])
if not batch:
break
all_points.extend(batch)
offset = result.get("result", {}).get("next_page_offset")
if not offset:
break
return all_points
def count_file_faces(file_uuid: str) -> int:
"""Count face points for a file
Args:
file_uuid: Video file UUID
Returns:
Number of face points
"""
body = {
"filter": {
"must": [
{"key": "file_uuid", "match": {"value": file_uuid}}
]
}
}
result = qdrant_request("POST", f"/collections/{FACES_COLLECTION}/points/count", body)
return result.get("result", {}).get("count", 0)