fix: ASRX duplication, TKG edges, trace ingest, and add pipeline progress publishing

- ASRX handler no longer stores duplicate 'asr' pre_chunks
- Pre_chunks storage made idempotent (delete-before-insert)
- Rule 1 + trace_ingest changed to query 'asrx' not 'asr'
- Trace chunks removed (dynamic from TKG/Qdrant)
- TKG scroll_face_points fixed: trace_id >= 1 (not == 1)
- TKG AsrxSegmentEntry: start/end -> start_time/end_time (match ASRX JSON)
- Unregister error handling: log instead of silent discard
- Add publish_pipeline_progress calls at each pipeline stage
  (processors, rule1, face_trace, identity_agent, TKG, rule2, completion)
This commit is contained in:
Accusys
2026-07-02 10:43:46 +08:00
parent d791d138f2
commit 3eabd45882
65 changed files with 9481 additions and 3856 deletions

View File

@@ -14,13 +14,9 @@ from sklearn.cluster import AgglomerativeClustering
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
try:
from deepface import DeepFace
HAS_DEEPFACE = True
except ImportError:
print("❌ DeepFace not found. Run: pip install deepface")
sys.exit(1)
# Use FaceNet embeddings from face.json instead of DeepFace
HAS_DEEPFACE = False
print("[FACE_CLUSTER] Using FaceNet embeddings from face.json (DeepFace not required)")
# 設定
UUID = os.getenv("UUID", "quick_preview")
@@ -104,53 +100,69 @@ def main():
print("❌ No frames in JSON.")
return
cap = cv2.VideoCapture(VIDEO_PATH)
# Get embeddings from Qdrant
print(f"[FACE_CLUSTER] Loading embeddings from Qdrant for {UUID}...")
try:
import requests
qdrant_url = "http://localhost:6333"
collection = "_faces"
# Query all embeddings for this file_uuid
response = requests.post(
f"{qdrant_url}/collections/{collection}/points/scroll",
json={
"filter": {
"must": [
{"key": "file_uuid", "match": {"value": UUID}}
]
},
"limit": 10000,
"with_vector": True
}
)
if response.status_code == 200:
result = response.json()
points = result.get("result", {}).get("points", [])
print(f"[FACE_CLUSTER] Loaded {len(points)} embeddings from Qdrant")
# Build face_id -> embedding map
embedding_map = {}
for point in points:
face_id = point.get("payload", {}).get("face_id")
vector = point.get("vector")
if face_id and vector:
embedding_map[face_id] = vector
else:
print(f"[FACE_CLUSTER] Qdrant query failed: {response.status_code}")
embedding_map = {}
except Exception as e:
print(f"[FACE_CLUSTER] Failed to load embeddings from Qdrant: {e}")
embedding_map = {}
# Use embeddings from Qdrant or face.json
embeddings = []
face_refs = []
print(f"🔍 Extracting face embeddings from {UUID}...")
print(f"🔍 Collecting face embeddings for {UUID}...")
for frame_idx, frame_obj in enumerate(frames_list):
ts = frame_obj.get("timestamp")
faces = frame_obj.get("faces", [])
if not faces:
continue
if ts is not None:
cap.set(cv2.CAP_PROP_POS_MSEC, ts * 1000)
ret, frame = cap.read()
if not ret:
continue
for face_idx, face in enumerate(faces):
x, y, w, h = face["x"], face["y"], face["width"], face["height"]
margin = 5
crop = frame[
max(0, y - margin) : y + h + margin, max(0, x - margin) : x + w + margin
]
if crop is None or crop.size == 0:
continue
try:
res = DeepFace.represent(
img_path=crop, model_name="ArcFace", enforce_detection=False
)
if res and "embedding" in res[0]:
embeddings.append(res[0]["embedding"])
face_refs.append({"frame_idx": frame_idx, "face_idx": face_idx})
except Exception:
pass
cap.release()
face_id = face.get("face_id")
if face_id and face_id in embedding_map:
embeddings.append(embedding_map[face_id])
face_refs.append({"frame_idx": frame_idx, "face_idx": face_idx, "face_id": face_id})
if not embeddings:
print("❌ No embeddings extracted.")
print("❌ No embeddings found in Qdrant.")
return
embeddings = np.array(embeddings)
print(f"Extracted {len(embeddings)} face embeddings.")
print(f"Collected {len(embeddings)} face embeddings from Qdrant.")
# 2. 聚類
print(f"🧠 Clustering {len(embeddings)} faces...")