feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
@@ -43,18 +43,18 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
frames = face_data.get("frames", {})
|
||||
if not frames:
|
||||
return face_data
|
||||
|
||||
|
||||
# Map each frame to its scene/cut number
|
||||
frame_to_scene = {}
|
||||
for s in cut_scenes:
|
||||
for f in range(s["start_frame"], s["end_frame"] + 1):
|
||||
frame_to_scene[f] = s["scene_number"]
|
||||
|
||||
|
||||
# Collect per-trace data: scene numbers, embeddings, face positions
|
||||
trace_frames = defaultdict(list)
|
||||
trace_embeddings = defaultdict(list)
|
||||
trace_poses = {}
|
||||
|
||||
|
||||
for fnum_str, frm_data in frames.items():
|
||||
fnum = int(fnum_str)
|
||||
for face in frm_data.get("faces", []):
|
||||
@@ -66,19 +66,23 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
if emb is not None:
|
||||
trace_embeddings[tid].append(emb)
|
||||
if tid not in trace_poses:
|
||||
trace_poses[tid] = (face.get("x", 0), face.get("y", 0),
|
||||
face.get("width", 0), face.get("height", 0))
|
||||
|
||||
trace_poses[tid] = (
|
||||
face.get("x", 0),
|
||||
face.get("y", 0),
|
||||
face.get("width", 0),
|
||||
face.get("height", 0),
|
||||
)
|
||||
|
||||
if len(trace_embeddings) < 2:
|
||||
return face_data
|
||||
|
||||
|
||||
# Compute centroid per trace
|
||||
trace_centroids = {}
|
||||
for tid, embs in trace_embeddings.items():
|
||||
centroid = np.mean(embs, axis=0)
|
||||
norm = np.linalg.norm(centroid)
|
||||
trace_centroids[tid] = centroid / norm if norm > 0 else centroid
|
||||
|
||||
|
||||
# Determine which scene each trace belongs to (majority of frames)
|
||||
trace_scene = {}
|
||||
for tid, fns in trace_frames.items():
|
||||
@@ -87,17 +91,17 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
scene = frame_to_scene.get(fn, -1)
|
||||
scene_votes[scene] += 1
|
||||
trace_scene[tid] = max(scene_votes, key=scene_votes.get) if scene_votes else -1
|
||||
|
||||
|
||||
# Within each scene, merge traces with similar centroids
|
||||
scene_traces = defaultdict(list)
|
||||
for tid, scene in trace_scene.items():
|
||||
if scene >= 0 and tid in trace_centroids:
|
||||
scene_traces[scene].append(tid)
|
||||
|
||||
|
||||
merged = 0
|
||||
next_new_id = max(trace_frames.keys()) + 1 if trace_frames else 0
|
||||
SIMILARITY_THRESHOLD = 0.75
|
||||
|
||||
|
||||
for scene, tids in scene_traces.items():
|
||||
if len(tids) < 2:
|
||||
continue
|
||||
@@ -118,7 +122,7 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
face["trace_id"] = keep_tid
|
||||
used.add(tids[j])
|
||||
merged += 1
|
||||
|
||||
|
||||
# If any merges happened, rebuild trace metadata
|
||||
if merged > 0:
|
||||
# Rebuild traces dict
|
||||
@@ -129,14 +133,20 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
for face in frm_data.get("faces", []):
|
||||
tid = face.get("trace_id")
|
||||
if tid is not None:
|
||||
new_trace_frames[tid].append({
|
||||
"frame": fnum,
|
||||
"face_index": 0,
|
||||
"bbox": {"x": face.get("x", 0), "y": face.get("y", 0),
|
||||
"width": face.get("width", 0), "height": face.get("height", 0)},
|
||||
"confidence": face.get("confidence", 0.0),
|
||||
})
|
||||
|
||||
new_trace_frames[tid].append(
|
||||
{
|
||||
"frame": fnum,
|
||||
"face_index": 0,
|
||||
"bbox": {
|
||||
"x": face.get("x", 0),
|
||||
"y": face.get("y", 0),
|
||||
"width": face.get("width", 0),
|
||||
"height": face.get("height", 0),
|
||||
},
|
||||
"confidence": face.get("confidence", 0.0),
|
||||
}
|
||||
)
|
||||
|
||||
for tid, path in new_trace_frames.items():
|
||||
if len(path) >= 1:
|
||||
frames_sorted = sorted(set(p["frame"] for p in path))
|
||||
@@ -145,23 +155,30 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
"start_frame": frames_sorted[0],
|
||||
"end_frame": frames_sorted[-1],
|
||||
"duration_frames": frames_sorted[-1] - frames_sorted[0] + 1,
|
||||
"duration_seconds": (frames_sorted[-1] - frames_sorted[0]) / face_data.get("metadata", {}).get("fps", 25.0),
|
||||
"duration_seconds": (frames_sorted[-1] - frames_sorted[0])
|
||||
/ face_data.get("metadata", {}).get("fps", 25.0),
|
||||
"total_appearances": len(path),
|
||||
"path": path,
|
||||
}
|
||||
|
||||
|
||||
face_data["traces"] = new_traces
|
||||
face_data["metadata"]["trace_stats"] = {
|
||||
"total_traces": len(new_traces),
|
||||
"active_traces": len(new_traces),
|
||||
"long_traces": len([t for t in new_traces.values() if t["duration_frames"] >= 2]),
|
||||
"long_traces": len(
|
||||
[t for t in new_traces.values() if t["duration_frames"] >= 2]
|
||||
),
|
||||
}
|
||||
print(f"[TRACE] Post-merge: {merged} traces merged, {len(new_traces)} total traces")
|
||||
|
||||
print(
|
||||
f"[TRACE] Post-merge: {merged} traces merged, {len(new_traces)} total traces"
|
||||
)
|
||||
|
||||
return face_data
|
||||
|
||||
|
||||
def run_face_tracker(face_json_path: str, traced_json_path: str, filter_eyes: bool = False) -> str:
|
||||
def run_face_tracker(
|
||||
face_json_path: str, traced_json_path: str, filter_eyes: bool = False
|
||||
) -> str:
|
||||
"""Run face_tracker.py on face.json, returns path to face_traced.json"""
|
||||
from face_tracker import track_faces
|
||||
|
||||
@@ -200,7 +217,7 @@ def run_face_tracker(face_json_path: str, traced_json_path: str, filter_eyes: bo
|
||||
"fps": face_data.get("fps", 30.0),
|
||||
"total_frames": face_data.get("frame_count", 0),
|
||||
}
|
||||
|
||||
|
||||
# Eye filter: remove faces without at least one eye landmark
|
||||
if filter_eyes:
|
||||
removed = 0
|
||||
@@ -215,19 +232,26 @@ def run_face_tracker(face_json_path: str, traced_json_path: str, filter_eyes: bo
|
||||
removed += 1
|
||||
frm_data["faces"] = kept
|
||||
print(f"[TRACE] Eye filter: {removed} faces without eyes removed")
|
||||
|
||||
|
||||
print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames")
|
||||
|
||||
|
||||
# Load embeddings from DB for the face tracker
|
||||
file_uuid = face_json_path.split("/")[-1].replace(".face.json", "").replace("_traced.json", "")
|
||||
file_uuid = (
|
||||
face_json_path.split("/")[-1]
|
||||
.replace(".face.json", "")
|
||||
.replace("_traced.json", "")
|
||||
)
|
||||
try:
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
cur.execute(f"""
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT frame_number, x, y, width, height, embedding
|
||||
FROM {SCHEMA}.face_detections
|
||||
WHERE file_uuid = %s AND embedding IS NOT NULL
|
||||
""", (file_uuid,))
|
||||
""",
|
||||
(file_uuid,),
|
||||
)
|
||||
emb_rows = cur.fetchall()
|
||||
conn.close()
|
||||
# Build lookup: frame_number → list of (bbox, embedding)
|
||||
@@ -235,41 +259,55 @@ def run_face_tracker(face_json_path: str, traced_json_path: str, filter_eyes: bo
|
||||
for fn, x, y, w, h, emb in emb_rows:
|
||||
emb_map.setdefault(fn, []).append(((x, y, w, h), emb))
|
||||
print(f"[TRACE] Loaded {len(emb_rows)} embeddings from DB")
|
||||
|
||||
|
||||
# Attach embeddings to face data
|
||||
attached = 0
|
||||
for fnum_str, frm_data in face_data.get("frames", {}).items():
|
||||
fnum = int(fnum_str)
|
||||
for face in frm_data.get("faces", []):
|
||||
x, y, w, h = face.get("x", 0), face.get("y", 0), face.get("width", 0), face.get("height", 0)
|
||||
x, y, w, h = (
|
||||
face.get("x", 0),
|
||||
face.get("y", 0),
|
||||
face.get("width", 0),
|
||||
face.get("height", 0),
|
||||
)
|
||||
candidates = emb_map.get(fnum, [])
|
||||
# Find matching embedding by bbox proximity
|
||||
for (ex, ey, ew, eh), emb in candidates:
|
||||
if abs(x - ex) < 10 and abs(y - ey) < 10 and abs(w - ew) < 10 and abs(h - eh) < 10:
|
||||
if (
|
||||
abs(x - ex) < 10
|
||||
and abs(y - ey) < 10
|
||||
and abs(w - ew) < 10
|
||||
and abs(h - eh) < 10
|
||||
):
|
||||
face["embedding"] = emb
|
||||
attached += 1
|
||||
break
|
||||
print(f"[TRACE] Attached {attached} embeddings to faces")
|
||||
except Exception as e:
|
||||
print(f"[TRACE] WARNING: Could not load embeddings: {e}")
|
||||
|
||||
|
||||
# Load cut boundaries from cut.json (same directory as face.json)
|
||||
cut_boundaries = None
|
||||
cut_scenes = None
|
||||
cuts_path = face_json_path.replace("_traced.json", ".cut.json").replace(".face.json", ".cut.json")
|
||||
cuts_path = face_json_path.replace("_traced.json", ".cut.json").replace(
|
||||
".face.json", ".cut.json"
|
||||
)
|
||||
if os.path.exists(cuts_path):
|
||||
with open(cuts_path) as f:
|
||||
cuts = json.load(f)
|
||||
cut_scenes = cuts.get("scenes", [])
|
||||
cut_boundaries = {s["start_frame"] for s in cut_scenes if s["start_frame"] > 0}
|
||||
print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries")
|
||||
|
||||
face_data = track_faces(face_data, use_embedding=True, cut_boundaries=cut_boundaries)
|
||||
|
||||
|
||||
face_data = track_faces(
|
||||
face_data, use_embedding=True, cut_boundaries=cut_boundaries
|
||||
)
|
||||
|
||||
# Merge traces within same cut (same person re-appearing after occlusion/pose change)
|
||||
if cut_scenes and len(cut_scenes) > 0:
|
||||
face_data = merge_traces_within_cuts(face_data, cut_scenes)
|
||||
|
||||
|
||||
metadata = face_data.get("metadata", {})
|
||||
metadata["tracking_method"] = "iou_embedding"
|
||||
metadata["tracked_at"] = datetime.now().isoformat()
|
||||
@@ -309,6 +347,8 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE
|
||||
h = face.get("height", 0)
|
||||
confidence = face.get("confidence", 0.0)
|
||||
face_id = face.get("face_id")
|
||||
if face_id is None:
|
||||
face_id = f"face_{trace_id}"
|
||||
attributes = face.get("attributes")
|
||||
embedding = face.get("embedding")
|
||||
|
||||
@@ -319,14 +359,20 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE {schema}.face_detections
|
||||
SET trace_id = %s, embedding = %s
|
||||
SET trace_id = %s, embedding = %s, face_id = %s
|
||||
WHERE file_uuid = %s AND frame_number = %s
|
||||
AND x = %s AND y = %s AND width = %s AND height = %s
|
||||
""",
|
||||
(
|
||||
trace_id,
|
||||
embed_vec,
|
||||
file_uuid, frame_num, x, y, w, h,
|
||||
face_id,
|
||||
file_uuid,
|
||||
frame_num,
|
||||
x,
|
||||
y,
|
||||
w,
|
||||
h,
|
||||
),
|
||||
)
|
||||
if cur.rowcount > 0:
|
||||
@@ -348,7 +394,9 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"[TRACE] Stored {total_stored} face detections, {db_trace_count} unique traces in DB")
|
||||
print(
|
||||
f"[TRACE] Stored {total_stored} face detections, {db_trace_count} unique traces in DB"
|
||||
)
|
||||
return total_stored, db_trace_count
|
||||
|
||||
|
||||
@@ -361,7 +409,11 @@ def main():
|
||||
parser.add_argument("--schema", default=SCHEMA, help="DB schema name")
|
||||
|
||||
parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
|
||||
parser.add_argument("--filter-eyes", action="store_true", help="Remove faces without eye landmarks before tracking")
|
||||
parser.add_argument(
|
||||
"--filter-eyes",
|
||||
action="store_true",
|
||||
help="Remove faces without eye landmarks before tracking",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
face_json = args.face_json or os.path.join(
|
||||
|
||||
Reference in New Issue
Block a user