Changes: - Rust: face_trace → face_track (45 occurrences in 8 files) - Rust: gaze_trace → gaze_track, lip_trace → lip_track - Python: tkg_builder.py unified + pipeline_checklist.py fixed - Swift: swift_hand.swift hand state detection (empty vs holding) Node type changes: face_trace → face_track person_trace → body_track gaze_trace → gaze_track lip_trace → lip_track hand_trace → hand_track speaker → speaker_segment object → detected_object text_trace → text_region Migration: PUBLIC schema: 12970 + 892 + 305 rows updated
917 lines
31 KiB
Python
917 lines
31 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
TKG Builder - Unified Temporal Knowledge Graph Builder
|
|
|
|
Builds graph nodes and edges from all pipeline outputs:
|
|
- Face tracks (face_detections with trace_id)
|
|
- Body tracks (pose.json + Level 1 appearance features)
|
|
- Detected objects (yolo.json)
|
|
- Speaker segments (asrx.json)
|
|
- Hand tracks (hand.json) [optional]
|
|
|
|
Node Types (V2.0 - intuitive naming):
|
|
NODES:
|
|
(face_track) - face tracking across frames
|
|
(body_track) - body appearance with Level 1 features
|
|
(detected_object) - YOLO detected objects
|
|
(speaker_segment) - speaker segments
|
|
(hand_track) - hand state tracking [optional]
|
|
EDGES:
|
|
(face_track) -[:CO_OCCURS_WITH]-> (detected_object) -- same frame
|
|
(face_track) -[:SPEAKS_AS]-> (speaker_segment) -- temporal overlap
|
|
(face_track) -[:HAS_BODY]-> (body_track) -- spatial proximity
|
|
(body_track) -[:HAS_HAND]-> (hand_track) -- wrist position
|
|
|
|
Usage:
|
|
python tkg_builder.py --file-uuid <uuid> [--schema <schema>] [--video <path>]
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
import argparse
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
import cv2
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils"))
|
|
|
|
try:
|
|
from utils.feature_extractor import HierarchicalFeatureExtractor
|
|
from utils.proportion_calculator import calculate_proportions, get_head_region
|
|
except ImportError:
|
|
print("[TKG] Warning: Level 1 feature extraction unavailable")
|
|
HierarchicalFeatureExtractor = None
|
|
calculate_proportions = None
|
|
get_head_region = None
|
|
|
|
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
|
SCHEMA = os.environ.get("DATABASE_SCHEMA", "dev")
|
|
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
|
|
|
|
|
|
def get_conn():
|
|
return psycopg2.connect(DB_URL)
|
|
|
|
|
|
def ensure_node(cur, schema, file_uuid, node_type, external_id, label="", properties=None):
|
|
"""Insert or get graph node"""
|
|
cur.execute(
|
|
f"""
|
|
INSERT INTO {schema}.tkg_nodes (node_type, external_id, file_uuid, label, properties)
|
|
VALUES (%s, %s, %s, %s, %s::jsonb)
|
|
ON CONFLICT (file_uuid, node_type, external_id)
|
|
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
|
|
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
|
|
RETURNING id
|
|
""",
|
|
(node_type, str(external_id), file_uuid, label, json.dumps(properties or {})),
|
|
)
|
|
row = cur.fetchone()
|
|
return row[0]
|
|
|
|
|
|
def ensure_edge(cur, schema, file_uuid, edge_type, source_id, target_id, properties=None):
|
|
"""Insert graph edge"""
|
|
cur.execute(
|
|
f"""
|
|
INSERT INTO {schema}.tkg_edges (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
|
VALUES (%s, %s, %s, %s, %s::jsonb)
|
|
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
|
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
|
""",
|
|
(edge_type, source_id, target_id, file_uuid, json.dumps(properties or {})),
|
|
)
|
|
|
|
|
|
def build_face_track_nodes(cur, schema, file_uuid):
|
|
"""Create graph nodes for each face track"""
|
|
print("[TKG] Building face_track nodes...")
|
|
cur.execute(
|
|
f"""
|
|
SELECT trace_id, COUNT(*) as frame_count,
|
|
MIN(frame_number) as start_f, MAX(frame_number) as end_f,
|
|
AVG(x::float) as avg_x,
|
|
AVG(y::float) as avg_y,
|
|
AVG(width::float) as avg_w,
|
|
AVG(height::float) as avg_h
|
|
FROM {schema}.face_detections
|
|
WHERE file_uuid = %s AND trace_id IS NOT NULL
|
|
GROUP BY trace_id
|
|
ORDER BY trace_id
|
|
""",
|
|
(file_uuid,),
|
|
)
|
|
count = 0
|
|
for row in cur.fetchall():
|
|
tid, fc, sf, ef, ax, ay, aw, ah = row
|
|
label = f"Face Track {tid}"
|
|
props = {
|
|
"frame_count": fc,
|
|
"start_frame": sf,
|
|
"end_frame": ef,
|
|
"avg_bbox": {"x": round(ax or 0, 1), "y": round(ay or 0, 1),
|
|
"width": round(aw or 0, 1), "height": round(ah or 0, 1)},
|
|
}
|
|
ensure_node(cur, schema, file_uuid, "face_track", f"face_track_{tid}", label, props)
|
|
count += 1
|
|
print(f"[TKG] {count} face_track nodes created")
|
|
return count
|
|
|
|
|
|
def load_json_safe(path):
|
|
"""Load JSON even if in-progress (truncated tail)"""
|
|
if not os.path.exists(path):
|
|
return None
|
|
try:
|
|
with open(path) as f:
|
|
return json.load(f)
|
|
except json.JSONDecodeError:
|
|
# Try to recover by truncating to last valid frame
|
|
print(f"[TKG] Warning: {path} is in-progress, loading partial data")
|
|
with open(path) as f:
|
|
content = f.read()
|
|
# Find last valid "frame" entry and truncate
|
|
last_valid = content.rfind('"}')
|
|
if last_valid > 0:
|
|
try:
|
|
return json.loads(content[:last_valid+2] + "\n}}")
|
|
except json.JSONDecodeError:
|
|
pass
|
|
return None
|
|
|
|
|
|
def build_detected_object_nodes(cur, schema, file_uuid):
|
|
"""Create graph nodes for each YOLO detected object class from yolo.json"""
|
|
yolo_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.yolo.json")
|
|
yolo = load_json_safe(yolo_path)
|
|
if yolo is None:
|
|
print(f"[TKG] yolo.json not available, skipping detected_object nodes")
|
|
return 0
|
|
|
|
frames = yolo.get("frames", {})
|
|
class_counts = {}
|
|
for fdata in frames.values():
|
|
detections = fdata.get("detections", fdata.get("objects", []))
|
|
for det in detections:
|
|
cls = det.get("class_name", "unknown")
|
|
class_counts[cls] = class_counts.get(cls, 0) + 1
|
|
|
|
count = 0
|
|
for cls, cnt in sorted(class_counts.items()):
|
|
ensure_node(
|
|
cur, schema, file_uuid, "detected_object",
|
|
cls, cls,
|
|
{"total_detections": cnt},
|
|
)
|
|
count += 1
|
|
print(f"[TKG] {count} detected_object nodes created")
|
|
return count
|
|
|
|
|
|
def build_speaker_segment_nodes(cur, schema, file_uuid):
|
|
"""Create graph nodes for each speaker segment from asrx.json"""
|
|
asrx_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.asrx.json")
|
|
if not os.path.exists(asrx_path):
|
|
print(f"[TKG] asrx.json not found, skipping speaker_segment nodes")
|
|
return 0
|
|
|
|
with open(asrx_path) as f:
|
|
asrx = json.load(f)
|
|
|
|
count = 0
|
|
stats = asrx.get("speaker_stats", {})
|
|
for sid, sinfo in stats.items():
|
|
cnt = sinfo.get("count", 0)
|
|
ensure_node(
|
|
cur, schema, file_uuid, "speaker_segment",
|
|
sid.lower().replace("speaker_", "speaker_"), sid,
|
|
{"segment_count": cnt},
|
|
)
|
|
count += 1
|
|
print(f"[TKG] {count} speaker_segment nodes created")
|
|
return count
|
|
|
|
|
|
def build_co_occurrence_edges(cur, schema, file_uuid):
|
|
"""Build CO_OCCURS_WITH edges: face_track ↔ detected_object in same frame"""
|
|
print("[TKG] Building co-occurrence edges (face-object within same frame)...")
|
|
|
|
yolo_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.yolo.json")
|
|
yolo = load_json_safe(yolo_path)
|
|
if yolo is None:
|
|
print(f"[TKG] yolo.json not available, skipping co-occurrence")
|
|
return 0
|
|
|
|
yolo_frames = yolo.get("frames", {})
|
|
|
|
# Query face detections with trace_id
|
|
cur.execute(
|
|
f"""
|
|
SELECT trace_id, frame_number, x, y, width, height
|
|
FROM {schema}.face_detections
|
|
WHERE file_uuid = %s AND trace_id IS NOT NULL
|
|
ORDER BY frame_number
|
|
""",
|
|
(file_uuid,),
|
|
)
|
|
face_rows = cur.fetchall()
|
|
print(f"[TKG] Checking {len(face_rows)} face detections against YOLO frames...")
|
|
|
|
# Get or create frame nodes cache
|
|
frame_node_cache = {}
|
|
|
|
edge_count = 0
|
|
for tid, frame_num, fx, fy, fw, fh in face_rows:
|
|
frame_str = str(frame_num)
|
|
yolo_frame = yolo_frames.get(frame_str)
|
|
if not yolo_frame:
|
|
continue
|
|
|
|
detections = yolo_frame.get("detections", yolo_frame.get("objects", []))
|
|
if not detections:
|
|
continue
|
|
|
|
# Get face trace node
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
|
(file_uuid, f"face_track_{tid}"),
|
|
)
|
|
ft_row = cur.fetchone()
|
|
if not ft_row:
|
|
continue
|
|
face_node_id = ft_row[0]
|
|
|
|
for det in detections:
|
|
cls = det.get("class_name", "unknown")
|
|
confidence = det.get("confidence", 0)
|
|
|
|
# Get object node
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='detected_object' AND external_id=%s",
|
|
(file_uuid, cls),
|
|
)
|
|
obj_row = cur.fetchone()
|
|
if not obj_row:
|
|
continue
|
|
obj_node_id = obj_row[0]
|
|
|
|
# Compute spatial distance (center-to-center)
|
|
fc_x = fx + fw / 2
|
|
fc_y = fy + fh / 2
|
|
|
|
od = det
|
|
od_x = od.get("x1", 0) + (od.get("x2", 0) - od.get("x1", 0)) / 2
|
|
od_y = od.get("y1", 0) + (od.get("y2", 0) - od.get("y1", 0)) / 2
|
|
distance = ((fc_x - od_x) ** 2 + (fc_y - od_y) ** 2) ** 0.5
|
|
|
|
edge_props = {
|
|
"frame": frame_num,
|
|
"distance_px": round(distance, 1),
|
|
"object_confidence": confidence,
|
|
"face_bbox": {"x": fx, "y": fy, "width": fw, "height": fh},
|
|
"object_bbox": {
|
|
"x1": od.get("x1"), "y1": od.get("y1"),
|
|
"x2": od.get("x2"), "y2": od.get("y2"),
|
|
},
|
|
}
|
|
|
|
try:
|
|
ensure_edge(
|
|
cur, schema, file_uuid,
|
|
"CO_OCCURS_WITH",
|
|
face_node_id, obj_node_id,
|
|
edge_props,
|
|
)
|
|
edge_count += 1
|
|
except Exception as e:
|
|
conn = cur.connection
|
|
conn.rollback()
|
|
continue
|
|
|
|
print(f"[TKG] {edge_count} co-occurrence edges created")
|
|
return edge_count
|
|
|
|
|
|
def build_speaker_face_edges(cur, schema, file_uuid):
|
|
"""Build SPEAKS_AS edges: face_track ↔ speaker_segment via temporal overlap"""
|
|
asrx_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.asrx.json")
|
|
if not os.path.exists(asrx_path):
|
|
print(f"[TKG] asrx.json not found, skipping speaker edges")
|
|
return 0
|
|
|
|
with open(asrx_path) as f:
|
|
asrx = json.load(f)
|
|
|
|
segments = asrx.get("segments", [])
|
|
if not segments:
|
|
print("[TKG] No speaker segments found")
|
|
return 0
|
|
|
|
# Get face trace nodes with their time spans
|
|
cur.execute(
|
|
f"""
|
|
SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f
|
|
FROM {schema}.face_detections
|
|
WHERE file_uuid = %s AND trace_id IS NOT NULL
|
|
GROUP BY trace_id
|
|
""",
|
|
(file_uuid,),
|
|
)
|
|
traces = cur.fetchall()
|
|
|
|
fps = segments[-1]["end_frame"] / segments[-1]["end_time"] if segments else 30.0
|
|
|
|
edge_count = 0
|
|
for tid, sf, ef in traces:
|
|
# Get face trace node
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
|
(file_uuid, f"face_track_{tid}"),
|
|
)
|
|
ft_row = cur.fetchone()
|
|
if not ft_row:
|
|
continue
|
|
face_node_id = ft_row[0]
|
|
|
|
face_start_sec = sf / fps if fps > 0 else 0
|
|
face_end_sec = ef / fps if fps > 0 else 0
|
|
|
|
for seg in segments:
|
|
speaker_id = seg.get("speaker_id", "")
|
|
seg_start = seg.get("start_time", 0)
|
|
seg_end = seg.get("end_time", 0)
|
|
|
|
# Check temporal overlap
|
|
overlap_start = max(face_start_sec, seg_start)
|
|
overlap_end = min(face_end_sec, seg_end)
|
|
if overlap_start >= overlap_end:
|
|
continue
|
|
|
|
overlap_dur = overlap_end - overlap_start
|
|
face_dur = face_end_sec - face_start_sec
|
|
overlap_ratio = overlap_dur / face_dur if face_dur > 0 else 0
|
|
|
|
if overlap_ratio < 0.3: # minimum 30% overlap
|
|
continue
|
|
|
|
# Get speaker node
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='speaker_segment' AND external_id=%s",
|
|
(file_uuid, speaker_id),
|
|
)
|
|
sp_row = cur.fetchone()
|
|
if not sp_row:
|
|
continue
|
|
speaker_node_id = sp_row[0]
|
|
|
|
ensure_edge(
|
|
cur, schema, file_uuid,
|
|
"SPEAKS_AS",
|
|
face_node_id, speaker_node_id,
|
|
{
|
|
"overlap_ratio": round(overlap_ratio, 3),
|
|
"overlap_duration_s": round(overlap_dur, 1),
|
|
"face_time_range": f"{face_start_sec:.1f}-{face_end_sec:.1f}s",
|
|
"speaker_time_range": f"{seg_start:.1f}-{seg_end:.1f}s",
|
|
},
|
|
)
|
|
edge_count += 1
|
|
|
|
print(f"[TKG] {edge_count} speaker-face edges created")
|
|
return edge_count
|
|
|
|
|
|
def build_face_face_edges(cur, schema, file_uuid):
|
|
"""Build CO_OCCURS_WITH edges: face_track ↔ face_track in same frame"""
|
|
print("[TKG] Building face-face co-occurrence edges...")
|
|
|
|
cur.execute(
|
|
f"""
|
|
SELECT a.trace_id AS tid_a, b.trace_id AS tid_b,
|
|
a.frame_number, a.timestamp_secs,
|
|
a.x AS ax, a.y AS ay, a.width AS aw, a.height AS ah,
|
|
b.x AS bx, b.y AS by, b.width AS bw, b.height AS bh
|
|
FROM {schema}.face_detections a
|
|
JOIN {schema}.face_detections b
|
|
ON a.file_uuid = b.file_uuid
|
|
AND a.frame_number = b.frame_number
|
|
AND a.trace_id < b.trace_id
|
|
WHERE a.file_uuid = %s
|
|
AND a.trace_id IS NOT NULL
|
|
AND b.trace_id IS NOT NULL
|
|
ORDER BY a.frame_number
|
|
""",
|
|
(file_uuid,),
|
|
)
|
|
rows = cur.fetchall()
|
|
if not rows:
|
|
print("[TKG] No face-face co-occurrences found")
|
|
return 0
|
|
|
|
# Deduplicate by pair (group all frames where same two traces co-occur)
|
|
pair_first = {}
|
|
pair_frames = {}
|
|
for tid_a, tid_b, frame, ts, ax, ay, aw, ah, bx, by, bw, bh in rows:
|
|
key = (min(tid_a, tid_b), max(tid_a, tid_b))
|
|
if key not in pair_first:
|
|
pair_first[key] = frame
|
|
pair_frames.setdefault(key, []).append(frame)
|
|
|
|
edge_count = 0
|
|
for (tid_a, tid_b), frames in pair_frames.items():
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
|
(file_uuid, f"face_track_{tid_a}"),
|
|
)
|
|
n_a = cur.fetchone()
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
|
(file_uuid, f"trace_{tid_b}"),
|
|
)
|
|
n_b = cur.fetchone()
|
|
if not n_a or not n_b:
|
|
continue
|
|
|
|
distance_px = ((frames[0] - frames[0]) ** 2) ** 0.5 # placeholder
|
|
ensure_edge(
|
|
cur, schema, file_uuid,
|
|
"CO_OCCURS_WITH",
|
|
n_a[0], n_b[0],
|
|
{
|
|
"first_frame": int(frames[0]),
|
|
"frame_count": len(frames),
|
|
},
|
|
)
|
|
edge_count += 1
|
|
|
|
print(f"[TKG] {edge_count} face-face co-occurrence edges created")
|
|
return edge_count
|
|
|
|
|
|
def extract_level1_features(video_path, pose_json_path):
|
|
"""
|
|
Extract Level 1 features for each person in each frame
|
|
|
|
Args:
|
|
video_path: Path to video file
|
|
pose_json_path: Path to pose.json
|
|
|
|
Returns:
|
|
List of (frame, person_index, bbox, level1_features)
|
|
"""
|
|
if HierarchicalFeatureExtractor is None:
|
|
print("[TKG] Level 1 feature extractor not available")
|
|
return []
|
|
|
|
if not os.path.exists(pose_json_path):
|
|
print(f"[TKG] pose.json not found: {pose_json_path}")
|
|
return []
|
|
|
|
with open(pose_json_path) as f:
|
|
pose_data = json.load(f)
|
|
|
|
cap = cv2.VideoCapture(video_path)
|
|
if not cap.isOpened():
|
|
print(f"[TKG] Cannot open video: {video_path}")
|
|
return []
|
|
|
|
fps = pose_data.get("fps", 30.0)
|
|
extractor = HierarchicalFeatureExtractor()
|
|
|
|
results = []
|
|
|
|
for pose_frame in pose_data.get("frames", []):
|
|
frame_num = pose_frame["frame"]
|
|
persons = pose_frame.get("persons", [])
|
|
|
|
if not persons:
|
|
continue
|
|
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
|
ret, frame = cap.read()
|
|
|
|
if not ret:
|
|
continue
|
|
|
|
for person_idx, person in enumerate(persons):
|
|
bbox = person.get("bbox", {})
|
|
keypoints = person.get("keypoints", [])
|
|
|
|
if bbox.get("width", 0) <= 0 or bbox.get("height", 0) <= 0:
|
|
continue
|
|
|
|
proportions = calculate_proportions(keypoints, bbox) if calculate_proportions else {}
|
|
head_region = get_head_region(keypoints) if get_head_region else {}
|
|
level1 = extractor.extract_level1(frame, bbox, head_region)
|
|
|
|
results.append({
|
|
"frame": frame_num,
|
|
"timestamp": pose_frame.get("timestamp", frame_num / fps),
|
|
"person_index": person_idx,
|
|
"bbox": bbox,
|
|
"proportions": proportions,
|
|
"level1_features": level1,
|
|
})
|
|
|
|
cap.release()
|
|
print(f"[TKG] Extracted Level 1 features: {len(results)} frame-person pairs")
|
|
return results
|
|
|
|
|
|
def average_colors(color_lists):
|
|
"""Average multiple color lists"""
|
|
if not color_lists:
|
|
return []
|
|
|
|
valid_colors = [c for c in color_lists if c]
|
|
if not valid_colors:
|
|
return []
|
|
|
|
first_colors = [c[0] if c else [0, 0, 0] for c in valid_colors]
|
|
avg = [sum(x) / len(x) for x in zip(*first_colors)]
|
|
return [round(x, 2) for x in avg]
|
|
|
|
|
|
def average_h_mean(items, region):
|
|
"""Average H mean from Level 1 items"""
|
|
h_means = []
|
|
for item in items:
|
|
l1 = item.get("level1_features", {})
|
|
if region in l1 and "color" in l1[region]:
|
|
h_mean = l1[region]["color"].get("h_mean", 0)
|
|
if h_mean:
|
|
h_means.append(h_mean)
|
|
|
|
return round(sum(h_means) / len(h_means), 2) if h_means else 0
|
|
|
|
|
|
def average_bbox(bboxes):
|
|
"""Average bbox across frames"""
|
|
if not bboxes:
|
|
return {}
|
|
|
|
avg_x = sum(b.get("x", 0) for b in bboxes) / len(bboxes)
|
|
avg_y = sum(b.get("y", 0) for b in bboxes) / len(bboxes)
|
|
avg_w = sum(b.get("width", 0) for b in bboxes) / len(bboxes)
|
|
avg_h = sum(b.get("height", 0) for b in bboxes) / len(bboxes)
|
|
|
|
return {
|
|
"x": round(avg_x, 1),
|
|
"y": round(avg_y, 1),
|
|
"width": round(avg_w, 1),
|
|
"height": round(avg_h, 1),
|
|
}
|
|
|
|
|
|
def build_body_track_nodes(cur, schema, file_uuid, video_path=None):
|
|
"""Create body_track nodes with Level 1 appearance features"""
|
|
pose_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.pose.json")
|
|
|
|
if not os.path.exists(pose_json_path):
|
|
print("[TKG] pose.json not found, skipping body_track nodes")
|
|
return 0
|
|
|
|
if video_path is None:
|
|
video_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.mp4")
|
|
|
|
if not os.path.exists(video_path):
|
|
print(f"[TKG] Video not found: {video_path}, skipping body_track")
|
|
return 0
|
|
|
|
print("[TKG] Building body_track nodes with Level 1 features...")
|
|
|
|
level1_data = extract_level1_features(video_path, pose_json_path)
|
|
if not level1_data:
|
|
print("[TKG] No Level 1 data extracted")
|
|
return 0
|
|
|
|
person_groups = {}
|
|
for item in level1_data:
|
|
person_idx = item["person_index"]
|
|
if person_idx not in person_groups:
|
|
person_groups[person_idx] = []
|
|
person_groups[person_idx].append(item)
|
|
|
|
count = 0
|
|
for person_idx, items in person_groups.items():
|
|
if not items:
|
|
continue
|
|
|
|
body_colors = []
|
|
head_colors = []
|
|
upper_colors = []
|
|
lower_colors = []
|
|
frames = []
|
|
bboxes = []
|
|
|
|
for item in items:
|
|
l1 = item.get("level1_features", {})
|
|
frames.append(item["frame"])
|
|
bboxes.append(item["bbox"])
|
|
|
|
if "body" in l1 and "color" in l1["body"]:
|
|
body_colors.append(l1["body"]["color"].get("dominant_colors", []))
|
|
if "head_top" in l1 and "color" in l1["head_top"]:
|
|
head_colors.append(l1["head_top"]["color"].get("dominant_colors", []))
|
|
if "upper_body" in l1 and "color" in l1["upper_body"]:
|
|
upper_colors.append(l1["upper_body"]["color"].get("dominant_colors", []))
|
|
if "lower_body" in l1 and "color" in l1["lower_body"]:
|
|
lower_colors.append(l1["lower_body"]["color"].get("dominant_colors", []))
|
|
|
|
avg_body_color = average_colors(body_colors)
|
|
avg_head_color = average_colors(head_colors)
|
|
avg_upper_color = average_colors(upper_colors)
|
|
avg_lower_color = average_colors(lower_colors)
|
|
|
|
avg_height_estimate = {}
|
|
avg_body_shape = {}
|
|
|
|
for item in items:
|
|
props = item.get("proportions", {})
|
|
if "height_estimate" in props and not avg_height_estimate:
|
|
avg_height_estimate = props["height_estimate"]
|
|
if "body_shape" in props and not avg_body_shape:
|
|
avg_body_shape = props["body_shape"]
|
|
|
|
properties = {
|
|
"frame_count": len(frames),
|
|
"frames": frames,
|
|
"avg_bbox": average_bbox(bboxes),
|
|
"height_estimate": avg_height_estimate,
|
|
"body_shape": avg_body_shape,
|
|
"level1_features": {
|
|
"body": {"dominant_colors": avg_body_color, "h_mean": average_h_mean(items, "body")},
|
|
"head_top": {"dominant_colors": avg_head_color, "h_mean": average_h_mean(items, "head_top")},
|
|
"upper_body": {"dominant_colors": avg_upper_color, "h_mean": average_h_mean(items, "upper_body")},
|
|
"lower_body": {"dominant_colors": avg_lower_color, "h_mean": average_h_mean(items, "lower_body")},
|
|
},
|
|
}
|
|
|
|
external_id = f"body_track_{person_idx}"
|
|
label = f"Body Track {person_idx}"
|
|
ensure_node(cur, schema, file_uuid, "body_track", external_id, label, properties)
|
|
count += 1
|
|
|
|
print(f"[TKG] {count} body_track nodes created")
|
|
return count
|
|
|
|
|
|
def build_hand_track_nodes(cur, schema, file_uuid):
|
|
"""Create hand_track nodes from hand.json (hand detection results)"""
|
|
hand_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.hand.json")
|
|
|
|
if not os.path.exists(hand_json_path):
|
|
print("[TKG] hand.json not found, skipping hand_track nodes")
|
|
return 0
|
|
|
|
with open(hand_json_path) as f:
|
|
hand_data = json.load(f)
|
|
|
|
frames = hand_data.get("frames", [])
|
|
if not frames:
|
|
print("[TKG] No hand frames found")
|
|
return 0
|
|
|
|
print("[TKG] Building hand_track nodes...")
|
|
|
|
person_groups = {}
|
|
for frame_data in frames:
|
|
frame_num = frame_data.get("frame", 0)
|
|
persons = frame_data.get("persons", [])
|
|
|
|
for person in persons:
|
|
person_id = person.get("person_id", 0)
|
|
hand_type = person.get("hand_type", "unknown")
|
|
gesture = person.get("gesture", "unknown")
|
|
hand_state = person.get("hand_state", "unknown")
|
|
|
|
key = (person_id, hand_type)
|
|
if key not in person_groups:
|
|
person_groups[key] = {
|
|
"frames": [],
|
|
"gestures": [],
|
|
"hand_states": [],
|
|
}
|
|
|
|
person_groups[key]["frames"].append(frame_num)
|
|
person_groups[key]["gestures"].append(gesture)
|
|
person_groups[key]["hand_states"].append(hand_state)
|
|
|
|
count = 0
|
|
for (person_id, hand_type), data in person_groups.items():
|
|
frames_list = data["frames"]
|
|
gestures = data["gestures"]
|
|
hand_states = data["hand_states"]
|
|
|
|
empty_count = sum(1 for s in hand_states if s == "empty")
|
|
holding_count = sum(1 for s in hand_states if s == "holding")
|
|
|
|
external_id = f"hand_track_{person_id}_{hand_type}"
|
|
label = f"Hand Track {person_id} ({hand_type})"
|
|
|
|
properties = {
|
|
"frame_count": len(frames_list),
|
|
"frames": frames_list,
|
|
"person_id": person_id,
|
|
"hand_type": hand_type,
|
|
"empty_count": empty_count,
|
|
"holding_count": holding_count,
|
|
"gesture_summary": {
|
|
"empty": empty_count,
|
|
"holding": holding_count,
|
|
},
|
|
}
|
|
|
|
ensure_node(cur, schema, file_uuid, "hand_track", external_id, label, properties)
|
|
count += 1
|
|
|
|
print(f"[TKG] {count} hand_track nodes created")
|
|
return count
|
|
|
|
|
|
def build_face_body_edges(cur, schema, file_uuid):
|
|
"""Build HAS_BODY edges: face_track ↔ body_track via spatial proximity"""
|
|
print("[TKG] Building face-body edges...")
|
|
|
|
cur.execute(
|
|
f"""
|
|
SELECT ft.trace_id, ft.frame_number, ft.x, ft.y, ft.width, ft.height
|
|
FROM {schema}.face_detections ft
|
|
WHERE ft.file_uuid = %s AND ft.trace_id IS NOT NULL
|
|
ORDER BY ft.frame_number
|
|
""",
|
|
(file_uuid,),
|
|
)
|
|
face_rows = cur.fetchall()
|
|
|
|
pose_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.pose.json")
|
|
if not os.path.exists(pose_json_path):
|
|
print("[TKG] pose.json not found, skipping face-body edges")
|
|
return 0
|
|
|
|
with open(pose_json_path) as f:
|
|
pose_data = json.load(f)
|
|
|
|
pose_frames = {f["frame"]: f.get("persons", []) for f in pose_data.get("frames", [])}
|
|
|
|
edge_count = 0
|
|
for trace_id, frame_num, fx, fy, fw, fh in face_rows:
|
|
pose_persons = pose_frames.get(frame_num, [])
|
|
|
|
face_center_x = fx + fw / 2
|
|
face_center_y = fy + fh / 2
|
|
|
|
best_person_idx = None
|
|
best_distance = float("inf")
|
|
|
|
for person_idx, person in enumerate(pose_persons):
|
|
bbox = person.get("bbox", {})
|
|
if bbox.get("width", 0) <= 0:
|
|
continue
|
|
|
|
body_center_x = bbox.get("x", 0) + bbox.get("width", 0) / 2
|
|
body_center_y = bbox.get("y", 0) + bbox.get("height", 0) / 2
|
|
|
|
distance = ((face_center_x - body_center_x) ** 2 + (face_center_y - body_center_y) ** 2) ** 0.5
|
|
|
|
if distance < best_distance:
|
|
best_distance = distance
|
|
best_person_idx = person_idx
|
|
|
|
if best_person_idx is None or best_distance > 200:
|
|
continue
|
|
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
|
(file_uuid, f"face_track_{trace_id}"),
|
|
)
|
|
face_row = cur.fetchone()
|
|
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='body_track' AND external_id=%s",
|
|
(file_uuid, f"body_track_{best_person_idx}"),
|
|
)
|
|
body_row = cur.fetchone()
|
|
|
|
if not face_row or not body_row:
|
|
continue
|
|
|
|
ensure_edge(
|
|
cur, schema, file_uuid,
|
|
"HAS_BODY",
|
|
face_row[0], body_row[0],
|
|
{"avg_distance_px": round(best_distance, 1)},
|
|
)
|
|
edge_count += 1
|
|
|
|
print(f"[TKG] {edge_count} face-body edges created")
|
|
return edge_count
|
|
|
|
|
|
def build_body_hand_edges(cur, schema, file_uuid):
|
|
"""Build HAS_HAND edges: body_track ↔ hand_track via person_id"""
|
|
print("[TKG] Building body-hand edges...")
|
|
|
|
hand_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.hand.json")
|
|
if not os.path.exists(hand_json_path):
|
|
print("[TKG] hand.json not found, skipping body-hand edges")
|
|
return 0
|
|
|
|
with open(hand_json_path) as f:
|
|
hand_data = json.load(f)
|
|
|
|
frames = hand_data.get("frames", [])
|
|
if not frames:
|
|
return 0
|
|
|
|
person_hand_map = {}
|
|
for frame_data in frames:
|
|
persons = frame_data.get("persons", [])
|
|
for person in persons:
|
|
person_id = person.get("person_id", 0)
|
|
hand_type = person.get("hand_type", "unknown")
|
|
key = (person_id, hand_type)
|
|
person_hand_map[key] = person_id
|
|
|
|
edge_count = 0
|
|
for (person_id, hand_type), _ in person_hand_map.items():
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='body_track' AND external_id=%s",
|
|
(file_uuid, f"body_track_{person_id}"),
|
|
)
|
|
body_row = cur.fetchone()
|
|
|
|
cur.execute(
|
|
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='hand_track' AND external_id=%s",
|
|
(file_uuid, f"hand_track_{person_id}_{hand_type}"),
|
|
)
|
|
hand_row = cur.fetchone()
|
|
|
|
if not body_row or not hand_row:
|
|
continue
|
|
|
|
ensure_edge(
|
|
cur, schema, file_uuid,
|
|
"HAS_HAND",
|
|
body_row[0], hand_row[0],
|
|
{"hand_type": hand_type},
|
|
)
|
|
edge_count += 1
|
|
|
|
print(f"[TKG] {edge_count} body-hand edges created")
|
|
return edge_count
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Build Temporal Knowledge Graph")
|
|
parser.add_argument("--file-uuid", "-u", required=True, help="File UUID")
|
|
parser.add_argument("--schema", "-s", default=SCHEMA, help="Database schema")
|
|
parser.add_argument("--video", "-v", help="Video path (optional, auto-detected)")
|
|
parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
|
|
args = parser.parse_args()
|
|
|
|
conn = get_conn()
|
|
cur = conn.cursor()
|
|
|
|
video_path = args.video or os.path.join(OUTPUT_DIR, f"{args.file_uuid}.mp4")
|
|
|
|
print(f"[TKG] Building graph for {args.file_uuid}...")
|
|
print(f"[TKG] Video: {video_path}")
|
|
|
|
n1 = build_face_track_nodes(cur, args.schema, args.file_uuid)
|
|
n2 = build_body_track_nodes(cur, args.schema, args.file_uuid, video_path)
|
|
n3 = build_detected_object_nodes(cur, args.schema, args.file_uuid)
|
|
n4 = build_speaker_segment_nodes(cur, args.schema, args.file_uuid)
|
|
n5 = build_hand_track_nodes(cur, args.schema, args.file_uuid)
|
|
|
|
e1 = build_co_occurrence_edges(cur, args.schema, args.file_uuid)
|
|
e2 = build_speaker_face_edges(cur, args.schema, args.file_uuid)
|
|
e3 = build_face_face_edges(cur, args.schema, args.file_uuid)
|
|
e4 = build_face_body_edges(cur, args.schema, args.file_uuid)
|
|
e5 = build_body_hand_edges(cur, args.schema, args.file_uuid)
|
|
|
|
conn.commit()
|
|
cur.close()
|
|
conn.close()
|
|
|
|
total_nodes = n1 + n2 + n3 + n4 + n5
|
|
total_edges = e1 + e2 + e3 + e4 + e5
|
|
|
|
print(f"\n[TKG] Complete: {total_nodes} nodes, {total_edges} edges")
|
|
print(f" Face tracks: {n1}")
|
|
print(f" Body tracks: {n2}")
|
|
print(f" Detected objects: {n3}")
|
|
print(f" Speaker segments: {n4}")
|
|
print(f" Hand tracks: {n5}")
|
|
print(f" Co-occur edges: {e1}")
|
|
print(f" Speaker-face: {e2}")
|
|
print(f" Face-face: {e3}")
|
|
print(f" Face-body: {e4}")
|
|
print(f" Body-hand: {e5}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|