release: v1.3.0 - TKG node type renaming
Changes: - Rust: face_trace → face_track (45 occurrences in 8 files) - Rust: gaze_trace → gaze_track, lip_trace → lip_track - Python: tkg_builder.py unified + pipeline_checklist.py fixed - Swift: swift_hand.swift hand state detection (empty vs holding) Node type changes: face_trace → face_track person_trace → body_track gaze_trace → gaze_track lip_trace → lip_track hand_trace → hand_track speaker → speaker_segment object → detected_object text_trace → text_region Migration: PUBLIC schema: 12970 + 892 + 305 rows updated
This commit is contained in:
139
scripts/migrate_tkg_node_types.py
Normal file
139
scripts/migrate_tkg_node_types.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Migrate TKG Node Types to V2.0 Intuitive Naming
|
||||
|
||||
Renames node types in tkg_nodes table:
|
||||
face_trace → face_track
|
||||
person_trace → body_track
|
||||
gaze_trace → gaze_track
|
||||
lip_trace → lip_track
|
||||
hand_trace → hand_track
|
||||
speaker → speaker_segment
|
||||
object → detected_object
|
||||
text_trace → text_region
|
||||
|
||||
Also updates external_id format:
|
||||
trace_1 → face_track_1
|
||||
person_0 → body_track_0
|
||||
SPEAKER_01 → speaker_01
|
||||
|
||||
Usage:
|
||||
python migrate_tkg_node_types.py [--schema <schema>]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import psycopg2
|
||||
|
||||
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = os.environ.get("DATABASE_SCHEMA", "dev")
|
||||
|
||||
NODE_TYPE_MIGRATIONS = {
|
||||
"face_trace": "face_track",
|
||||
"person_trace": "body_track",
|
||||
"gaze_trace": "gaze_track",
|
||||
"lip_trace": "lip_track",
|
||||
"hand_trace": "hand_track",
|
||||
"speaker": "speaker_segment",
|
||||
"object": "detected_object",
|
||||
"text_trace": "text_region",
|
||||
}
|
||||
|
||||
EXTERNAL_ID_MIGRATIONS = {
|
||||
"face_trace": lambda x: x.replace("trace_", "face_track_"),
|
||||
"person_trace": lambda x: x.replace("person_", "body_track_"),
|
||||
"gaze_trace": lambda x: x.replace("trace_", "gaze_track_"),
|
||||
"lip_trace": lambda x: x.replace("trace_", "lip_track_"),
|
||||
"hand_trace": lambda x: x.replace("trace_", "hand_track_"),
|
||||
"speaker": lambda x: x.lower().replace("SPEAKER_", "speaker_"),
|
||||
"object": lambda x: x,
|
||||
"text_trace": lambda x: x.replace("text_", "text_region_"),
|
||||
}
|
||||
|
||||
|
||||
def get_conn():
|
||||
return psycopg2.connect(DB_URL)
|
||||
|
||||
|
||||
def migrate_node_types(cur, schema):
|
||||
"""Migrate node_type and external_id in tkg_nodes"""
|
||||
print(f"[Migrate] Schema: {schema}")
|
||||
|
||||
# Migration rules with SQL expressions
|
||||
migrations = [
|
||||
("face_trace", "face_track", "REPLACE(external_id, 'trace_', 'face_track_')"),
|
||||
("person_trace", "body_track", "REPLACE(external_id, 'person_', 'body_track_')"),
|
||||
("gaze_trace", "gaze_track", "REPLACE(external_id, 'trace_', 'gaze_track_')"),
|
||||
("lip_trace", "lip_track", "REPLACE(external_id, 'trace_', 'lip_track_')"),
|
||||
("hand_trace", "hand_track", "REPLACE(external_id, 'trace_', 'hand_track_')"),
|
||||
("speaker", "speaker_segment", "LOWER(REPLACE(external_id, 'SPEAKER_', 'speaker_'))"),
|
||||
("object", "detected_object", "external_id"),
|
||||
("text_trace", "text_region", "REPLACE(external_id, 'text_', 'text_region_')"),
|
||||
]
|
||||
|
||||
for old_type, new_type, id_expr in migrations:
|
||||
cur.execute(
|
||||
f"SELECT COUNT(*) FROM {schema}.tkg_nodes WHERE node_type = %s",
|
||||
(old_type,),
|
||||
)
|
||||
count = cur.fetchone()[0]
|
||||
|
||||
if count == 0:
|
||||
print(f"[Migrate] {old_type}: 0 rows, skipping")
|
||||
continue
|
||||
|
||||
print(f"[Migrate] {old_type} → {new_type}: {count} rows")
|
||||
|
||||
cur.execute(
|
||||
f"""
|
||||
UPDATE {schema}.tkg_nodes
|
||||
SET node_type = %s,
|
||||
external_id = {id_expr},
|
||||
label = REPLACE(label, 'Trace', 'Track')
|
||||
WHERE node_type = %s
|
||||
""",
|
||||
(new_type, old_type),
|
||||
)
|
||||
|
||||
print(f"[Migrate] Updated {cur.rowcount} rows")
|
||||
|
||||
print("[Migrate] Done")
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Migrate TKG node types to V2.0")
|
||||
parser.add_argument("--schema", default=SCHEMA, help="Database schema")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show counts only, no updates")
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
if args.dry_run:
|
||||
print("[Migrate] DRY RUN - showing counts only")
|
||||
for old_type, new_type in NODE_TYPE_MIGRATIONS.items():
|
||||
cur.execute(
|
||||
f"SELECT COUNT(*) FROM {args.schema}.tkg_nodes WHERE node_type = %s",
|
||||
(old_type,),
|
||||
)
|
||||
count = cur.fetchone()[0]
|
||||
print(f" {old_type} → {new_type}: {count} rows")
|
||||
else:
|
||||
migrate_node_types(cur, args.schema)
|
||||
conn.commit()
|
||||
print("[Migrate] Committed successfully")
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"[Migrate] Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
finally:
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -115,7 +115,7 @@ check("face trace", [
|
||||
print("[6/8] TKG")
|
||||
node_count = int(run_sql(f"SELECT count(*) FROM dev.tkg_nodes WHERE file_uuid='{uuid}'"))
|
||||
edge_count = int(run_sql(f"SELECT count(*) FROM dev.tkg_edges WHERE file_uuid='{uuid}'"))
|
||||
face_face = int(run_sql(f"SELECT count(*) FROM dev.tkg_edges WHERE file_uuid='{uuid}' AND edge_type='CO_OCCURS_WITH' AND source_node_id IN (SELECT id FROM dev.tkg_nodes WHERE node_type='face_trace')"))
|
||||
face_face = int(run_sql(f"SELECT count(*) FROM dev.tkg_edges WHERE file_uuid='{uuid}' AND edge_type='CO_OCCURS_WITH' AND source_node_id IN (SELECT id FROM dev.tkg_nodes WHERE node_type='face_track')"))
|
||||
check("TKG graph", [
|
||||
("nodes", node_count > 0, f"{node_count} nodes"),
|
||||
("edges", edge_count > 0, f"{edge_count} edges"),
|
||||
|
||||
31
scripts/requirements.txt
Normal file
31
scripts/requirements.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
# Momentry Core Processor Dependencies
|
||||
# Install: pip install -r requirements.txt --break-system-packages
|
||||
|
||||
# Core Vision Processing
|
||||
opencv-python>=4.8.0
|
||||
numpy>=1.24.0
|
||||
|
||||
# ASR (Automatic Speech Recognition)
|
||||
faster-whisper>=0.9.0
|
||||
|
||||
# Audio Processing
|
||||
librosa>=0.10.0
|
||||
|
||||
# Machine Learning Frameworks
|
||||
torch>=2.0.0
|
||||
ultralytics>=8.0.0 # YOLO
|
||||
|
||||
# Pose & Face Detection
|
||||
mediapipe>=0.10.0
|
||||
|
||||
# Database
|
||||
psycopg2-binary>=2.9.0
|
||||
|
||||
# Clustering
|
||||
scikit-learn>=1.3.0
|
||||
|
||||
# CoreML Integration (Apple Silicon)
|
||||
coremltools>=7.0
|
||||
|
||||
# Additional utilities
|
||||
Pillow>=9.0.0 # Image processing
|
||||
@@ -110,5 +110,13 @@ let package = Package(
|
||||
path: ".",
|
||||
sources: ["swift_face.swift"]
|
||||
),
|
||||
.executableTarget(
|
||||
name: "swift_hand",
|
||||
dependencies: [
|
||||
.product(name: "ArgumentParser", package: "swift-argument-parser"),
|
||||
],
|
||||
path: ".",
|
||||
sources: ["swift_hand.swift"]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
299
scripts/swift_processors/swift_hand.swift
Normal file
299
scripts/swift_processors/swift_hand.swift
Normal file
@@ -0,0 +1,299 @@
|
||||
import Foundation
|
||||
import Vision
|
||||
import ArgumentParser
|
||||
import AppKit
|
||||
import AVFoundation
|
||||
|
||||
/// Swift Hand Pose Processor
|
||||
/// Uses Apple Vision Framework VNDetectHumanHandPoseRequest for 21 hand landmarks
|
||||
@main
|
||||
struct SwiftHandProcessor: ParsableCommand {
|
||||
@Argument(help: "Input video path")
|
||||
var inputPath: String
|
||||
|
||||
@Argument(help: "Output JSON path")
|
||||
var outputPath: String
|
||||
|
||||
@Option(name: [.short, .long], help: "UUID for the file")
|
||||
var uuid: String = ""
|
||||
|
||||
@Option(name: [.short, .long], help: "Sample interval (frames)")
|
||||
var sampleInterval: Int = 30
|
||||
|
||||
@Option(name: [.long], help: "Minimum confidence threshold")
|
||||
var minConfidence: Double = 0.3
|
||||
|
||||
func run() throws {
|
||||
print("[SwiftHand] Starting: \(inputPath)")
|
||||
|
||||
let url = URL(fileURLWithPath: inputPath)
|
||||
let asset = AVURLAsset(url: url)
|
||||
|
||||
guard let track = asset.tracks(withMediaType: AVMediaType.video).first else {
|
||||
print("[SwiftHand] Error: No video track"); return
|
||||
}
|
||||
|
||||
let duration = asset.duration.seconds
|
||||
let fps = Double(track.nominalFrameRate)
|
||||
|
||||
print("[SwiftHand] Duration: \(String(format: "%.1f", duration))s, FPS: \(String(format: "%.1f", fps))")
|
||||
|
||||
// Extract frames using ffmpeg (same approach as swift_pose)
|
||||
let tempDir = FileManager.default.temporaryDirectory.appendingPathComponent("swift_hand_\(UUID().uuidString)")
|
||||
let framesDir = tempDir.appendingPathComponent("frames")
|
||||
try FileManager.default.createDirectory(at: framesDir, withIntermediateDirectories: true)
|
||||
|
||||
let pattern = framesDir.appendingPathComponent("frame_%05d.jpg").path
|
||||
print("[SwiftHand] Extracting frames...")
|
||||
let extract = Process()
|
||||
extract.executableURL = URL(fileURLWithPath: "/opt/homebrew/bin/ffmpeg")
|
||||
extract.arguments = ["-y", "-v", "quiet", "-i", inputPath,
|
||||
"-vf", "select=not(mod(n\\,\(sampleInterval)))",
|
||||
"-vsync", "vfr", "-q:v", "15", pattern]
|
||||
try extract.run()
|
||||
extract.waitUntilExit()
|
||||
|
||||
let files = (try? FileManager.default.contentsOfDirectory(atPath: framesDir.path)) ?? []
|
||||
let frameFiles = files.filter { $0.hasSuffix(".jpg") }.sorted()
|
||||
print("[SwiftHand] Extracted \(frameFiles.count) frames")
|
||||
|
||||
// Hand joint names (21 landmarks)
|
||||
let jointNames: [VNHumanHandPoseObservation.JointName] = [
|
||||
.wrist,
|
||||
.thumbTip, .thumbIP, .thumbMP, .thumbCMC,
|
||||
.indexTip, .indexDIP, .indexPIP, .indexMCP,
|
||||
.middleTip, .middleDIP, .middlePIP, .middleMCP,
|
||||
.ringTip, .ringDIP, .ringPIP, .ringMCP,
|
||||
.littleTip, .littleDIP, .littlePIP, .littleMCP,
|
||||
]
|
||||
|
||||
var handFrames: [[String: Any]] = []
|
||||
var lastProgress = 0
|
||||
|
||||
for (i, fname) in frameFiles.enumerated() {
|
||||
let imgPath = framesDir.appendingPathComponent(fname).path
|
||||
guard let imgData = try? Data(contentsOf: URL(fileURLWithPath: imgPath)),
|
||||
let img = NSImage(data: imgData),
|
||||
let cgImage = img.cgImage(forProposedRect: nil, context: nil, hints: nil) else { continue }
|
||||
|
||||
let frameNum = Int(fname.replacingOccurrences(of: "frame_", with: "").replacingOccurrences(of: ".jpg", with: "")) ?? (i * sampleInterval)
|
||||
let timestamp = Double(frameNum) / fps
|
||||
let w = cgImage.width
|
||||
let h = cgImage.height
|
||||
|
||||
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
|
||||
let req = VNDetectHumanHandPoseRequest()
|
||||
try? handler.perform([req])
|
||||
|
||||
guard let hands = req.results, !hands.isEmpty else { continue }
|
||||
|
||||
var persons: [[String: Any]] = []
|
||||
|
||||
for (handIdx, hand) in hands.enumerated() {
|
||||
if Float(hand.confidence) < Float(minConfidence) {
|
||||
continue
|
||||
}
|
||||
|
||||
var landmarks: [[String: Any]] = []
|
||||
|
||||
for joint in jointNames {
|
||||
if let point = try? hand.recognizedPoint(joint) {
|
||||
let desc = String(describing: joint.rawValue.rawValue)
|
||||
let rawName = desc
|
||||
.replacingOccurrences(of: "VNRecognizedPointKey(_rawValue: ", with: "")
|
||||
.replacingOccurrences(of: ")", with: "")
|
||||
.trimmingCharacters(in: .whitespaces)
|
||||
|
||||
let name = mapJointName(rawName)
|
||||
let px = Float(point.location.x) * Float(w)
|
||||
let py = Float(h) - Float(point.location.y) * Float(h) // Y-flip to Top-Left
|
||||
let conf = Float(point.confidence)
|
||||
|
||||
if conf > 0.1 {
|
||||
landmarks.append([
|
||||
"name": name,
|
||||
"x": px,
|
||||
"y": py,
|
||||
"confidence": conf
|
||||
])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Gesture detection
|
||||
let gesture = detectGesture(hand)
|
||||
|
||||
let handType = handIdx == 0 ? "left" : "right"
|
||||
|
||||
persons.append([
|
||||
"person_id": handIdx,
|
||||
"hand_type": handType,
|
||||
"confidence": Float(hand.confidence),
|
||||
"landmarks": landmarks,
|
||||
"num_landmarks": landmarks.count,
|
||||
"gesture": gesture["gesture"] as? String ?? "unknown",
|
||||
"hand_state": gesture["hand_state"] as? String ?? "empty",
|
||||
"finger_extensions": gesture["finger_extensions"] as? [String: Bool] ?? [:],
|
||||
"num_fingers_extended": gesture["num_fingers_extended"] as? Int ?? 0,
|
||||
"num_fingers_curled": gesture["num_fingers_curled"] as? Int ?? 0
|
||||
])
|
||||
}
|
||||
|
||||
if !persons.isEmpty {
|
||||
handFrames.append([
|
||||
"frame": frameNum,
|
||||
"timestamp": timestamp,
|
||||
"persons": persons
|
||||
])
|
||||
}
|
||||
|
||||
// Progress reporting
|
||||
let progress = (i + 1) * 100 / frameFiles.count
|
||||
if progress > lastProgress && progress % 10 == 0 {
|
||||
print("[SwiftHand] Progress: \(progress)% (\(handFrames.count) hand frames)")
|
||||
lastProgress = progress
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup temp directory
|
||||
try? FileManager.default.removeItem(at: tempDir)
|
||||
|
||||
// Build output JSON
|
||||
let outputData: [String: Any] = [
|
||||
"frame_count": handFrames.count,
|
||||
"fps": fps,
|
||||
"frames": handFrames,
|
||||
"metadata": [
|
||||
"source": "swift_hand",
|
||||
"uuid": uuid,
|
||||
"landmarks_per_hand": 21,
|
||||
"min_confidence": minConfidence,
|
||||
"sample_interval": sampleInterval
|
||||
]
|
||||
]
|
||||
|
||||
let jsonData = try JSONSerialization.data(withJSONObject: outputData, options: [.prettyPrinted])
|
||||
try jsonData.write(to: URL(fileURLWithPath: outputPath))
|
||||
|
||||
print("[SwiftHand] Complete: \(handFrames.count) frames with hands")
|
||||
print("[SwiftHand] Output: \(outputPath)")
|
||||
}
|
||||
|
||||
/// Map Vision joint codes to readable names
|
||||
func mapJointName(_ rawName: String) -> String {
|
||||
let mapping: [String: String] = [
|
||||
"VNHLKWRI": "wrist",
|
||||
"VNHLKTIP": "thumb_tip",
|
||||
"VNHLKTTIP": "thumb_tip",
|
||||
"VNHLKTMP": "thumb_mp",
|
||||
"VNHLKTCMC": "thumb_cmc",
|
||||
"VNHLKITIP": "index_tip",
|
||||
"VNHLKIDIP": "index_dip",
|
||||
"VNHLKIPIP": "index_pip",
|
||||
"VNHLKIMCP": "index_mcp",
|
||||
"VNHLKMTIP": "middle_tip",
|
||||
"VNHLKMDIP": "middle_dip",
|
||||
"VNHLKMPIP": "middle_pip",
|
||||
"VNHLKMMCP": "middle_mcp",
|
||||
"VNHLKRTIP": "ring_tip",
|
||||
"VNHLKRDIP": "ring_dip",
|
||||
"VNHLKRPIP": "ring_pip",
|
||||
"VNHLKRMCP": "ring_mcp",
|
||||
"VNHLKPTIP": "little_tip",
|
||||
"VNHLKPDIP": "little_dip",
|
||||
"VNHLKPPIP": "little_pip",
|
||||
"VNHLKPMCP": "little_mcp",
|
||||
]
|
||||
return mapping[rawName] ?? rawName.lowercased()
|
||||
}
|
||||
|
||||
/// Detect gesture from finger extensions
|
||||
/// Returns: gesture, hand_state ("empty" or "holding"), finger info
|
||||
func detectGesture(_ hand: VNHumanHandPoseObservation) -> [String: Any] {
|
||||
// Finger extension check (tip lower than pip after flip = extended)
|
||||
func isFingerExtended(tipName: VNHumanHandPoseObservation.JointName, pipName: VNHumanHandPoseObservation.JointName) -> Bool {
|
||||
guard let tip = try? hand.recognizedPoint(tipName),
|
||||
let pip = try? hand.recognizedPoint(pipName) else { return false }
|
||||
return tip.confidence > 0.3 && pip.confidence > 0.3 && tip.location.y > pip.location.y
|
||||
}
|
||||
|
||||
// Finger curled check (tip higher than pip after flip = curled around object)
|
||||
func isFingerCurled(tipName: VNHumanHandPoseObservation.JointName, pipName: VNHumanHandPoseObservation.JointName) -> Bool {
|
||||
guard let tip = try? hand.recognizedPoint(tipName),
|
||||
let pip = try? hand.recognizedPoint(pipName) else { return false }
|
||||
return tip.confidence > 0.3 && pip.confidence > 0.3 && tip.location.y < pip.location.y
|
||||
}
|
||||
|
||||
// Thumb: tip vs cmc (horizontal distance)
|
||||
func isThumbExtended() -> Bool {
|
||||
guard let tip = try? hand.recognizedPoint(.thumbTip),
|
||||
let cmc = try? hand.recognizedPoint(.thumbCMC) else { return false }
|
||||
return tip.confidence > 0.3 && cmc.confidence > 0.3 &&
|
||||
abs(tip.location.x - cmc.location.x) > 0.05
|
||||
}
|
||||
|
||||
let thumb = isThumbExtended()
|
||||
let index = isFingerExtended(tipName: .indexTip, pipName: .indexPIP)
|
||||
let middle = isFingerExtended(tipName: .middleTip, pipName: .middlePIP)
|
||||
let ring = isFingerExtended(tipName: .ringTip, pipName: .ringPIP)
|
||||
let little = isFingerExtended(tipName: .littleTip, pipName: .littlePIP)
|
||||
|
||||
// Curled fingers (holding object indicator)
|
||||
let indexCurled = isFingerCurled(tipName: .indexTip, pipName: .indexPIP)
|
||||
let middleCurled = isFingerCurled(tipName: .middleTip, pipName: .middlePIP)
|
||||
let ringCurled = isFingerCurled(tipName: .ringTip, pipName: .ringPIP)
|
||||
let littleCurled = isFingerCurled(tipName: .littleTip, pipName: .littlePIP)
|
||||
|
||||
let extensions: [String: Bool] = [
|
||||
"thumb": thumb,
|
||||
"index": index,
|
||||
"middle": middle,
|
||||
"ring": ring,
|
||||
"little": little
|
||||
]
|
||||
|
||||
let numExtended = extensions.values.filter { $0 }.count
|
||||
let numCurled = [indexCurled, middleCurled, ringCurled, littleCurled].filter { $0 }.count
|
||||
|
||||
var gesture = "unknown"
|
||||
var handState = "empty" // "empty" or "holding"
|
||||
|
||||
// === HOLDING DETECTION ===
|
||||
// Holding object: 2+ fingers curled, thumb may be wrapped or supporting
|
||||
if numCurled >= 2 && !thumb {
|
||||
// Fist-like grip without thumb extended
|
||||
handState = "holding"
|
||||
gesture = "holding_object"
|
||||
} else if numCurled >= 3 {
|
||||
// Multiple fingers wrapped around object
|
||||
handState = "holding"
|
||||
gesture = "holding_object"
|
||||
}
|
||||
// === EMPTY HAND GESTURES ===
|
||||
else if numExtended == 5 {
|
||||
gesture = "open_hand"
|
||||
} else if numExtended == 0 {
|
||||
gesture = "fist"
|
||||
} else if thumb && numExtended == 1 {
|
||||
gesture = "thumbs_up"
|
||||
} else if index && numExtended == 1 {
|
||||
gesture = "pointing"
|
||||
} else if index && middle && numExtended == 2 {
|
||||
gesture = "peace_sign"
|
||||
} else if thumb && index && !middle && !ring && !little {
|
||||
gesture = "ok_sign"
|
||||
} else if thumb && index && middle && !ring && !little {
|
||||
gesture = "three_fingers"
|
||||
} else if numExtended >= 3 {
|
||||
gesture = "partial_open"
|
||||
}
|
||||
|
||||
return [
|
||||
"gesture": gesture,
|
||||
"hand_state": handState,
|
||||
"finger_extensions": extensions,
|
||||
"num_fingers_extended": numExtended,
|
||||
"num_fingers_curled": numCurled
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,24 +1,29 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
TKG Builder - Populate Temporal Knowledge Graph from pipeline results
|
||||
TKG Builder - Unified Temporal Knowledge Graph Builder
|
||||
|
||||
Builds graph nodes and edges from:
|
||||
- Face traces (face_detections with trace_id + bbox)
|
||||
- YOLO objects (yolo.json)
|
||||
Builds graph nodes and edges from all pipeline outputs:
|
||||
- Face tracks (face_detections with trace_id)
|
||||
- Body tracks (pose.json + Level 1 appearance features)
|
||||
- Detected objects (yolo.json)
|
||||
- Speaker segments (asrx.json)
|
||||
- Hand tracks (hand.json) [optional]
|
||||
|
||||
Graph Structure:
|
||||
Node Types (V2.0 - intuitive naming):
|
||||
NODES:
|
||||
(face_trace:N) - one per unique trace_id per file
|
||||
(object:C) - one per unique yolo class
|
||||
(speaker:S) - one per speaker_id
|
||||
(face_track) - face tracking across frames
|
||||
(body_track) - body appearance with Level 1 features
|
||||
(detected_object) - YOLO detected objects
|
||||
(speaker_segment) - speaker segments
|
||||
(hand_track) - hand state tracking [optional]
|
||||
EDGES:
|
||||
(face_trace) -[:APPEARS_IN]-> (frame:N)
|
||||
(object) -[:APPEARS_IN]-> (frame:N)
|
||||
(face_trace) -[:CO_OCCURS_WITH]-> (object) -- same frame, same file
|
||||
(face_track) -[:CO_OCCURS_WITH]-> (detected_object) -- same frame
|
||||
(face_track) -[:SPEAKS_AS]-> (speaker_segment) -- temporal overlap
|
||||
(face_track) -[:HAS_BODY]-> (body_track) -- spatial proximity
|
||||
(body_track) -[:HAS_HAND]-> (hand_track) -- wrist position
|
||||
|
||||
Usage:
|
||||
python tkg_builder.py --file-uuid <uuid> [--schema <schema>]
|
||||
python tkg_builder.py --file-uuid <uuid> [--schema <schema>] [--video <path>]
|
||||
"""
|
||||
|
||||
import sys
|
||||
@@ -27,9 +32,22 @@ import json
|
||||
import argparse
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import cv2
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils"))
|
||||
|
||||
try:
|
||||
from utils.feature_extractor import HierarchicalFeatureExtractor
|
||||
from utils.proportion_calculator import calculate_proportions, get_head_region
|
||||
except ImportError:
|
||||
print("[TKG] Warning: Level 1 feature extraction unavailable")
|
||||
HierarchicalFeatureExtractor = None
|
||||
calculate_proportions = None
|
||||
get_head_region = None
|
||||
|
||||
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = os.environ.get("MOMENTRY_DB_SCHEMA", "dev")
|
||||
SCHEMA = os.environ.get("DATABASE_SCHEMA", "dev")
|
||||
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
|
||||
|
||||
|
||||
@@ -67,9 +85,9 @@ def ensure_edge(cur, schema, file_uuid, edge_type, source_id, target_id, propert
|
||||
)
|
||||
|
||||
|
||||
def build_face_trace_nodes(cur, schema, file_uuid):
|
||||
"""Create graph nodes for each face trace"""
|
||||
print("[TKG] Building face trace nodes...")
|
||||
def build_face_track_nodes(cur, schema, file_uuid):
|
||||
"""Create graph nodes for each face track"""
|
||||
print("[TKG] Building face_track nodes...")
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT trace_id, COUNT(*) as frame_count,
|
||||
@@ -88,7 +106,7 @@ def build_face_trace_nodes(cur, schema, file_uuid):
|
||||
count = 0
|
||||
for row in cur.fetchall():
|
||||
tid, fc, sf, ef, ax, ay, aw, ah = row
|
||||
label = f"Face Trace {tid}"
|
||||
label = f"Face Track {tid}"
|
||||
props = {
|
||||
"frame_count": fc,
|
||||
"start_frame": sf,
|
||||
@@ -96,9 +114,9 @@ def build_face_trace_nodes(cur, schema, file_uuid):
|
||||
"avg_bbox": {"x": round(ax or 0, 1), "y": round(ay or 0, 1),
|
||||
"width": round(aw or 0, 1), "height": round(ah or 0, 1)},
|
||||
}
|
||||
ensure_node(cur, schema, file_uuid, "face_trace", f"trace_{tid}", label, props)
|
||||
ensure_node(cur, schema, file_uuid, "face_track", f"face_track_{tid}", label, props)
|
||||
count += 1
|
||||
print(f"[TKG] {count} face trace nodes created")
|
||||
print(f"[TKG] {count} face_track nodes created")
|
||||
return count
|
||||
|
||||
|
||||
@@ -124,12 +142,12 @@ def load_json_safe(path):
|
||||
return None
|
||||
|
||||
|
||||
def build_yolo_object_nodes(cur, schema, file_uuid):
|
||||
"""Create graph nodes for each YOLO object class from yolo.json"""
|
||||
def build_detected_object_nodes(cur, schema, file_uuid):
|
||||
"""Create graph nodes for each YOLO detected object class from yolo.json"""
|
||||
yolo_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.yolo.json")
|
||||
yolo = load_json_safe(yolo_path)
|
||||
if yolo is None:
|
||||
print(f"[TKG] yolo.json not available, skipping object nodes")
|
||||
print(f"[TKG] yolo.json not available, skipping detected_object nodes")
|
||||
return 0
|
||||
|
||||
frames = yolo.get("frames", {})
|
||||
@@ -143,20 +161,20 @@ def build_yolo_object_nodes(cur, schema, file_uuid):
|
||||
count = 0
|
||||
for cls, cnt in sorted(class_counts.items()):
|
||||
ensure_node(
|
||||
cur, schema, file_uuid, "object",
|
||||
cur, schema, file_uuid, "detected_object",
|
||||
cls, cls,
|
||||
{"total_detections": cnt},
|
||||
)
|
||||
count += 1
|
||||
print(f"[TKG] {count} object class nodes created")
|
||||
print(f"[TKG] {count} detected_object nodes created")
|
||||
return count
|
||||
|
||||
|
||||
def build_speaker_nodes(cur, schema, file_uuid):
|
||||
"""Create graph nodes for each speaker from asrx.json"""
|
||||
def build_speaker_segment_nodes(cur, schema, file_uuid):
|
||||
"""Create graph nodes for each speaker segment from asrx.json"""
|
||||
asrx_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.asrx.json")
|
||||
if not os.path.exists(asrx_path):
|
||||
print(f"[TKG] asrx.json not found, skipping speaker nodes")
|
||||
print(f"[TKG] asrx.json not found, skipping speaker_segment nodes")
|
||||
return 0
|
||||
|
||||
with open(asrx_path) as f:
|
||||
@@ -167,17 +185,17 @@ def build_speaker_nodes(cur, schema, file_uuid):
|
||||
for sid, sinfo in stats.items():
|
||||
cnt = sinfo.get("count", 0)
|
||||
ensure_node(
|
||||
cur, schema, file_uuid, "speaker",
|
||||
sid, sid,
|
||||
cur, schema, file_uuid, "speaker_segment",
|
||||
sid.lower().replace("speaker_", "speaker_"), sid,
|
||||
{"segment_count": cnt},
|
||||
)
|
||||
count += 1
|
||||
print(f"[TKG] {count} speaker nodes created")
|
||||
print(f"[TKG] {count} speaker_segment nodes created")
|
||||
return count
|
||||
|
||||
|
||||
def build_co_occurrence_edges(cur, schema, file_uuid):
|
||||
"""Build CO_OCCURS_WITH edges: face_trace ↔ yolo_object in same frame"""
|
||||
"""Build CO_OCCURS_WITH edges: face_track ↔ detected_object in same frame"""
|
||||
print("[TKG] Building co-occurrence edges (face-object within same frame)...")
|
||||
|
||||
yolo_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.yolo.json")
|
||||
@@ -217,8 +235,8 @@ def build_co_occurrence_edges(cur, schema, file_uuid):
|
||||
|
||||
# Get face trace node
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_trace' AND external_id=%s",
|
||||
(file_uuid, f"trace_{tid}"),
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
||||
(file_uuid, f"face_track_{tid}"),
|
||||
)
|
||||
ft_row = cur.fetchone()
|
||||
if not ft_row:
|
||||
@@ -231,7 +249,7 @@ def build_co_occurrence_edges(cur, schema, file_uuid):
|
||||
|
||||
# Get object node
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='object' AND external_id=%s",
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='detected_object' AND external_id=%s",
|
||||
(file_uuid, cls),
|
||||
)
|
||||
obj_row = cur.fetchone()
|
||||
@@ -277,7 +295,7 @@ def build_co_occurrence_edges(cur, schema, file_uuid):
|
||||
|
||||
|
||||
def build_speaker_face_edges(cur, schema, file_uuid):
|
||||
"""Build SPEAKS_AS edges: face_trace ↔ speaker via temporal overlap"""
|
||||
"""Build SPEAKS_AS edges: face_track ↔ speaker_segment via temporal overlap"""
|
||||
asrx_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.asrx.json")
|
||||
if not os.path.exists(asrx_path):
|
||||
print(f"[TKG] asrx.json not found, skipping speaker edges")
|
||||
@@ -309,8 +327,8 @@ def build_speaker_face_edges(cur, schema, file_uuid):
|
||||
for tid, sf, ef in traces:
|
||||
# Get face trace node
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_trace' AND external_id=%s",
|
||||
(file_uuid, f"trace_{tid}"),
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
||||
(file_uuid, f"face_track_{tid}"),
|
||||
)
|
||||
ft_row = cur.fetchone()
|
||||
if not ft_row:
|
||||
@@ -340,7 +358,7 @@ def build_speaker_face_edges(cur, schema, file_uuid):
|
||||
|
||||
# Get speaker node
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='speaker' AND external_id=%s",
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='speaker_segment' AND external_id=%s",
|
||||
(file_uuid, speaker_id),
|
||||
)
|
||||
sp_row = cur.fetchone()
|
||||
@@ -366,7 +384,7 @@ def build_speaker_face_edges(cur, schema, file_uuid):
|
||||
|
||||
|
||||
def build_face_face_edges(cur, schema, file_uuid):
|
||||
"""Build CO_OCCURS_WITH edges: face_trace ↔ face_trace in same frame"""
|
||||
"""Build CO_OCCURS_WITH edges: face_track ↔ face_track in same frame"""
|
||||
print("[TKG] Building face-face co-occurrence edges...")
|
||||
|
||||
cur.execute(
|
||||
@@ -404,12 +422,12 @@ def build_face_face_edges(cur, schema, file_uuid):
|
||||
edge_count = 0
|
||||
for (tid_a, tid_b), frames in pair_frames.items():
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_trace' AND external_id=%s",
|
||||
(file_uuid, f"trace_{tid_a}"),
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
||||
(file_uuid, f"face_track_{tid_a}"),
|
||||
)
|
||||
n_a = cur.fetchone()
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_trace' AND external_id=%s",
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
||||
(file_uuid, f"trace_{tid_b}"),
|
||||
)
|
||||
n_b = cur.fetchone()
|
||||
@@ -432,37 +450,466 @@ def build_face_face_edges(cur, schema, file_uuid):
|
||||
return edge_count
|
||||
|
||||
|
||||
def extract_level1_features(video_path, pose_json_path):
|
||||
"""
|
||||
Extract Level 1 features for each person in each frame
|
||||
|
||||
Args:
|
||||
video_path: Path to video file
|
||||
pose_json_path: Path to pose.json
|
||||
|
||||
Returns:
|
||||
List of (frame, person_index, bbox, level1_features)
|
||||
"""
|
||||
if HierarchicalFeatureExtractor is None:
|
||||
print("[TKG] Level 1 feature extractor not available")
|
||||
return []
|
||||
|
||||
if not os.path.exists(pose_json_path):
|
||||
print(f"[TKG] pose.json not found: {pose_json_path}")
|
||||
return []
|
||||
|
||||
with open(pose_json_path) as f:
|
||||
pose_data = json.load(f)
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"[TKG] Cannot open video: {video_path}")
|
||||
return []
|
||||
|
||||
fps = pose_data.get("fps", 30.0)
|
||||
extractor = HierarchicalFeatureExtractor()
|
||||
|
||||
results = []
|
||||
|
||||
for pose_frame in pose_data.get("frames", []):
|
||||
frame_num = pose_frame["frame"]
|
||||
persons = pose_frame.get("persons", [])
|
||||
|
||||
if not persons:
|
||||
continue
|
||||
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
||||
ret, frame = cap.read()
|
||||
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
for person_idx, person in enumerate(persons):
|
||||
bbox = person.get("bbox", {})
|
||||
keypoints = person.get("keypoints", [])
|
||||
|
||||
if bbox.get("width", 0) <= 0 or bbox.get("height", 0) <= 0:
|
||||
continue
|
||||
|
||||
proportions = calculate_proportions(keypoints, bbox) if calculate_proportions else {}
|
||||
head_region = get_head_region(keypoints) if get_head_region else {}
|
||||
level1 = extractor.extract_level1(frame, bbox, head_region)
|
||||
|
||||
results.append({
|
||||
"frame": frame_num,
|
||||
"timestamp": pose_frame.get("timestamp", frame_num / fps),
|
||||
"person_index": person_idx,
|
||||
"bbox": bbox,
|
||||
"proportions": proportions,
|
||||
"level1_features": level1,
|
||||
})
|
||||
|
||||
cap.release()
|
||||
print(f"[TKG] Extracted Level 1 features: {len(results)} frame-person pairs")
|
||||
return results
|
||||
|
||||
|
||||
def average_colors(color_lists):
|
||||
"""Average multiple color lists"""
|
||||
if not color_lists:
|
||||
return []
|
||||
|
||||
valid_colors = [c for c in color_lists if c]
|
||||
if not valid_colors:
|
||||
return []
|
||||
|
||||
first_colors = [c[0] if c else [0, 0, 0] for c in valid_colors]
|
||||
avg = [sum(x) / len(x) for x in zip(*first_colors)]
|
||||
return [round(x, 2) for x in avg]
|
||||
|
||||
|
||||
def average_h_mean(items, region):
|
||||
"""Average H mean from Level 1 items"""
|
||||
h_means = []
|
||||
for item in items:
|
||||
l1 = item.get("level1_features", {})
|
||||
if region in l1 and "color" in l1[region]:
|
||||
h_mean = l1[region]["color"].get("h_mean", 0)
|
||||
if h_mean:
|
||||
h_means.append(h_mean)
|
||||
|
||||
return round(sum(h_means) / len(h_means), 2) if h_means else 0
|
||||
|
||||
|
||||
def average_bbox(bboxes):
|
||||
"""Average bbox across frames"""
|
||||
if not bboxes:
|
||||
return {}
|
||||
|
||||
avg_x = sum(b.get("x", 0) for b in bboxes) / len(bboxes)
|
||||
avg_y = sum(b.get("y", 0) for b in bboxes) / len(bboxes)
|
||||
avg_w = sum(b.get("width", 0) for b in bboxes) / len(bboxes)
|
||||
avg_h = sum(b.get("height", 0) for b in bboxes) / len(bboxes)
|
||||
|
||||
return {
|
||||
"x": round(avg_x, 1),
|
||||
"y": round(avg_y, 1),
|
||||
"width": round(avg_w, 1),
|
||||
"height": round(avg_h, 1),
|
||||
}
|
||||
|
||||
|
||||
def build_body_track_nodes(cur, schema, file_uuid, video_path=None):
|
||||
"""Create body_track nodes with Level 1 appearance features"""
|
||||
pose_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.pose.json")
|
||||
|
||||
if not os.path.exists(pose_json_path):
|
||||
print("[TKG] pose.json not found, skipping body_track nodes")
|
||||
return 0
|
||||
|
||||
if video_path is None:
|
||||
video_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.mp4")
|
||||
|
||||
if not os.path.exists(video_path):
|
||||
print(f"[TKG] Video not found: {video_path}, skipping body_track")
|
||||
return 0
|
||||
|
||||
print("[TKG] Building body_track nodes with Level 1 features...")
|
||||
|
||||
level1_data = extract_level1_features(video_path, pose_json_path)
|
||||
if not level1_data:
|
||||
print("[TKG] No Level 1 data extracted")
|
||||
return 0
|
||||
|
||||
person_groups = {}
|
||||
for item in level1_data:
|
||||
person_idx = item["person_index"]
|
||||
if person_idx not in person_groups:
|
||||
person_groups[person_idx] = []
|
||||
person_groups[person_idx].append(item)
|
||||
|
||||
count = 0
|
||||
for person_idx, items in person_groups.items():
|
||||
if not items:
|
||||
continue
|
||||
|
||||
body_colors = []
|
||||
head_colors = []
|
||||
upper_colors = []
|
||||
lower_colors = []
|
||||
frames = []
|
||||
bboxes = []
|
||||
|
||||
for item in items:
|
||||
l1 = item.get("level1_features", {})
|
||||
frames.append(item["frame"])
|
||||
bboxes.append(item["bbox"])
|
||||
|
||||
if "body" in l1 and "color" in l1["body"]:
|
||||
body_colors.append(l1["body"]["color"].get("dominant_colors", []))
|
||||
if "head_top" in l1 and "color" in l1["head_top"]:
|
||||
head_colors.append(l1["head_top"]["color"].get("dominant_colors", []))
|
||||
if "upper_body" in l1 and "color" in l1["upper_body"]:
|
||||
upper_colors.append(l1["upper_body"]["color"].get("dominant_colors", []))
|
||||
if "lower_body" in l1 and "color" in l1["lower_body"]:
|
||||
lower_colors.append(l1["lower_body"]["color"].get("dominant_colors", []))
|
||||
|
||||
avg_body_color = average_colors(body_colors)
|
||||
avg_head_color = average_colors(head_colors)
|
||||
avg_upper_color = average_colors(upper_colors)
|
||||
avg_lower_color = average_colors(lower_colors)
|
||||
|
||||
avg_height_estimate = {}
|
||||
avg_body_shape = {}
|
||||
|
||||
for item in items:
|
||||
props = item.get("proportions", {})
|
||||
if "height_estimate" in props and not avg_height_estimate:
|
||||
avg_height_estimate = props["height_estimate"]
|
||||
if "body_shape" in props and not avg_body_shape:
|
||||
avg_body_shape = props["body_shape"]
|
||||
|
||||
properties = {
|
||||
"frame_count": len(frames),
|
||||
"frames": frames,
|
||||
"avg_bbox": average_bbox(bboxes),
|
||||
"height_estimate": avg_height_estimate,
|
||||
"body_shape": avg_body_shape,
|
||||
"level1_features": {
|
||||
"body": {"dominant_colors": avg_body_color, "h_mean": average_h_mean(items, "body")},
|
||||
"head_top": {"dominant_colors": avg_head_color, "h_mean": average_h_mean(items, "head_top")},
|
||||
"upper_body": {"dominant_colors": avg_upper_color, "h_mean": average_h_mean(items, "upper_body")},
|
||||
"lower_body": {"dominant_colors": avg_lower_color, "h_mean": average_h_mean(items, "lower_body")},
|
||||
},
|
||||
}
|
||||
|
||||
external_id = f"body_track_{person_idx}"
|
||||
label = f"Body Track {person_idx}"
|
||||
ensure_node(cur, schema, file_uuid, "body_track", external_id, label, properties)
|
||||
count += 1
|
||||
|
||||
print(f"[TKG] {count} body_track nodes created")
|
||||
return count
|
||||
|
||||
|
||||
def build_hand_track_nodes(cur, schema, file_uuid):
|
||||
"""Create hand_track nodes from hand.json (hand detection results)"""
|
||||
hand_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.hand.json")
|
||||
|
||||
if not os.path.exists(hand_json_path):
|
||||
print("[TKG] hand.json not found, skipping hand_track nodes")
|
||||
return 0
|
||||
|
||||
with open(hand_json_path) as f:
|
||||
hand_data = json.load(f)
|
||||
|
||||
frames = hand_data.get("frames", [])
|
||||
if not frames:
|
||||
print("[TKG] No hand frames found")
|
||||
return 0
|
||||
|
||||
print("[TKG] Building hand_track nodes...")
|
||||
|
||||
person_groups = {}
|
||||
for frame_data in frames:
|
||||
frame_num = frame_data.get("frame", 0)
|
||||
persons = frame_data.get("persons", [])
|
||||
|
||||
for person in persons:
|
||||
person_id = person.get("person_id", 0)
|
||||
hand_type = person.get("hand_type", "unknown")
|
||||
gesture = person.get("gesture", "unknown")
|
||||
hand_state = person.get("hand_state", "unknown")
|
||||
|
||||
key = (person_id, hand_type)
|
||||
if key not in person_groups:
|
||||
person_groups[key] = {
|
||||
"frames": [],
|
||||
"gestures": [],
|
||||
"hand_states": [],
|
||||
}
|
||||
|
||||
person_groups[key]["frames"].append(frame_num)
|
||||
person_groups[key]["gestures"].append(gesture)
|
||||
person_groups[key]["hand_states"].append(hand_state)
|
||||
|
||||
count = 0
|
||||
for (person_id, hand_type), data in person_groups.items():
|
||||
frames_list = data["frames"]
|
||||
gestures = data["gestures"]
|
||||
hand_states = data["hand_states"]
|
||||
|
||||
empty_count = sum(1 for s in hand_states if s == "empty")
|
||||
holding_count = sum(1 for s in hand_states if s == "holding")
|
||||
|
||||
external_id = f"hand_track_{person_id}_{hand_type}"
|
||||
label = f"Hand Track {person_id} ({hand_type})"
|
||||
|
||||
properties = {
|
||||
"frame_count": len(frames_list),
|
||||
"frames": frames_list,
|
||||
"person_id": person_id,
|
||||
"hand_type": hand_type,
|
||||
"empty_count": empty_count,
|
||||
"holding_count": holding_count,
|
||||
"gesture_summary": {
|
||||
"empty": empty_count,
|
||||
"holding": holding_count,
|
||||
},
|
||||
}
|
||||
|
||||
ensure_node(cur, schema, file_uuid, "hand_track", external_id, label, properties)
|
||||
count += 1
|
||||
|
||||
print(f"[TKG] {count} hand_track nodes created")
|
||||
return count
|
||||
|
||||
|
||||
def build_face_body_edges(cur, schema, file_uuid):
|
||||
"""Build HAS_BODY edges: face_track ↔ body_track via spatial proximity"""
|
||||
print("[TKG] Building face-body edges...")
|
||||
|
||||
cur.execute(
|
||||
f"""
|
||||
SELECT ft.trace_id, ft.frame_number, ft.x, ft.y, ft.width, ft.height
|
||||
FROM {schema}.face_detections ft
|
||||
WHERE ft.file_uuid = %s AND ft.trace_id IS NOT NULL
|
||||
ORDER BY ft.frame_number
|
||||
""",
|
||||
(file_uuid,),
|
||||
)
|
||||
face_rows = cur.fetchall()
|
||||
|
||||
pose_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.pose.json")
|
||||
if not os.path.exists(pose_json_path):
|
||||
print("[TKG] pose.json not found, skipping face-body edges")
|
||||
return 0
|
||||
|
||||
with open(pose_json_path) as f:
|
||||
pose_data = json.load(f)
|
||||
|
||||
pose_frames = {f["frame"]: f.get("persons", []) for f in pose_data.get("frames", [])}
|
||||
|
||||
edge_count = 0
|
||||
for trace_id, frame_num, fx, fy, fw, fh in face_rows:
|
||||
pose_persons = pose_frames.get(frame_num, [])
|
||||
|
||||
face_center_x = fx + fw / 2
|
||||
face_center_y = fy + fh / 2
|
||||
|
||||
best_person_idx = None
|
||||
best_distance = float("inf")
|
||||
|
||||
for person_idx, person in enumerate(pose_persons):
|
||||
bbox = person.get("bbox", {})
|
||||
if bbox.get("width", 0) <= 0:
|
||||
continue
|
||||
|
||||
body_center_x = bbox.get("x", 0) + bbox.get("width", 0) / 2
|
||||
body_center_y = bbox.get("y", 0) + bbox.get("height", 0) / 2
|
||||
|
||||
distance = ((face_center_x - body_center_x) ** 2 + (face_center_y - body_center_y) ** 2) ** 0.5
|
||||
|
||||
if distance < best_distance:
|
||||
best_distance = distance
|
||||
best_person_idx = person_idx
|
||||
|
||||
if best_person_idx is None or best_distance > 200:
|
||||
continue
|
||||
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='face_track' AND external_id=%s",
|
||||
(file_uuid, f"face_track_{trace_id}"),
|
||||
)
|
||||
face_row = cur.fetchone()
|
||||
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='body_track' AND external_id=%s",
|
||||
(file_uuid, f"body_track_{best_person_idx}"),
|
||||
)
|
||||
body_row = cur.fetchone()
|
||||
|
||||
if not face_row or not body_row:
|
||||
continue
|
||||
|
||||
ensure_edge(
|
||||
cur, schema, file_uuid,
|
||||
"HAS_BODY",
|
||||
face_row[0], body_row[0],
|
||||
{"avg_distance_px": round(best_distance, 1)},
|
||||
)
|
||||
edge_count += 1
|
||||
|
||||
print(f"[TKG] {edge_count} face-body edges created")
|
||||
return edge_count
|
||||
|
||||
|
||||
def build_body_hand_edges(cur, schema, file_uuid):
|
||||
"""Build HAS_HAND edges: body_track ↔ hand_track via person_id"""
|
||||
print("[TKG] Building body-hand edges...")
|
||||
|
||||
hand_json_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.hand.json")
|
||||
if not os.path.exists(hand_json_path):
|
||||
print("[TKG] hand.json not found, skipping body-hand edges")
|
||||
return 0
|
||||
|
||||
with open(hand_json_path) as f:
|
||||
hand_data = json.load(f)
|
||||
|
||||
frames = hand_data.get("frames", [])
|
||||
if not frames:
|
||||
return 0
|
||||
|
||||
person_hand_map = {}
|
||||
for frame_data in frames:
|
||||
persons = frame_data.get("persons", [])
|
||||
for person in persons:
|
||||
person_id = person.get("person_id", 0)
|
||||
hand_type = person.get("hand_type", "unknown")
|
||||
key = (person_id, hand_type)
|
||||
person_hand_map[key] = person_id
|
||||
|
||||
edge_count = 0
|
||||
for (person_id, hand_type), _ in person_hand_map.items():
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='body_track' AND external_id=%s",
|
||||
(file_uuid, f"body_track_{person_id}"),
|
||||
)
|
||||
body_row = cur.fetchone()
|
||||
|
||||
cur.execute(
|
||||
f"SELECT id FROM {schema}.tkg_nodes WHERE file_uuid=%s AND node_type='hand_track' AND external_id=%s",
|
||||
(file_uuid, f"hand_track_{person_id}_{hand_type}"),
|
||||
)
|
||||
hand_row = cur.fetchone()
|
||||
|
||||
if not body_row or not hand_row:
|
||||
continue
|
||||
|
||||
ensure_edge(
|
||||
cur, schema, file_uuid,
|
||||
"HAS_HAND",
|
||||
body_row[0], hand_row[0],
|
||||
{"hand_type": hand_type},
|
||||
)
|
||||
edge_count += 1
|
||||
|
||||
print(f"[TKG] {edge_count} body-hand edges created")
|
||||
return edge_count
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Build Temporal Knowledge Graph")
|
||||
parser.add_argument("--file-uuid", required=True)
|
||||
parser.add_argument("--schema", default=SCHEMA)
|
||||
parser.add_argument("--file-uuid", "-u", required=True, help="File UUID")
|
||||
parser.add_argument("--schema", "-s", default=SCHEMA, help="Database schema")
|
||||
parser.add_argument("--video", "-v", help="Video path (optional, auto-detected)")
|
||||
parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
|
||||
video_path = args.video or os.path.join(OUTPUT_DIR, f"{args.file_uuid}.mp4")
|
||||
|
||||
print(f"[TKG] Building graph for {args.file_uuid}...")
|
||||
|
||||
n1 = build_face_trace_nodes(cur, args.schema, args.file_uuid)
|
||||
n2 = build_yolo_object_nodes(cur, args.schema, args.file_uuid)
|
||||
n3 = build_speaker_nodes(cur, args.schema, args.file_uuid)
|
||||
|
||||
print(f"[TKG] Video: {video_path}")
|
||||
|
||||
n1 = build_face_track_nodes(cur, args.schema, args.file_uuid)
|
||||
n2 = build_body_track_nodes(cur, args.schema, args.file_uuid, video_path)
|
||||
n3 = build_detected_object_nodes(cur, args.schema, args.file_uuid)
|
||||
n4 = build_speaker_segment_nodes(cur, args.schema, args.file_uuid)
|
||||
n5 = build_hand_track_nodes(cur, args.schema, args.file_uuid)
|
||||
|
||||
e1 = build_co_occurrence_edges(cur, args.schema, args.file_uuid)
|
||||
e2 = build_speaker_face_edges(cur, args.schema, args.file_uuid)
|
||||
e3 = build_face_face_edges(cur, args.schema, args.file_uuid)
|
||||
|
||||
e4 = build_face_body_edges(cur, args.schema, args.file_uuid)
|
||||
e5 = build_body_hand_edges(cur, args.schema, args.file_uuid)
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\n[TKG] Complete: {n1+n2+n3} nodes, {e1+e2+e3} edges")
|
||||
print(f" Face traces: {n1}")
|
||||
print(f" Objects: {n2}")
|
||||
print(f" Speakers: {n3}")
|
||||
print(f" Co-occur: {e1}")
|
||||
print(f" Speaker-face:{e2}")
|
||||
print(f" Face-face: {e3}")
|
||||
|
||||
total_nodes = n1 + n2 + n3 + n4 + n5
|
||||
total_edges = e1 + e2 + e3 + e4 + e5
|
||||
|
||||
print(f"\n[TKG] Complete: {total_nodes} nodes, {total_edges} edges")
|
||||
print(f" Face tracks: {n1}")
|
||||
print(f" Body tracks: {n2}")
|
||||
print(f" Detected objects: {n3}")
|
||||
print(f" Speaker segments: {n4}")
|
||||
print(f" Hand tracks: {n5}")
|
||||
print(f" Co-occur edges: {e1}")
|
||||
print(f" Speaker-face: {e2}")
|
||||
print(f" Face-face: {e3}")
|
||||
print(f" Face-body: {e4}")
|
||||
print(f" Body-hand: {e5}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -4,7 +4,7 @@ TKG Level 1 Builder - Store Level 1 appearance features in TKG
|
||||
|
||||
Purpose:
|
||||
1. Extract Level 1 features from pose.json + video frames
|
||||
2. Store as person_trace nodes in TKG
|
||||
2. Store as body_track nodes in TKG
|
||||
3. Enable tracking via Level 1 feature similarity
|
||||
|
||||
Level 1 Features:
|
||||
@@ -13,6 +13,8 @@ Level 1 Features:
|
||||
- upper_body: upper clothing color
|
||||
- lower_body: lower clothing color
|
||||
|
||||
Node Type: body_track (person appearance tracking)
|
||||
|
||||
Usage:
|
||||
python tkg_level1_builder.py --file-uuid <uuid> [--schema <schema>]
|
||||
"""
|
||||
@@ -123,9 +125,9 @@ def extract_level1_features(video_path, pose_json_path):
|
||||
return results
|
||||
|
||||
|
||||
def build_person_trace_nodes(cur, schema, file_uuid, level1_data):
|
||||
def build_body_track_nodes(cur, schema, file_uuid, level1_data):
|
||||
"""
|
||||
Build person_trace nodes with Level 1 features
|
||||
Build body_track nodes with Level 1 features
|
||||
|
||||
Args:
|
||||
cur: Database cursor
|
||||
@@ -133,7 +135,7 @@ def build_person_trace_nodes(cur, schema, file_uuid, level1_data):
|
||||
file_uuid: File UUID
|
||||
level1_data: Level 1 extracted features
|
||||
"""
|
||||
print("[TKG-L1] Building person_trace nodes...")
|
||||
print("[TKG-L1] Building body_track nodes...")
|
||||
|
||||
# Group by person (assuming person_index consistency across frames)
|
||||
person_groups = {}
|
||||
@@ -181,8 +183,8 @@ def build_person_trace_nodes(cur, schema, file_uuid, level1_data):
|
||||
avg_lower_color = average_colors(lower_colors) if lower_colors else []
|
||||
|
||||
# Build node properties
|
||||
external_id = f"person_{person_idx}"
|
||||
label = f"Person {person_idx}"
|
||||
external_id = f"body_track_{person_idx}"
|
||||
label = f"Body Track {person_idx}"
|
||||
|
||||
# Get average height and body shape
|
||||
avg_height_estimate = {}
|
||||
@@ -224,11 +226,11 @@ def build_person_trace_nodes(cur, schema, file_uuid, level1_data):
|
||||
}
|
||||
|
||||
# Store node
|
||||
ensure_node(cur, schema, file_uuid, "person_trace", external_id, label, properties)
|
||||
ensure_node(cur, schema, file_uuid, "body_track", external_id, label, properties)
|
||||
count += 1
|
||||
print(f"[TKG-L1] Created person_trace node: {external_id} ({len(frames)} frames)")
|
||||
print(f"[TKG-L1] Created body_track node: {external_id} ({len(frames)} frames)")
|
||||
|
||||
print(f"[TKG-L1] Total: {count} person_trace nodes")
|
||||
print(f"[TKG-L1] Total: {count} body_track nodes")
|
||||
return count
|
||||
|
||||
|
||||
@@ -321,11 +323,11 @@ def main():
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
# Build person_trace nodes
|
||||
count = build_person_trace_nodes(cur, schema, file_uuid, level1_data)
|
||||
# Build body_track nodes
|
||||
count = build_body_track_nodes(cur, schema, file_uuid, level1_data)
|
||||
|
||||
conn.commit()
|
||||
print(f"[TKG-L1] Success: {count} person_trace nodes created")
|
||||
print(f"[TKG-L1] Success: {count} body_track nodes created")
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
Reference in New Issue
Block a user