feat: Swift Face Pose integration + TKG 方案 B

Major Changes:
- swift_face_pose: output pose angles (yaw/pitch/roll) in face.json
- face_processor.py: call swift_face_pose (dual output: face.json + pose.json)
- Face struct: add pose_angle field
- TKG 方案 B: gaze/lip_track nodes from face.json (no face_detections dependency)
- Chunk cleanup: delete old data before rebuild (avoid duplicate key)
- Hand nodes: classify by hand_type + gesture (15 combinations)
- HAND_OBJECT edges: bbox spatial matching (174 matches)

Test Results:
- Blake Jones: 8 faces, pose_angle ✓, 66 nodes, 174 edges
- FilmRiot: 394 faces, pose_angle ✓, 35 nodes, 39 edges
- Left hands: 132, Right hands: 2

Architecture:
- All TKG nodes built from JSON files (face.json, hand.json, yolo.json)
- Swift processors: sample_interval=3 (Face/Pose/Hand sync)
- Cleanup functions: delete_tkg_nodes_by_uuid, delete_tkg_edges_by_uuid
This commit is contained in:
Accusys
2026-06-23 05:47:24 +08:00
parent e1e2da2140
commit 766a1d9a6d
17 changed files with 1108 additions and 47 deletions

View File

@@ -111,6 +111,7 @@ impl SystemResources {
recommended.push("ocr");
recommended.push("face");
recommended.push("pose");
recommended.push("hand");
}
recommended
@@ -519,6 +520,42 @@ async fn process_face_module(
Ok(())
}
async fn process_hand_module(
hand_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> anyhow::Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Hand).start(1);
}
let hand_result = momentry_core::core::processor::process_hand(
video_path,
hand_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let hand_json = serde_json::to_string_pretty(&hand_result)?;
std::fs::write(hand_path, &hand_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "hand.json");
println!(" ✓ Hand saved: {} frames", hand_result.frames.len());
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Hand)
.complete(&format!("{} frames", hand_result.frames.len()));
state.stop();
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}
async fn process_pose_module(
pose_path: &Path,
video_path: &str,
@@ -688,7 +725,7 @@ enum Commands {
Process {
/// UUID or path
target: String,
/// Modules to process (comma separated: appearance,asr,cut,asrx,yolo,ocr,face,pose,story,caption)
/// Modules to process (comma separated: appearance,asr,cut,asrx,yolo,ocr,face,pose,hand,story,caption)
/// If not specified, processes all modules
#[arg(short, long, value_delimiter = ',')]
modules: Option<Vec<String>>,
@@ -1062,15 +1099,16 @@ async fn main() -> Result<()> {
.filter_map(|name| {
let name_lower = name.to_lowercase();
match name_lower.as_str() {
"appearance" => Some(ProcessorType::Appearance),
"appearance" => Some(ProcessorType::Appearance),
"asr" => Some(ProcessorType::Asr),
"cut" => Some(ProcessorType::Cut),
"asrx" => Some(ProcessorType::Asrx),
"yolo" => Some(ProcessorType::Yolo),
"ocr" => Some(ProcessorType::Ocr),
"face" => Some(ProcessorType::Face),
"pose" => Some(ProcessorType::Pose),
_ => {
"ocr" => Some(ProcessorType::Ocr),
"face" => Some(ProcessorType::Face),
"pose" => Some(ProcessorType::Pose),
"hand" => Some(ProcessorType::Hand),
_ => {
eprintln!("Unknown module: {}", name);
None
}
@@ -1087,15 +1125,16 @@ None
.filter_map(|name| {
let name_lower = name.to_lowercase();
match name_lower.as_str() {
"appearance" => Some(ProcessorType::Appearance),
"appearance" => Some(ProcessorType::Appearance),
"asr" => Some(ProcessorType::Asr),
"cut" => Some(ProcessorType::Cut),
"asrx" => Some(ProcessorType::Asrx),
"yolo" => Some(ProcessorType::Yolo),
"ocr" => Some(ProcessorType::Ocr),
"face" => Some(ProcessorType::Face),
"pose" => Some(ProcessorType::Pose),
_ => {
"face" => Some(ProcessorType::Face),
"pose" => Some(ProcessorType::Pose),
"hand" => Some(ProcessorType::Hand),
_ => {
eprintln!("Unknown cloud module: {}", name);
None
}
@@ -1667,6 +1706,65 @@ None
}
}
// Process Hand (gesture detection)
if should_process(ProcessorType::Hand) {
let hand_path = output_dir.get_output_path(&uuid, "hand.json");
let decision = decide_processing(&hand_path, force, resume);
match decision {
ProcessingDecision::SkipComplete => {
println!("\nHand: ✓ Already complete, skipping");
}
ProcessingDecision::ForceReprocess => {
println!("\nHand: ⟳ Force reprocessing from scratch...");
std::fs::remove_file(&hand_path).ok();
if is_cloud(ProcessorType::Hand) {
println!(" [Cloud processing not implemented yet - run locally]");
} else {
process_hand_module(
&hand_path,
video_path,
&uuid,
&progress_state,
&ui,
)
.await?;
}
}
ProcessingDecision::ResumePartial => {
println!("\nHand: ↻ Resuming from checkpoint...");
if is_cloud(ProcessorType::Hand) {
println!(" [Cloud processing not implemented yet - run locally]");
} else {
process_hand_module(
&hand_path,
video_path,
&uuid,
&progress_state,
&ui,
)
.await?;
}
}
ProcessingDecision::Process => {
if is_cloud(ProcessorType::Hand) {
println!("\nHand: ☁️ Running via cloud...");
println!(" [Cloud processing not implemented yet - run locally]");
} else {
println!("\nHand: ⚙️ Processing...");
process_hand_module(
&hand_path,
video_path,
&uuid,
&progress_state,
&ui,
)
.await?;
}
}
}
}
// Process Appearance (color/histogram analysis, depends on Pose)
if should_process(ProcessorType::Appearance) {
let appearance_path = output_dir.get_output_path(&uuid, "appearance.json");
@@ -1774,6 +1872,9 @@ Ok(())
Commands::Chunk { uuid } => {
println!("Chunking: {}", uuid);
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
let db = PostgresDb::init().await?;
let video = db
.get_video_by_uuid(&uuid)
@@ -1786,7 +1887,7 @@ Ok(())
// ========== Read all JSON files ==========
// Read ASR JSON
let asr_path = format!("{}.asr.json", uuid);
let asr_path = format!("{}/{}.asr.json", output_dir, uuid);
let asr_json = std::fs::read_to_string(&asr_path)
.context("ASR file not found. Run 'process' first.")?;
let asr_result: momentry_core::core::processor::asr::AsrResult =
@@ -1794,7 +1895,7 @@ Ok(())
println!("Loaded ASR: {} segments", asr_result.segments.len());
// Read CUT JSON
let cut_path = format!("{}.cut.json", uuid);
let cut_path = format!("{}/{}.cut.json", output_dir, uuid);
let cut_json = std::fs::read_to_string(&cut_path)
.context("CUT file not found. Run 'process' first.")?;
let cut_result: momentry_core::core::processor::cut::CutResult =
@@ -1802,7 +1903,7 @@ Ok(())
println!("Loaded CUT: {} scenes", cut_result.scenes.len());
// Read YOLO JSON (optional)
let yolo_path = format!("{}.yolo.json", uuid);
let yolo_path = format!("{}/{}.yolo.json", output_dir, uuid);
let yolo_result = match std::fs::read_to_string(&yolo_path) {
Ok(yolo_json) => match serde_json::from_str::<
momentry_core::core::processor::yolo::YoloResult,
@@ -1832,7 +1933,7 @@ Ok(())
};
// Read OCR JSON (optional)
let ocr_path = format!("{}.ocr.json", uuid);
let ocr_path = format!("{}/{}.ocr.json", output_dir, uuid);
let ocr_result = match std::fs::read_to_string(&ocr_path) {
Ok(ocr_json) => match serde_json::from_str::<
momentry_core::core::processor::ocr::OcrResult,
@@ -1862,7 +1963,7 @@ Ok(())
};
// Read Face JSON (optional)
let face_path = format!("{}.face.json", uuid);
let face_path = format!("{}/{}.face.json", output_dir, uuid);
let face_result = match std::fs::read_to_string(&face_path) {
Ok(face_json) => match serde_json::from_str::<
momentry_core::core::processor::face::FaceResult,
@@ -1892,7 +1993,7 @@ Ok(())
};
// Read Pose JSON (optional)
let pose_path = format!("{}.pose.json", uuid);
let pose_path = format!("{}/{}.pose.json", output_dir, uuid);
let pose_result = match std::fs::read_to_string(&pose_path) {
Ok(pose_json) => match serde_json::from_str::<
momentry_core::core::processor::pose::PoseResult,
@@ -1953,6 +2054,15 @@ Ok(())
// ========== Store pre_chunks (from ASR, CUT) ==========
// Clean old data first (avoid duplicate key)
println!("\nCleaning old data...");
let deleted_pre_chunks = db.delete_pre_chunks_by_uuid(&uuid).await?;
let deleted_frames = db.delete_frames_by_uuid(&uuid).await?;
let deleted_tkg_nodes = db.delete_tkg_nodes_by_uuid(&uuid).await?;
let deleted_tkg_edges = db.delete_tkg_edges_by_uuid(&uuid).await?;
println!(" Deleted: {} pre_chunks, {} frames, {} tkg_nodes, {} tkg_edges",
deleted_pre_chunks, deleted_frames, deleted_tkg_nodes, deleted_tkg_edges);
println!("\nStoring pre_chunks...");
// Store ASR sentence pre_chunks
@@ -2255,6 +2365,13 @@ Ok(())
// Update storage status
db.update_storage_status(&uuid, "psql_chunk", true).await?;
// Build TKG
println!("\nBuilding TKG...");
let tkg_result = momentry_core::core::processor::tkg::build_tkg(&db, &uuid, &output_dir).await?;
println!("✓ TKG built: {} nodes, {} edges",
tkg_result.face_track_nodes + tkg_result.hand_nodes + tkg_result.object_nodes,
tkg_result.co_occurrence_edges + tkg_result.hand_object_edges);
println!("\n✓ Chunk stage completed!");
println!(
" - pre_chunks: {} (asr + cut + time)",