refactor: remove face embedding architecture - single Qdrant _faces collection

- Delete FaceEmbeddingDb module (face_embedding_db.rs)
- Stub match_faces_iterative, generate_seed_embeddings, tmdb_match_handler
- Remove sync_trace_embeddings, populate_face_embeddings_to_qdrant
- Remove embedding from face.json output (face_processor.py)
- Remove embedding from PG UPDATE (store_traced_faces.py)
- Remove workspace traces staging (checkin.rs, qdrant_workspace.rs)
- Fix tests: add pose_angle to Face, hand_nodes to TkgResult

Disabled functions (need reimplement with _faces):
- match_faces_iterative (identity agent)
- generate_seed_embeddings (TMDb seeds)
- tmdb_match_handler (TMDb matching)
- cluster_face_embeddings, search_similar_faces
- merge_traces_within_cuts
This commit is contained in:
Accusys
2026-06-24 22:27:09 +08:00
parent 360cb991e1
commit 074cdcdbed
60 changed files with 657 additions and 9454 deletions

View File

@@ -633,44 +633,6 @@ async fn process_appearance_module(
Ok(())
}
async fn process_story_module(
story_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> anyhow::Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Story).start(1);
}
let story_result = momentry_core::core::processor::process_story(
video_path,
story_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let story_json = serde_json::to_string_pretty(&story_result)?;
std::fs::write(story_path, &story_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "story.json");
println!(
" ✓ Story saved: {} parent chunks, {} child chunks",
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
);
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Story).complete(&format!(
"{} parents, {} children",
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}
async fn process_caption_module(
caption_path: &Path,
video_path: &str,
@@ -745,11 +707,6 @@ enum Commands {
/// UUID
uuid: String,
},
/// Generate story for cut scenes
Story {
/// UUID
uuid: String,
},
/// Vectorize chunks
Vectorize {
/// UUID (or 'all' for all)
@@ -2382,150 +2339,6 @@ Ok(())
Ok(())
}
Commands::Story { uuid } => {
println!("Generating story for: {}", uuid);
let db = PostgresDb::init().await?;
let video = db
.get_video_by_uuid(&uuid)
.await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?;
let file_id = video.id;
let _fps = video.fps;
let duration = video.duration;
// Get all chunks
let all_chunks = db.get_chunks_by_uuid(&uuid).await?;
// Try cut chunks first, fall back to sentence chunks
let mut story_chunks: Vec<&Chunk> = all_chunks
.iter()
.filter(|c| c.chunk_type == ChunkType::Cut)
.collect();
let story_type = if story_chunks.is_empty() {
story_chunks = all_chunks
.iter()
.filter(|c| c.chunk_type == ChunkType::Sentence && c.text_content.is_some())
.collect();
"sentence"
} else {
"cut"
};
if story_chunks.is_empty() {
println!("No story chunks found. Run 'chunk' command first.");
return Ok(());
}
println!("Found {} {} scenes", story_chunks.len(), story_type);
for (i, story_chunk) in story_chunks.iter().enumerate() {
println!("\n=== Scene {} ===", i + 1);
println!(
"Time: {:.2}s - {:.2}s",
story_chunk.start_time().seconds(),
story_chunk.end_time().seconds()
);
let context_start = (story_chunk.start_time().seconds() - 5.0).max(0.0);
let context_end = (story_chunk.end_time().seconds() + 5.0).min(duration);
let context_chunks = db
.get_chunks_by_time_range(&uuid, context_start, context_end)
.await?;
let context_frames = db
.get_frames_by_time_range(&uuid, context_start, context_end)
.await?;
let mut story = String::new();
story.push_str(&format!(
"Scene {} ({:.1}s - {:.1}s)\n\n",
i + 1,
story_chunk.start_time().seconds(),
story_chunk.end_time().seconds()
));
let sentence_chunks: Vec<&serde_json::Value> = context_chunks
.iter()
.filter(|c| c["chunk_type"] == "sentence")
.collect();
if !sentence_chunks.is_empty() {
story.push_str("【Speech】\n");
for sc in &sentence_chunks {
if let Some(text) = sc["text_content"].as_str() {
story.push_str(&format!(" - {}\n", text));
}
}
story.push('\n');
}
let mut all_objects: std::collections::HashMap<String, u32> =
std::collections::HashMap::new();
for frame in &context_frames {
if let Some(objects) = frame["yolo_objects"].as_array() {
for obj in objects {
if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str())
{
*all_objects.entry(class_name.to_string()).or_insert(0) += 1;
}
}
}
}
if !all_objects.is_empty() {
story.push_str("【Objects】\n");
let mut sorted_objects: Vec<_> = all_objects.iter().collect();
sorted_objects.sort_by(|a, b| b.1.cmp(a.1));
for (obj, count) in sorted_objects.iter().take(10) {
story.push_str(&format!(" - {} ({} frames)\n", obj, count));
}
story.push('\n');
}
let mut all_texts: Vec<String> = Vec::new();
for frame in &context_frames {
if let Some(texts) = frame["ocr_results"].as_array() {
for txt in texts {
if let Some(text) = txt.get("text").and_then(|v| v.as_str()) {
if !text.is_empty() && text.len() > 2 {
all_texts.push(text.to_string());
}
}
}
}
}
if !all_texts.is_empty() {
story.push_str("【Text in video】\n");
for txt in all_texts.iter().take(10) {
story.push_str(&format!(" - {}\n", txt));
}
story.push('\n');
}
let mut face_count = 0;
for frame in &context_frames {
if let Some(faces) = frame["face_results"].as_array() {
face_count += faces.len();
}
}
if face_count > 0 {
story.push_str(&format!(
"【Faces】\n - {} face(s) detected\n\n",
face_count
));
}
println!("{}", story);
}
Ok(())
}
Commands::Vectorize { uuid } => {
println!("Vectorizing: {}", uuid);