feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID

This commit is contained in:
Warren
2026-05-06 14:41:48 +08:00
parent 74b6182eba
commit 65a1f77e65
1048 changed files with 103499 additions and 0 deletions

View File

@@ -15,6 +15,10 @@ pub fn bbox_routes() -> Router<crate::api::server::AppState> {
"/api/v1/file/:file_uuid/video/bbox",
get(bbox_overlay_video),
)
.route(
"/api/v1/file/:file_uuid/trace/:trace_id/video",
get(trace_video),
)
.route("/api/v1/file/:file_uuid/video", get(stream_video))
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
}
@@ -229,6 +233,100 @@ fn parse_range(range: &str, file_size: u64) -> (u64, u64) {
(start.min(file_size - 1), end.min(file_size - 1))
}
async fn trace_video(
State(state): State<crate::api::server::AppState>,
Path((file_uuid, trace_id)): Path<(String, i32)>,
Query(params): Query<std::collections::HashMap<String, String>>,
) -> Result<impl IntoResponse, StatusCode> {
use axum::http::header;
let videos_table = schema::table_name("videos");
let row: Option<(String,)> = sqlx::query_as(&format!(
"SELECT file_path FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (video_path,) = row.ok_or(StatusCode::NOT_FOUND)?;
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0);
// Get all detections for this trace_id
let face_table = schema::table_name("face_detections");
let rows: Vec<(i32, i32, i32, i32, i32)> = sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND trace_id = $2 ORDER BY frame_number",
face_table
))
.bind(&file_uuid).bind(trace_id)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| { tracing::error!("trace query error: {}", e); vec![] });
if rows.is_empty() {
return Err(StatusCode::NOT_FOUND);
}
let first_frame = rows[0].0;
let last_frame = rows[rows.len() - 1].0;
let start_sec = first_frame as f64 / fps;
let padding = params.get("padding").and_then(|s| s.parse().ok()).unwrap_or(2.0);
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
let seek = (start_sec - padding).max(0.0);
// Build filters: per-frame bbox + text
let mut parts: Vec<String> = Vec::new();
for (frame, x, y, w, h) in &rows {
let offset = frame - first_frame + (padding * fps) as i32;
parts.push(format!(
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='eq(n,{})'",
x, y, w, h, offset
));
let label = format!("t{}", trace_id);
render_text(&mut parts, &label, *x + 6, *y + 6, Some(offset));
}
let vf = if parts.is_empty() {
"null".to_string()
} else {
parts.join(",")
};
let tmp = std::env::temp_dir().join(format!("trace_{}.mp4", uuid::Uuid::new_v4()));
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let status = std::process::Command::new("ffmpeg")
.args([
"-ss", &seek.to_string(), "-i", &video_path,
"-t", &duration.to_string(), "-vf", &vf,
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
"-an", "-movflags", "+faststart", "-y", &tmp_str,
])
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() {
let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp);
Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap())
}
async fn stream_video(
State(state): State<crate::api::server::AppState>,
Path(file_uuid): Path<String>,

138
src/core/frame_cache.rs Normal file
View File

@@ -0,0 +1,138 @@
use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
use tracing::info;
/// A single extracted frame with metadata
#[derive(Debug, Clone)]
pub struct CachedFrame {
pub path: PathBuf,
pub frame_number: u64,
pub timestamp_secs: f64,
}
/// Manages shared frame extraction for concurrent processors
pub struct FrameManager {
pub dir: PathBuf,
pub frames: Vec<CachedFrame>,
pub fps: f64,
pub total_frames: u64,
pub duration_secs: f64,
}
impl FrameManager {
/// Extract frames from video at `sample_interval` into a temp directory.
pub async fn extract(
video_path: &str,
sample_interval: u32,
fps: f64,
total_frames: u64,
) -> Result<Self> {
let dir = std::env::temp_dir().join(format!("frames_{}", uuid_from_path(video_path)));
let _ = std::fs::create_dir_all(&dir);
let pattern = dir.join("frame_%05d.jpg").to_string_lossy().to_string();
let video_path = video_path.to_owned();
info!(
"[FrameCache] Extracting frames (interval={}) to {:?}",
sample_interval, dir
);
let output = tokio::process::Command::new("ffmpeg")
.args([
"-y",
"-v",
"quiet",
"-i",
&video_path,
"-vf",
&format!("select=not(mod(n\\,{})),scale=320:-2", sample_interval),
"-vsync",
"vfr",
"-q:v",
"15",
&pattern,
])
.output()
.await
.context("Frame extraction via ffmpeg failed")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ffmpeg frame extraction failed: {}", stderr);
}
// Read extracted frames
let mut frames: Vec<CachedFrame> = Vec::new();
let mut entries: Vec<_> = std::fs::read_dir(&dir)?
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map_or(false, |ext| ext == "jpg"))
.collect();
entries.sort_by_key(|e| e.file_name());
for entry in &entries {
let fname = entry.file_name();
let fname_str = fname.to_string_lossy();
if let Some(num_str) = fname_str
.strip_prefix("frame_")
.and_then(|s| s.strip_suffix(".jpg"))
{
if let Ok(frame_num) = num_str.parse::<u64>() {
let timestamp = frame_num as f64 / fps;
frames.push(CachedFrame {
path: entry.path(),
frame_number: frame_num,
timestamp_secs: timestamp,
});
}
}
}
let duration_secs = if fps > 0.0 {
total_frames as f64 / fps
} else {
0.0
};
info!(
"[FrameCache] Extracted {} frames to {:?}",
frames.len(),
dir
);
Ok(FrameManager {
dir,
frames,
fps,
total_frames,
duration_secs,
})
}
/// Clean up the extracted frame files
pub fn cleanup(&self) {
let _ = std::fs::remove_dir_all(&self.dir);
info!("[FrameCache] Cleaned up {:?}", self.dir);
}
/// Get a frame by index
pub fn get_frame(&self, index: usize) -> Option<&CachedFrame> {
self.frames.get(index)
}
/// Number of extracted frames
pub fn len(&self) -> usize {
self.frames.len()
}
pub fn is_empty(&self) -> bool {
self.frames.is_empty()
}
}
fn uuid_from_path(path: &str) -> String {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
path.hash(&mut hasher);
format!("{:x}", hasher.finish())
}

146
src/core/tmdb/face_agent.rs Normal file
View File

@@ -0,0 +1,146 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use tracing::{error, info};
use crate::core::db::PostgresDb;
#[derive(Debug, Deserialize)]
struct FaceDetection {
face_id: String,
embedding: Vec<f32>,
}
#[derive(Debug, Deserialize)]
struct TmdbIdentity {
id: i64,
name: String,
face_embedding: Vec<f32>,
}
const MATCH_THRESHOLD: f32 = 0.55;
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
return 0.0;
}
dot / (norm_a * norm_b)
}
/// Match unassigned face detections against TMDb-sourced identities.
/// For each face detection with identity_id IS NULL, compute cosine similarity
/// against all TMDb identities that have face_embedding set.
/// If similarity > MATCH_THRESHOLD, bind the face to the identity.
pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
// Step 1: Fetch unassigned face detections for this file
let detections: Vec<FaceDetection> = sqlx::query_as::<_, (String, Vec<f32>)>(
"SELECT face_id, embedding FROM dev.face_detections \
WHERE file_uuid = $1 AND identity_id IS NULL AND embedding IS NOT NULL",
)
.bind(file_uuid)
.fetch_all(db.pool())
.await
.context("Failed to fetch unassigned face detections")?
.into_iter()
.map(|(face_id, embedding)| FaceDetection { face_id, embedding })
.collect();
if detections.is_empty() {
info!(
"[TMDB-FACE] No unassigned face detections for {}",
file_uuid
);
return Ok(0);
}
// Step 2: Fetch TMDb identities with face embeddings
let identities: Vec<TmdbIdentity> = sqlx::query_as::<_, (i64, String, Vec<f32>)>(
"SELECT id, name, face_embedding::real[] FROM dev.identities \
WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
)
.fetch_all(db.pool())
.await
.context("Failed to fetch TMDb identities")?
.into_iter()
.map(|(id, name, emb)| TmdbIdentity {
id,
name,
face_embedding: emb,
})
.collect();
if identities.is_empty() {
info!("[TMDB-FACE] No TMDb identities with face embeddings for matching");
return Ok(0);
}
info!(
"[TMDB-FACE] Matching {} face detections against {} TMDb identities",
detections.len(),
identities.len()
);
// Step 3: For each face detection, find best matching identity
let mut bindings_created = 0usize;
for det in &detections {
let mut best_match: Option<(i64, f32)> = None;
for identity in &identities {
let sim = cosine_similarity(&det.embedding, &identity.face_embedding);
if sim > MATCH_THRESHOLD {
match best_match {
Some((_, best_sim)) if sim > best_sim => {
best_match = Some((identity.id, sim));
}
None => {
best_match = Some((identity.id, sim));
}
_ => {}
}
}
}
if let Some((identity_id, similarity)) = best_match {
// Update face_detection with identity_id
let _ = sqlx::query(
"UPDATE dev.face_detections SET identity_id = $1, identity_confidence = $2 \
WHERE file_uuid = $3 AND face_id = $4",
)
.bind(identity_id)
.bind(similarity as f64)
.bind(file_uuid)
.bind(&det.face_id)
.execute(db.pool())
.await
.ok();
// Also create identity_binding
let _ = sqlx::query(
"INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, source, confidence) \
VALUES ($1, 'face', $2, 'tmdb_agent', $3) \
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence"
)
.bind(identity_id)
.bind(&det.face_id)
.bind(similarity as f64)
.execute(db.pool())
.await
.ok();
bindings_created += 1;
}
}
info!(
"[TMDB-FACE] Created {} face-to-TMDb bindings for {}",
bindings_created, file_uuid
);
Ok(bindings_created)
}

246
src/core/tmdb/probe.rs Normal file
View File

@@ -0,0 +1,246 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use tracing::{info, warn};
use crate::core::config;
use crate::core::db::PostgresDb;
#[derive(Debug, Deserialize)]
struct TmdbSearchResult {
results: Vec<TmdbMovie>,
}
#[derive(Debug, Deserialize)]
struct TmdbMovie {
id: u64,
title: String,
release_date: Option<String>,
overview: Option<String>,
poster_path: Option<String>,
}
#[derive(Debug, Deserialize)]
struct TmdbCredits {
cast: Vec<TmdbCastMember>,
}
#[derive(Debug, Deserialize)]
struct TmdbCastMember {
id: u64,
name: String,
character: String,
profile_path: Option<String>,
order: u32,
}
pub struct TmdbProbeResult {
pub tmdb_id: u64,
pub title: String,
pub cast_count: usize,
pub identities_created: usize,
}
fn extract_movie_name(filename: &str) -> Option<String> {
let name = std::path::Path::new(filename)
.file_stem()
.and_then(|s| s.to_str())?;
let cleaned = name.replace(['.', '_'], " ").trim().to_string();
if cleaned.is_empty() || cleaned.len() < 3 {
return None;
}
Some(cleaned)
}
pub async fn probe_movie(
db: &PostgresDb,
filename: &str,
file_uuid: &str,
) -> Result<Option<TmdbProbeResult>> {
let api_key = match config::tmdb::API_KEY.as_ref() {
Some(k) => k.clone(),
None => {
info!("[TMDB] No API key configured, skipping TMDb probe");
return Ok(None);
}
};
let movie_name = match extract_movie_name(filename) {
Some(n) => n,
None => {
info!("[TMDB] Could not extract movie name from: {}", filename);
return Ok(None);
}
};
info!("[TMDB] Searching for movie: {}", movie_name);
let client = reqwest::Client::new();
// Step 1: Search movie
let search_url = format!(
"https://api.themoviedb.org/3/search/movie?api_key={}&query={}",
api_key,
urlencoding(&movie_name)
);
let search_resp = client
.get(&search_url)
.send()
.await
.context("TMDb search request failed")?
.json::<TmdbSearchResult>()
.await
.context("Failed to parse TMDb search response")?;
let movie = match search_resp.results.into_iter().next() {
Some(m) => m,
None => {
info!("[TMDB] No movie found for: {}", movie_name);
return Ok(None);
}
};
info!("[TMDB] Matched: {} (TMDB id={})", movie.title, movie.id);
// Step 2: Fetch cast
let credits_url = format!(
"https://api.themoviedb.org/3/movie/{}/credits?api_key={}",
movie.id, api_key
);
let credits = client
.get(&credits_url)
.send()
.await
.context("TMDb credits request failed")?
.json::<TmdbCredits>()
.await
.context("Failed to parse TMDb credits response")?;
// Step 3: Create identities for top cast
let identities_table = crate::core::db::schema::table_name("identities");
let mut identities_created = 0usize;
for member in credits.cast.iter().take(20) {
if member.name.trim().is_empty() {
continue;
}
let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let result = sqlx::query(&format!(
"INSERT INTO {} (name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \
VALUES ($1, 'people', 'tmdb', 'confirmed', $2, $3, \
jsonb_build_object('tmdb_character', $4, 'tmdb_cast_order', $5, 'tmdb_movie_id', $6, 'tmdb_movie_title', $7)) \
ON CONFLICT (name) DO UPDATE SET \
tmdb_id = COALESCE(EXCLUDED.tmdb_id, {}.tmdb_id), \
tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
metadata = {}.metadata || jsonb_build_object('tmdb_movie_id', $6, 'tmdb_movie_title', $7) \
RETURNING id",
identities_table, identities_table, identities_table, identities_table
))
.bind(&member.name)
.bind(member.id as i64)
.bind(&profile_url)
.bind(&member.character)
.bind(member.order as i32)
.bind(movie.id as i64)
.bind(&movie.title)
.execute(db.pool())
.await;
match result {
Ok(_) => {
info!(
"[TMDB] Created/updated identity: {} as {}",
member.name, member.character
);
identities_created += 1;
}
Err(e) => {
warn!("[TMDB] Failed to create identity '{}': {}", member.name, e);
}
}
}
// Step 4: Trigger background embedding extraction
if identities_created > 0 {
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let schema = crate::core::config::DATABASE_SCHEMA.clone();
tokio::spawn(async move {
let output = tokio::process::Command::new(&python_path)
.arg(&format!("{}/tmdb_embed_extractor.py", scripts_dir))
.arg("--schema")
.arg(&schema)
.output()
.await;
match output {
Ok(o) => {
if !o.status.success() {
let stderr = String::from_utf8_lossy(&o.stderr);
warn!("[TMDB] Embed extraction script failed: {}", stderr);
} else {
info!("[TMDB] Background face embedding extraction complete");
}
}
Err(e) => warn!("[TMDB] Failed to run embed extraction script: {}", e),
}
});
}
// Step 5: Store tmdb_id on the video record for later use
let videos_table = crate::core::db::schema::table_name("videos");
let tmdb_label = "tmdb";
let _ = sqlx::query(&format!(
"UPDATE {} SET birth_registration = \
jsonb_set(COALESCE(birth_registration, '{{}}'::jsonb), '{{{}}}', $1::jsonb) \
WHERE file_uuid = $2",
videos_table, tmdb_label
))
.bind(serde_json::json!({
"movie_id": movie.id,
"movie_title": movie.title,
"release_date": movie.release_date,
"poster": movie.poster_path,
"cast_count": credits.cast.len(),
"identities_created": identities_created,
}))
.bind(file_uuid)
.execute(db.pool())
.await
.ok();
info!(
"[TMDB] Probe complete: {} cast members, {} identities created/updated",
credits.cast.len(),
identities_created
);
Ok(Some(TmdbProbeResult {
tmdb_id: movie.id,
title: movie.title,
cast_count: credits.cast.len(),
identities_created,
}))
}
fn urlencoding(s: &str) -> String {
s.chars()
.map(|c| match c {
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(),
' ' => '+'.to_string(),
_ => format!("%{:02X}", c as u8),
})
.collect()
}