use axum::{ extract::{Path, Query, State}, http::StatusCode, response::Json, routing::get, Router, }; use serde::{Deserialize, Serialize}; use super::types::AppState; use crate::core::db::schema; /// Comprehensive file stats endpoint — provides all data sources for frontend transparency /// Combines: JSON file status + PostgreSQL counts + Qdrant collections + TKG stats + Identity Agent stats #[derive(Debug, Serialize)] struct FileStatsResponse { file_uuid: String, file_name: Option, status: Option, // Processor status processors: Vec, // PostgreSQL counts postgres: PostgresStats, // Qdrant collection counts qdrant: QdrantStats, // TKG stats tkg: TkgFileStats, // Identity Agent stats identity_agent: IdentityAgentStats, } #[derive(Debug, Serialize)] struct ProcessorStatus { name: String, status: String, progress: u32, message: Option, } #[derive(Debug, Serialize, Default)] struct PostgresStats { sentence_chunks: i64, trace_chunks: i64, relationship_chunks: i64, identities: i64, file_identities: i64, } #[derive(Debug, Serialize)] struct QdrantStats { faces: i64, face_traces: i64, face_identities: i64, text_chunks: i64, speakers: i64, } #[derive(Debug, Serialize, Default)] struct TkgFileStats { total_nodes: i64, total_edges: i64, face_track_nodes: i64, gaze_track_nodes: i64, lip_track_nodes: i64, text_region_nodes: i64, appearance_nodes: i64, accessory_nodes: i64, object_nodes: i64, hand_nodes: i64, speaker_nodes: i64, co_occurrence_edges: i64, speaker_face_edges: i64, face_face_edges: i64, mutual_gaze_edges: i64, lip_sync_edges: i64, has_appearance_edges: i64, wears_edges: i64, hand_object_edges: i64, } #[derive(Debug, Serialize, Default)] struct IdentityAgentStats { clusters: i64, identities_created: i64, tmdb_matches: i64, speaker_bindings: i64, confirmations: i64, } #[derive(Debug, Serialize, Deserialize)] struct ScannedFileInfo { file_name: String, relative_path: String, file_path: String, file_size: u64, modified_time: String, is_registered: bool, file_uuid: Option, status: Option, registration_time: Option, job_id: Option, } #[derive(Debug, Serialize, Deserialize)] struct ScanFilesResponse { files: Vec, total: usize, filtered_total: usize, page: usize, page_size: usize, total_pages: usize, registered_count: usize, unregistered_count: usize, total_chunks: i64, searchable_chunks: i64, pending_videos: i64, } #[derive(Debug, Deserialize)] struct ScanFilesQuery { limit: Option, page: Option, page_size: Option, pattern: Option, sort_by: Option, sort_order: Option, } fn scan_directory_recursive( dir: &std::path::Path, root: &std::path::Path, allowed_extensions: &[&str], registered_paths: &std::collections::HashMap< String, (String, String, Option, Option), >, files: &mut Vec, ) { if let Ok(entries) = std::fs::read_dir(dir) { for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if name.starts_with('.') { return; } } scan_directory_recursive(&path, root, allowed_extensions, registered_paths, files); } else if path.is_file() { if let Some(ext) = path.extension().and_then(|e| e.to_str()) { if allowed_extensions.contains(&ext.to_lowercase().as_str()) { if let Ok(meta) = entry.metadata() { let abs_path = path.to_string_lossy().to_string(); let rel_path = path .strip_prefix(root) .map(|p| p.to_string_lossy().to_string()) .unwrap_or_else(|_| abs_path.clone()); let file_name = path .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_default(); let modified_time = meta .modified() .ok() .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok()) .map(|d| { chrono::DateTime::from_timestamp(d.as_secs() as i64, 0) .map(|dt| dt.to_rfc3339()) .unwrap_or_default() }) .unwrap_or_default(); match registered_paths.get(&abs_path) { Some((uuid, status, reg_time, jid)) if status != "unregistered" => { files.push(ScannedFileInfo { file_name, relative_path: rel_path, file_path: abs_path, file_size: meta.len(), modified_time, is_registered: true, file_uuid: Some(uuid.clone()), status: Some(status.clone()), registration_time: reg_time.clone(), job_id: *jid, }); } _ => { files.push(ScannedFileInfo { file_name, relative_path: rel_path, file_path: abs_path, file_size: meta.len(), modified_time, is_registered: false, file_uuid: None, status: Some("unregistered".to_string()), registration_time: None, job_id: None, }); } } } } } } } } } async fn scan_files( State(state): State, Query(params): Query, ) -> Result, StatusCode> { let demo_dir_str = std::env::var("MOMENTRY_SFTP_ROOT") .unwrap_or_else(|_| "/Users/accusys/momentry/var/sftpgo/data/demo".to_string()); let demo_dir = std::path::Path::new(&demo_dir_str); let allowed_extensions = vec![ "mp4", "mov", "mkv", "avi", "webm", "jpg", "jpeg", "png", "gif", "webp", ]; let table = schema::table_name("videos"); let mj_table = schema::table_name("monitor_jobs"); let registered_db: Vec<(String, String, String, String, Option, Option)> = sqlx::query_as(&format!( "SELECT v.file_path, v.file_name, v.file_uuid, v.status, v.registration_time::text, \ latest_job.id as job_id \ FROM {} v \ LEFT JOIN LATERAL ( \ SELECT id FROM {} WHERE uuid = v.file_uuid ORDER BY id DESC LIMIT 1 \ ) latest_job ON true \ ORDER BY v.id", table, mj_table )) .fetch_all(state.db.pool()) .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let registered_paths: std::collections::HashMap< String, (String, String, Option, Option), > = registered_db .into_iter() .map(|(path, _name, uuid, status, reg_time, jid)| (path, (uuid, status, reg_time, jid))) .collect(); let mut result_files = Vec::new(); if demo_dir.exists() { scan_directory_recursive( demo_dir, demo_dir, &allowed_extensions, ®istered_paths, &mut result_files, ); } let desc = params.sort_order.as_deref().unwrap_or("asc") == "desc"; match params.sort_by.as_deref().unwrap_or("name") { "size" => { if desc { result_files.sort_by(|a, b| b.file_size.cmp(&a.file_size)); } else { result_files.sort_by(|a, b| a.file_size.cmp(&b.file_size)); } } "modified" | "time" => { if desc { result_files.sort_by(|a, b| b.modified_time.cmp(&a.modified_time)); } else { result_files.sort_by(|a, b| a.modified_time.cmp(&b.modified_time)); } } "status" => { if desc { result_files .sort_by(|a, b| b.status.cmp(&a.status).then(b.file_name.cmp(&a.file_name))); } else { result_files .sort_by(|a, b| a.status.cmp(&b.status).then(a.file_name.cmp(&b.file_name))); } } _ => { if desc { result_files.sort_by(|a, b| { a.is_registered .cmp(&b.is_registered) .then(b.file_name.cmp(&a.file_name)) }); } else { result_files.sort_by(|a, b| { b.is_registered .cmp(&a.is_registered) .then(a.file_name.cmp(&b.file_name)) }); } } } let total_all = result_files.len(); let registered_count = result_files.iter().filter(|f| f.is_registered).count(); let unregistered_count = result_files.iter().filter(|f| !f.is_registered).count(); let filtered: Vec = if let Some(ref pat) = params.pattern { let re = match regex::Regex::new(&format!("(?i){}", pat)) { Ok(r) => r, Err(_) => return Err(StatusCode::BAD_REQUEST), }; result_files .into_iter() .filter(|f| re.is_match(&f.file_name)) .collect() } else { result_files }; let filtered_total = filtered.len(); let page = params.page.unwrap_or(1).max(1); let page_size = params .page_size .or(params.limit) .unwrap_or(filtered_total.max(1)); let total_pages = if page_size > 0 { (filtered_total + page_size - 1) / page_size } else { 1 }; let start = (page - 1) * page_size; let files: Vec = filtered.into_iter().skip(start).take(page_size).collect(); let table_videos = schema::table_name("videos"); let table_chunks = schema::table_name("chunk"); let total_chunks: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table_chunks)) .fetch_one(state.db.pool()) .await .unwrap_or(0); let searchable_chunks: i64 = sqlx::query_scalar(&format!( "SELECT COUNT(*) FROM {} WHERE vector_id IS NOT NULL", table_chunks )) .fetch_one(state.db.pool()) .await .unwrap_or(0); let pending_videos: i64 = sqlx::query_scalar(&format!( "SELECT COUNT(*) FROM {} WHERE status = 'pending'", table_videos )) .fetch_one(state.db.pool()) .await .unwrap_or(0); Ok(Json(ScanFilesResponse { files, total: total_all, filtered_total, page, page_size, total_pages, registered_count, unregistered_count, total_chunks, searchable_chunks, pending_videos, })) } #[derive(Debug, Serialize)] struct SftpgoStatusResponse { username: String, home_dir: String, files_count: i64, registered_videos: Vec, last_login: Option, } #[derive(Debug, Serialize)] struct RegisteredVideo { uuid: String, file_name: String, status: String, } async fn get_sftpgo_status( State(state): State, ) -> Result, StatusCode> { let demo_dir = "/Users/accusys/momentry/var/sftpgo/data/demo"; let files_count: i64 = std::fs::read_dir(demo_dir) .map(|entries| entries.count() as i64) .unwrap_or(0); let table_videos = schema::table_name("videos"); let registered_videos: Vec<(String, String, String)> = sqlx::query_as(&format!( "SELECT file_uuid, file_name, status FROM {} WHERE file_path LIKE '%demo%' ORDER BY id", table_videos )) .fetch_all(state.db.pool()) .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let registered_videos = registered_videos .into_iter() .map(|(uuid, file_name, status)| RegisteredVideo { uuid, file_name, status, }) .collect(); Ok(Json(SftpgoStatusResponse { username: "demo".to_string(), home_dir: demo_dir.to_string(), files_count, registered_videos, last_login: None, })) } #[derive(Debug, Serialize)] struct IngestionStep { name: String, status: String, detail: Option, } #[derive(Debug, Serialize)] struct IdentityRef { uuid: String, name: String, } #[derive(Debug, Serialize)] struct IngestionStatusResponse { file_uuid: String, steps: Vec, related_identities: Vec, strangers: i64, } async fn get_ingestion_status( State(state): State, Path(file_uuid): Path, ) -> Result, StatusCode> { let pool = state.db.pool(); let chunk = schema::table_name("chunk"); let identities = schema::table_name("identities"); // Get face counts from Qdrant _faces use crate::core::db::qdrant_db::QdrantDb; use serde_json::json; let qdrant = QdrantDb::new(); let face_filter = json!({ "must": [ {"key": "file_uuid", "match": {"value": file_uuid}} ] }); let points = qdrant.scroll_all_points("_faces", face_filter, 1000).await.unwrap_or_default(); let face_total = points.len() as i64; let mut trace_ids: std::collections::HashSet = std::collections::HashSet::new(); let mut identity_ids: std::collections::HashSet = std::collections::HashSet::new(); let mut stranger_traces: std::collections::HashSet = std::collections::HashSet::new(); for point in &points { let payload = &point["payload"]; if let Some(tid) = payload["trace_id"].as_i64() { if tid > 0 { trace_ids.insert(tid); if payload["identity_id"].is_null() { stranger_traces.insert(tid); } } } if let Some(iid) = payload["identity_id"].as_i64() { if iid > 0 { identity_ids.insert(iid); } } } let trace_count = trace_ids.len() as i64; let identity_count = identity_ids.len() as i64; let strangers = stranger_traces.len() as i64; let scene_meta_path = format!( "{}/{}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str(), file_uuid ); let scene_meta_ok = std::path::Path::new(&scene_meta_path).exists(); macro_rules! count_sql { ($sql:expr) => { sqlx::query_scalar::<_, i64>($sql) .fetch_one(pool) .await .unwrap_or(0) }; } let sentence_count = count_sql!(&format!( "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence'" )); let sentence_embedded = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence' AND embedding IS NOT NULL")); let scene_count = count_sql!(&format!( "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut'" )); let face_total = face_total; let trace_count = trace_count; let trace_chunks = count_sql!(&format!( "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'" )); let identity_count = identity_count; let tkg_nodes = count_sql!(&format!( "SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", schema::table_name("tkg_nodes") )); let tkg_edges = count_sql!(&format!( "SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", schema::table_name("tkg_edges") )); // Get individual node counts by type let tkg_nodes_table = schema::table_name("tkg_nodes"); let face_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'face_track'")); let gaze_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'gaze_track'")); let lip_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'lip_track'")); let text_region_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'text_region'")); let appearance_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'appearance_trace'")); let accessory_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'accessory'")); let object_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'yolo_object'")); let hand_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'hand'")); let speaker_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'speaker'")); // Get individual edge counts by type let tkg_edges_table = schema::table_name("tkg_edges"); let co_occurrence_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'CO_OCCURS_WITH'")); let speaker_face_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'SPEAKS_AS'")); let face_face_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'FACE_TO_FACE'")); let mutual_gaze_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'MUTUAL_GAZE'")); let lip_sync_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'LIP_SYNC'")); let has_appearance_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'HAS_APPEARANCE'")); let wears_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'WEARS'")); let hand_object_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'HAND_OBJECT'")); // Rule 2 relationship chunks let rule2_chunks = count_sql!(&format!( "SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'relationship'" )); // Get related identities from Qdrant _faces let related_identity_ids: Vec = identity_ids.into_iter().collect(); let related_identities: Vec = if !related_identity_ids.is_empty() { let id_list: String = related_identity_ids.iter().map(|id| id.to_string()).collect::>().join(","); match sqlx::query_as::<_, (String, String)>(&format!( "SELECT DISTINCT uuid::text, name FROM {identities} \ WHERE id IN ({id_list}) ORDER BY name" )) .fetch_all(pool) .await { Ok(rows) => rows .into_iter() .map(|(uuid, name)| IdentityRef { uuid: uuid.replace('-', ""), name, }) .collect(), Err(e) => { tracing::error!("related_identities query failed: {}", e); vec![] } } } else { vec![] }; let strangers = strangers; macro_rules! step { ($name:expr, $done:expr, $detail:expr) => { IngestionStep { name: $name.into(), status: if $done { "done" } else { "pending" }.into(), detail: $detail, } }; } let steps = vec![ step!( "rule1_sentence", sentence_count > 0, Some(format!("{sentence_count} sentence chunks")) ), step!( "auto_vectorize", sentence_embedded > 0, Some(format!("{sentence_embedded} embedded")) ), step!( "face_track", trace_count > 0, Some(format!("{trace_count} traces / {face_total} detections")) ), step!( "trace_chunks", trace_chunks > 0, Some(format!("{trace_chunks} trace chunks")) ), // TKG Nodes step!("tkg_face_track", face_track_nodes > 0, Some(format!("{face_track_nodes} nodes"))), step!("tkg_gaze_track", gaze_track_nodes > 0, Some(format!("{gaze_track_nodes} nodes"))), step!("tkg_lip_track", lip_track_nodes > 0, Some(format!("{lip_track_nodes} nodes"))), step!("tkg_text_region", text_region_nodes > 0, Some(format!("{text_region_nodes} nodes"))), step!("tkg_appearance", appearance_nodes > 0, Some(format!("{appearance_nodes} nodes"))), step!("tkg_accessory", accessory_nodes > 0, Some(format!("{accessory_nodes} nodes"))), step!("tkg_object", object_nodes > 0, Some(format!("{object_nodes} nodes"))), step!("tkg_hand", hand_nodes > 0, Some(format!("{hand_nodes} nodes"))), step!("tkg_speaker", speaker_nodes > 0, Some(format!("{speaker_nodes} nodes"))), // TKG Edges step!("tkg_co_occurrence", co_occurrence_edges > 0, Some(format!("{co_occurrence_edges} edges"))), step!("tkg_speaker_face", speaker_face_edges > 0, Some(format!("{speaker_face_edges} edges"))), step!("tkg_face_face", face_face_edges > 0, Some(format!("{face_face_edges} edges"))), step!("tkg_mutual_gaze", mutual_gaze_edges > 0, Some(format!("{mutual_gaze_edges} edges"))), step!("tkg_lip_sync", lip_sync_edges > 0, Some(format!("{lip_sync_edges} edges"))), step!("tkg_has_appearance", has_appearance_edges > 0, Some(format!("{has_appearance_edges} edges"))), step!("tkg_wears", wears_edges > 0, Some(format!("{wears_edges} edges"))), step!("tkg_hand_object", hand_object_edges > 0, Some(format!("{hand_object_edges} edges"))), // Rule 2 step!( "rule2_relationship", rule2_chunks > 0, Some(format!("{rule2_chunks} relationship chunks")) ), // Identity & Scene step!( "identity_match", identity_count > 0, Some(format!("{identity_count} identities matched")) ), step!("scene_metadata", scene_meta_ok, None), ]; Ok(Json(IngestionStatusResponse { file_uuid, steps, related_identities, strangers, })) } /// Comprehensive file stats endpoint — combines all data sources for frontend transparency async fn get_file_stats( State(state): State, Path(file_uuid): Path, ) -> Result, StatusCode> { let pool = state.db.pool(); // 1. Get file info from PostgreSQL let videos_table = schema::table_name("videos"); let file_info: Option<(String, String, String)> = sqlx::query_as(&format!( "SELECT file_uuid, file_name, status FROM {} WHERE file_uuid = $1", videos_table )) .bind(&file_uuid) .fetch_optional(pool) .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let (file_uuid_str, file_name, status) = file_info .map(|(uuid, name, s)| (uuid, Some(name), Some(s))) .unwrap_or_else(|| (file_uuid.clone(), None, None)); // 2. Get processor status from processing_status JSONB let processing_status: serde_json::Value = sqlx::query_scalar(&format!( "SELECT processing_status FROM {} WHERE file_uuid = $1", videos_table )) .bind(&file_uuid) .fetch_optional(pool) .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .unwrap_or(serde_json::json!({})); let processors: Vec = processing_status .get("progress") .and_then(|p| p.as_object()) .map(|progress| { progress .iter() .filter_map(|(name, info)| { info.as_object().map(|obj| { let status = obj .get("status") .and_then(|s| s.as_str()) .unwrap_or("pending") .to_string(); let progress_val = obj .get("percentage") .and_then(|p| p.as_u64()) .unwrap_or(0) as u32; let message = obj .get("message") .and_then(|m| m.as_str()) .map(|s| s.to_string()); ProcessorStatus { name: name.clone(), status, progress: progress_val, message, } }) }) .collect() }) .unwrap_or_default(); // 3. Get PostgreSQL counts let chunk_table = schema::table_name("chunk"); let identities_table = schema::table_name("identities"); let file_identities_table = schema::table_name("file_identities"); let postgres = PostgresStats { sentence_chunks: sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence'" )) .bind(&file_uuid) .fetch_one(pool) .await .unwrap_or(0), trace_chunks: sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'trace'" )) .bind(&file_uuid) .fetch_one(pool) .await .unwrap_or(0), relationship_chunks: sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'relationship'" )) .bind(&file_uuid) .fetch_one(pool) .await .unwrap_or(0), identities: sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(DISTINCT i.id) FROM {identities_table} i \ JOIN {file_identities_table} fi ON fi.identity_id = i.id \ WHERE fi.file_uuid = $1" )) .bind(&file_uuid) .fetch_one(pool) .await .unwrap_or(0), file_identities: sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {file_identities_table} WHERE file_uuid = $1" )) .bind(&file_uuid) .fetch_one(pool) .await .unwrap_or(0), }; // 4. Get Qdrant stats use crate::core::db::qdrant_db::QdrantDb; use serde_json::json; let qdrant_db = QdrantDb::new(); // Face stats let face_filter = json!({ "must": [{"key": "file_uuid", "match": {"value": file_uuid}}] }); let face_points = qdrant_db .scroll_all_points("_faces", face_filter.clone(), 500) .await .unwrap_or_default(); let mut face_traces = std::collections::HashSet::new(); let mut face_identities = std::collections::HashSet::new(); for point in &face_points { let payload = &point["payload"]; if let Some(tid) = payload["trace_id"].as_i64() { if tid > 0 { face_traces.insert(tid); } } if let Some(iid) = payload["identity_id"].as_i64() { if iid > 0 { face_identities.insert(iid); } } } // Text chunk stats (rule1 collection) let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string()); let rule1_collection = if schema == "public" { "momentry_rule1".to_string() } else { format!("momentry_{}_rule1_v2", schema) }; let text_filter = json!({ "must": [{"key": "file_uuid", "match": {"value": file_uuid}}] }); let text_points = qdrant_db .scroll_all_points(&rule1_collection, text_filter, 500) .await .unwrap_or_default(); // Speaker stats let speaker_collection = format!("momentry_{}_speaker", schema); let speaker_filter = json!({ "must": [{"key": "file_uuid", "match": {"value": file_uuid}}] }); let speaker_points = qdrant_db .scroll_all_points(&speaker_collection, speaker_filter, 500) .await .unwrap_or_default(); let qdrant_stats = QdrantStats { faces: face_points.len() as i64, face_traces: face_traces.len() as i64, face_identities: face_identities.len() as i64, text_chunks: text_points.len() as i64, speakers: speaker_points.len() as i64, }; // 5. Get TKG stats from PostgreSQL let tkg_nodes_table = schema::table_name("tkg_nodes"); let tkg_edges_table = schema::table_name("tkg_edges"); let tkg_nodes_total: i64 = sqlx::query_scalar::<_, i64>(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1", tkg_nodes_table)) .bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let tkg_edges_total: i64 = sqlx::query_scalar::<_, i64>(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1", tkg_edges_table)) .bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let tkg = TkgFileStats { total_nodes: tkg_nodes_total, total_edges: tkg_edges_total, face_track_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "face_track").await, gaze_track_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "gaze_track").await, lip_track_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "lip_track").await, text_region_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "text_trace").await, appearance_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "appearance_trace").await, accessory_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "accessory").await, object_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "yolo_object").await, hand_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "hand").await, speaker_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "speaker").await, co_occurrence_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "CO_OCCURS_WITH").await, speaker_face_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "SPEAKS_AS").await, face_face_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "FACE_TO_FACE").await, mutual_gaze_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "MUTUAL_GAZE").await, lip_sync_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "LIP_SYNC").await, has_appearance_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "HAS_APPEARANCE").await, wears_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "WEARS").await, hand_object_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "HAND_OBJECT").await, ..Default::default() }; // 6. Get Identity Agent stats from Qdrant _seeds let seeds_filter = json!({ "must": [ {"key": "file_uuid", "match": {"value": file_uuid}} ] }); let seed_points = qdrant_db .scroll_all_points("_seeds", seeds_filter, 500) .await .unwrap_or_default(); let identity_agent = IdentityAgentStats { clusters: 0, // From face_clustered.json if available identities_created: face_identities.len() as i64, tmdb_matches: seed_points.iter() .filter(|p| p["payload"]["source"].as_str() == Some("tmdb")) .count() as i64, speaker_bindings: speaker_points.len() as i64, confirmations: 0, // From identity_bindings table }; Ok(Json(FileStatsResponse { file_uuid: file_uuid_str, file_name, status, processors, postgres, qdrant: qdrant_stats, tkg, identity_agent, })) } async fn count_nodes(pool: &sqlx::PgPool, table: &str, file_uuid: &str, node_type: &str) -> i64 { sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND node_type = $2", table )) .bind(file_uuid) .bind(node_type) .fetch_one(pool) .await .unwrap_or(0) } async fn count_edges(pool: &sqlx::PgPool, table: &str, file_uuid: &str, edge_type: &str) -> i64 { sqlx::query_scalar::<_, i64>(&format!( "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND edge_type = $2", table )) .bind(file_uuid) .bind(edge_type) .fetch_one(pool) .await .unwrap_or(0) } pub fn scan_routes() -> Router { Router::new() .route("/api/v1/files/scan", get(scan_files)) .route("/api/v1/stats/sftpgo", get(get_sftpgo_status)) .route( "/api/v1/stats/ingestion-status/:file_uuid", get(get_ingestion_status), ) .route( "/api/v1/stats/file/:file_uuid", get(get_file_stats), ) .route( "/api/v1/stats/pipeline/:file_uuid", get(get_pipeline_progress_handler), ) } /// Get segmented pipeline progress with weighted stages async fn get_pipeline_progress_handler( State(state): State, Path(file_uuid): Path, ) -> Result, StatusCode> { let pool = state.db.pool(); let chunk_table = schema::table_name("chunk"); let tkg_nodes_table = schema::table_name("tkg_nodes"); let tkg_edges_table = schema::table_name("tkg_edges"); let pr_table = schema::table_name("processor_results"); let mj_table = schema::table_name("monitor_jobs"); // Compute actual progress from DB state let sentence_count: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence'") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let sentence_embedded: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NOT NULL") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let face_traced: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {pr_table} pr JOIN {mj_table} mj ON pr.job_id = mj.id WHERE mj.uuid = $1 AND pr.processor = 'face' AND pr.status = 'completed'") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let tkg_node_count: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = $1") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let tkg_edge_count: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = $1") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let relationship_count: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'relationship'") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); let asrx_completed: i64 = sqlx::query_scalar::<_, i64>( &format!("SELECT COUNT(*) FROM {pr_table} pr JOIN {mj_table} mj ON pr.job_id = mj.id WHERE mj.uuid = $1 AND pr.processor = 'asrx' AND pr.status = 'completed'") ).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0); // Determine processor completion let processors_done = asrx_completed > 0; let mut pp = crate::core::progress::PipelineProgress::new(&file_uuid); if processors_done { pp.update_stage("processors", 1.0, "completed", None); } if sentence_count > 0 { let detail = if sentence_embedded > 0 { Some(format!("{} chunks, {} embedded", sentence_count, sentence_embedded)) } else { Some(format!("{} chunks", sentence_count)) }; pp.update_stage("rule1_ingestion", 1.0, "completed", detail); } if face_traced > 0 { pp.update_stage("face_tracing", 1.0, "completed", None); } if tkg_node_count > 0 { pp.update_stage("tkg_nodes", 1.0, "completed", Some(format!("{} nodes", tkg_node_count))); } if tkg_edge_count > 0 { pp.update_stage("tkg_edges", 1.0, "completed", Some(format!("{} edges", tkg_edge_count))); } if relationship_count > 0 { pp.update_stage("rule2_ingestion", 1.0, "completed", Some(format!("{} chunks", relationship_count))); } // Check identity agent from _seeds use crate::core::db::qdrant_db::QdrantDb; use serde_json::json; let qdrant = QdrantDb::new(); let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string()); let seeds_collection = if schema == "public" { "momentry_public_speaker" } else { &format!("momentry_{}_speaker", schema) }; let seeds_filter = json!({"must": [{"key": "file_uuid", "match": {"value": &file_uuid}}]}); let seed_points = qdrant.scroll_all_points("_seeds", seeds_filter, 100).await.unwrap_or_default(); if !seed_points.is_empty() { pp.update_stage("identity_agent", 1.0, "completed", Some(format!("{} seeds", seed_points.len()))); } Ok(Json(pp)) }