- get_pipeline_progress_handler now queries actual DB counts - Fixed processor_results query (requires JOIN with monitor_jobs) - Card progress bar and right-click content now consistent
1026 lines
38 KiB
Rust
1026 lines
38 KiB
Rust
use axum::{
|
|
extract::{Path, Query, State},
|
|
http::StatusCode,
|
|
response::Json,
|
|
routing::get,
|
|
Router,
|
|
};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use super::types::AppState;
|
|
use crate::core::db::schema;
|
|
|
|
/// Comprehensive file stats endpoint — provides all data sources for frontend transparency
|
|
/// Combines: JSON file status + PostgreSQL counts + Qdrant collections + TKG stats + Identity Agent stats
|
|
#[derive(Debug, Serialize)]
|
|
struct FileStatsResponse {
|
|
file_uuid: String,
|
|
file_name: Option<String>,
|
|
status: Option<String>,
|
|
// Processor status
|
|
processors: Vec<ProcessorStatus>,
|
|
// PostgreSQL counts
|
|
postgres: PostgresStats,
|
|
// Qdrant collection counts
|
|
qdrant: QdrantStats,
|
|
// TKG stats
|
|
tkg: TkgFileStats,
|
|
// Identity Agent stats
|
|
identity_agent: IdentityAgentStats,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct ProcessorStatus {
|
|
name: String,
|
|
status: String,
|
|
progress: u32,
|
|
message: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Default)]
|
|
struct PostgresStats {
|
|
sentence_chunks: i64,
|
|
trace_chunks: i64,
|
|
relationship_chunks: i64,
|
|
identities: i64,
|
|
file_identities: i64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct QdrantStats {
|
|
faces: i64,
|
|
face_traces: i64,
|
|
face_identities: i64,
|
|
text_chunks: i64,
|
|
speakers: i64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Default)]
|
|
struct TkgFileStats {
|
|
total_nodes: i64,
|
|
total_edges: i64,
|
|
face_track_nodes: i64,
|
|
gaze_track_nodes: i64,
|
|
lip_track_nodes: i64,
|
|
text_region_nodes: i64,
|
|
appearance_nodes: i64,
|
|
accessory_nodes: i64,
|
|
object_nodes: i64,
|
|
hand_nodes: i64,
|
|
speaker_nodes: i64,
|
|
co_occurrence_edges: i64,
|
|
speaker_face_edges: i64,
|
|
face_face_edges: i64,
|
|
mutual_gaze_edges: i64,
|
|
lip_sync_edges: i64,
|
|
has_appearance_edges: i64,
|
|
wears_edges: i64,
|
|
hand_object_edges: i64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Default)]
|
|
struct IdentityAgentStats {
|
|
clusters: i64,
|
|
identities_created: i64,
|
|
tmdb_matches: i64,
|
|
speaker_bindings: i64,
|
|
confirmations: i64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
struct ScannedFileInfo {
|
|
file_name: String,
|
|
relative_path: String,
|
|
file_path: String,
|
|
file_size: u64,
|
|
modified_time: String,
|
|
is_registered: bool,
|
|
file_uuid: Option<String>,
|
|
status: Option<String>,
|
|
registration_time: Option<String>,
|
|
job_id: Option<i32>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
struct ScanFilesResponse {
|
|
files: Vec<ScannedFileInfo>,
|
|
total: usize,
|
|
filtered_total: usize,
|
|
page: usize,
|
|
page_size: usize,
|
|
total_pages: usize,
|
|
registered_count: usize,
|
|
unregistered_count: usize,
|
|
total_chunks: i64,
|
|
searchable_chunks: i64,
|
|
pending_videos: i64,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct ScanFilesQuery {
|
|
limit: Option<usize>,
|
|
page: Option<usize>,
|
|
page_size: Option<usize>,
|
|
pattern: Option<String>,
|
|
sort_by: Option<String>,
|
|
sort_order: Option<String>,
|
|
}
|
|
|
|
fn scan_directory_recursive(
|
|
dir: &std::path::Path,
|
|
root: &std::path::Path,
|
|
allowed_extensions: &[&str],
|
|
registered_paths: &std::collections::HashMap<
|
|
String,
|
|
(String, String, Option<String>, Option<i32>),
|
|
>,
|
|
files: &mut Vec<ScannedFileInfo>,
|
|
) {
|
|
if let Ok(entries) = std::fs::read_dir(dir) {
|
|
for entry in entries.flatten() {
|
|
let path = entry.path();
|
|
if path.is_dir() {
|
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
|
if name.starts_with('.') {
|
|
return;
|
|
}
|
|
}
|
|
scan_directory_recursive(&path, root, allowed_extensions, registered_paths, files);
|
|
} else if path.is_file() {
|
|
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
|
|
if allowed_extensions.contains(&ext.to_lowercase().as_str()) {
|
|
if let Ok(meta) = entry.metadata() {
|
|
let abs_path = path.to_string_lossy().to_string();
|
|
let rel_path = path
|
|
.strip_prefix(root)
|
|
.map(|p| p.to_string_lossy().to_string())
|
|
.unwrap_or_else(|_| abs_path.clone());
|
|
|
|
let file_name = path
|
|
.file_name()
|
|
.map(|n| n.to_string_lossy().to_string())
|
|
.unwrap_or_default();
|
|
|
|
let modified_time = meta
|
|
.modified()
|
|
.ok()
|
|
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
|
|
.map(|d| {
|
|
chrono::DateTime::from_timestamp(d.as_secs() as i64, 0)
|
|
.map(|dt| dt.to_rfc3339())
|
|
.unwrap_or_default()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
match registered_paths.get(&abs_path) {
|
|
Some((uuid, status, reg_time, jid)) if status != "unregistered" => {
|
|
files.push(ScannedFileInfo {
|
|
file_name,
|
|
relative_path: rel_path,
|
|
file_path: abs_path,
|
|
file_size: meta.len(),
|
|
modified_time,
|
|
is_registered: true,
|
|
file_uuid: Some(uuid.clone()),
|
|
status: Some(status.clone()),
|
|
registration_time: reg_time.clone(),
|
|
job_id: *jid,
|
|
});
|
|
}
|
|
_ => {
|
|
files.push(ScannedFileInfo {
|
|
file_name,
|
|
relative_path: rel_path,
|
|
file_path: abs_path,
|
|
file_size: meta.len(),
|
|
modified_time,
|
|
is_registered: false,
|
|
file_uuid: None,
|
|
status: Some("unregistered".to_string()),
|
|
registration_time: None,
|
|
job_id: None,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn scan_files(
|
|
State(state): State<AppState>,
|
|
Query(params): Query<ScanFilesQuery>,
|
|
) -> Result<Json<ScanFilesResponse>, StatusCode> {
|
|
let demo_dir_str = std::env::var("MOMENTRY_SFTP_ROOT")
|
|
.unwrap_or_else(|_| "/Users/accusys/momentry/var/sftpgo/data/demo".to_string());
|
|
let demo_dir = std::path::Path::new(&demo_dir_str);
|
|
|
|
let allowed_extensions = vec![
|
|
"mp4", "mov", "mkv", "avi", "webm", "jpg", "jpeg", "png", "gif", "webp",
|
|
];
|
|
|
|
let table = schema::table_name("videos");
|
|
let mj_table = schema::table_name("monitor_jobs");
|
|
let registered_db: Vec<(String, String, String, String, Option<String>, Option<i32>)> =
|
|
sqlx::query_as(&format!(
|
|
"SELECT v.file_path, v.file_name, v.file_uuid, v.status, v.registration_time::text, \
|
|
latest_job.id as job_id \
|
|
FROM {} v \
|
|
LEFT JOIN LATERAL ( \
|
|
SELECT id FROM {} WHERE uuid = v.file_uuid ORDER BY id DESC LIMIT 1 \
|
|
) latest_job ON true \
|
|
ORDER BY v.id",
|
|
table, mj_table
|
|
))
|
|
.fetch_all(state.db.pool())
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let registered_paths: std::collections::HashMap<
|
|
String,
|
|
(String, String, Option<String>, Option<i32>),
|
|
> = registered_db
|
|
.into_iter()
|
|
.map(|(path, _name, uuid, status, reg_time, jid)| (path, (uuid, status, reg_time, jid)))
|
|
.collect();
|
|
|
|
let mut result_files = Vec::new();
|
|
|
|
if demo_dir.exists() {
|
|
scan_directory_recursive(
|
|
demo_dir,
|
|
demo_dir,
|
|
&allowed_extensions,
|
|
®istered_paths,
|
|
&mut result_files,
|
|
);
|
|
}
|
|
|
|
let desc = params.sort_order.as_deref().unwrap_or("asc") == "desc";
|
|
match params.sort_by.as_deref().unwrap_or("name") {
|
|
"size" => {
|
|
if desc {
|
|
result_files.sort_by(|a, b| b.file_size.cmp(&a.file_size));
|
|
} else {
|
|
result_files.sort_by(|a, b| a.file_size.cmp(&b.file_size));
|
|
}
|
|
}
|
|
"modified" | "time" => {
|
|
if desc {
|
|
result_files.sort_by(|a, b| b.modified_time.cmp(&a.modified_time));
|
|
} else {
|
|
result_files.sort_by(|a, b| a.modified_time.cmp(&b.modified_time));
|
|
}
|
|
}
|
|
"status" => {
|
|
if desc {
|
|
result_files
|
|
.sort_by(|a, b| b.status.cmp(&a.status).then(b.file_name.cmp(&a.file_name)));
|
|
} else {
|
|
result_files
|
|
.sort_by(|a, b| a.status.cmp(&b.status).then(a.file_name.cmp(&b.file_name)));
|
|
}
|
|
}
|
|
_ => {
|
|
if desc {
|
|
result_files.sort_by(|a, b| {
|
|
a.is_registered
|
|
.cmp(&b.is_registered)
|
|
.then(b.file_name.cmp(&a.file_name))
|
|
});
|
|
} else {
|
|
result_files.sort_by(|a, b| {
|
|
b.is_registered
|
|
.cmp(&a.is_registered)
|
|
.then(a.file_name.cmp(&b.file_name))
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
let total_all = result_files.len();
|
|
let registered_count = result_files.iter().filter(|f| f.is_registered).count();
|
|
let unregistered_count = result_files.iter().filter(|f| !f.is_registered).count();
|
|
|
|
let filtered: Vec<ScannedFileInfo> = if let Some(ref pat) = params.pattern {
|
|
let re = match regex::Regex::new(&format!("(?i){}", pat)) {
|
|
Ok(r) => r,
|
|
Err(_) => return Err(StatusCode::BAD_REQUEST),
|
|
};
|
|
result_files
|
|
.into_iter()
|
|
.filter(|f| re.is_match(&f.file_name))
|
|
.collect()
|
|
} else {
|
|
result_files
|
|
};
|
|
|
|
let filtered_total = filtered.len();
|
|
|
|
let page = params.page.unwrap_or(1).max(1);
|
|
let page_size = params
|
|
.page_size
|
|
.or(params.limit)
|
|
.unwrap_or(filtered_total.max(1));
|
|
let total_pages = if page_size > 0 {
|
|
(filtered_total + page_size - 1) / page_size
|
|
} else {
|
|
1
|
|
};
|
|
let start = (page - 1) * page_size;
|
|
let files: Vec<ScannedFileInfo> = filtered.into_iter().skip(start).take(page_size).collect();
|
|
|
|
let table_videos = schema::table_name("videos");
|
|
let table_chunks = schema::table_name("chunk");
|
|
let total_chunks: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", table_chunks))
|
|
.fetch_one(state.db.pool())
|
|
.await
|
|
.unwrap_or(0);
|
|
let searchable_chunks: i64 = sqlx::query_scalar(&format!(
|
|
"SELECT COUNT(*) FROM {} WHERE vector_id IS NOT NULL",
|
|
table_chunks
|
|
))
|
|
.fetch_one(state.db.pool())
|
|
.await
|
|
.unwrap_or(0);
|
|
let pending_videos: i64 = sqlx::query_scalar(&format!(
|
|
"SELECT COUNT(*) FROM {} WHERE status = 'pending'",
|
|
table_videos
|
|
))
|
|
.fetch_one(state.db.pool())
|
|
.await
|
|
.unwrap_or(0);
|
|
|
|
Ok(Json(ScanFilesResponse {
|
|
files,
|
|
total: total_all,
|
|
filtered_total,
|
|
page,
|
|
page_size,
|
|
total_pages,
|
|
registered_count,
|
|
unregistered_count,
|
|
total_chunks,
|
|
searchable_chunks,
|
|
pending_videos,
|
|
}))
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct SftpgoStatusResponse {
|
|
username: String,
|
|
home_dir: String,
|
|
files_count: i64,
|
|
registered_videos: Vec<RegisteredVideo>,
|
|
last_login: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct RegisteredVideo {
|
|
uuid: String,
|
|
file_name: String,
|
|
status: String,
|
|
}
|
|
|
|
async fn get_sftpgo_status(
|
|
State(state): State<AppState>,
|
|
) -> Result<Json<SftpgoStatusResponse>, StatusCode> {
|
|
let demo_dir = "/Users/accusys/momentry/var/sftpgo/data/demo";
|
|
|
|
let files_count: i64 = std::fs::read_dir(demo_dir)
|
|
.map(|entries| entries.count() as i64)
|
|
.unwrap_or(0);
|
|
|
|
let table_videos = schema::table_name("videos");
|
|
|
|
let registered_videos: Vec<(String, String, String)> = sqlx::query_as(&format!(
|
|
"SELECT file_uuid, file_name, status FROM {} WHERE file_path LIKE '%demo%' ORDER BY id",
|
|
table_videos
|
|
))
|
|
.fetch_all(state.db.pool())
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let registered_videos = registered_videos
|
|
.into_iter()
|
|
.map(|(uuid, file_name, status)| RegisteredVideo {
|
|
uuid,
|
|
file_name,
|
|
status,
|
|
})
|
|
.collect();
|
|
|
|
Ok(Json(SftpgoStatusResponse {
|
|
username: "demo".to_string(),
|
|
home_dir: demo_dir.to_string(),
|
|
files_count,
|
|
registered_videos,
|
|
last_login: None,
|
|
}))
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct IngestionStep {
|
|
name: String,
|
|
status: String,
|
|
detail: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct IdentityRef {
|
|
uuid: String,
|
|
name: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct IngestionStatusResponse {
|
|
file_uuid: String,
|
|
steps: Vec<IngestionStep>,
|
|
related_identities: Vec<IdentityRef>,
|
|
strangers: i64,
|
|
}
|
|
|
|
async fn get_ingestion_status(
|
|
State(state): State<AppState>,
|
|
Path(file_uuid): Path<String>,
|
|
) -> Result<Json<IngestionStatusResponse>, StatusCode> {
|
|
let pool = state.db.pool();
|
|
let chunk = schema::table_name("chunk");
|
|
let identities = schema::table_name("identities");
|
|
|
|
// Get face counts from Qdrant _faces
|
|
use crate::core::db::qdrant_db::QdrantDb;
|
|
use serde_json::json;
|
|
|
|
let qdrant = QdrantDb::new();
|
|
let face_filter = json!({
|
|
"must": [
|
|
{"key": "file_uuid", "match": {"value": file_uuid}}
|
|
]
|
|
});
|
|
let points = qdrant.scroll_all_points("_faces", face_filter, 1000).await.unwrap_or_default();
|
|
|
|
let face_total = points.len() as i64;
|
|
let mut trace_ids: std::collections::HashSet<i64> = std::collections::HashSet::new();
|
|
let mut identity_ids: std::collections::HashSet<i64> = std::collections::HashSet::new();
|
|
let mut stranger_traces: std::collections::HashSet<i64> = std::collections::HashSet::new();
|
|
|
|
for point in &points {
|
|
let payload = &point["payload"];
|
|
if let Some(tid) = payload["trace_id"].as_i64() {
|
|
if tid > 0 {
|
|
trace_ids.insert(tid);
|
|
if payload["identity_id"].is_null() {
|
|
stranger_traces.insert(tid);
|
|
}
|
|
}
|
|
}
|
|
if let Some(iid) = payload["identity_id"].as_i64() {
|
|
if iid > 0 {
|
|
identity_ids.insert(iid);
|
|
}
|
|
}
|
|
}
|
|
|
|
let trace_count = trace_ids.len() as i64;
|
|
let identity_count = identity_ids.len() as i64;
|
|
let strangers = stranger_traces.len() as i64;
|
|
|
|
let scene_meta_path = format!(
|
|
"{}/{}.scene_meta.json",
|
|
crate::core::config::OUTPUT_DIR.as_str(),
|
|
file_uuid
|
|
);
|
|
let scene_meta_ok = std::path::Path::new(&scene_meta_path).exists();
|
|
|
|
macro_rules! count_sql {
|
|
($sql:expr) => {
|
|
sqlx::query_scalar::<_, i64>($sql)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0)
|
|
};
|
|
}
|
|
|
|
let sentence_count = count_sql!(&format!(
|
|
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence'"
|
|
));
|
|
let sentence_embedded = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence' AND embedding IS NOT NULL"));
|
|
let scene_count = count_sql!(&format!(
|
|
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut'"
|
|
));
|
|
let face_total = face_total;
|
|
let trace_count = trace_count;
|
|
let trace_chunks = count_sql!(&format!(
|
|
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'"
|
|
));
|
|
let identity_count = identity_count;
|
|
let tkg_nodes = count_sql!(&format!(
|
|
"SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'",
|
|
schema::table_name("tkg_nodes")
|
|
));
|
|
let tkg_edges = count_sql!(&format!(
|
|
"SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'",
|
|
schema::table_name("tkg_edges")
|
|
));
|
|
|
|
// Get individual node counts by type
|
|
let tkg_nodes_table = schema::table_name("tkg_nodes");
|
|
let face_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'face_track'"));
|
|
let gaze_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'gaze_track'"));
|
|
let lip_track_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'lip_track'"));
|
|
let text_region_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'text_region'"));
|
|
let appearance_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'appearance_trace'"));
|
|
let accessory_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'accessory'"));
|
|
let object_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'yolo_object'"));
|
|
let hand_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'hand'"));
|
|
let speaker_nodes: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = '{file_uuid}' AND node_type = 'speaker'"));
|
|
|
|
// Get individual edge counts by type
|
|
let tkg_edges_table = schema::table_name("tkg_edges");
|
|
let co_occurrence_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'CO_OCCURS_WITH'"));
|
|
let speaker_face_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'SPEAKS_AS'"));
|
|
let face_face_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'FACE_TO_FACE'"));
|
|
let mutual_gaze_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'MUTUAL_GAZE'"));
|
|
let lip_sync_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'LIP_SYNC'"));
|
|
let has_appearance_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'HAS_APPEARANCE'"));
|
|
let wears_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'WEARS'"));
|
|
let hand_object_edges: i64 = count_sql!(&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = '{file_uuid}' AND edge_type = 'HAND_OBJECT'"));
|
|
|
|
// Rule 2 relationship chunks
|
|
let rule2_chunks = count_sql!(&format!(
|
|
"SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'relationship'"
|
|
));
|
|
// Get related identities from Qdrant _faces
|
|
let related_identity_ids: Vec<i64> = identity_ids.into_iter().collect();
|
|
let related_identities: Vec<IdentityRef> = if !related_identity_ids.is_empty() {
|
|
let id_list: String = related_identity_ids.iter().map(|id| id.to_string()).collect::<Vec<_>>().join(",");
|
|
match sqlx::query_as::<_, (String, String)>(&format!(
|
|
"SELECT DISTINCT uuid::text, name FROM {identities} \
|
|
WHERE id IN ({id_list}) ORDER BY name"
|
|
))
|
|
.fetch_all(pool)
|
|
.await
|
|
{
|
|
Ok(rows) => rows
|
|
.into_iter()
|
|
.map(|(uuid, name)| IdentityRef {
|
|
uuid: uuid.replace('-', ""),
|
|
name,
|
|
})
|
|
.collect(),
|
|
Err(e) => {
|
|
tracing::error!("related_identities query failed: {}", e);
|
|
vec![]
|
|
}
|
|
}
|
|
} else {
|
|
vec![]
|
|
};
|
|
|
|
let strangers = strangers;
|
|
|
|
macro_rules! step {
|
|
($name:expr, $done:expr, $detail:expr) => {
|
|
IngestionStep {
|
|
name: $name.into(),
|
|
status: if $done { "done" } else { "pending" }.into(),
|
|
detail: $detail,
|
|
}
|
|
};
|
|
}
|
|
|
|
let steps = vec![
|
|
step!(
|
|
"rule1_sentence",
|
|
sentence_count > 0,
|
|
Some(format!("{sentence_count} sentence chunks"))
|
|
),
|
|
step!(
|
|
"auto_vectorize",
|
|
sentence_embedded > 0,
|
|
Some(format!("{sentence_embedded} embedded"))
|
|
),
|
|
step!(
|
|
"face_track",
|
|
trace_count > 0,
|
|
Some(format!("{trace_count} traces / {face_total} detections"))
|
|
),
|
|
step!(
|
|
"trace_chunks",
|
|
trace_chunks > 0,
|
|
Some(format!("{trace_chunks} trace chunks"))
|
|
),
|
|
// TKG Nodes
|
|
step!("tkg_face_track", face_track_nodes > 0, Some(format!("{face_track_nodes} nodes"))),
|
|
step!("tkg_gaze_track", gaze_track_nodes > 0, Some(format!("{gaze_track_nodes} nodes"))),
|
|
step!("tkg_lip_track", lip_track_nodes > 0, Some(format!("{lip_track_nodes} nodes"))),
|
|
step!("tkg_text_region", text_region_nodes > 0, Some(format!("{text_region_nodes} nodes"))),
|
|
step!("tkg_appearance", appearance_nodes > 0, Some(format!("{appearance_nodes} nodes"))),
|
|
step!("tkg_accessory", accessory_nodes > 0, Some(format!("{accessory_nodes} nodes"))),
|
|
step!("tkg_object", object_nodes > 0, Some(format!("{object_nodes} nodes"))),
|
|
step!("tkg_hand", hand_nodes > 0, Some(format!("{hand_nodes} nodes"))),
|
|
step!("tkg_speaker", speaker_nodes > 0, Some(format!("{speaker_nodes} nodes"))),
|
|
// TKG Edges
|
|
step!("tkg_co_occurrence", co_occurrence_edges > 0, Some(format!("{co_occurrence_edges} edges"))),
|
|
step!("tkg_speaker_face", speaker_face_edges > 0, Some(format!("{speaker_face_edges} edges"))),
|
|
step!("tkg_face_face", face_face_edges > 0, Some(format!("{face_face_edges} edges"))),
|
|
step!("tkg_mutual_gaze", mutual_gaze_edges > 0, Some(format!("{mutual_gaze_edges} edges"))),
|
|
step!("tkg_lip_sync", lip_sync_edges > 0, Some(format!("{lip_sync_edges} edges"))),
|
|
step!("tkg_has_appearance", has_appearance_edges > 0, Some(format!("{has_appearance_edges} edges"))),
|
|
step!("tkg_wears", wears_edges > 0, Some(format!("{wears_edges} edges"))),
|
|
step!("tkg_hand_object", hand_object_edges > 0, Some(format!("{hand_object_edges} edges"))),
|
|
// Rule 2
|
|
step!(
|
|
"rule2_relationship",
|
|
rule2_chunks > 0,
|
|
Some(format!("{rule2_chunks} relationship chunks"))
|
|
),
|
|
// Identity & Scene
|
|
step!(
|
|
"identity_match",
|
|
identity_count > 0,
|
|
Some(format!("{identity_count} identities matched"))
|
|
),
|
|
step!("scene_metadata", scene_meta_ok, None),
|
|
];
|
|
|
|
Ok(Json(IngestionStatusResponse {
|
|
file_uuid,
|
|
steps,
|
|
related_identities,
|
|
strangers,
|
|
}))
|
|
}
|
|
|
|
/// Comprehensive file stats endpoint — combines all data sources for frontend transparency
|
|
async fn get_file_stats(
|
|
State(state): State<AppState>,
|
|
Path(file_uuid): Path<String>,
|
|
) -> Result<Json<FileStatsResponse>, StatusCode> {
|
|
let pool = state.db.pool();
|
|
|
|
// 1. Get file info from PostgreSQL
|
|
let videos_table = schema::table_name("videos");
|
|
let file_info: Option<(String, String, String)> = sqlx::query_as(&format!(
|
|
"SELECT file_uuid, file_name, status FROM {} WHERE file_uuid = $1",
|
|
videos_table
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_optional(pool)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
|
|
let (file_uuid_str, file_name, status) = file_info
|
|
.map(|(uuid, name, s)| (uuid, Some(name), Some(s)))
|
|
.unwrap_or_else(|| (file_uuid.clone(), None, None));
|
|
|
|
// 2. Get processor status from processing_status JSONB
|
|
let processing_status: serde_json::Value =
|
|
sqlx::query_scalar(&format!(
|
|
"SELECT processing_status FROM {} WHERE file_uuid = $1",
|
|
videos_table
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_optional(pool)
|
|
.await
|
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
|
.unwrap_or(serde_json::json!({}));
|
|
|
|
let processors: Vec<ProcessorStatus> = processing_status
|
|
.get("progress")
|
|
.and_then(|p| p.as_object())
|
|
.map(|progress| {
|
|
progress
|
|
.iter()
|
|
.filter_map(|(name, info)| {
|
|
info.as_object().map(|obj| {
|
|
let status = obj
|
|
.get("status")
|
|
.and_then(|s| s.as_str())
|
|
.unwrap_or("pending")
|
|
.to_string();
|
|
let progress_val = obj
|
|
.get("percentage")
|
|
.and_then(|p| p.as_u64())
|
|
.unwrap_or(0) as u32;
|
|
let message = obj
|
|
.get("message")
|
|
.and_then(|m| m.as_str())
|
|
.map(|s| s.to_string());
|
|
ProcessorStatus {
|
|
name: name.clone(),
|
|
status,
|
|
progress: progress_val,
|
|
message,
|
|
}
|
|
})
|
|
})
|
|
.collect()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
// 3. Get PostgreSQL counts
|
|
let chunk_table = schema::table_name("chunk");
|
|
let identities_table = schema::table_name("identities");
|
|
let file_identities_table = schema::table_name("file_identities");
|
|
|
|
let postgres = PostgresStats {
|
|
sentence_chunks: sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence'"
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0),
|
|
trace_chunks: sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'trace'"
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0),
|
|
relationship_chunks: sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'relationship'"
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0),
|
|
identities: sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(DISTINCT i.id) FROM {identities_table} i \
|
|
JOIN {file_identities_table} fi ON fi.identity_id = i.id \
|
|
WHERE fi.file_uuid = $1"
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0),
|
|
file_identities: sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(*) FROM {file_identities_table} WHERE file_uuid = $1"
|
|
))
|
|
.bind(&file_uuid)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0),
|
|
};
|
|
|
|
// 4. Get Qdrant stats
|
|
use crate::core::db::qdrant_db::QdrantDb;
|
|
use serde_json::json;
|
|
|
|
let qdrant_db = QdrantDb::new();
|
|
|
|
// Face stats
|
|
let face_filter = json!({
|
|
"must": [{"key": "file_uuid", "match": {"value": file_uuid}}]
|
|
});
|
|
let face_points = qdrant_db
|
|
.scroll_all_points("_faces", face_filter.clone(), 500)
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
let mut face_traces = std::collections::HashSet::new();
|
|
let mut face_identities = std::collections::HashSet::new();
|
|
for point in &face_points {
|
|
let payload = &point["payload"];
|
|
if let Some(tid) = payload["trace_id"].as_i64() {
|
|
if tid > 0 {
|
|
face_traces.insert(tid);
|
|
}
|
|
}
|
|
if let Some(iid) = payload["identity_id"].as_i64() {
|
|
if iid > 0 {
|
|
face_identities.insert(iid);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Text chunk stats (rule1 collection)
|
|
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
|
|
let rule1_collection = if schema == "public" {
|
|
"momentry_rule1".to_string()
|
|
} else {
|
|
format!("momentry_{}_rule1_v2", schema)
|
|
};
|
|
let text_filter = json!({
|
|
"must": [{"key": "file_uuid", "match": {"value": file_uuid}}]
|
|
});
|
|
let text_points = qdrant_db
|
|
.scroll_all_points(&rule1_collection, text_filter, 500)
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
// Speaker stats
|
|
let speaker_collection = format!("momentry_{}_speaker", schema);
|
|
let speaker_filter = json!({
|
|
"must": [{"key": "file_uuid", "match": {"value": file_uuid}}]
|
|
});
|
|
let speaker_points = qdrant_db
|
|
.scroll_all_points(&speaker_collection, speaker_filter, 500)
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
let qdrant_stats = QdrantStats {
|
|
faces: face_points.len() as i64,
|
|
face_traces: face_traces.len() as i64,
|
|
face_identities: face_identities.len() as i64,
|
|
text_chunks: text_points.len() as i64,
|
|
speakers: speaker_points.len() as i64,
|
|
};
|
|
|
|
// 5. Get TKG stats from PostgreSQL
|
|
let tkg_nodes_table = schema::table_name("tkg_nodes");
|
|
let tkg_edges_table = schema::table_name("tkg_edges");
|
|
|
|
let tkg_nodes_total: i64 = sqlx::query_scalar::<_, i64>(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1", tkg_nodes_table))
|
|
.bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
let tkg_edges_total: i64 = sqlx::query_scalar::<_, i64>(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = $1", tkg_edges_table))
|
|
.bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let tkg = TkgFileStats {
|
|
total_nodes: tkg_nodes_total,
|
|
total_edges: tkg_edges_total,
|
|
face_track_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "face_track").await,
|
|
gaze_track_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "gaze_track").await,
|
|
lip_track_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "lip_track").await,
|
|
text_region_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "text_trace").await,
|
|
appearance_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "appearance_trace").await,
|
|
accessory_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "accessory").await,
|
|
object_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "yolo_object").await,
|
|
hand_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "hand").await,
|
|
speaker_nodes: count_nodes(pool, &tkg_nodes_table, &file_uuid, "speaker").await,
|
|
co_occurrence_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "CO_OCCURS_WITH").await,
|
|
speaker_face_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "SPEAKS_AS").await,
|
|
face_face_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "FACE_TO_FACE").await,
|
|
mutual_gaze_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "MUTUAL_GAZE").await,
|
|
lip_sync_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "LIP_SYNC").await,
|
|
has_appearance_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "HAS_APPEARANCE").await,
|
|
wears_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "WEARS").await,
|
|
hand_object_edges: count_edges(pool, &tkg_edges_table, &file_uuid, "HAND_OBJECT").await,
|
|
..Default::default()
|
|
};
|
|
|
|
// 6. Get Identity Agent stats from Qdrant _seeds
|
|
let seeds_filter = json!({
|
|
"must": [
|
|
{"key": "file_uuid", "match": {"value": file_uuid}}
|
|
]
|
|
});
|
|
let seed_points = qdrant_db
|
|
.scroll_all_points("_seeds", seeds_filter, 500)
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
let identity_agent = IdentityAgentStats {
|
|
clusters: 0, // From face_clustered.json if available
|
|
identities_created: face_identities.len() as i64,
|
|
tmdb_matches: seed_points.iter()
|
|
.filter(|p| p["payload"]["source"].as_str() == Some("tmdb"))
|
|
.count() as i64,
|
|
speaker_bindings: speaker_points.len() as i64,
|
|
confirmations: 0, // From identity_bindings table
|
|
};
|
|
|
|
Ok(Json(FileStatsResponse {
|
|
file_uuid: file_uuid_str,
|
|
file_name,
|
|
status,
|
|
processors,
|
|
postgres,
|
|
qdrant: qdrant_stats,
|
|
tkg,
|
|
identity_agent,
|
|
}))
|
|
}
|
|
|
|
async fn count_nodes(pool: &sqlx::PgPool, table: &str, file_uuid: &str, node_type: &str) -> i64 {
|
|
sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND node_type = $2",
|
|
table
|
|
))
|
|
.bind(file_uuid)
|
|
.bind(node_type)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
async fn count_edges(pool: &sqlx::PgPool, table: &str, file_uuid: &str, edge_type: &str) -> i64 {
|
|
sqlx::query_scalar::<_, i64>(&format!(
|
|
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND edge_type = $2",
|
|
table
|
|
))
|
|
.bind(file_uuid)
|
|
.bind(edge_type)
|
|
.fetch_one(pool)
|
|
.await
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
pub fn scan_routes() -> Router<AppState> {
|
|
Router::new()
|
|
.route("/api/v1/files/scan", get(scan_files))
|
|
.route("/api/v1/stats/sftpgo", get(get_sftpgo_status))
|
|
.route(
|
|
"/api/v1/stats/ingestion-status/:file_uuid",
|
|
get(get_ingestion_status),
|
|
)
|
|
.route(
|
|
"/api/v1/stats/file/:file_uuid",
|
|
get(get_file_stats),
|
|
)
|
|
.route(
|
|
"/api/v1/stats/pipeline/:file_uuid",
|
|
get(get_pipeline_progress_handler),
|
|
)
|
|
}
|
|
|
|
/// Get segmented pipeline progress with weighted stages
|
|
async fn get_pipeline_progress_handler(
|
|
State(state): State<AppState>,
|
|
Path(file_uuid): Path<String>,
|
|
) -> Result<Json<crate::core::progress::PipelineProgress>, StatusCode> {
|
|
let pool = state.db.pool();
|
|
let chunk_table = schema::table_name("chunk");
|
|
let tkg_nodes_table = schema::table_name("tkg_nodes");
|
|
let tkg_edges_table = schema::table_name("tkg_edges");
|
|
let pr_table = schema::table_name("processor_results");
|
|
let mj_table = schema::table_name("monitor_jobs");
|
|
|
|
// Compute actual progress from DB state
|
|
let sentence_count: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence'")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let sentence_embedded: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NOT NULL")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let face_traced: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {pr_table} pr JOIN {mj_table} mj ON pr.job_id = mj.id WHERE mj.uuid = $1 AND pr.processor = 'face' AND pr.status = 'completed'")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let tkg_node_count: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {tkg_nodes_table} WHERE file_uuid = $1")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let tkg_edge_count: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {tkg_edges_table} WHERE file_uuid = $1")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let relationship_count: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {chunk_table} WHERE file_uuid = $1 AND chunk_type = 'relationship'")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
let asrx_completed: i64 = sqlx::query_scalar::<_, i64>(
|
|
&format!("SELECT COUNT(*) FROM {pr_table} pr JOIN {mj_table} mj ON pr.job_id = mj.id WHERE mj.uuid = $1 AND pr.processor = 'asrx' AND pr.status = 'completed'")
|
|
).bind(&file_uuid).fetch_one(pool).await.unwrap_or(0);
|
|
|
|
// Determine processor completion
|
|
let processors_done = asrx_completed > 0;
|
|
|
|
let mut pp = crate::core::progress::PipelineProgress::new(&file_uuid);
|
|
|
|
if processors_done {
|
|
pp.update_stage("processors", 1.0, "completed", None);
|
|
}
|
|
if sentence_count > 0 {
|
|
let detail = if sentence_embedded > 0 {
|
|
Some(format!("{} chunks, {} embedded", sentence_count, sentence_embedded))
|
|
} else {
|
|
Some(format!("{} chunks", sentence_count))
|
|
};
|
|
pp.update_stage("rule1_ingestion", 1.0, "completed", detail);
|
|
}
|
|
if face_traced > 0 {
|
|
pp.update_stage("face_tracing", 1.0, "completed", None);
|
|
}
|
|
if tkg_node_count > 0 {
|
|
pp.update_stage("tkg_nodes", 1.0, "completed", Some(format!("{} nodes", tkg_node_count)));
|
|
}
|
|
if tkg_edge_count > 0 {
|
|
pp.update_stage("tkg_edges", 1.0, "completed", Some(format!("{} edges", tkg_edge_count)));
|
|
}
|
|
if relationship_count > 0 {
|
|
pp.update_stage("rule2_ingestion", 1.0, "completed", Some(format!("{} chunks", relationship_count)));
|
|
}
|
|
|
|
// Check identity agent from _seeds
|
|
use crate::core::db::qdrant_db::QdrantDb;
|
|
use serde_json::json;
|
|
let qdrant = QdrantDb::new();
|
|
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
|
|
let seeds_collection = if schema == "public" { "momentry_public_speaker" } else { &format!("momentry_{}_speaker", schema) };
|
|
let seeds_filter = json!({"must": [{"key": "file_uuid", "match": {"value": &file_uuid}}]});
|
|
let seed_points = qdrant.scroll_all_points("_seeds", seeds_filter, 100).await.unwrap_or_default();
|
|
if !seed_points.is_empty() {
|
|
pp.update_stage("identity_agent", 1.0, "completed", Some(format!("{} seeds", seed_points.len())));
|
|
}
|
|
|
|
Ok(Json(pp))
|
|
}
|