fix: pipeline timeline log, chunk lookup, face processor no fallback, Qdrant UUID script, delete safety rules

This commit is contained in:
Accusys
2026-05-18 00:36:14 +08:00
parent a880c80556
commit 088aefdac7
7 changed files with 503 additions and 29 deletions

View File

@@ -3765,10 +3765,18 @@ struct IngestionStep {
detail: Option<String>,
}
#[derive(Debug, Serialize)]
struct IdentityRef {
uuid: String,
name: String,
}
#[derive(Debug, Serialize)]
struct IngestionStatusResponse {
file_uuid: String,
steps: Vec<IngestionStep>,
related_identities: Vec<IdentityRef>,
strangers: i64,
}
async fn get_ingestion_status(
@@ -3778,6 +3786,7 @@ async fn get_ingestion_status(
let pool = state.db.pool();
let chunk = schema::table_name("chunk");
let fd = schema::table_name("face_detections");
let identities = schema::table_name("identities");
let scene_meta_path = format!("{}/{}.scene_meta.json",
crate::core::config::OUTPUT_DIR.as_str(),
@@ -3796,13 +3805,30 @@ async fn get_ingestion_status(
let sentence_count = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence'"));
let sentence_embedded = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'sentence' AND embedding IS NOT NULL"));
let scene_count = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut'"));
let face_total = count_sql!(&format!("SELECT COUNT(*) FROM {fd} WHERE file_uuid = '{file_uuid}'"));
let trace_count = count_sql!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND trace_id IS NOT NULL"));
let trace_chunks = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'trace'"));
let identities = count_sql!(&format!("SELECT COUNT(DISTINCT identity_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND identity_id IS NOT NULL"));
let identity_count = count_sql!(&format!("SELECT COUNT(DISTINCT identity_id) FROM {fd} WHERE file_uuid = '{file_uuid}' AND identity_id IS NOT NULL"));
let tkg_nodes = count_sql!(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", schema::table_name("tkg_nodes")));
let tkg_edges = count_sql!(&format!("SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'", schema::table_name("tkg_edges")));
let scene_5w1h = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != ''"));
let related_identities: Vec<IdentityRef> = sqlx::query_as::<_, (String, String)>(&format!(
"SELECT DISTINCT i.uuid, i.name FROM {identities} i \
JOIN {fd} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = '{file_uuid}' AND fd.identity_id IS NOT NULL \
ORDER BY i.name"
)).fetch_all(pool).await.unwrap_or_default().into_iter()
.map(|(uuid, name)| {
let uuid = uuid.replace('-', "");
IdentityRef { uuid, name }
}).collect();
let strangers = count_sql!(&format!(
"SELECT COUNT(DISTINCT trace_id) FROM {fd} \
WHERE file_uuid = '{file_uuid}' AND trace_id IS NOT NULL AND identity_id IS NULL"
));
macro_rules! step {
($name:expr, $done:expr, $detail:expr) => {
IngestionStep {
@@ -3817,15 +3843,15 @@ async fn get_ingestion_status(
step!("rule1_sentence", sentence_count > 0, Some(format!("{sentence_count} sentence chunks"))),
step!("auto_vectorize", sentence_embedded > 0, Some(format!("{sentence_embedded} embedded"))),
step!("rule3_scene", scene_count > 0, Some(format!("{scene_count} scene chunks"))),
step!("face_trace", trace_count > 0, Some(format!("{trace_count} traces"))),
step!("face_trace", trace_count > 0, Some(format!("{trace_count} traces / {face_total} detections"))),
step!("trace_chunks", trace_chunks > 0, Some(format!("{trace_chunks} trace chunks"))),
step!("tkg", tkg_nodes > 0 || tkg_edges > 0, Some(format!("{tkg_nodes} nodes, {tkg_edges} edges"))),
step!("identity_match", identities > 0, Some(format!("{identities} identities matched"))),
step!("identity_match", identity_count > 0, Some(format!("{identity_count} identities matched"))),
step!("scene_metadata", scene_meta_ok, None),
step!("5w1h", scene_5w1h > 0, Some(format!("{scene_5w1h} scenes with 5W1H"))),
];
Ok(Json(IngestionStatusResponse { file_uuid, steps }))
Ok(Json(IngestionStatusResponse { file_uuid, steps, related_identities, strangers }))
}
#[derive(Debug, Deserialize)]