feat: trace-level matching, health watcher/worker status, timezone config

This commit is contained in:
Accusys
2026-05-21 01:08:30 +08:00
parent 8ede4be159
commit bebaa743ed
60 changed files with 6110 additions and 1586 deletions

View File

@@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::config::OUTPUT_DIR;
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
VideoStatus,
};
use crate::core::embedding::Embedder;
use crate::core::processor::heuristic_scene::generate_scene_meta;
use crate::worker::config::WorkerConfig;
use crate::worker::processor::{ProcessorPool, ProcessorTask};
use crate::core::processor::heuristic_scene::generate_scene_meta;
use crate::worker::resources::SystemResources;
use sqlx::PgPool;
@@ -70,14 +71,15 @@ impl JobWorker {
// Reset stale running jobs: jobs stuck in 'running' with no active processor results
let monitor_jobs_table = schema::table_name("monitor_jobs");
let processor_results_table = schema::table_name("processor_results");
if let Err(e) = sqlx::query(
&format!("UPDATE {} SET status = 'pending', updated_at = NOW()
if let Err(e) = sqlx::query(&format!(
"UPDATE {} SET status = 'pending', updated_at = NOW()
WHERE status = 'running'
AND id NOT IN (
SELECT DISTINCT job_id FROM {}
WHERE status IN ('pending', 'running')
)", monitor_jobs_table, processor_results_table),
)
)",
monitor_jobs_table, processor_results_table
))
.execute(self.db.pool())
.await
{
@@ -608,12 +610,23 @@ impl JobWorker {
}
let fu = uuid;
let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"));
let rule1 = check!(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
));
let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"));
let rule3 = check!(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
));
let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes")));
let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists();
let tkg = check!(&format!(
"SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1",
schema::table_name("tkg_nodes")
));
let scene_meta = std::path::Path::new(&format!(
"{}/{fu}.scene_meta.json",
crate::core::config::OUTPUT_DIR.as_str()
))
.exists();
let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1"));
let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h;
@@ -847,26 +860,23 @@ impl JobWorker {
Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
}
// Build Temporal Knowledge Graph (TKG)
info!("📝 Building TKG graph...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => {
error!("Failed to create PythonExecutor for TKG: {}", e);
return;
}
};
match executor
.run(
"tkg_builder.py",
&["--file-uuid", &uuid_clone],
Some(&uuid_clone),
"TKG_BUILDER",
Some(std::time::Duration::from_secs(300)),
)
.await
// Build Temporal Knowledge Graph (TKG) — native Rust
info!("📝 Building TKG graph (Rust)...");
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| ".".to_string());
match crate::core::processor::tkg::build_tkg(
db_clone.as_ref(),
&uuid_clone,
&output_dir,
)
.await
{
Ok(()) => info!("✅ TKG built for {}", uuid_clone),
Ok(r) => info!(
"✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges",
uuid_clone,
r.face_trace_nodes, r.object_nodes, r.speaker_nodes,
r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges,
),
Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
}
}
@@ -898,7 +908,7 @@ impl JobWorker {
let ids = sqlx::query_scalar::<_, uuid::Uuid>(
"SELECT DISTINCT i.uuid FROM identities i \
JOIN face_detections fd ON fd.identity_id = i.id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL"
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
)
.bind(&uuid_clone)
.fetch_all(db_clone.pool())
@@ -907,12 +917,18 @@ impl JobWorker {
for id_uuid in &ids {
let us = id_uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file(
&db_clone, &us
).await {
&db_clone, &us,
)
.await
{
warn!("[P2.5] Failed to save identity file {}: {}", us, e);
}
}
info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone);
info!(
"[P2.5] {} identity files saved for {}",
ids.len(),
uuid_clone
);
}
Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e),
}
@@ -1088,8 +1104,8 @@ impl JobWorker {
let pool = db.pool();
let chunk_table = schema::table_name("chunk");
let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>(
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>(
&format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
)
.bind(uuid)
.fetch_all(pool)
@@ -1107,7 +1123,17 @@ impl JobWorker {
);
let mut stored = 0usize;
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
for (
chunk_id,
_chunk_type,
text,
start_frame,
end_frame,
start_time,
end_time,
_content_str,
) in &rows
{
if text.is_empty() {
continue;
}
@@ -1119,9 +1145,11 @@ impl JobWorker {
continue;
}
let payload = VectorPayload {
uuid: uuid.to_string(),
file_uuid: uuid.to_string(),
chunk_id: chunk_id.clone(),
chunk_type: "sentence".to_string(),
start_frame: *start_frame,
end_frame: *end_frame,
start_time: *start_time,
end_time: *end_time,
text: Some(text.clone()),

View File

@@ -237,11 +237,19 @@ impl ProcessorPool {
let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name);
let now = chrono::Utc::now().to_rfc3339();
let _: Option<String> = redis::cmd("HSET")
.arg(&key).arg("started_at").arg(&now)
.query_async(&mut conn).await.ok();
.arg(&key)
.arg("started_at")
.arg(&now)
.query_async(&mut conn)
.await
.ok();
let _: Option<String> = redis::cmd("HSET")
.arg(&key).arg("embedding_started_at").arg(&now)
.query_async(&mut conn).await.ok();
.arg(&key)
.arg("embedding_started_at")
.arg(&now)
.query_async(&mut conn)
.await
.ok();
}
// Subscribe to Redis progress pub/sub and update processor hash in real-time
@@ -254,10 +262,12 @@ impl ProcessorPool {
let cb_processor = sub_processor.clone();
if let Err(e) = sub_redis
.subscribe_and_callback(&sub_uuid, move |msg| {
tracing::info!("[Subscriber] Got msg for={} cur={} tot={}",
msg.processor,
tracing::info!(
"[Subscriber] Got msg for={} cur={} tot={}",
msg.processor,
msg.data.current.unwrap_or(0),
msg.data.total.unwrap_or(0));
msg.data.total.unwrap_or(0)
);
if msg.processor == cb_processor {
let cur = msg.data.current.unwrap_or(0);
let tot = msg.data.total.unwrap_or(0);
@@ -266,11 +276,18 @@ impl ProcessorPool {
let u = cb_uuid.clone();
let p = cb_processor.clone();
tokio::spawn(async move {
match r.update_worker_processor_status(
&u, &p, "running", None,
cur, oc, tot, 0, 0,
).await {
Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot),
match r
.update_worker_processor_status(
&u, &p, "running", None, cur, oc, tot, 0, 0,
)
.await
{
Ok(_) => tracing::info!(
"[Subscriber] Updated {}: cur={} tot={}",
p,
cur,
tot
),
Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
}
});
@@ -756,9 +773,11 @@ impl ProcessorPool {
.enumerate()
.map(|(i, segment)| {
// Prefer ASR output frames, fallback to time-based conversion
let start_frame = segment.start_frame
let start_frame = segment
.start_frame
.unwrap_or_else(|| (segment.start_time * fps).round() as i64);
let end_frame = segment.end_frame
let end_frame = segment
.end_frame
.unwrap_or_else(|| (segment.end_time * fps).round() as i64);
let data = serde_json::json!({
"text": segment.text,
@@ -892,7 +911,11 @@ impl ProcessorPool {
tracing::info!(
"Storing {} Face pre-chunks + {} detections for video {}",
frames_count,
face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(),
face_result
.frames
.iter()
.map(|f| f.faces.len())
.sum::<usize>(),
uuid
);
@@ -911,7 +934,10 @@ impl ProcessorPool {
detections_to_store.push((
frame.frame as i64,
frame.timestamp,
face.x, face.y, face.width, face.height,
face.x,
face.y,
face.width,
face.height,
face.confidence,
));
}
@@ -1170,9 +1196,10 @@ impl ProcessorPool {
"top_5": scene.top_5,
});
let chunk_table = crate::core::db::schema::table_name("chunk");
let _ = sqlx::query(
&format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
chunk_table
))
.bind(&meta)
.bind(uuid)
.bind(&chk_id)