feat: trace-level matching, health watcher/worker status, timezone config
This commit is contained in:
@@ -12,12 +12,13 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
|
||||
use crate::core::config::OUTPUT_DIR;
|
||||
use crate::core::db::qdrant_db::QdrantDb;
|
||||
use crate::core::db::{
|
||||
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
|
||||
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
|
||||
VideoStatus,
|
||||
};
|
||||
use crate::core::embedding::Embedder;
|
||||
use crate::core::processor::heuristic_scene::generate_scene_meta;
|
||||
use crate::worker::config::WorkerConfig;
|
||||
use crate::worker::processor::{ProcessorPool, ProcessorTask};
|
||||
use crate::core::processor::heuristic_scene::generate_scene_meta;
|
||||
use crate::worker::resources::SystemResources;
|
||||
use sqlx::PgPool;
|
||||
|
||||
@@ -70,14 +71,15 @@ impl JobWorker {
|
||||
// Reset stale running jobs: jobs stuck in 'running' with no active processor results
|
||||
let monitor_jobs_table = schema::table_name("monitor_jobs");
|
||||
let processor_results_table = schema::table_name("processor_results");
|
||||
if let Err(e) = sqlx::query(
|
||||
&format!("UPDATE {} SET status = 'pending', updated_at = NOW()
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
"UPDATE {} SET status = 'pending', updated_at = NOW()
|
||||
WHERE status = 'running'
|
||||
AND id NOT IN (
|
||||
SELECT DISTINCT job_id FROM {}
|
||||
WHERE status IN ('pending', 'running')
|
||||
)", monitor_jobs_table, processor_results_table),
|
||||
)
|
||||
)",
|
||||
monitor_jobs_table, processor_results_table
|
||||
))
|
||||
.execute(self.db.pool())
|
||||
.await
|
||||
{
|
||||
@@ -608,12 +610,23 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
let fu = uuid;
|
||||
let rule1 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"));
|
||||
let rule1 = check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
|
||||
));
|
||||
let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
|
||||
let rule3 = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"));
|
||||
let rule3 = check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
|
||||
));
|
||||
let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
|
||||
let tkg = check!(&format!("SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1", schema::table_name("tkg_nodes")));
|
||||
let scene_meta = std::path::Path::new(&format!("{}/{fu}.scene_meta.json", crate::core::config::OUTPUT_DIR.as_str())).exists();
|
||||
let tkg = check!(&format!(
|
||||
"SELECT 1 FROM {} WHERE file_uuid = '{fu}' LIMIT 1",
|
||||
schema::table_name("tkg_nodes")
|
||||
));
|
||||
let scene_meta = std::path::Path::new(&format!(
|
||||
"{}/{fu}.scene_meta.json",
|
||||
crate::core::config::OUTPUT_DIR.as_str()
|
||||
))
|
||||
.exists();
|
||||
let five_w1h = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != '' LIMIT 1"));
|
||||
|
||||
let all_ok = rule1 && vector && rule3 && trace && tkg && scene_meta && five_w1h;
|
||||
@@ -847,26 +860,23 @@ impl JobWorker {
|
||||
Err(e) => error!("❌ Trace chunk ingestion failed: {}", e),
|
||||
}
|
||||
|
||||
// Build Temporal Knowledge Graph (TKG)
|
||||
info!("📝 Building TKG graph...");
|
||||
let executor = match crate::core::processor::PythonExecutor::new() {
|
||||
Ok(ex) => ex,
|
||||
Err(e) => {
|
||||
error!("Failed to create PythonExecutor for TKG: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
match executor
|
||||
.run(
|
||||
"tkg_builder.py",
|
||||
&["--file-uuid", &uuid_clone],
|
||||
Some(&uuid_clone),
|
||||
"TKG_BUILDER",
|
||||
Some(std::time::Duration::from_secs(300)),
|
||||
)
|
||||
.await
|
||||
// Build Temporal Knowledge Graph (TKG) — native Rust
|
||||
info!("📝 Building TKG graph (Rust)...");
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| ".".to_string());
|
||||
match crate::core::processor::tkg::build_tkg(
|
||||
db_clone.as_ref(),
|
||||
&uuid_clone,
|
||||
&output_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => info!("✅ TKG built for {}", uuid_clone),
|
||||
Ok(r) => info!(
|
||||
"✅ TKG built for {}: {} face, {} obj, {} spk, {} co, {} sf, {} ff edges",
|
||||
uuid_clone,
|
||||
r.face_trace_nodes, r.object_nodes, r.speaker_nodes,
|
||||
r.co_occurrence_edges, r.speaker_face_edges, r.face_face_edges,
|
||||
),
|
||||
Err(e) => error!("❌ TKG build failed for {}: {}", uuid_clone, e),
|
||||
}
|
||||
}
|
||||
@@ -898,7 +908,7 @@ impl JobWorker {
|
||||
let ids = sqlx::query_scalar::<_, uuid::Uuid>(
|
||||
"SELECT DISTINCT i.uuid FROM identities i \
|
||||
JOIN face_detections fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL"
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL",
|
||||
)
|
||||
.bind(&uuid_clone)
|
||||
.fetch_all(db_clone.pool())
|
||||
@@ -907,12 +917,18 @@ impl JobWorker {
|
||||
for id_uuid in &ids {
|
||||
let us = id_uuid.to_string().replace('-', "");
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file(
|
||||
&db_clone, &us
|
||||
).await {
|
||||
&db_clone, &us,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("[P2.5] Failed to save identity file {}: {}", us, e);
|
||||
}
|
||||
}
|
||||
info!("[P2.5] {} identity files saved for {}", ids.len(), uuid_clone);
|
||||
info!(
|
||||
"[P2.5] {} identity files saved for {}",
|
||||
ids.len(),
|
||||
uuid_clone
|
||||
);
|
||||
}
|
||||
Err(e) => error!("❌ TMDb face matching failed for {}: {}", uuid_clone, e),
|
||||
}
|
||||
@@ -1088,8 +1104,8 @@ impl JobWorker {
|
||||
let pool = db.pool();
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>(
|
||||
&format!("SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
|
||||
let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64, String)>(
|
||||
&format!("SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time, content::text FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", chunk_table),
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(pool)
|
||||
@@ -1107,7 +1123,17 @@ impl JobWorker {
|
||||
);
|
||||
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
|
||||
for (
|
||||
chunk_id,
|
||||
_chunk_type,
|
||||
text,
|
||||
start_frame,
|
||||
end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
_content_str,
|
||||
) in &rows
|
||||
{
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
@@ -1119,9 +1145,11 @@ impl JobWorker {
|
||||
continue;
|
||||
}
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.to_string(),
|
||||
file_uuid: uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
|
||||
@@ -237,11 +237,19 @@ impl ProcessorPool {
|
||||
let key = format!("{}job:{}:processor:{}", prefix, &job.uuid, &processor_name);
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
let _: Option<String> = redis::cmd("HSET")
|
||||
.arg(&key).arg("started_at").arg(&now)
|
||||
.query_async(&mut conn).await.ok();
|
||||
.arg(&key)
|
||||
.arg("started_at")
|
||||
.arg(&now)
|
||||
.query_async(&mut conn)
|
||||
.await
|
||||
.ok();
|
||||
let _: Option<String> = redis::cmd("HSET")
|
||||
.arg(&key).arg("embedding_started_at").arg(&now)
|
||||
.query_async(&mut conn).await.ok();
|
||||
.arg(&key)
|
||||
.arg("embedding_started_at")
|
||||
.arg(&now)
|
||||
.query_async(&mut conn)
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
// Subscribe to Redis progress pub/sub and update processor hash in real-time
|
||||
@@ -254,10 +262,12 @@ impl ProcessorPool {
|
||||
let cb_processor = sub_processor.clone();
|
||||
if let Err(e) = sub_redis
|
||||
.subscribe_and_callback(&sub_uuid, move |msg| {
|
||||
tracing::info!("[Subscriber] Got msg for={} cur={} tot={}",
|
||||
msg.processor,
|
||||
tracing::info!(
|
||||
"[Subscriber] Got msg for={} cur={} tot={}",
|
||||
msg.processor,
|
||||
msg.data.current.unwrap_or(0),
|
||||
msg.data.total.unwrap_or(0));
|
||||
msg.data.total.unwrap_or(0)
|
||||
);
|
||||
if msg.processor == cb_processor {
|
||||
let cur = msg.data.current.unwrap_or(0);
|
||||
let tot = msg.data.total.unwrap_or(0);
|
||||
@@ -266,11 +276,18 @@ impl ProcessorPool {
|
||||
let u = cb_uuid.clone();
|
||||
let p = cb_processor.clone();
|
||||
tokio::spawn(async move {
|
||||
match r.update_worker_processor_status(
|
||||
&u, &p, "running", None,
|
||||
cur, oc, tot, 0, 0,
|
||||
).await {
|
||||
Ok(_) => tracing::info!("[Subscriber] Updated {}: cur={} tot={}", p, cur, tot),
|
||||
match r
|
||||
.update_worker_processor_status(
|
||||
&u, &p, "running", None, cur, oc, tot, 0, 0,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => tracing::info!(
|
||||
"[Subscriber] Updated {}: cur={} tot={}",
|
||||
p,
|
||||
cur,
|
||||
tot
|
||||
),
|
||||
Err(e) => tracing::error!("[Subscriber] FAILED {}: {}", p, e),
|
||||
}
|
||||
});
|
||||
@@ -756,9 +773,11 @@ impl ProcessorPool {
|
||||
.enumerate()
|
||||
.map(|(i, segment)| {
|
||||
// Prefer ASR output frames, fallback to time-based conversion
|
||||
let start_frame = segment.start_frame
|
||||
let start_frame = segment
|
||||
.start_frame
|
||||
.unwrap_or_else(|| (segment.start_time * fps).round() as i64);
|
||||
let end_frame = segment.end_frame
|
||||
let end_frame = segment
|
||||
.end_frame
|
||||
.unwrap_or_else(|| (segment.end_time * fps).round() as i64);
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
@@ -892,7 +911,11 @@ impl ProcessorPool {
|
||||
tracing::info!(
|
||||
"Storing {} Face pre-chunks + {} detections for video {}",
|
||||
frames_count,
|
||||
face_result.frames.iter().map(|f| f.faces.len()).sum::<usize>(),
|
||||
face_result
|
||||
.frames
|
||||
.iter()
|
||||
.map(|f| f.faces.len())
|
||||
.sum::<usize>(),
|
||||
uuid
|
||||
);
|
||||
|
||||
@@ -911,7 +934,10 @@ impl ProcessorPool {
|
||||
detections_to_store.push((
|
||||
frame.frame as i64,
|
||||
frame.timestamp,
|
||||
face.x, face.y, face.width, face.height,
|
||||
face.x,
|
||||
face.y,
|
||||
face.width,
|
||||
face.height,
|
||||
face.confidence,
|
||||
));
|
||||
}
|
||||
@@ -1170,9 +1196,10 @@ impl ProcessorPool {
|
||||
"top_5": scene.top_5,
|
||||
});
|
||||
let chunk_table = crate::core::db::schema::table_name("chunk");
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3", chunk_table)
|
||||
)
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&meta)
|
||||
.bind(uuid)
|
||||
.bind(&chk_id)
|
||||
|
||||
Reference in New Issue
Block a user