feat: Phase 1 handover - schema migration, correction mechanism, API fixes

Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index
Correction: asr-1.json format, generate/apply scripts
API: 37/37 endpoints fixed and tested
Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
Accusys
2026-05-11 07:03:22 +08:00
parent ef894a44ad
commit 39ba5ddf76
147 changed files with 19843 additions and 3053 deletions

View File

@@ -58,7 +58,6 @@ pub struct BatchJobStatus {
#[derive(Debug, Clone)]
struct CutScene {
chunk_id: String,
chunk_index: i32,
start_frame: i64,
end_frame: i64,
fps: f64,
@@ -66,6 +65,7 @@ struct CutScene {
end_time: f64,
content: serde_json::Value,
metadata: serde_json::Value,
summary_text: Option<String>,
}
#[derive(Debug, Clone)]
@@ -108,21 +108,25 @@ fn llm_model() -> String {
// ── Data Fetching ──
async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<CutScene>> {
let table = schema::table_name("chunks");
sqlx::query_as::<_, (String, i32, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value)>(&format!(
r#"SELECT chunk_id, chunk_index, start_frame, end_frame, fps, start_time, end_time, content, metadata
let table = schema::table_name("chunk");
sqlx::query_as::<_, (String, i64, i64, f64, f64, f64, serde_json::Value, serde_json::Value, Option<String>)>(&format!(
r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text
FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table
))
.bind(file_uuid)
.fetch_all(db.pool()).await?
.into_iter().map(|r| Ok(CutScene {
chunk_id: r.0, chunk_index: r.1, start_frame: r.2, end_frame: r.3,
fps: r.4, start_time: r.5, end_time: r.6, content: r.7, metadata: r.8,
chunk_id: r.0, start_frame: r.1, end_frame: r.2,
fps: r.3, start_time: r.4, end_time: r.5, content: r.6, metadata: r.7, summary_text: r.8,
})).collect()
}
async fn fetch_sentences_in_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<SentenceChunk>> {
let table = schema::table_name("chunks");
async fn fetch_sentences_in_scene(
db: &PostgresDb,
file_uuid: &str,
cut: &CutScene,
) -> anyhow::Result<Vec<SentenceChunk>> {
let table = schema::table_name("chunk");
sqlx::query_as::<_, (String, String, f64, f64, i64, i64, serde_json::Value)>(&format!(
r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content
FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'
@@ -137,7 +141,11 @@ async fn fetch_sentences_in_scene(db: &PostgresDb, file_uuid: &str, cut: &CutSce
}
/// Fetch actor names present in this scene from face_detections + identity_bindings + identities
async fn fetch_identity_names_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
async fn fetch_identity_names_for_scene(
db: &PostgresDb,
file_uuid: &str,
cut: &CutScene,
) -> anyhow::Result<Vec<String>> {
let fd_table = schema::table_name("face_detections");
let ib_table = schema::table_name("identity_bindings");
let id_table = schema::table_name("identities");
@@ -148,43 +156,65 @@ async fn fetch_identity_names_for_scene(db: &PostgresDb, file_uuid: &str, cut: &
JOIN {} i ON i.id = ib.identity_id
WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3
AND fd.trace_id IS NOT NULL
ORDER BY i.name"#, fd_table, ib_table, id_table
ORDER BY i.name"#,
fd_table, ib_table, id_table
))
.bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
.fetch_all(db.pool()).await?;
.bind(file_uuid)
.bind(cut.start_frame)
.bind(cut.end_frame)
.fetch_all(db.pool())
.await?;
Ok(rows)
}
/// Fetch YOLO object labels detected in this scene from pre_chunks
async fn fetch_yolo_objects_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
async fn fetch_yolo_objects_for_scene(
db: &PostgresDb,
file_uuid: &str,
cut: &CutScene,
) -> anyhow::Result<Vec<String>> {
let table = schema::table_name("pre_chunks");
let rows = sqlx::query_scalar::<_, String>(&format!(
r#"SELECT DISTINCT data->>'label'
FROM {} WHERE file_uuid = $1 AND processor_type = 'yolo'
AND frame_number >= $2 AND frame_number <= $3
AND data->>'label' IS NOT NULL
ORDER BY data->>'label'"#, table
ORDER BY data->>'label'"#,
table
))
.bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
.fetch_all(db.pool()).await?;
.bind(file_uuid)
.bind(cut.start_frame)
.bind(cut.end_frame)
.fetch_all(db.pool())
.await?;
Ok(rows)
}
/// Fetch active speakers + their actor names for a scene's frame range
/// Uses identity_bindings to map SPEAKER_X to actor names
async fn fetch_speakers_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
async fn fetch_speakers_for_scene(
db: &PostgresDb,
file_uuid: &str,
cut: &CutScene,
) -> anyhow::Result<Vec<String>> {
let pc_table = schema::table_name("pre_chunks");
let speakers = sqlx::query_scalar::<_, String>(&format!(
r#"SELECT DISTINCT data->>'speaker_id'
FROM {} WHERE file_uuid = $1 AND processor_type = 'asrx'
AND data->>'speaker_id' IS NOT NULL
AND start_frame <= $3 AND end_frame >= $2
ORDER BY data->>'speaker_id'"#, pc_table
ORDER BY data->>'speaker_id'"#,
pc_table
))
.bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
.fetch_all(db.pool()).await?;
.bind(file_uuid)
.bind(cut.start_frame)
.bind(cut.end_frame)
.fetch_all(db.pool())
.await?;
if speakers.is_empty() { return Ok(vec![]); }
if speakers.is_empty() {
return Ok(vec![]);
}
// Map speaker_ids to actor names via identity_bindings
let ib_table = schema::table_name("identity_bindings");
@@ -194,10 +224,12 @@ async fn fetch_speakers_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutSce
let name: Option<String> = sqlx::query_scalar(&format!(
r#"SELECT i.name FROM {} ib JOIN {} i ON i.id = ib.identity_id
WHERE ib.identity_type = 'speaker' AND ib.identity_value = $1 AND i.name IS NOT NULL
LIMIT 1"#, ib_table, id_table
LIMIT 1"#,
ib_table, id_table
))
.bind(spk)
.fetch_optional(db.pool()).await?;
.fetch_optional(db.pool())
.await?;
match name {
Some(n) => result.push(format!("{} ({})", spk, n)),
None => result.push(spk.clone()),
@@ -207,7 +239,11 @@ async fn fetch_speakers_for_scene(db: &PostgresDb, file_uuid: &str, cut: &CutSce
}
/// Fetch trace IDs with identity names for a scene's frame range
async fn fetch_trace_info(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> anyhow::Result<Vec<String>> {
async fn fetch_trace_info(
db: &PostgresDb,
file_uuid: &str,
cut: &CutScene,
) -> anyhow::Result<Vec<String>> {
let fd_table = schema::table_name("face_detections");
let ib_table = schema::table_name("identity_bindings");
let id_table = schema::table_name("identities");
@@ -218,18 +254,25 @@ async fn fetch_trace_info(db: &PostgresDb, file_uuid: &str, cut: &CutScene) -> a
LEFT JOIN {} i ON i.id = ib.identity_id
WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3
AND fd.trace_id IS NOT NULL
ORDER BY fd.trace_id"#, fd_table, ib_table, id_table
ORDER BY fd.trace_id"#,
fd_table, ib_table, id_table
))
.bind(file_uuid).bind(cut.start_frame).bind(cut.end_frame)
.fetch_all(db.pool()).await?;
.bind(file_uuid)
.bind(cut.start_frame)
.bind(cut.end_frame)
.fetch_all(db.pool())
.await?;
Ok(rows.iter().map(|(trace, name)| {
if let Some(n) = name {
format!("trace_{} ({})", trace, n)
} else {
format!("trace_{}", trace)
}
}).collect())
Ok(rows
.iter()
.map(|(trace, name)| {
if let Some(n) = name {
format!("trace_{} ({})", trace, n)
} else {
format!("trace_{}", trace)
}
})
.collect())
}
// ── LLM Prompt (Embedding-Optimized) ──
@@ -243,19 +286,31 @@ async fn summarize_one_scene(
) -> anyhow::Result<SceneSummaryResult> {
if sentences.is_empty() {
return Ok(SceneSummaryResult {
parent_summary: String::new(), five_w1h: serde_json::Value::Null, child_summaries: vec![],
parent_summary: String::new(),
five_w1h: serde_json::Value::Null,
child_summaries: vec![],
});
}
let faces = fetch_identity_names_for_scene(db, file_uuid, cut).await.unwrap_or_default();
let objects = fetch_yolo_objects_for_scene(db, file_uuid, cut).await.unwrap_or_default();
let traces = fetch_trace_info(db, file_uuid, cut).await.unwrap_or_default();
let speakers = fetch_speakers_for_scene(db, file_uuid, cut).await.unwrap_or_default();
let faces = fetch_identity_names_for_scene(db, file_uuid, cut)
.await
.unwrap_or_default();
let objects = fetch_yolo_objects_for_scene(db, file_uuid, cut)
.await
.unwrap_or_default();
let traces = fetch_trace_info(db, file_uuid, cut)
.await
.unwrap_or_default();
let speakers = fetch_speakers_for_scene(db, file_uuid, cut)
.await
.unwrap_or_default();
let mut dialogue = String::new();
for (i, s) in sentences.iter().enumerate() {
let t = s.text.trim();
if !t.is_empty() { dialogue.push_str(&format!("[{}] {}\n", i + 1, t)); }
if !t.is_empty() {
dialogue.push_str(&format!("[{}] {}\n", i + 1, t));
}
}
let story_so_far = if prev_context.is_empty() {
@@ -306,7 +361,14 @@ Rules:
- Each sentence.enhanced: self-contained for search, include actual spoken words.
- Return ONLY valid JSON. No markdown.
- A short scene with 1-2 lines should have a short summary."#,
cut.start_time, cut.end_time, dialogue, faces.join(", "), objects.join(", "), traces.join(", "), speakers.join(", "), story_so_far,
cut.start_time,
cut.end_time,
dialogue,
faces.join(", "),
objects.join(", "),
traces.join(", "),
speakers.join(", "),
story_so_far,
);
let body = serde_json::json!({
@@ -321,22 +383,32 @@ Rules:
});
let client = Client::new();
let resp = client.post(llm_base_url()).json(&body)
let resp = client
.post(llm_base_url())
.json(&body)
.timeout(std::time::Duration::from_secs(180))
.send().await?
.json::<serde_json::Value>().await?;
.send()
.await?
.json::<serde_json::Value>()
.await?;
let content = resp["choices"][0]["message"]["content"].as_str().unwrap_or("{}");
let content = resp["choices"][0]["message"]["content"]
.as_str()
.unwrap_or("{}");
// Strip markdown code fences if present
let cleaned = content
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
let parsed: serde_json::Value = serde_json::from_str(cleaned).unwrap_or(serde_json::Value::Null);
let parsed: serde_json::Value =
serde_json::from_str(cleaned).unwrap_or(serde_json::Value::Null);
let parent_summary = parsed["scene_summary"].as_str().unwrap_or("").to_string();
let five_w1h = parsed.get("5w1h").cloned().unwrap_or(serde_json::Value::Null);
let five_w1h = parsed
.get("5w1h")
.cloned()
.unwrap_or(serde_json::Value::Null);
let mut child_summaries = Vec::new();
if let Some(arr) = parsed["sentences"].as_array() {
@@ -376,16 +448,24 @@ Rules:
}
}
Ok(SceneSummaryResult { parent_summary, five_w1h, child_summaries })
Ok(SceneSummaryResult {
parent_summary,
five_w1h,
child_summaries,
})
}
// ── DB Storage ──
async fn store_parent_summary(
db: &PostgresDb, cut_chunk_id: &str, file_uuid: &str,
summary: &str, five_w1h: &serde_json::Value, sentences: &[SentenceChunk],
db: &PostgresDb,
cut_chunk_id: &str,
file_uuid: &str,
summary: &str,
five_w1h: &serde_json::Value,
sentences: &[SentenceChunk],
) -> anyhow::Result<()> {
let table = schema::table_name("chunks");
let table = schema::table_name("chunk");
let meta = serde_json::json!({
"5w1h": five_w1h,
"sentence_ids": sentences.iter().map(|s| s.chunk_id.clone()).collect::<Vec<_>>(),
@@ -393,28 +473,42 @@ async fn store_parent_summary(
});
sqlx::query(&format!(
r#"UPDATE {} SET summary_text = $1, metadata = metadata || $2::jsonb
WHERE chunk_id = $3 AND file_uuid = $4"#, table
WHERE chunk_id = $3 AND file_uuid = $4"#,
table
))
.bind(summary).bind(&meta).bind(cut_chunk_id).bind(file_uuid)
.execute(db.pool()).await?;
.bind(summary)
.bind(&meta)
.bind(cut_chunk_id)
.bind(file_uuid)
.execute(db.pool())
.await?;
Ok(())
}
async fn store_child_summaries(
db: &PostgresDb, file_uuid: &str, children: &[ChildSummary],
db: &PostgresDb,
file_uuid: &str,
children: &[ChildSummary],
) -> anyhow::Result<()> {
let table = schema::table_name("chunks");
let table = schema::table_name("chunk");
for c in children {
let text = c.enhanced.trim();
if text.is_empty() || text.len() < 10 { continue; }
if text.is_empty() || text.len() < 10 {
continue;
}
// Update text_content (for embedding) + merge 5w1h into content
let merge = serde_json::json!({ "5w1h": c.five_w1h });
sqlx::query(&format!(
r#"UPDATE {} SET text_content = $1, content = content || $2::jsonb, embedding = NULL
WHERE chunk_id = $3 AND file_uuid = $4"#, table
WHERE chunk_id = $3 AND file_uuid = $4"#,
table
))
.bind(text).bind(&merge).bind(&c.chunk_id).bind(file_uuid)
.execute(db.pool()).await?;
.bind(text)
.bind(&merge)
.bind(&c.chunk_id)
.bind(file_uuid)
.execute(db.pool())
.await?;
}
Ok(())
}
@@ -427,7 +521,8 @@ async fn analyze_5w1h(
) -> Result<Json<Analyze5W1HResponse>, (StatusCode, String)> {
let db = PostgresDb::from_pool(state.db.pool().clone());
let cuts = fetch_cut_scenes(&db, &req.file_uuid).await
let cuts = fetch_cut_scenes(&db, &req.file_uuid)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let total = cuts.len();
@@ -435,29 +530,71 @@ async fn analyze_5w1h(
let mut prev_context: Vec<String> = Vec::new();
for cut in &cuts {
let sentences = fetch_sentences_in_scene(&db, &req.file_uuid, cut).await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
if sentences.is_empty() { continue; }
// Skip already-summarized scenes but preserve context
if let Some(ref t) = cut.summary_text {
if t.len() > 20 {
processed += 1;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, t
));
continue;
}
}
let sentences = match fetch_sentences_in_scene(&db, &req.file_uuid, cut).await {
Ok(s) => s,
Err(e) => {
tracing::error!("[5W1H] fetch sentences failed: {}", e);
continue;
}
};
if sentences.is_empty() {
continue;
}
let context = prev_context.join("\n");
let result = summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let result = match summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await
{
Ok(r) => r,
Err(e) => {
tracing::error!("[5W1H] scene {} failed: {}", cut.chunk_id, e);
processed += 1;
continue;
}
};
if !result.parent_summary.is_empty() {
if let Err(e) = store_parent_summary(&db, &cut.chunk_id, &req.file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await {
if let Err(e) = store_parent_summary(
&db,
&cut.chunk_id,
&req.file_uuid,
&result.parent_summary,
&result.five_w1h,
&sentences,
)
.await
{
tracing::error!("[5W1H] parent: {}", e);
}
if let Err(e) = store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await {
if let Err(e) =
store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await
{
tracing::error!("[5W1H] child: {}", e);
}
prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary));
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, result.parent_summary
));
}
processed += 1;
}
Ok(Json(Analyze5W1HResponse {
success: true, file_uuid: req.file_uuid,
scenes_processed: processed, scenes_total: total,
success: true,
file_uuid: req.file_uuid,
scenes_processed: processed,
scenes_total: total,
}))
}
@@ -475,14 +612,39 @@ async fn batch_analyze_5w1h(
let mut prev_context: Vec<String> = Vec::new();
for cut in &cuts {
let sentences = fetch_sentences_in_scene(&db, uuid, cut).await.unwrap_or_default();
if sentences.is_empty() { continue; }
if let Some(ref t) = cut.summary_text {
if t.len() > 20 {
processed += 1;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, t
));
continue;
}
}
let sentences = fetch_sentences_in_scene(&db, uuid, cut)
.await
.unwrap_or_default();
if sentences.is_empty() {
continue;
}
let context = prev_context.join("\n");
if let Ok(result) = summarize_one_scene(&db, uuid, cut, &sentences, &context).await {
if !result.parent_summary.is_empty() {
let _ = store_parent_summary(&db, &cut.chunk_id, uuid, &result.parent_summary, &result.five_w1h, &sentences).await;
let _ = store_parent_summary(
&db,
&cut.chunk_id,
uuid,
&result.parent_summary,
&result.five_w1h,
&sentences,
)
.await;
let _ = store_child_summaries(&db, uuid, &result.child_summaries).await;
prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary));
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, result.parent_summary
));
}
}
processed += 1;
@@ -490,12 +652,19 @@ async fn batch_analyze_5w1h(
jobs.push(BatchJobStatus {
file_uuid: uuid.clone(),
status: if processed > 0 { "completed".to_string() } else { "no_cut_scenes".to_string() },
status: if processed > 0 {
"completed".to_string()
} else {
"no_cut_scenes".to_string()
},
message: format!("{}/{} scenes processed", processed, total),
});
}
Ok(Json(BatchAnalyze5W1HResponse { success: true, jobs }))
Ok(Json(BatchAnalyze5W1HResponse {
success: true,
jobs,
}))
}
async fn get_5w1h_status(
@@ -505,19 +674,26 @@ async fn get_5w1h_status(
let rows = sqlx::query(&format!(
r#"SELECT file_uuid, processing_status->'agents'->'five_w1h' as s
FROM {} WHERE processing_status->'agents'->'five_w1h' IS NOT NULL
ORDER BY updated_at DESC LIMIT 50"#, table
ORDER BY updated_at DESC LIMIT 50"#,
table
))
.fetch_all(state.db.pool()).await
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let videos: Vec<serde_json::Value> = rows.iter().map(|r| {
serde_json::json!({
"uuid": r.try_get::<String,_>("file_uuid").unwrap_or_default(),
"five_w1h_status": r.try_get::<Option<serde_json::Value>,_>("s").ok().flatten(),
let videos: Vec<serde_json::Value> = rows
.iter()
.map(|r| {
serde_json::json!({
"uuid": r.try_get::<String,_>("file_uuid").unwrap_or_default(),
"five_w1h_status": r.try_get::<Option<serde_json::Value>,_>("s").ok().flatten(),
})
})
}).collect();
.collect();
Ok(Json(serde_json::json!({ "success": true, "videos": videos })))
Ok(Json(
serde_json::json!({ "success": true, "videos": videos }),
))
}
/// Pipeline-triggered entry point: run 5W1H agent for a file.
@@ -528,24 +704,52 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
let mut prev_context: Vec<String> = Vec::new();
for cut in &cuts {
let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?;
if sentences.is_empty() { continue; }
let context = prev_context.join("\n");
match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await {
Ok(result) => {
if !result.parent_summary.is_empty() {
let _ = store_parent_summary(db, &cut.chunk_id, file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await;
let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await;
prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary));
}
processed += 1;
}
Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e),
if let Some(ref t) = cut.summary_text {
if t.len() > 20 {
processed += 1;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, t
));
continue;
}
}
let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?;
if sentences.is_empty() {
continue;
}
let context = prev_context.join("\n");
match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await {
Ok(result) => {
if !result.parent_summary.is_empty() {
let _ = store_parent_summary(
db,
&cut.chunk_id,
file_uuid,
&result.parent_summary,
&result.five_w1h,
&sentences,
)
.await;
let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await;
prev_context.push(format!(
"Scene (t={:.0}s): {}",
cut.start_time, result.parent_summary
));
}
processed += 1;
}
Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e),
}
}
tracing::info!("[5W1H] Done for {}: {}/{} scenes", file_uuid, processed, total);
tracing::info!(
"[5W1H] Done for {}: {}/{} scenes",
file_uuid,
processed,
total
);
// Auto-vectorize sentences with EmbeddingGemma (768D)
tracing::info!("[5W1H] Starting vectorize for sentence chunks...");
@@ -555,17 +759,20 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
let rows = sqlx::query_as::<_, (String, String, String, f64, f64)>(
r#"SELECT chunk_id, chunk_type, text_content, start_time, end_time
FROM dev.chunks WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL
AND (text_content IS NOT NULL AND text_content != '') ORDER BY chunk_index"#
FROM dev.chunk WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL
AND (text_content IS NOT NULL AND text_content != '') ORDER BY id"#,
)
.bind(file_uuid)
.fetch_all(db.pool()).await?;
.fetch_all(db.pool())
.await?;
let total_vec = rows.len();
let mut stored = 0usize;
for (chunk_id, _ctype, text, start_time, end_time) in &rows {
let text = text.trim();
if text.is_empty() || text.len() < 5 { continue; }
if text.is_empty() || text.len() < 5 {
continue;
}
match embedder.embed_document(text).await {
Ok(vector) => {
if let Err(e) = sqlx::query(

View File

@@ -140,15 +140,37 @@ async fn analyze_identity(
}
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read face data: {}", e)))?
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to read face data: {}", e),
)
})?
.parse()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse face data: {}", e)))?;
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse face data: {}", e),
)
})?;
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
Some(std::fs::read_to_string(&asrx_path)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read asrx data: {}", e)))?
.parse()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse asrx data: {}", e)))?)
Some(
std::fs::read_to_string(&asrx_path)
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to read asrx data: {}", e),
)
})?
.parse()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse asrx data: {}", e),
)
})?,
)
} else {
None
};
@@ -161,7 +183,14 @@ async fn analyze_identity(
// 將 identity 結果寫入 DB
let pool = state.db.pool();
for id_result in &identities {
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
let identity_name = format!(
"person_{}",
id_result
.person_ids
.first()
.map(|s| &**s)
.unwrap_or("unknown")
);
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,
@@ -184,7 +213,9 @@ async fn analyze_identity(
}
// 迭代多角度 face embedding 比對TMDb seed → 傳播)
let _ = match_faces_iterative(pool, &req.file_uuid).await.unwrap_or(0);
let _ = match_faces_iterative(pool, &req.file_uuid)
.await
.unwrap_or(0);
// 將 ASRX speaker 綁定到已匹配 identity 的 trace
let _ = bind_speakers(pool, &req.file_uuid).await.unwrap_or(0);
@@ -309,11 +340,21 @@ fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec
let mut speaker_segments_map: std::collections::HashMap<String, Vec<(f64, f64)>> =
std::collections::HashMap::new();
for segment in segments {
let speaker_id = segment.get("speaker_id").and_then(|s| s.as_str())
let speaker_id = segment
.get("speaker_id")
.and_then(|s| s.as_str())
.or_else(|| segment.get("speaker").and_then(|s| s.as_str()));
if let Some(speaker_id) = speaker_id {
let start = segment.get("start").or_else(|| segment.get("start_time")).and_then(|s| s.as_f64()).unwrap_or(0.0);
let end = segment.get("end").or_else(|| segment.get("end_time")).and_then(|e| e.as_f64()).unwrap_or(0.0);
let start = segment
.get("start")
.or_else(|| segment.get("start_time"))
.and_then(|s| s.as_f64())
.unwrap_or(0.0);
let end = segment
.get("end")
.or_else(|| segment.get("end_time"))
.and_then(|e| e.as_f64())
.unwrap_or(0.0);
speaker_segments_map
.entry(speaker_id.to_string())
.or_insert_with(Vec::new)
@@ -321,7 +362,10 @@ fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec
}
}
for (speaker_id, segments) in speaker_segments_map {
speakers.push(SpeakerData { speaker_id, segments });
speakers.push(SpeakerData {
speaker_id,
segments,
});
}
}
}
@@ -598,11 +642,17 @@ struct SpeakerData {
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() { return 0.0; }
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
if na == 0.0 || nb == 0.0 {
0.0
} else {
dot / (na * nb)
}
}
/// 迭代多角度 face embedding 比對 + 傳播
@@ -619,16 +669,20 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
tracing::warn!("[FaceMatch] No TMDb identities with face embeddings");
return Ok(0);
}
tracing::info!("[FaceMatch] Loaded {} TMDb seed identities", tmdb_rows.len());
tracing::info!(
"[FaceMatch] Loaded {} TMDb seed identities",
tmdb_rows.len()
);
// Step 2: 載入所有 face_detections按 trace_id 分組
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
"SELECT trace_id, embedding FROM dev.face_detections \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id"
ORDER BY trace_id",
)
.bind(file_uuid)
.fetch_all(pool).await?;
.fetch_all(pool)
.await?;
if fd_rows.is_empty() {
tracing::warn!("[FaceMatch] No face detections with embeddings");
@@ -639,7 +693,10 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
use std::collections::HashMap;
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
for (tid, emb) in &fd_rows {
trace_faces.entry(*tid).or_insert_with(Vec::new).push(emb.clone());
trace_faces
.entry(*tid)
.or_insert_with(Vec::new)
.push(emb.clone());
}
// 去重:同一個 trace 內embedding 太接近的只留一個
@@ -649,7 +706,11 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
}
let total_traces = trace_faces.len();
tracing::info!("[FaceMatch] Loaded {} traces with {} faces", total_traces, fd_rows.len());
tracing::info!(
"[FaceMatch] Loaded {} traces with {} faces",
total_traces,
fd_rows.len()
);
// Step 3: 建立 TMDb 查找表
let tmdb_seeds: Vec<(i32, String, Vec<f32>)> = tmdb_rows;
@@ -665,14 +726,21 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
for face_emb in faces {
let s = cosine_similarity(face_emb, tmdb_emb);
if s > best_sim { best_sim = s; best_name = name.clone(); }
if s > best_sim {
best_sim = s;
best_name = name.clone();
}
}
}
if best_sim >= TH {
matched.insert(tid, best_name);
}
}
tracing::info!("[FaceMatch] Round 1: {} matched ({}%)", matched.len(), matched.len() * 100 / total_traces);
tracing::info!(
"[FaceMatch] Round 1: {} matched ({}%)",
matched.len(),
matched.len() * 100 / total_traces
);
// Round 2+: 用已匹配的 face 作為 seed 傳播
for round_n in 2..=10 {
@@ -681,21 +749,31 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
for (&tid, name) in &matched {
if let Some(faces) = trace_faces.get(&tid) {
seed_pool.entry(name.clone()).or_default().extend(faces.iter());
seed_pool
.entry(name.clone())
.or_default()
.extend(faces.iter());
}
}
let mut new_matches: Vec<(i32, String)> = Vec::new();
for (&tid, faces) in &trace_faces {
if matched.contains_key(&tid) { continue; }
if matched.contains_key(&tid) {
continue;
}
let mut best_name = String::new();
let mut best_sim = 0.0f32;
if faces.is_empty() { continue; }
if faces.is_empty() {
continue;
}
let ref_face = &faces[0];
for (name, seed_faces) in &seed_pool {
for seed in seed_faces {
let s = cosine_similarity(ref_face, seed);
if s > best_sim { best_sim = s; best_name = name.clone(); }
if s > best_sim {
best_sim = s;
best_name = name.clone();
}
}
}
if best_sim >= TH {
@@ -706,31 +784,46 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
matched.insert(tid, name);
}
let new = matched.len() - prev;
tracing::info!("[FaceMatch] Round {}: +{} matched (total {}, {}%)", round_n, new, matched.len(), matched.len() * 100 / total_traces);
if new < 5 { break; }
tracing::info!(
"[FaceMatch] Round {}: +{} matched (total {}, {}%)",
round_n,
new,
matched.len(),
matched.len() * 100 / total_traces
);
if new < 5 {
break;
}
}
// Step 5: 寫入 DB
let mut updated = 0usize;
for (tid, name) in &matched {
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
"SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'"
"SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'",
)
.bind(name)
.fetch_optional(pool).await?;
.fetch_optional(pool)
.await?;
if let Some(identity_id) = id_opt {
let _ = sqlx::query(
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
)
.bind(identity_id)
.bind(file_uuid)
.bind(tid)
.execute(pool).await;
.execute(pool)
.await;
updated += 1;
}
}
tracing::info!("[FaceMatch] Done: {}/{} traces matched ({}%)", matched.len(), total_traces, matched.len() * 100 / total_traces);
tracing::info!(
"[FaceMatch] Done: {}/{} traces matched ({}%)",
matched.len(),
total_traces,
matched.len() * 100 / total_traces
);
Ok(updated)
}
@@ -771,12 +864,25 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
let mut speakers: HashMap<String, Vec<(f64, f64)>> = HashMap::new();
if let Some(segments) = asrx_data.get("segments").and_then(|s| s.as_array()) {
for seg in segments {
let sid = seg.get("speaker_id").and_then(|s| s.as_str())
let sid = seg
.get("speaker_id")
.and_then(|s| s.as_str())
.or_else(|| seg.get("speaker").and_then(|s| s.as_str()));
if let Some(sid) = sid {
let start = seg.get("start_time").or_else(|| seg.get("start")).and_then(|v| v.as_f64()).unwrap_or(0.0);
let end = seg.get("end_time").or_else(|| seg.get("end")).and_then(|v| v.as_f64()).unwrap_or(0.0);
speakers.entry(sid.to_string()).or_default().push((start, end));
let start = seg
.get("start_time")
.or_else(|| seg.get("start"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let end = seg
.get("end_time")
.or_else(|| seg.get("end"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
speakers
.entry(sid.to_string())
.or_default()
.push((start, end));
}
}
}
@@ -792,7 +898,9 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
// For each trace, compute overlap with each speaker
let mut bindings = 0usize;
for (trace_id, frames) in &traces {
if frames.is_empty() { continue; }
if frames.is_empty() {
continue;
}
// Get identity_id for this trace
let identity_id: Option<i32> = sqlx::query_scalar(
@@ -801,7 +909,9 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
.bind(file_uuid).bind(trace_id)
.fetch_optional(pool).await?.flatten();
if identity_id.is_none() { continue; }
if identity_id.is_none() {
continue;
}
let identity_id = identity_id.unwrap();
// Compute overlap with each speaker
@@ -850,7 +960,11 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
}
}
tracing::info!("[SpeakerBind] Created {}/{} speaker bindings", bindings, traces.len());
tracing::info!(
"[SpeakerBind] Created {}/{} speaker bindings",
bindings,
traces.len()
);
Ok(bindings)
}
@@ -870,7 +984,10 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
};
if !face_clustered_path.exists() {
tracing::warn!("[IdentityAgent] face_clustered.json not found for {}", file_uuid);
tracing::warn!(
"[IdentityAgent] face_clustered.json not found for {}",
file_uuid
);
return Ok(());
}
@@ -888,7 +1005,14 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let pool = db.pool();
for id_result in &identities {
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
let identity_name = format!(
"person_{}",
id_result
.person_ids
.first()
.map(|s| &**s)
.unwrap_or("unknown")
);
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,
@@ -914,7 +1038,10 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
tracing::info!(
"[IdentityAgent] Done for {}: {} identities, {} face matches, {} speaker bindings",
file_uuid, identities.len(), matched, bound
file_uuid,
identities.len(),
matched,
bound
);
Ok(())
}

View File

@@ -501,7 +501,7 @@ async fn get_identity_chunks(
let data: Vec<IdentityChunkItem> = records
.into_iter()
.map(|r| IdentityChunkItem {
id: r.id,
id: r.id as i64,
file_uuid: r.file_uuid,
chunk_id: r.chunk_id,
chunk_type: r.chunk_type,

View File

@@ -13,14 +13,20 @@ use crate::core::db::{schema, PostgresDb};
static FFMPEG: Lazy<String> = Lazy::new(|| {
std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
if std::path::Path::new(full).exists() { full.to_string() } else { "ffmpeg".to_string() }
if std::path::Path::new(full).exists() {
full.to_string()
} else {
"ffmpeg".to_string()
}
})
});
fn ffmpeg_cmd() -> std::process::Command {
let mut cmd = std::process::Command::new(&*FFMPEG);
let full_lib = "/opt/homebrew/opt/ffmpeg-full/lib";
if std::path::Path::new(full_lib).exists() { cmd.env("DYLD_LIBRARY_PATH", full_lib); }
if std::path::Path::new(full_lib).exists() {
cmd.env("DYLD_LIBRARY_PATH", full_lib);
}
cmd
}
@@ -293,20 +299,32 @@ async fn trace_video(
let first_frame = rows[0].0;
let last_frame = rows[rows.len() - 1].0;
let start_sec = first_frame as f64 / fps;
let padding = params.get("padding").and_then(|s| s.parse().ok()).unwrap_or(2.0);
let padding = params
.get("padding")
.and_then(|s| s.parse().ok())
.unwrap_or(2.0);
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
let seek = (start_sec - padding).max(0.0);
// Build filters: bbox+drawtext (1 filter + 1 drawtext per detection)
let mut parts: Vec<String> = Vec::new();
for (i, (frame, x, y, w, h)) in rows.iter().enumerate() {
let next_frame = if i + 1 < rows.len() { rows[i + 1].0 } else { last_frame + (padding * fps) as i32 };
let next_frame = if i + 1 < rows.len() {
rows[i + 1].0
} else {
last_frame + (padding * fps) as i32
};
let start_offset = frame - first_frame + (padding * fps) as i32;
let end_offset = next_frame - first_frame + (padding * fps) as i32;
// Bbox
parts.push(format!(
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='between(n,{},{})'",
x, y, w, h, start_offset, end_offset - 1
x,
y,
w,
h,
start_offset,
end_offset - 1
));
// Text label (drawtext, 1 filter vs ~175 bitmap drawboxes)
parts.push(format!(
@@ -325,14 +343,31 @@ async fn trace_video(
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let result = ffmpeg_cmd()
.args([
"-ss", &seek.to_string(), "-i", &video_path,
"-t", &duration.to_string(),
"-/filter_complex", &filter_path,
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
"-an", "-movflags", "+faststart", "-y", &tmp_str,
"-ss",
&seek.to_string(),
"-i",
&video_path,
"-t",
&duration.to_string(),
"-/filter_complex",
&filter_path,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
"-an",
"-movflags",
"+faststart",
"-y",
&tmp_str,
])
.output()
.map_err(|e| { tracing::error!("ffmpeg spawn: {}", e); StatusCode::INTERNAL_SERVER_ERROR })?;
.map_err(|e| {
tracing::error!("ffmpeg spawn: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
if !result.status.success() {
let stderr = String::from_utf8_lossy(&result.stderr);
tracing::error!("ffmpeg failed: {}", &stderr[..stderr.len().min(300)]);

View File

@@ -13,6 +13,8 @@ use crate::core::embedding::Embedder;
pub struct SmartSearchRequest {
pub uuid: String,
pub query: String,
pub page: Option<usize>,
pub page_size: Option<usize>,
pub limit: Option<usize>,
}
@@ -41,6 +43,8 @@ pub struct SearchResult {
pub struct SmartSearchResponse {
pub query: String,
pub results: Vec<SearchResult>,
pub page: usize,
pub page_size: usize,
pub strategy: String,
}
@@ -51,7 +55,18 @@ pub async fn smart_search(
Json(req): Json<SmartSearchRequest>,
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let db = &state.db;
let limit = req.limit.unwrap_or(5);
let page = req.page.unwrap_or(1).max(1);
// Backward compat: if old `limit` sent without `page_size`, use limit as page_size
let page_size = if req.page_size.is_some() {
req.page_size.unwrap()
} else if req.limit.is_some() && req.page.is_none() {
req.limit.unwrap()
} else {
5
}
.max(1);
let hard_limit = req.limit.unwrap_or(usize::MAX);
let limit = hard_limit.min(page_size);
// 1. Generate Embedding using EmbeddingGemma via MOMENTRY_EMBED_URL
let embedder = Embedder::new("embeddinggemma-300m".to_string());
@@ -83,6 +98,8 @@ pub async fn smart_search(
return Ok(Json(SmartSearchResponse {
query: req.query,
results: vec![],
page,
page_size,
strategy: "semantic_vector_search".to_string(),
}));
}
@@ -145,13 +162,15 @@ pub async fn smart_search(
});
// 7. Limit the final results (optional, but good for API consistency)
let limit = req.limit.unwrap_or(5) * 5; // Allow more children per parent context
results.truncate(limit);
let truncate_limit = hard_limit.min(page_size * 5); // Allow more children per parent context
results.truncate(truncate_limit);
// 8. Format Response
let response = SmartSearchResponse {
query: req.query,
results,
page,
page_size,
strategy: "drill_down_semantic_search".to_string(),
};

View File

@@ -2286,7 +2286,8 @@ async fn list_jobs(Query(params): Query<JobsQuery>) -> Result<Json<JobListRespon
.into_iter()
.map(|r| {
let status_str: String = r.try_get("status").unwrap_or_default();
let status = MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
let status =
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
JobInfoResponse {
id: r.try_get("id").unwrap_or(0),
uuid: r.try_get("uuid").unwrap_or_default(),
@@ -2507,7 +2508,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
.route("/api/v1/files/scan", get(scan_files))
.route("/api/v1/file/:file_uuid/probe", get(probe_by_uuid))
.route("/api/v1/file/:file_uuid/process", post(trigger_processing))
.route("/api/v1/file/:file_uuid/chunks", get(list_pre_chunks))
.route("/api/v1/progress/:uuid", get(get_progress))
.route("/api/v1/jobs", get(list_jobs))
.route("/api/v1/config/cache", post(cache_toggle))
@@ -2585,7 +2586,7 @@ async fn get_ingest_stats(
State(state): State<AppState>,
) -> Result<Json<IngestStatsResponse>, StatusCode> {
let table_videos = schema::table_name("videos");
let table_chunks = schema::table_name("chunks");
let table_chunks = schema::table_name("chunk");
let total_videos: (i64,) = sqlx::query_as(&format!("SELECT COUNT(*) FROM {}", table_videos))
.fetch_one(state.db.pool())
@@ -3048,15 +3049,15 @@ async fn video_details(
Query(query): Query<VideoDetailsQuery>,
State(state): State<AppState>,
) -> Result<Json<VideoDetailsResponse>, StatusCode> {
let table = schema::table_name("chunks");
let table = schema::table_name("chunk");
if let Some(chunk_id) = query.chunk_id {
let row: Option<(
i32, String, String, i32, String, f64, i64, i64,
i32, String, String, String, f64, i64, i64,
Option<String>, serde_json::Value, Option<serde_json::Value>,
Option<String>, i32, Option<String>, Option<serde_json::Value>, Option<String>,
)> = sqlx::query_as(&format!(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type::text, fps, start_frame, end_frame,
"SELECT file_id, uuid, chunk_id, chunk_type::text, fps, start_frame, end_frame,
text_content, content, metadata, vector_id, frame_count,
parent_chunk_id, visual_stats, summary_text
FROM {} WHERE chunk_id = $1 AND uuid = $2",
@@ -3081,20 +3082,20 @@ async fn video_details(
let row = row.ok_or(StatusCode::NOT_FOUND)?;
let fps = if row.5 > 0.0 { row.5 } else { 24.0 };
let start_frame = row.6;
let end_frame = row.7;
let fps = if row.4 > 0.0 { row.4 } else { 24.0 };
let start_frame = row.5;
let end_frame = row.6;
let duration_frames = end_frame - start_frame;
let start_time = start_frame as f64 / fps;
let end_time = end_frame as f64 / fps;
let row_metadata = row.10.clone();
let row_metadata = row.9.clone();
let mut summary_text = row.15.clone();
let mut summary_text = row.14.clone();
let mut metadata = None;
if let Some(ref pid_str) = row.13 {
if let Some(ref pid_str) = row.12 {
if !pid_str.is_empty() {
if let Ok(pid) = pid_str.parse::<i32>() {
let parent_table = schema::table_name("parent_chunks");
@@ -3168,7 +3169,7 @@ async fn video_details(
uuid: row.1.clone(),
details: VideoDetailsResult::Chunk(ChunkDetailResponse {
chunk_id: row.2.clone(),
chunk_type: row.4.clone(),
chunk_type: row.3.clone(),
frame_range: FrameRange {
start_frame,
end_frame,
@@ -3179,12 +3180,12 @@ async fn video_details(
start: start_time,
end: end_time,
},
text_content: row.8.clone(),
content: Some(row.9.clone()),
parent_id: row.13.clone(),
text_content: row.7.clone(),
content: Some(row.8.clone()),
parent_id: row.12.clone(),
summary_text,
metadata,
visual_stats: row.14.clone(),
visual_stats: row.13.clone(),
speaker_ids,
person_ids,
}),
@@ -3194,123 +3195,6 @@ async fn video_details(
Err(StatusCode::BAD_REQUEST)
}
#[derive(Debug, Deserialize)]
struct PreChunksQuery {
processor_type: Option<String>,
page: Option<usize>,
page_size: Option<usize>,
}
#[derive(Debug, Serialize)]
struct PreChunksResponse {
pre_chunks: Vec<PreChunkItem>,
count: i64,
page: usize,
page_size: usize,
}
#[derive(Debug, Serialize)]
struct PreChunkItem {
id: i64,
processor_type: String,
coordinate_type: String,
coordinate_index: i64,
start_frame: Option<i64>,
end_frame: Option<i64>,
start_time: Option<f64>,
end_time: Option<f64>,
fps: Option<f64>,
data: serde_json::Value,
identity_id: Option<String>,
confidence: Option<f64>,
created_at: String,
}
async fn list_pre_chunks(
Path(uuid): Path<String>,
Query(query): Query<PreChunksQuery>,
State(state): State<AppState>,
) -> Result<Json<PreChunksResponse>, StatusCode> {
let table = schema::table_name("pre_chunks");
let page = query.page.unwrap_or(1);
let page_size = query.page_size.unwrap_or(20);
let offset = (page - 1) * page_size;
let processor_filter = if let Some(pt) = &query.processor_type {
format!("AND processor_type = '{}'", pt.to_lowercase())
} else {
"".to_string()
};
let count_query = format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 {}",
table, processor_filter
);
let count: i64 = sqlx::query(&count_query)
.bind(&uuid)
.fetch_one(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.try_get(0)
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let data_query = format!(
"SELECT id, processor_type, coordinate_type, coordinate_index,
start_frame, end_frame, start_time, end_time, fps,
data, created_at
FROM {}
WHERE file_uuid = $1 {}
ORDER BY coordinate_index ASC
LIMIT {} OFFSET {}",
table, processor_filter, page_size, offset
);
let rows: Vec<(
i64,
String,
String,
i64,
Option<i64>,
Option<i64>,
Option<f64>,
Option<f64>,
Option<f64>,
serde_json::Value,
chrono::DateTime<chrono::Utc>,
)> = sqlx::query_as(&data_query)
.bind(&uuid)
.fetch_all(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let pre_chunks = rows
.iter()
.map(|row| PreChunkItem {
id: row.0,
processor_type: row.1.clone(),
coordinate_type: row.2.clone(),
coordinate_index: row.3,
start_frame: row.4,
end_frame: row.5,
start_time: row.6,
end_time: row.7,
fps: row.8,
data: row.9.clone(),
identity_id: None,
confidence: None,
created_at: row.10.to_rfc3339(),
})
.collect();
Ok(Json(PreChunksResponse {
pre_chunks,
count,
page,
page_size,
}))
}
#[derive(Debug, Serialize)]
struct DeleteVideoResponse {
success: bool,
@@ -3404,7 +3288,7 @@ async fn delete_video(
let videos_table = schema::table_name("videos");
let face_table = schema::table_name("face_detections");
let processor_table = schema::table_name("processor_results");
let chunks_table = schema::table_name("chunks");
let chunks_table = schema::table_name("chunk");
let parent_chunks_table = schema::table_name("parent_chunks");
// Check if video exists first

View File

@@ -25,6 +25,8 @@ pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
struct TracesRequest {
min_faces: Option<i64>,
sort_by: Option<String>,
page: Option<i64>,
page_size: Option<i64>,
limit: Option<i64>,
min_confidence: Option<f64>,
max_confidence: Option<f64>,
@@ -49,6 +51,8 @@ struct TracesResponse {
file_uuid: String,
total_traces: i64,
total_faces: i64,
page: i64,
page_size: i64,
traces: Vec<TraceInfo>,
}
@@ -59,7 +63,11 @@ async fn list_traces_sorted(
) -> Result<Json<TracesResponse>, (StatusCode, String)> {
let min_faces = req.min_faces.unwrap_or(1);
let sort = req.sort_by.as_deref().unwrap_or("first_appearance");
let limit = req.limit.unwrap_or(500);
let page = req.page.unwrap_or(1).max(1);
let page_size = req.page_size.unwrap_or(50).max(1).min(500);
let hard_limit = req.limit.unwrap_or(500);
let effective_limit = hard_limit.min(page_size);
let db_offset = (page - 1) * page_size;
let min_confidence = req.min_confidence.unwrap_or(0.0);
let max_confidence = req.max_confidence.unwrap_or(1.0);
@@ -92,11 +100,11 @@ async fn list_traces_sorted(
AVG(confidence) AS avg_confidence
FROM dev.face_detections
WHERE file_uuid = $1 AND trace_id IS NOT NULL
AND confidence >= $4 AND confidence <= $5
AND confidence >= $5 AND confidence <= $6
GROUP BY trace_id
HAVING COUNT(*) >= $2
ORDER BY {}
LIMIT $3
LIMIT $3 OFFSET $4
) tt
LEFT JOIN LATERAL (
SELECT id FROM dev.face_detections
@@ -111,7 +119,8 @@ async fn list_traces_sorted(
sqlx::query_as(&query)
.bind(&file_uuid)
.bind(min_faces)
.bind(limit)
.bind(effective_limit)
.bind(db_offset)
.bind(min_confidence)
.bind(max_confidence)
.fetch_all(state.db.pool())
@@ -146,6 +155,8 @@ async fn list_traces_sorted(
file_uuid,
total_traces,
total_faces,
page,
page_size,
traces,
}))
}
@@ -154,6 +165,8 @@ async fn list_traces_sorted(
#[derive(Debug, Deserialize)]
struct TraceFacesQuery {
page: Option<i64>,
page_size: Option<i64>,
limit: Option<i64>,
offset: Option<i64>,
interpolate: Option<bool>,
@@ -194,7 +207,14 @@ async fn list_trace_faces(
Query(q): Query<TraceFacesQuery>,
) -> Result<Json<TraceFacesResponse>, (StatusCode, String)> {
let limit = q.limit.unwrap_or(200).min(1000);
let offset = q.offset.unwrap_or(0);
// Support both page/page_size and offset; page/page_size takes precedence
let offset = if q.page.is_some() || q.page_size.is_some() {
let p = q.page.unwrap_or(1).max(1);
let ps = q.page_size.unwrap_or(200).max(1).min(1000);
(p - 1) * ps
} else {
q.offset.unwrap_or(0)
};
let interpolate = q.interpolate.unwrap_or(false);
let fps: f64 =
@@ -206,7 +226,7 @@ async fn list_trace_faces(
.unwrap_or(24.0);
let total_detected: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1 AND trace_id = $2"
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1 AND trace_id = $2",
)
.bind(&file_uuid)
.bind(trace_id)
@@ -214,21 +234,28 @@ async fn list_trace_faces(
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let rows: Vec<(i32, i32, Option<i32>, Option<i32>, Option<i32>, Option<i32>, f32)> =
sqlx::query_as(
"SELECT id, frame_number, x, y, width, height, confidence
let rows: Vec<(
i32,
i32,
Option<i32>,
Option<i32>,
Option<i32>,
Option<i32>,
f32,
)> = sqlx::query_as(
"SELECT id, frame_number, x, y, width, height, confidence
FROM dev.face_detections
WHERE file_uuid = $1 AND trace_id = $2
ORDER BY frame_number ASC
LIMIT $3 OFFSET $4"
)
.bind(&file_uuid)
.bind(trace_id)
.bind(limit)
.bind(offset)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
LIMIT $3 OFFSET $4",
)
.bind(&file_uuid)
.bind(trace_id)
.bind(limit)
.bind(offset)
.fetch_all(state.db.pool())
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let mut faces: Vec<TraceFaceItem> = Vec::new();

View File

@@ -327,7 +327,7 @@ async fn search_chunks(
};
let mut sql = format!(
"SELECT chunk_id, chunk_type, start_time, end_time, start_frame, end_frame, text_content, content FROM chunks WHERE file_uuid = '{}'",
"SELECT chunk_id, chunk_type, start_time, end_time, start_frame, end_frame, text_content, content FROM dev.chunk WHERE file_uuid = '{}'",
uuid
);
if let Some(tr) = &req.time_range {
@@ -483,7 +483,7 @@ async fn search_frames_internal(
let video_table = "videos";
let mut sql = format!(
"SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, f.pose_results, v.file_uuid
"SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, v.file_uuid
FROM {} f JOIN {} v ON f.file_id = v.id WHERE 1=1",
table, video_table
);
@@ -532,13 +532,12 @@ async fn search_frames_internal(
Option<serde_json::Value>,
Option<serde_json::Value>,
Option<serde_json::Value>,
Option<serde_json::Value>,
String,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<SearchResult> = rows
.into_iter()
.map(|(frame_number, timestamp, yolo, ocr, face, pose, _uuid)| {
.map(|(frame_number, timestamp, yolo, ocr, face, _uuid)| {
let objects = yolo.as_ref().and_then(|v| {
v.get("objects")
.map(|o| o.as_array().cloned().unwrap_or_default())
@@ -558,10 +557,6 @@ async fn search_frames_internal(
v.get("faces")
.map(|f| f.as_array().cloned().unwrap_or_default())
});
let pose_persons = pose.as_ref().and_then(|v| {
v.get("persons")
.map(|p| p.as_array().cloned().unwrap_or_default())
});
SearchResult::Frame {
frame_number,
@@ -570,7 +565,7 @@ async fn search_frames_internal(
objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
ocr_texts,
faces,
pose_persons,
pose_persons: None,
}
})
.collect();
@@ -652,7 +647,7 @@ async fn search_frames_internal_v2(
let video_table = "videos";
let mut sql = format!(
"SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, f.pose_results, v.file_uuid
"SELECT f.frame_number, f.timestamp, f.yolo_objects, f.ocr_results, f.face_results, v.file_uuid
FROM {} f JOIN {} v ON f.file_id = v.id WHERE 1=1",
table, video_table
);
@@ -685,13 +680,12 @@ async fn search_frames_internal_v2(
Option<serde_json::Value>,
Option<serde_json::Value>,
Option<serde_json::Value>,
Option<serde_json::Value>,
String,
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
let results: Vec<FrameResult> = rows
.into_iter()
.map(|(frame_number, timestamp, yolo, ocr, face, pose, uuid)| {
.map(|(frame_number, timestamp, yolo, ocr, face, uuid)| {
let objects = yolo.as_ref().and_then(|v| {
v.get("objects")
.map(|o| o.as_array().cloned().unwrap_or_default())
@@ -711,11 +705,6 @@ async fn search_frames_internal_v2(
v.get("faces")
.map(|f| f.as_array().cloned().unwrap_or_default())
});
let pose_persons = pose.as_ref().and_then(|v| {
v.get("persons")
.map(|p| p.as_array().cloned().unwrap_or_default())
});
FrameResult {
frame_number,
timestamp,
@@ -723,7 +712,7 @@ async fn search_frames_internal_v2(
objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
ocr_texts,
faces,
pose_persons,
pose_persons: None,
}
})
.collect();

View File

@@ -177,7 +177,7 @@ pub async fn search_visual_chunks(
/// Get all visual chunks for a video UUID
async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Chunk>> {
let sql = format!(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, visual_stats FROM chunks WHERE uuid = '{}' AND chunk_type = 'visual' ORDER BY start_frame ASC",
"SELECT file_id, file_uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, visual_stats FROM dev.chunk WHERE file_uuid = '{}' AND chunk_type = 'visual' ORDER BY start_frame ASC",
uuid.replace('\'', "''")
);
@@ -185,7 +185,6 @@ async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Ch
i32, // file_id
String, // uuid
String, // chunk_id
i32, // chunk_index
String, // chunk_type
f64, // fps
i64, // start_frame
@@ -199,7 +198,7 @@ async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Ch
let mut chunks = Vec::new();
for row in rows {
let chunk_type = match row.4.as_str() {
let chunk_type = match row.3.as_str() {
"visual" => ChunkType::Visual,
"sentence" => ChunkType::Sentence,
"time_based" => ChunkType::TimeBased,
@@ -210,27 +209,26 @@ async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Ch
};
// Calculate frame_count
let frame_count = (row.7 - row.6) as i32;
let frame_count = (row.6 - row.5) as i32;
chunks.push(Chunk {
file_id: row.0,
uuid: row.1,
chunk_id: row.2,
chunk_index: row.3 as u32,
chunk_type,
rule: ChunkRule::Rule2, // Visual chunks use Rule2
fps: row.5,
start_frame: row.6,
end_frame: row.7,
text_content: row.8,
content: row.9,
metadata: row.10,
vector_id: row.11,
fps: row.4,
start_frame: row.5,
end_frame: row.6,
text_content: row.7,
content: row.8,
metadata: row.9,
vector_id: row.10,
frame_count,
pre_chunk_ids: Vec::new(),
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
visual_stats: row.12,
visual_stats: row.11,
});
}
@@ -383,13 +381,13 @@ pub async fn get_visual_chunk_statistics(
MAX((content->'metadata'->>'avg_confidence')::float) as max_confidence,
SUM((content->'metadata'->>'object_count')::int) as total_objects,
AVG((content->'metadata'->>'spatial_density')::float) as avg_density
FROM chunks
WHERE uuid = '{}'
FROM dev.chunk
WHERE file_uuid = '{}'
AND chunk_type = 'visual'",
uuid.replace('\'', "''")
);
let row: (i64, Option<f64>, Option<f64>, Option<f64>, i64, Option<f64>) =
let row: (i64, Option<f64>, Option<f64>, Option<f64>, Option<i64>, Option<f64>) =
sqlx::query_as(&sql).fetch_one(db.pool()).await?;
let mut stats = HashMap::new();
@@ -406,7 +404,7 @@ pub async fn get_visual_chunk_statistics(
"max_confidence".to_string(),
Value::from(row.3.unwrap_or(0.0)),
);
stats.insert("total_objects".to_string(), Value::from(row.4));
stats.insert("total_objects".to_string(), Value::from(row.4.unwrap_or(0)));
stats.insert("avg_density".to_string(), Value::from(row.5.unwrap_or(0.0)));
Ok(stats)

View File

@@ -6,6 +6,6 @@ pub mod types;
pub use rule1_ingest::execute_rule1;
pub use rule3_ingest::ingest_rule3;
pub use trace_ingest::ingest_traces;
pub use splitter::{AsrSegment, ChunkSplitter};
pub use trace_ingest::ingest_traces;
pub use types::{Chunk, ChunkType};

View File

@@ -50,7 +50,7 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result
let chunk = Chunk::from_seconds(
file_id as i32,
file_uuid.to_string(),
idx as u32,
format!("{}", idx),
ChunkType::Sentence,
ChunkRule::Rule1,
seg.start_time,

View File

@@ -73,7 +73,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
// Query chunks table for Rule 1 sentence chunks
let rule1_rows: Vec<(String,)> = sqlx::query_as(
r#"
SELECT chunk_id FROM chunks
SELECT chunk_id FROM dev.chunk
WHERE file_uuid = $1 AND chunk_type = 'sentence'
AND start_frame >= $2
AND end_frame <= $3
@@ -98,7 +98,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let texts: Vec<String> = sqlx::query_scalar(
r#"
SELECT text_content FROM chunks
SELECT text_content FROM dev.chunk
WHERE file_uuid = $1 AND chunk_type = 'sentence'
AND start_frame >= $2
AND end_frame <= $3
@@ -135,10 +135,11 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
);
// 4. Insert into dev.chunks
let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE file_uuid = $1")
.bind(file_uuid)
.fetch_optional(&mut *tx)
.await?;
let fps_query: Option<f64> =
sqlx::query_scalar("SELECT fps FROM videos WHERE file_uuid = $1")
.bind(file_uuid)
.fetch_optional(&mut *tx)
.await?;
let fps = fps_query.unwrap_or(29.97);
// Prepare metadata JSON
@@ -149,12 +150,12 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
sqlx::query(
r#"
INSERT INTO chunks (
file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type,
INSERT INTO dev.chunk (
file_uuid, chunk_id, chunk_type,
start_time, end_time, fps, start_frame, end_frame,
content, text_content, summary_text, metadata, child_chunk_ids
) VALUES ($1, $2, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
ON CONFLICT (file_uuid, old_chunk_id) DO NOTHING
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
ON CONFLICT (file_uuid, chunk_id) DO NOTHING
"#,
)
.bind(file_uuid)

View File

@@ -23,7 +23,7 @@ impl ChunkSplitter {
chunks.push(Chunk::from_seconds(
0, // file_id
uuid.to_string(),
index,
format!("{}", index),
ChunkType::TimeBased,
ChunkRule::Rule1,
current_time,
@@ -48,7 +48,7 @@ impl ChunkSplitter {
chunks.push(Chunk::from_seconds(
0, // file_id
uuid.to_string(),
index as u32,
format!("{}", index),
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,

View File

@@ -95,7 +95,7 @@ pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
let chunk = Chunk::new(
file_id,
file_uuid.to_string(),
(count + 1) as u32,
format!("trace_{}", count + 1),
ChunkType::Trace,
ChunkRule::Rule1,
trace.first_frame as i64,
@@ -110,17 +110,29 @@ pub async fn ingest_traces(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
if let Err(e) = db.store_chunk(&chunk).await {
error!("Failed to store trace chunk {}: {}", trace.trace_id, e);
} else {
let preview = chunk.text_content.as_deref().unwrap_or("").chars().take(60).collect::<String>();
let co = chunk.metadata.as_ref()
let preview = chunk
.text_content
.as_deref()
.unwrap_or("")
.chars()
.take(60)
.collect::<String>();
let co = chunk
.metadata
.as_ref()
.and_then(|m| m.get("co_appearances"))
.and_then(|c| c.as_array())
.map(|a| a.len())
.unwrap_or(0);
info!(
"Trace chunk {}: trace_id={} frames={}-{} faces={} co_appear={} text={}",
chunk.chunk_id, trace.trace_id,
trace.first_frame, trace.last_frame,
trace.face_count, co, preview,
chunk.chunk_id,
trace.trace_id,
trace.first_frame,
trace.last_frame,
trace.face_count,
co,
preview,
);
count += 1;
}
@@ -209,14 +221,11 @@ impl<'r> sqlx::FromRow<'r, sqlx::postgres::PgRow> for AsrSegment {
impl AsrSegment {
fn text(&self) -> Option<&str> {
self.data
.get("text")
.and_then(|v| v.as_str())
.or_else(|| {
self.data
.get("data")
.and_then(|d| d.get("text"))
.and_then(|v| v.as_str())
})
self.data.get("text").and_then(|v| v.as_str()).or_else(|| {
self.data
.get("data")
.and_then(|d| d.get("text"))
.and_then(|v| v.as_str())
})
}
}

View File

@@ -115,7 +115,6 @@ pub struct Chunk {
pub file_id: i32,
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: ChunkType,
pub rule: ChunkRule,
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
@@ -140,7 +139,7 @@ impl Chunk {
pub fn new(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
chunk_type: ChunkType,
rule: ChunkRule,
start_frame: i64,
@@ -149,13 +148,11 @@ impl Chunk {
content: serde_json::Value,
) -> Self {
let frame_count = (end_frame - start_frame) as i32;
let chunk_id = format!("{}_{}", uuid, chunk_index);
Self {
file_id,
uuid,
chunk_id,
chunk_index,
chunk_type,
rule,
fps,
@@ -177,7 +174,7 @@ impl Chunk {
pub fn new_visual(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
start_frame: i64,
end_frame: i64,
fps: f64,
@@ -189,7 +186,7 @@ impl Chunk {
Self::new(
file_id,
uuid,
chunk_index,
chunk_id,
ChunkType::Visual,
ChunkRule::Rule2,
start_frame,
@@ -203,7 +200,7 @@ impl Chunk {
pub fn from_yolo_frames(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
start_frame: i64,
end_frame: i64,
fps: f64,
@@ -307,7 +304,7 @@ impl Chunk {
Self::new_visual(
file_id,
uuid,
chunk_index,
chunk_id,
start_frame,
end_frame,
fps,
@@ -334,7 +331,7 @@ impl Chunk {
pub fn from_seconds(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
chunk_type: ChunkType,
rule: ChunkRule,
start_time: f64,
@@ -347,7 +344,7 @@ impl Chunk {
Self::new(
file_id,
uuid,
chunk_index,
chunk_id,
chunk_type,
rule,
start_frame,

View File

@@ -103,7 +103,6 @@ pub struct Chunk {
pub file_id: i32,
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: ChunkType,
pub rule: ChunkRule,
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
@@ -128,7 +127,7 @@ impl Chunk {
pub fn new_visual(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
start_frame: i64,
end_frame: i64,
fps: f64,
@@ -140,7 +139,7 @@ impl Chunk {
Self::new(
file_id,
uuid,
chunk_index,
chunk_id,
ChunkType::Visual,
ChunkRule::Rule2,
start_frame,
@@ -154,7 +153,7 @@ impl Chunk {
pub fn from_yolo_result(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
start_frame: i64,
end_frame: i64,
fps: f64,
@@ -263,7 +262,7 @@ impl Chunk {
Self::new_visual(
file_id,
uuid,
chunk_index,
chunk_id,
start_frame,
end_frame,
fps,
@@ -275,7 +274,7 @@ impl Chunk {
pub fn new(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_id: String,
chunk_type: ChunkType,
rule: ChunkRule,
start_frame: i64,
@@ -284,13 +283,11 @@ impl Chunk {
content: serde_json::Value,
) -> Self {
let frame_count = (end_frame - start_frame) as i32;
let chunk_id = format!("{}_{}", uuid, chunk_index);
Self {
file_id,
uuid,
chunk_id,
chunk_index,
chunk_type,
rule,
fps,

View File

@@ -13,7 +13,6 @@ pub struct MongoDb {
pub struct ChunkDocument {
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: String,
pub start_time: f64,
pub end_time: f64,
@@ -34,7 +33,6 @@ impl From<Chunk> for ChunkDocument {
Self {
uuid: chunk.uuid,
chunk_id: chunk.chunk_id,
chunk_index: chunk.chunk_index,
chunk_type: chunk.chunk_type.as_str().to_string(),
start_time,
end_time,
@@ -119,7 +117,7 @@ impl MongoDb {
file_id: 0,
uuid: doc.uuid,
chunk_id: doc.chunk_id,
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
fps: doc.fps,
@@ -178,7 +176,7 @@ impl MongoDb {
file_id: 0,
uuid: doc.uuid,
chunk_id: doc.chunk_id,
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
fps: doc.fps,
@@ -234,7 +232,7 @@ impl MongoDb {
file_id: 0,
uuid: doc.uuid,
chunk_id: doc.chunk_id,
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
fps: doc.fps,

View File

@@ -56,7 +56,7 @@ pub struct CandidateRecord {
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct FileIdentityRecord {
pub id: i64,
pub id: i32,
pub file_uuid: String,
pub identity_id: i32,
pub name: String,
@@ -116,7 +116,7 @@ pub struct IdentityFaceRecord {
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct IdentityChunkRecord {
pub id: i64,
pub id: i32,
pub file_uuid: String,
pub chunk_id: String,
pub chunk_type: String,
@@ -788,8 +788,8 @@ impl PostgresDb {
.await?;
// Chunks
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
sqlx::query("CREATE TABLE IF NOT EXISTS chunk (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunk_file ON chunk(file_uuid)")
.execute(pool)
.await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
@@ -845,7 +845,7 @@ impl PostgresDb {
sqlx::query(
"CREATE TRIGGER chunks_search_vector_trigger
BEFORE INSERT OR UPDATE ON chunks
BEFORE INSERT OR UPDATE ON chunk
FOR EACH ROW EXECUTE FUNCTION update_search_vector()",
)
.execute(pool)
@@ -1232,7 +1232,7 @@ impl PostgresDb {
let tx = self.pool.begin().await?;
let chunk_vectors = schema::table_name("chunk_vectors");
let chunks = schema::table_name("chunks");
let chunks = "dev.chunk";
let processor_results = schema::table_name("processor_results");
let videos = schema::table_name("videos");
@@ -1254,6 +1254,11 @@ impl PostgresDb {
.execute(&self.pool)
.await?;
sqlx::query(&format!("DELETE FROM dev.pre_chunks WHERE file_uuid = $1"))
.bind(uuid)
.execute(&self.pool)
.await?;
sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", videos))
.bind(uuid)
.execute(&self.pool)
@@ -1277,7 +1282,7 @@ impl PostgresDb {
}
pub async fn get_chunk_count(&self, uuid: &str) -> Result<(i64, i64)> {
let chunks = schema::table_name("chunks");
let chunks = "dev.chunk";
let sentence_count: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'",
chunks
@@ -2417,8 +2422,10 @@ impl PostgresDb {
pub async fn get_identity_by_uuid(&self, uuid: &Uuid) -> Result<Option<IdentityDetailRecord>> {
let query = r#"
SELECT id, uuid, name, identity_type, source, status, metadata, reference_data,
voice_embedding, identity_embedding, face_embedding,
tmdb_id, tmdb_profile, created_at, NULL::timestamptz as updated_at
voice_embedding::real[] as voice_embedding,
identity_embedding::real[] as identity_embedding,
face_embedding::real[] as face_embedding,
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at
FROM identities
WHERE uuid = $1
"#;
@@ -2497,7 +2504,7 @@ impl PostgresDb {
let query = r#"
SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
c.start_time, c.end_time, c.text_content, c.content
FROM chunks c
FROM dev.chunk c
WHERE c.file_uuid IN (
SELECT DISTINCT fd.file_uuid
FROM face_detections fd
@@ -2538,7 +2545,7 @@ impl PostgresDb {
}
pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let content_with_rule = serde_json::json!({
"rule": chunk.rule.as_str(),
"data": chunk.content
@@ -2567,9 +2574,9 @@ impl PostgresDb {
sqlx::query(&format!(
r#"
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
@@ -2590,7 +2597,6 @@ impl PostgresDb {
.bind(chunk.file_id)
.bind(&chunk.uuid)
.bind(&chunk.chunk_id)
.bind(chunk.chunk_index as i32)
.bind(chunk.chunk_type.as_str())
.bind(chunk.start_time().seconds())
.bind(chunk.end_time().seconds())
@@ -2616,7 +2622,7 @@ impl PostgresDb {
chunk: &Chunk,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
) -> Result<()> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let content_with_rule = serde_json::json!({
"rule": chunk.rule.as_str(),
"data": chunk.content
@@ -2642,9 +2648,9 @@ impl PostgresDb {
sqlx::query(&format!(
r#"
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11::jsonb, $12::jsonb, $13, $14, $15, $16, $17)
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
@@ -2665,7 +2671,6 @@ impl PostgresDb {
.bind(chunk.file_id)
.bind(&chunk.uuid)
.bind(&chunk.chunk_id)
.bind(chunk.chunk_index as i32)
.bind(chunk.chunk_type.as_str())
.bind(chunk.start_time().seconds())
.bind(chunk.end_time().seconds())
@@ -2687,9 +2692,9 @@ impl PostgresDb {
}
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let rows = sqlx::query(&format!(
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY id",
table
))
.bind(uuid)
@@ -2699,8 +2704,7 @@ impl PostgresDb {
let chunks: Vec<Chunk> = rows
.into_iter()
.map(|r| {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
@@ -2740,7 +2744,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
@@ -2768,9 +2772,9 @@ impl PostgresDb {
chunk_id: &str,
uuid: &str,
) -> Result<Option<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let row = sqlx::query(&format!(
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE chunk_id = $1 AND uuid = $2",
table
))
.bind(chunk_id)
@@ -2779,25 +2783,24 @@ impl PostgresDb {
.await?;
if let Some(r) = row {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
"cut" => ChunkType::Cut,
"trace" => ChunkType::Trace,
"story" => ChunkType::Story,
_ => ChunkType::TimeBased,
};
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
"cut" => ChunkType::Cut,
"trace" => ChunkType::Trace,
"story" => ChunkType::Story,
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(9);
let metadata: Option<serde_json::Value> = r.get(10);
let content: serde_json::Value = r.get(8);
let metadata: Option<serde_json::Value> = r.get(9);
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(12).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(13).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(14).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
.get("rule")
.and_then(|v| v.as_str())
@@ -2820,7 +2823,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
fps: r.get("fps"),
@@ -2996,9 +2999,9 @@ impl PostgresDb {
start_time: f64,
end_time: f64,
) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let rows = sqlx::query(&format!(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids
"SELECT file_id, uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids
FROM {}
WHERE file_id = $1 AND start_time >= $2 AND end_time <= $3
ORDER BY start_time",
@@ -3013,8 +3016,7 @@ impl PostgresDb {
let chunks: Vec<Chunk> = rows
.into_iter()
.map(|r| {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
@@ -3024,12 +3026,12 @@ impl PostgresDb {
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(11);
let metadata: Option<serde_json::Value> = r.get(12);
let content: serde_json::Value = r.get(10);
let metadata: Option<serde_json::Value> = r.get(11);
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(14).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(15).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(16).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
@@ -3054,7 +3056,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
@@ -3082,9 +3084,9 @@ impl PostgresDb {
return Ok(vec![]);
}
let table = schema::table_name("chunks");
let table = "dev.chunk";
let rows = sqlx::query(&format!(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY chunk_index",
"SELECT file_id, uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids FROM {} WHERE chunk_id = ANY($1) ORDER BY id",
table
))
.bind(chunk_ids)
@@ -3094,8 +3096,7 @@ impl PostgresDb {
let chunks: Vec<Chunk> = rows
.into_iter()
.map(|r| {
let chunk_type_str: String = r.get(4);
let chunk_index: i32 = r.get(3);
let chunk_type_str: String = r.get(3);
let chunk_type = match chunk_type_str.as_str() {
"time" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
@@ -3135,7 +3136,7 @@ impl PostgresDb {
file_id,
uuid: r.get("uuid"),
chunk_id: r.get("chunk_id"),
chunk_index: chunk_index as u32,
chunk_type,
rule,
@@ -3192,7 +3193,7 @@ impl PostgresDb {
}
pub async fn update_vector_id(&self, chunk_id: &str, vector_id: &str) -> Result<()> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
sqlx::query(&format!(
"UPDATE {} SET vector_id = $1 WHERE chunk_id = $2",
table
@@ -3214,12 +3215,12 @@ impl PostgresDb {
}
pub async fn search_text(&self, query: &str, chunk_type: Option<&str>) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let query_pattern = format!("%{}%", query);
let sql = match chunk_type {
Some(_) => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY chunk_index", table),
None => &format!("SELECT uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY chunk_index", table),
Some(_) => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 AND chunk_type = $2 ORDER BY id", table),
None => &format!("SELECT uuid, chunk_id, chunk_type, start_time, end_time, fps, start_frame, end_frame, content, metadata, vector_id, parent_chunk_id, child_chunk_ids FROM {} WHERE content->>'text' ILIKE $1 ORDER BY id", table),
};
let chunks = if let Some(ct) = chunk_type {
@@ -3228,7 +3229,6 @@ impl PostgresDb {
(
String,
String,
i32,
String,
f64,
f64,
@@ -3252,7 +3252,6 @@ impl PostgresDb {
(
String,
String,
i32,
String,
f64,
f64,
@@ -3274,7 +3273,7 @@ impl PostgresDb {
let results: Vec<Chunk> = chunks
.into_iter()
.map(|r| {
let chunk_type = match r.3.as_str() {
let chunk_type = match r.2.as_str() {
"time_based" => ChunkType::TimeBased,
"sentence" => ChunkType::Sentence,
"cut" => ChunkType::Cut,
@@ -3284,29 +3283,29 @@ impl PostgresDb {
};
let content: serde_json::Value =
serde_json::from_str(&r.9).unwrap_or(serde_json::json!({}));
serde_json::from_str(&r.8).unwrap_or(serde_json::json!({}));
let metadata: Option<serde_json::Value> =
r.10.and_then(|m| serde_json::from_str(&m).ok());
r.9.and_then(|m| serde_json::from_str(&m).ok());
Chunk {
file_id: 0,
uuid: r.0,
chunk_id: r.1,
chunk_index: r.2 as u32,
chunk_type,
rule: ChunkRule::Rule1,
fps: r.6,
start_frame: r.7,
end_frame: r.8,
text_content: Some(r.9),
fps: r.5,
start_frame: r.6,
end_frame: r.7,
text_content: Some(r.8),
content,
metadata,
vector_id: r.11,
vector_id: r.10,
frame_count: 0,
pre_chunk_ids: vec![],
parent_chunk_id: r.12,
child_chunk_ids: r.13,
parent_chunk_id: r.11,
child_chunk_ids: r.12,
visual_stats: None,
}
})
@@ -3321,13 +3320,13 @@ impl PostgresDb {
uuid: Option<&str>,
limit: usize,
) -> Result<Vec<Bm25Result>> {
let table = schema::table_name("chunks");
let table = "dev.chunk";
let tsquery = self.prepare_tsquery(query).await?;
let sql = match uuid {
Some(_) => &format!(
r#"
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
c.visual_stats,
pc.metadata->'structured_summary' as scene_summary,
@@ -3342,7 +3341,7 @@ impl PostgresDb {
),
None => &format!(
r#"
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
SELECT c.chunk_id, c.file_uuid, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
c.visual_stats,
pc.metadata->'structured_summary' as scene_summary,
@@ -3406,7 +3405,7 @@ impl PostgresDb {
Bm25Result {
chunk_id: r.0,
uuid: r.1,
chunk_index: r.2 as u32,
chunk_type: r.3,
start_frame: r.4,
end_frame: r.5,
@@ -3472,7 +3471,7 @@ impl PostgresDb {
HybridSearchResult {
chunk_id: r.chunk_id.clone(),
uuid: r.uuid.clone(),
chunk_index: r.chunk_index,
chunk_type: r.chunk_type.clone(),
start_frame: r.start_frame,
end_frame: r.end_frame,
@@ -3526,7 +3525,7 @@ impl PostgresDb {
HybridSearchResult {
chunk_id: r.chunk_id.clone(),
uuid: r.uuid.clone(),
chunk_index: chunk_data.map(|c| c.chunk_index).unwrap_or(0),
chunk_type: chunk_data
.map(|c| c.chunk_type.as_str().to_string())
.unwrap_or_default(),
@@ -3779,7 +3778,6 @@ pub struct SceneSummary {
pub struct Bm25Result {
pub chunk_id: String,
pub uuid: String,
pub chunk_index: u32,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
@@ -3797,7 +3795,6 @@ pub struct Bm25Result {
pub struct HybridSearchResult {
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
@@ -4443,7 +4440,7 @@ impl PostgresDb {
total_frames: u64,
) -> Result<()> {
let table = schema::table_name("videos");
let chunks_table = schema::table_name("chunks");
let chunks_table = "dev.chunk";
let pre_chunks_table = schema::table_name("pre_chunks");
// Query chunks count and frames
@@ -4622,7 +4619,7 @@ impl PostgresDb {
let results = sqlx::query_as::<_, SemanticSearchResult>(
r#"
SELECT
id, chunk_index as scene_order, start_time, end_time,
id as scene_order, start_time, end_time,
COALESCE(summary_text, text_content, '') as summary,
metadata,
(1 - (embedding <=> $1::vector)) as similarity
@@ -4820,7 +4817,7 @@ mod tests {
"file_id": 1,
"uuid": "test",
"chunk_id": "c1",
"chunk_index": 0,
"chunk_type": "time_based",
"rule": "rule1",
"start_time": 0.0,
@@ -4960,7 +4957,7 @@ mod tests {
let result = Bm25Result {
chunk_id: "sentence_001".to_string(),
uuid: "test-uuid".to_string(),
chunk_index: 1,
chunk_type: "sentence".to_string(),
start_frame: 0,
end_frame: 150,
@@ -4985,7 +4982,7 @@ mod tests {
let result = HybridSearchResult {
chunk_id: "sentence_001".to_string(),
uuid: "test-uuid".to_string(),
chunk_index: 1,
chunk_type: "sentence".to_string(),
start_frame: 0,
end_frame: 150,

View File

@@ -120,9 +120,16 @@ impl QdrantDb {
.json(&body)
.send()
.await
.context(format!("Failed to create Qdrant collection: {}", collection))?;
.context(format!(
"Failed to create Qdrant collection: {}",
collection
))?;
tracing::info!("Created Qdrant collection: {} (dim={})", collection, vector_dim);
tracing::info!(
"Created Qdrant collection: {} (dim={})",
collection,
vector_dim
);
Ok(())
}

View File

@@ -129,7 +129,7 @@ impl SyncDb {
let chunk = Chunk::from_seconds(
0, // file_id - will be set later
uuid.to_string(),
i as u32,
format!("{}", i),
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,

View File

@@ -43,8 +43,7 @@ impl Embedder {
}
fn default_url() -> String {
std::env::var("MOMENTRY_EMBED_URL")
.unwrap_or_else(|_| "http://localhost:11434".to_string())
std::env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
}
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
@@ -91,7 +90,12 @@ impl Embedder {
.await
.context("Failed to parse embedding response")?;
Ok(result.data.into_iter().next().map(|d| d.embedding).unwrap_or_default())
Ok(result
.data
.into_iter()
.next()
.map(|d| d.embedding)
.unwrap_or_default())
} else {
let url = format!("{}/api/embeddings", self.base_url);
let response = self

View File

@@ -1,11 +1,8 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use super::executor::PythonExecutor;
const ASR_TIMEOUT: Duration = Duration::from_secs(1800); // 30 minutes
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrResult {
pub language: Option<String>,
@@ -36,7 +33,7 @@ pub async fn process_asr(
&[video_path, output_path],
uuid,
"ASR",
Some(ASR_TIMEOUT),
None,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;

View File

@@ -247,7 +247,10 @@ impl PythonExecutor {
let mut partial_path = out.to_path_buf();
partial_path.set_extension("json.partial");
let _ = std::fs::rename(tmp, &partial_path);
tracing::warn!("[Executor] Partial output preserved: {:?}", partial_path);
tracing::warn!(
"[Executor] Partial output preserved: {:?}",
partial_path
);
} else {
let mut err_path = out.to_path_buf();
err_path.set_extension("json.err");

View File

@@ -131,7 +131,7 @@ fn create_fixed_frame_chunks(
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
file_id,
uuid.to_string(),
chunk_index,
format!("vis_{}", chunk_index),
start_frame,
end_frame,
fps,
@@ -190,7 +190,7 @@ fn create_similarity_based_chunks(
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
file_id,
uuid.to_string(),
chunk_index,
format!("vis_{}", chunk_index),
current_start_frame,
end_frame,
fps,
@@ -214,7 +214,7 @@ fn create_similarity_based_chunks(
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
file_id,
uuid.to_string(),
chunk_index,
format!("vis_{}", chunk_index),
current_start_frame,
end_frame,
fps,

View File

@@ -13,11 +13,17 @@ struct TmdbIdentity {
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() { return 0.0; }
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
if na == 0.0 || nb == 0.0 {
0.0
} else {
dot / (na * nb)
}
}
/// Match face detections against TMDb identities using iterative multi-angle propagation.
@@ -42,10 +48,11 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
"SELECT trace_id, embedding FROM dev.face_detections \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id"
ORDER BY trace_id",
)
.bind(file_uuid)
.fetch_all(pool).await?;
.fetch_all(pool)
.await?;
if fd_rows.is_empty() {
info!("[TKG-MATCH] No face detections for {}", file_uuid);
@@ -77,14 +84,23 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
for (id, name, tmdb_emb) in &tmdb_rows {
for face in faces {
let s = cosine_similarity(face, tmdb_emb);
if s > best_sim { best_sim = s; best_id = *id; best_name = name.clone(); }
if s > best_sim {
best_sim = s;
best_id = *id;
best_name = name.clone();
}
}
}
if best_sim >= TH {
matched.insert(tid, (best_id, best_name));
}
}
info!("[TKG-MATCH] Round 1: {} ({}/{})", matched.len(), matched.len() * 100 / total, total);
info!(
"[TKG-MATCH] Round 1: {} ({}/{})",
matched.len(),
matched.len() * 100 / total,
total
);
// Round 2+: propagate
for round_n in 2..=10 {
@@ -98,7 +114,9 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
let mut new_matches: Vec<(i32, i32, String)> = Vec::new();
for (&tid, faces) in &trace_faces {
if matched.contains_key(&tid) || faces.is_empty() { continue; }
if matched.contains_key(&tid) || faces.is_empty() {
continue;
}
let ref_face = &faces[0];
let mut best_id = 0i32;
let mut best_name = String::new();
@@ -106,13 +124,19 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
for (&id, seed_faces) in &seed_pool {
for seed in seed_faces {
let s = cosine_similarity(ref_face, seed);
if s > best_sim { best_sim = s; best_id = id; }
if s > best_sim {
best_sim = s;
best_id = id;
}
}
}
if best_sim >= TH {
// Look up name for this id
for (id, name, _) in &tmdb_rows {
if *id == best_id { best_name = name.clone(); break; }
if *id == best_id {
best_name = name.clone();
break;
}
}
new_matches.push((tid, best_id, best_name));
}
@@ -121,7 +145,9 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
matched.insert(tid, (id, name));
}
let new = matched.len() - prev;
if new < 5 { break; }
if new < 5 {
break;
}
}
// Step 4: Quality control
@@ -129,41 +155,62 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
let mut after_qc = HashMap::new();
for (&tid, &(id, ref name)) in &matched {
let cnt: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2"
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2",
)
.bind(file_uuid).bind(tid)
.fetch_one(pool).await.unwrap_or(0);
.bind(file_uuid)
.bind(tid)
.fetch_one(pool)
.await
.unwrap_or(0);
if cnt >= 4 {
after_qc.insert(tid, (id, name.clone()));
} else {
info!("[TKG-QC] trace {} removed: only {} face(s), need >= 4", tid, cnt);
info!(
"[TKG-QC] trace {} removed: only {} face(s), need >= 4",
tid, cnt
);
}
}
let matched = after_qc;
let removed_low = total - matched.len();
if removed_low > 0 {
info!("[TKG-QC] Removed {} low-confidence traces (< 4 faces)", removed_low);
info!(
"[TKG-QC] Removed {} low-confidence traces (< 4 faces)",
removed_low
);
}
// 4b: Temporal collision check
let removed_collisions = quality_check_temporal_collisions(pool, file_uuid).await?;
if removed_collisions > 0 {
info!("[TKG-QC] Resolved {} temporal collisions", removed_collisions);
info!(
"[TKG-QC] Resolved {} temporal collisions",
removed_collisions
);
}
// Step 5: Update DB
let mut updated = 0usize;
for (&tid, &(id, _)) in &matched {
let r = sqlx::query(
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
)
.bind(id).bind(file_uuid).bind(tid)
.execute(pool).await?;
if r.rows_affected() > 0 { updated += 1; }
.bind(id)
.bind(file_uuid)
.bind(tid)
.execute(pool)
.await?;
if r.rows_affected() > 0 {
updated += 1;
}
}
info!("[TKG-MATCH] Done: {}/{} traces matched ({}%)",
matched.len(), total, matched.len() * 100 / total);
info!(
"[TKG-MATCH] Done: {}/{} traces matched ({}%)",
matched.len(),
total,
matched.len() * 100 / total
);
Ok(updated)
}
@@ -185,10 +232,11 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
AND a.identity_id IS NOT NULL
AND a.identity_id = b.identity_id
ORDER BY a.identity_id, a.frame_number
"#
"#,
)
.bind(file_uuid)
.fetch_all(pool).await?;
.fetch_all(pool)
.await?;
if collisions.is_empty() {
return Ok(0);
@@ -221,10 +269,12 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
sqlx::query(
"UPDATE dev.face_detections SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2"
"UPDATE dev.face_detections SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
)
.bind(file_uuid).bind(victim)
.execute(pool).await?;
.bind(file_uuid)
.bind(victim)
.execute(pool)
.await?;
unbound += 1;
warn!("[TKG-QC] Collision identity={}: trace {} vs trace {} ({} overlap frames). Unbound trace {} ({} detections)",

View File

@@ -2147,7 +2147,7 @@ async fn main() -> Result<()> {
let mut chunk = Chunk::from_seconds(
file_id as i32,
uuid.clone(),
i as u32,
format!("{}", i),
ChunkType::Sentence,
ChunkRule::Rule1,
seg.start,
@@ -2193,7 +2193,7 @@ async fn main() -> Result<()> {
let chunk = Chunk::from_seconds(
file_id as i32,
uuid.clone(),
i as u32,
format!("cut_{}", i),
ChunkType::Cut,
ChunkRule::Rule1,
scene.start_time,
@@ -2216,7 +2216,7 @@ async fn main() -> Result<()> {
let chunk = Chunk::new(
file_id as i32,
uuid.clone(),
i as u32,
format!("time_{}", i),
ChunkType::TimeBased,
ChunkRule::Rule1,
tc.start_frame,

View File

@@ -48,19 +48,25 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
let json_str = match std::fs::read_to_string(&output_path) {
Ok(s) => s,
Err(e) => return VerificationResult::fail(proc_name, file_uuid, &format!("unreadable: {}", e)),
Err(e) => {
return VerificationResult::fail(proc_name, file_uuid, &format!("unreadable: {}", e))
}
};
let value: serde_json::Value = match serde_json::from_str(&json_str) {
Ok(v) => v,
Err(e) => return VerificationResult::fail(proc_name, file_uuid, &format!("invalid JSON: {}", e)),
Err(e) => {
return VerificationResult::fail(proc_name, file_uuid, &format!("invalid JSON: {}", e))
}
};
match processor {
ProcessorType::Asr | ProcessorType::Asrx => {
let segs = value.get("segments").and_then(|v| v.as_array());
match segs {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 segments"),
Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 segments")
}
Some(s) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'segments' field"),
}
@@ -68,7 +74,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::Cut => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'scenes' field"),
}
@@ -76,15 +84,22 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::Yolo => {
let frames = value.get("frames").and_then(|v| v.as_object());
match frames {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
Some(f) if f.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 frames")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames' field"),
}
}
ProcessorType::Face => {
let faces = value.get("faces").or_else(|| value.get("frames")).and_then(|v| v.as_array());
let faces = value
.get("faces")
.or_else(|| value.get("frames"))
.and_then(|v| v.as_array());
match faces {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 faces"),
Some(f) if f.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 faces")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'faces'/'frames'"),
}
@@ -92,7 +107,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::Ocr => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
Some(f) if f.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 frames")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames'"),
}
@@ -100,7 +117,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::Pose => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
Some(f) if f.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 frames")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames'"),
}
@@ -108,7 +127,9 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
ProcessorType::Scene => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
@@ -142,7 +163,10 @@ pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
}
}
if removed > 0 {
info!("Cleaned up {} temp files for {}.{}", removed, file_uuid, proc_name);
info!(
"Cleaned up {} temp files for {}.{}",
removed, file_uuid, proc_name
);
}
}
}

View File

@@ -6,11 +6,11 @@ use std::time::Duration;
use tokio::time::sleep;
use tracing::{error, info, warn};
use crate::api::five_w1h_agent_api::run_5w1h_agent;
use crate::api::identity_agent_api::run_identity_agent;
use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::config::OUTPUT_DIR;
use crate::core::db::qdrant_db::QdrantDb;
use crate::api::five_w1h_agent_api::run_5w1h_agent;
use crate::api::identity_agent_api::run_identity_agent;
use crate::core::db::{
MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
};
@@ -72,7 +72,7 @@ impl JobWorker {
AND id NOT IN (
SELECT DISTINCT job_id FROM dev.processor_results
WHERE status IN ('pending', 'running')
)"
)",
)
.execute(self.db.pool())
.await
@@ -168,7 +168,10 @@ impl JobWorker {
} else {
job.processors.len()
};
let should_retry = self.check_and_complete_job(job.id, &job.uuid, expected_count).await.is_ok();
let should_retry = self
.check_and_complete_job(job.id, &job.uuid, expected_count)
.await
.is_ok();
if should_retry && self.processor_pool.can_start().await {
if let Err(e) = self.process_job(job.clone()).await {
error!("Failed to reprocess job {}: {}", job.uuid, e);
@@ -329,8 +332,11 @@ impl JobWorker {
.await?;
// Check if output file already exists on disk (source of truth)
let output_path =
PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.{}.json", job.uuid, processor_type.as_str()));
let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!(
"{}.{}.json",
job.uuid,
processor_type.as_str()
));
if output_path.exists() {
info!(
"Processor {} output file exists, marking completed and skipping",
@@ -361,23 +367,26 @@ impl JobWorker {
.await?;
started_count += 1;
// 覆寫 result_map 讓相依性檢查能正確判斷
result_map.insert(*processor_type, crate::core::db::ProcessorResult {
id: 0,
job_id: job.id,
processor_type: *processor_type,
status: ProcessorJobStatus::Completed,
started_at: None,
completed_at: None,
duration_secs: None,
chunks_produced: 0,
frames_processed: total_frames as i32,
output_size_bytes: 0,
error_message: None,
output_data: None,
retry_count: 0,
created_at: String::new(),
updated_at: String::new(),
});
result_map.insert(
*processor_type,
crate::core::db::ProcessorResult {
id: 0,
job_id: job.id,
processor_type: *processor_type,
status: ProcessorJobStatus::Completed,
started_at: None,
completed_at: None,
duration_secs: None,
chunks_produced: 0,
frames_processed: total_frames as i32,
output_size_bytes: 0,
error_message: None,
output_data: None,
retry_count: 0,
created_at: String::new(),
updated_at: String::new(),
},
);
continue;
}
@@ -524,7 +533,12 @@ impl JobWorker {
info!("Backup already exists: {}, skipping", bak_path.display());
} else {
match std::fs::copy(entry.path(), &bak_path) {
Ok(bytes) => info!("Backed up {} -> {} ({} bytes)", name, bak_path.display(), bytes),
Ok(bytes) => info!(
"Backed up {} -> {} ({} bytes)",
name,
bak_path.display(),
bytes
),
Err(e) => warn!("Failed to backup {}: {}", name, e),
}
}
@@ -568,12 +582,18 @@ impl JobWorker {
} else {
job.processors.len()
};
self.check_and_complete_job(job.id, &job.uuid, expected_count).await?;
self.check_and_complete_job(job.id, &job.uuid, expected_count)
.await?;
Ok(())
}
async fn check_and_complete_job(&self, job_id: i32, uuid: &str, expected_count: usize) -> Result<()> {
async fn check_and_complete_job(
&self,
job_id: i32,
uuid: &str,
expected_count: usize,
) -> Result<()> {
let results = self.db.get_processor_results_by_job(job_id).await?;
info!(
@@ -676,24 +696,41 @@ impl JobWorker {
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
// Automatically vectorize new sentence chunks
if count > 0 {
info!("📝 Starting automatic vectorize for {} chunks...", count);
if let Err(e) = Self::vectorize_chunks(&db_clone, &uuid_clone).await {
error!("❌ Auto-vectorize failed for {}: {}", uuid_clone, e);
info!(
"📝 Starting automatic vectorize for {} chunks...",
count
);
if let Err(e) =
Self::vectorize_chunks(&db_clone, &uuid_clone).await
{
error!(
"❌ Auto-vectorize failed for {}: {}",
uuid_clone, e
);
}
}
// Phase 1 release: sentence chunk embedding 交付
info!("📦 Phase 1 release packaging...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
Err(e) => {
error!("Failed PythonExecutor for release pack: {}", e);
return;
}
};
match executor.run(
"release_pack.py",
&["--phase", "1", "--file-uuid", &uuid_clone],
None, "RELEASE_P1",
Some(std::time::Duration::from_secs(120)),
).await {
Ok(()) => info!("✅ Phase 1 release packaged for {}", uuid_clone),
match executor
.run(
"release_pack.py",
&["--phase", "1", "--file-uuid", &uuid_clone],
None,
"RELEASE_P1",
Some(std::time::Duration::from_secs(120)),
)
.await
{
Ok(()) => {
info!("✅ Phase 1 release packaged for {}", uuid_clone)
}
Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
}
}
@@ -851,14 +888,21 @@ impl JobWorker {
info!("📦 Phase 2 release packaging...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
Err(e) => {
error!("Failed PythonExecutor for release pack: {}", e);
return;
}
};
match executor.run(
"release_pack.py",
&["--phase", "2", "--file-uuid", &uuid_clone],
None, "RELEASE_P2",
Some(std::time::Duration::from_secs(120)),
).await {
match executor
.run(
"release_pack.py",
&["--phase", "2", "--file-uuid", &uuid_clone],
None,
"RELEASE_P2",
Some(std::time::Duration::from_secs(120)),
)
.await
{
Ok(()) => info!("✅ Phase 2 release packaged for {}", uuid_clone),
Err(e) => error!("❌ Phase 2 release pack failed: {}", e),
}
@@ -970,7 +1014,10 @@ impl JobWorker {
}
let total = rows.len();
info!("[Vectorize] Starting vectorize of {} chunks for {}", total, uuid);
info!(
"[Vectorize] Starting vectorize of {} chunks for {}",
total, uuid
);
let mut stored = 0usize;
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
@@ -998,7 +1045,10 @@ impl JobWorker {
}
stored += 1;
if stored % 50 == 0 {
info!("[Vectorize] {}/{} vectors stored for {}", stored, total, uuid);
info!(
"[Vectorize] {}/{} vectors stored for {}",
stored, total, uuid
);
}
}
Err(e) => {
@@ -1007,7 +1057,10 @@ impl JobWorker {
}
}
info!("[Vectorize] Completed: {}/{} vectors stored for {}", stored, total, uuid);
info!(
"[Vectorize] Completed: {}/{} vectors stored for {}",
stored, total, uuid
);
Ok(())
}
}

View File

@@ -142,15 +142,21 @@ impl ProcessorPool {
.flatten();
if let Some(pid) = old_pid {
if pid > 0 {
warn!("[PID] Killing existing process {} for {}/{}", pid, uuid, processor);
unsafe { libc::kill(pid, libc::SIGKILL); }
warn!(
"[PID] Killing existing process {} for {}/{}",
pid, uuid, processor
);
unsafe {
libc::kill(pid, libc::SIGKILL);
}
}
}
}
}
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()).await;
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str())
.await;
let (cancel_tx, cancel_rx) = mpsc::channel(1);
let job_id = task.job.id;
@@ -231,15 +237,16 @@ impl ProcessorPool {
match result {
Ok(output) => {
// 驗收 agent 檢查產出內容
let verification = crate::verification::verifier::verify_output(
&processor_type,
&job.uuid,
);
let verification =
crate::verification::verifier::verify_output(&processor_type, &job.uuid);
if verification.passed {
info!(
"Processor {} completed and verified for job {} ({} chunks, {} frames)",
processor_name, job.uuid, output.chunks_produced, output.frames_processed
processor_name,
job.uuid,
output.chunks_produced,
output.frames_processed
);
// 清理暫存備份