From 8405d60797f34564c532cf84b7df3253258e106a Mon Sep 17 00:00:00 2001 From: Accusys Date: Fri, 8 May 2026 08:12:45 +0800 Subject: [PATCH] Fix 5W1H+: max_tokens 2048->4096, skip empty summaries - max_tokens was too low, truncating LLM JSON output - Added guard to skip storing empty parent_summary - Applied fix to all 3 entry points (analyze, batch, pipeline) --- src/api/five_w1h_agent_api.rs | 39 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/api/five_w1h_agent_api.rs b/src/api/five_w1h_agent_api.rs index e71631b..96149ec 100644 --- a/src/api/five_w1h_agent_api.rs +++ b/src/api/five_w1h_agent_api.rs @@ -316,7 +316,7 @@ Rules: {"role": "user", "content": prompt} ], "temperature": 0.1, - "max_tokens": 2048, + "max_tokens": 4096, "stream": false }); @@ -443,14 +443,13 @@ async fn analyze_5w1h( let result = summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; - if let Err(e) = store_parent_summary(&db, &cut.chunk_id, &req.file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await { - tracing::error!("[5W1H] parent: {}", e); - } - if let Err(e) = store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await { - tracing::error!("[5W1H] child: {}", e); - } - if !result.parent_summary.is_empty() { + if let Err(e) = store_parent_summary(&db, &cut.chunk_id, &req.file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await { + tracing::error!("[5W1H] parent: {}", e); + } + if let Err(e) = store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await { + tracing::error!("[5W1H] child: {}", e); + } prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary)); } processed += 1; @@ -480,9 +479,9 @@ async fn batch_analyze_5w1h( if sentences.is_empty() { continue; } let context = prev_context.join("\n"); if let Ok(result) = summarize_one_scene(&db, uuid, cut, &sentences, &context).await { - let _ = store_parent_summary(&db, &cut.chunk_id, uuid, &result.parent_summary, &result.five_w1h, &sentences).await; - let _ = store_child_summaries(&db, uuid, &result.child_summaries).await; if !result.parent_summary.is_empty() { + let _ = store_parent_summary(&db, &cut.chunk_id, uuid, &result.parent_summary, &result.five_w1h, &sentences).await; + let _ = store_child_summaries(&db, uuid, &result.child_summaries).await; prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary)); } } @@ -532,18 +531,18 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result< let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?; if sentences.is_empty() { continue; } - let context = prev_context.join("\n"); - match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await { - Ok(result) => { - let _ = store_parent_summary(db, &cut.chunk_id, file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await; - let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await; - if !result.parent_summary.is_empty() { - prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary)); + let context = prev_context.join("\n"); + match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await { + Ok(result) => { + if !result.parent_summary.is_empty() { + let _ = store_parent_summary(db, &cut.chunk_id, file_uuid, &result.parent_summary, &result.five_w1h, &sentences).await; + let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await; + prev_context.push(format!("Scene {} (t={:.0}s): {}", cut.chunk_index, cut.start_time, result.parent_summary)); + } + processed += 1; } - processed += 1; + Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e), } - Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e), - } } tracing::info!("[5W1H] Done for {}: {}/{} scenes", file_uuid, processed, total);