feat: verification agent for processor output validation

- New src/verification/ module: verify_output() checks JSON structure/completeness per processor type
- Worker: after processor succeeds, verification agent gates the result
- Passed -> mark completed + cleanup_temp_files (remove .tmp/.partial/.err/timestamp backups)
- Failed -> mark failed with verification details, preserve files for inspection
- cleanup_temp_files() keeps only the canonical {uuid}.{proc}.json
This commit is contained in:
Accusys
2026-05-09 13:30:00 +08:00
parent e068b70777
commit 7237a1811e
5 changed files with 368 additions and 30 deletions

View File

@@ -4,6 +4,8 @@ pub mod api;
pub mod ui;
pub mod verification;
pub mod watcher;
pub mod worker;

3
src/verification/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod verifier;
pub use verifier::{verify_output, VerificationResult, VerifierError};

View File

@@ -0,0 +1,148 @@
use crate::core::config::OUTPUT_DIR;
use crate::core::db::ProcessorType;
use anyhow::Result;
use std::path::PathBuf;
use tracing::info;
#[derive(Debug)]
pub struct VerificationResult {
pub passed: bool,
pub processor: String,
pub file_uuid: String,
pub details: Vec<String>,
}
impl VerificationResult {
pub fn ok(processor: &str, file_uuid: &str) -> Self {
Self {
passed: true,
processor: processor.to_string(),
file_uuid: file_uuid.to_string(),
details: vec!["verification passed".to_string()],
}
}
pub fn fail(processor: &str, file_uuid: &str, reason: &str) -> Self {
Self {
passed: false,
processor: processor.to_string(),
file_uuid: file_uuid.to_string(),
details: vec![reason.to_string()],
}
}
}
#[derive(Debug)]
pub struct VerifierError {
pub reason: String,
}
pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> VerificationResult {
let proc_name = processor.as_str();
let output_path =
PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.{}.json", file_uuid, proc_name));
if !output_path.exists() {
return VerificationResult::fail(proc_name, file_uuid, "output file not found");
}
let json_str = match std::fs::read_to_string(&output_path) {
Ok(s) => s,
Err(e) => return VerificationResult::fail(proc_name, file_uuid, &format!("unreadable: {}", e)),
};
let value: serde_json::Value = match serde_json::from_str(&json_str) {
Ok(v) => v,
Err(e) => return VerificationResult::fail(proc_name, file_uuid, &format!("invalid JSON: {}", e)),
};
match processor {
ProcessorType::Asr | ProcessorType::Asrx => {
let segs = value.get("segments").and_then(|v| v.as_array());
match segs {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 segments"),
Some(s) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'segments' field"),
}
}
ProcessorType::Cut => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'scenes' field"),
}
}
ProcessorType::Yolo => {
let frames = value.get("frames").and_then(|v| v.as_object());
match frames {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames' field"),
}
}
ProcessorType::Face => {
let faces = value.get("faces").or_else(|| value.get("frames")).and_then(|v| v.as_array());
match faces {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 faces"),
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'faces'/'frames'"),
}
}
ProcessorType::Ocr => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames'"),
}
}
ProcessorType::Pose => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(f) if f.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 frames"),
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::fail(proc_name, file_uuid, "missing 'frames'"),
}
}
ProcessorType::Scene => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => VerificationResult::fail(proc_name, file_uuid, "0 scenes"),
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::VisualChunk => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Story => VerificationResult::ok(proc_name, file_uuid),
}
}
/// 清理通過驗收的 processor 暫存檔,只保留最終 .json
pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
let proc_name = processor.as_str();
let prefix = format!("{}.{}.", file_uuid, proc_name);
let canonical = format!("{}.{}.json", file_uuid, proc_name);
if let Ok(dir) = std::fs::read_dir(OUTPUT_DIR.as_str()) {
let mut removed = 0u32;
for entry in dir.flatten() {
let name = entry.file_name();
let name = name.to_string_lossy().to_string();
if !name.starts_with(&prefix) {
continue;
}
if name == canonical {
continue;
}
if let Err(e) = std::fs::remove_file(entry.path()) {
tracing::warn!("Failed to cleanup {}: {}", name, e);
} else {
removed += 1;
}
}
if removed > 0 {
info!("Cleaned up {} temp files for {}.{}", removed, file_uuid, proc_name);
}
}
}

View File

@@ -1,6 +1,7 @@
use anyhow::{Context, Result};
use libc;
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::{mpsc, RwLock};
@@ -229,39 +230,89 @@ impl ProcessorPool {
match result {
Ok(output) => {
info!(
"Processor {} completed for job {} ({} chunks, {} frames)",
processor_name, job.uuid, output.chunks_produced, output.frames_processed
// 驗收 agent 檢查產出內容
let verification = crate::verification::verifier::verify_output(
&processor_type,
&job.uuid,
);
if let Err(e) = db
.update_processor_result_with_stats(
processor_result_id,
ProcessorJobStatus::Completed,
None,
Some(&output.data),
output.chunks_produced,
output.frames_processed,
)
.await
{
error!("Failed to update processor result to completed: {}", e);
}
if let Err(e) = redis
.update_worker_processor_status(
if verification.passed {
info!(
"Processor {} completed and verified for job {} ({} chunks, {} frames)",
processor_name, job.uuid, output.chunks_produced, output.frames_processed
);
// 清理暫存備份
crate::verification::verifier::cleanup_temp_files(
&processor_type,
&job.uuid,
&processor_name,
"completed",
None,
output.frames_processed,
output.chunks_produced,
output.total_frames,
output.retry_count,
output.pid,
)
.await
{
error!("Failed to update Redis processor status: {}", e);
);
if let Err(e) = db
.update_processor_result_with_stats(
processor_result_id,
ProcessorJobStatus::Completed,
None,
Some(&output.data),
output.chunks_produced,
output.frames_processed,
)
.await
{
error!("Failed to update processor result to completed: {}", e);
}
if let Err(e) = redis
.update_worker_processor_status(
&job.uuid,
&processor_name,
"completed",
None,
output.frames_processed,
output.chunks_produced,
output.total_frames,
output.retry_count,
output.pid,
)
.await
{
error!("Failed to update Redis processor status: {}", e);
}
} else {
error!(
"Processor {} output failed verification for job {}: {:?}",
processor_name, job.uuid, verification.details
);
if let Err(db_err) = db
.update_processor_result_with_stats(
processor_result_id,
ProcessorJobStatus::Failed,
Some(&format!("verification failed: {:?}", verification.details)),
None,
0,
0,
)
.await
{
error!("Failed to update processor result to failed: {}", db_err);
}
if let Err(redis_err) = redis
.update_worker_processor_status(
&job.uuid,
&processor_name,
"failed",
Some(&format!("verification failed: {:?}", verification.details)),
0,
0,
0,
0,
0,
)
.await
{
error!("Failed to update Redis processor status: {}", redis_err);
}
}
}
Err(e) => {