feat: Phase 1 handover - schema migration, correction mechanism, API fixes
Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index Correction: asr-1.json format, generate/apply scripts API: 37/37 endpoints fixed and tested Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
@@ -6,11 +6,11 @@ use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::api::five_w1h_agent_api::run_5w1h_agent;
|
||||
use crate::api::identity_agent_api::run_identity_agent;
|
||||
use crate::core::chunk::{rule1_ingest, rule3_ingest};
|
||||
use crate::core::config::OUTPUT_DIR;
|
||||
use crate::core::db::qdrant_db::QdrantDb;
|
||||
use crate::api::five_w1h_agent_api::run_5w1h_agent;
|
||||
use crate::api::identity_agent_api::run_identity_agent;
|
||||
use crate::core::db::{
|
||||
MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
|
||||
};
|
||||
@@ -72,7 +72,7 @@ impl JobWorker {
|
||||
AND id NOT IN (
|
||||
SELECT DISTINCT job_id FROM dev.processor_results
|
||||
WHERE status IN ('pending', 'running')
|
||||
)"
|
||||
)",
|
||||
)
|
||||
.execute(self.db.pool())
|
||||
.await
|
||||
@@ -168,7 +168,10 @@ impl JobWorker {
|
||||
} else {
|
||||
job.processors.len()
|
||||
};
|
||||
let should_retry = self.check_and_complete_job(job.id, &job.uuid, expected_count).await.is_ok();
|
||||
let should_retry = self
|
||||
.check_and_complete_job(job.id, &job.uuid, expected_count)
|
||||
.await
|
||||
.is_ok();
|
||||
if should_retry && self.processor_pool.can_start().await {
|
||||
if let Err(e) = self.process_job(job.clone()).await {
|
||||
error!("Failed to reprocess job {}: {}", job.uuid, e);
|
||||
@@ -329,8 +332,11 @@ impl JobWorker {
|
||||
.await?;
|
||||
|
||||
// Check if output file already exists on disk (source of truth)
|
||||
let output_path =
|
||||
PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.{}.json", job.uuid, processor_type.as_str()));
|
||||
let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!(
|
||||
"{}.{}.json",
|
||||
job.uuid,
|
||||
processor_type.as_str()
|
||||
));
|
||||
if output_path.exists() {
|
||||
info!(
|
||||
"Processor {} output file exists, marking completed and skipping",
|
||||
@@ -361,23 +367,26 @@ impl JobWorker {
|
||||
.await?;
|
||||
started_count += 1;
|
||||
// 覆寫 result_map 讓相依性檢查能正確判斷
|
||||
result_map.insert(*processor_type, crate::core::db::ProcessorResult {
|
||||
id: 0,
|
||||
job_id: job.id,
|
||||
processor_type: *processor_type,
|
||||
status: ProcessorJobStatus::Completed,
|
||||
started_at: None,
|
||||
completed_at: None,
|
||||
duration_secs: None,
|
||||
chunks_produced: 0,
|
||||
frames_processed: total_frames as i32,
|
||||
output_size_bytes: 0,
|
||||
error_message: None,
|
||||
output_data: None,
|
||||
retry_count: 0,
|
||||
created_at: String::new(),
|
||||
updated_at: String::new(),
|
||||
});
|
||||
result_map.insert(
|
||||
*processor_type,
|
||||
crate::core::db::ProcessorResult {
|
||||
id: 0,
|
||||
job_id: job.id,
|
||||
processor_type: *processor_type,
|
||||
status: ProcessorJobStatus::Completed,
|
||||
started_at: None,
|
||||
completed_at: None,
|
||||
duration_secs: None,
|
||||
chunks_produced: 0,
|
||||
frames_processed: total_frames as i32,
|
||||
output_size_bytes: 0,
|
||||
error_message: None,
|
||||
output_data: None,
|
||||
retry_count: 0,
|
||||
created_at: String::new(),
|
||||
updated_at: String::new(),
|
||||
},
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -524,7 +533,12 @@ impl JobWorker {
|
||||
info!("Backup already exists: {}, skipping", bak_path.display());
|
||||
} else {
|
||||
match std::fs::copy(entry.path(), &bak_path) {
|
||||
Ok(bytes) => info!("Backed up {} -> {} ({} bytes)", name, bak_path.display(), bytes),
|
||||
Ok(bytes) => info!(
|
||||
"Backed up {} -> {} ({} bytes)",
|
||||
name,
|
||||
bak_path.display(),
|
||||
bytes
|
||||
),
|
||||
Err(e) => warn!("Failed to backup {}: {}", name, e),
|
||||
}
|
||||
}
|
||||
@@ -568,12 +582,18 @@ impl JobWorker {
|
||||
} else {
|
||||
job.processors.len()
|
||||
};
|
||||
self.check_and_complete_job(job.id, &job.uuid, expected_count).await?;
|
||||
self.check_and_complete_job(job.id, &job.uuid, expected_count)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn check_and_complete_job(&self, job_id: i32, uuid: &str, expected_count: usize) -> Result<()> {
|
||||
async fn check_and_complete_job(
|
||||
&self,
|
||||
job_id: i32,
|
||||
uuid: &str,
|
||||
expected_count: usize,
|
||||
) -> Result<()> {
|
||||
let results = self.db.get_processor_results_by_job(job_id).await?;
|
||||
|
||||
info!(
|
||||
@@ -676,24 +696,41 @@ impl JobWorker {
|
||||
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
|
||||
// Automatically vectorize new sentence chunks
|
||||
if count > 0 {
|
||||
info!("📝 Starting automatic vectorize for {} chunks...", count);
|
||||
if let Err(e) = Self::vectorize_chunks(&db_clone, &uuid_clone).await {
|
||||
error!("❌ Auto-vectorize failed for {}: {}", uuid_clone, e);
|
||||
info!(
|
||||
"📝 Starting automatic vectorize for {} chunks...",
|
||||
count
|
||||
);
|
||||
if let Err(e) =
|
||||
Self::vectorize_chunks(&db_clone, &uuid_clone).await
|
||||
{
|
||||
error!(
|
||||
"❌ Auto-vectorize failed for {}: {}",
|
||||
uuid_clone, e
|
||||
);
|
||||
}
|
||||
}
|
||||
// Phase 1 release: sentence chunk embedding 交付
|
||||
info!("📦 Phase 1 release packaging...");
|
||||
let executor = match crate::core::processor::PythonExecutor::new() {
|
||||
Ok(ex) => ex,
|
||||
Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
|
||||
Err(e) => {
|
||||
error!("Failed PythonExecutor for release pack: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
match executor.run(
|
||||
"release_pack.py",
|
||||
&["--phase", "1", "--file-uuid", &uuid_clone],
|
||||
None, "RELEASE_P1",
|
||||
Some(std::time::Duration::from_secs(120)),
|
||||
).await {
|
||||
Ok(()) => info!("✅ Phase 1 release packaged for {}", uuid_clone),
|
||||
match executor
|
||||
.run(
|
||||
"release_pack.py",
|
||||
&["--phase", "1", "--file-uuid", &uuid_clone],
|
||||
None,
|
||||
"RELEASE_P1",
|
||||
Some(std::time::Duration::from_secs(120)),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
info!("✅ Phase 1 release packaged for {}", uuid_clone)
|
||||
}
|
||||
Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
|
||||
}
|
||||
}
|
||||
@@ -851,14 +888,21 @@ impl JobWorker {
|
||||
info!("📦 Phase 2 release packaging...");
|
||||
let executor = match crate::core::processor::PythonExecutor::new() {
|
||||
Ok(ex) => ex,
|
||||
Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
|
||||
Err(e) => {
|
||||
error!("Failed PythonExecutor for release pack: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
match executor.run(
|
||||
"release_pack.py",
|
||||
&["--phase", "2", "--file-uuid", &uuid_clone],
|
||||
None, "RELEASE_P2",
|
||||
Some(std::time::Duration::from_secs(120)),
|
||||
).await {
|
||||
match executor
|
||||
.run(
|
||||
"release_pack.py",
|
||||
&["--phase", "2", "--file-uuid", &uuid_clone],
|
||||
None,
|
||||
"RELEASE_P2",
|
||||
Some(std::time::Duration::from_secs(120)),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => info!("✅ Phase 2 release packaged for {}", uuid_clone),
|
||||
Err(e) => error!("❌ Phase 2 release pack failed: {}", e),
|
||||
}
|
||||
@@ -970,7 +1014,10 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
let total = rows.len();
|
||||
info!("[Vectorize] Starting vectorize of {} chunks for {}", total, uuid);
|
||||
info!(
|
||||
"[Vectorize] Starting vectorize of {} chunks for {}",
|
||||
total, uuid
|
||||
);
|
||||
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
|
||||
@@ -998,7 +1045,10 @@ impl JobWorker {
|
||||
}
|
||||
stored += 1;
|
||||
if stored % 50 == 0 {
|
||||
info!("[Vectorize] {}/{} vectors stored for {}", stored, total, uuid);
|
||||
info!(
|
||||
"[Vectorize] {}/{} vectors stored for {}",
|
||||
stored, total, uuid
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -1007,7 +1057,10 @@ impl JobWorker {
|
||||
}
|
||||
}
|
||||
|
||||
info!("[Vectorize] Completed: {}/{} vectors stored for {}", stored, total, uuid);
|
||||
info!(
|
||||
"[Vectorize] Completed: {}/{} vectors stored for {}",
|
||||
stored, total, uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,15 +142,21 @@ impl ProcessorPool {
|
||||
.flatten();
|
||||
if let Some(pid) = old_pid {
|
||||
if pid > 0 {
|
||||
warn!("[PID] Killing existing process {} for {}/{}", pid, uuid, processor);
|
||||
unsafe { libc::kill(pid, libc::SIGKILL); }
|
||||
warn!(
|
||||
"[PID] Killing existing process {} for {}/{}",
|
||||
pid, uuid, processor
|
||||
);
|
||||
unsafe {
|
||||
libc::kill(pid, libc::SIGKILL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
|
||||
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()).await;
|
||||
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str())
|
||||
.await;
|
||||
|
||||
let (cancel_tx, cancel_rx) = mpsc::channel(1);
|
||||
let job_id = task.job.id;
|
||||
@@ -231,15 +237,16 @@ impl ProcessorPool {
|
||||
match result {
|
||||
Ok(output) => {
|
||||
// 驗收 agent 檢查產出內容
|
||||
let verification = crate::verification::verifier::verify_output(
|
||||
&processor_type,
|
||||
&job.uuid,
|
||||
);
|
||||
let verification =
|
||||
crate::verification::verifier::verify_output(&processor_type, &job.uuid);
|
||||
|
||||
if verification.passed {
|
||||
info!(
|
||||
"Processor {} completed and verified for job {} ({} chunks, {} frames)",
|
||||
processor_name, job.uuid, output.chunks_produced, output.frames_processed
|
||||
processor_name,
|
||||
job.uuid,
|
||||
output.chunks_produced,
|
||||
output.frames_processed
|
||||
);
|
||||
|
||||
// 清理暫存備份
|
||||
|
||||
Reference in New Issue
Block a user