feat: Phase 1 handover - schema migration, correction mechanism, API fixes

Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index
Correction: asr-1.json format, generate/apply scripts
API: 37/37 endpoints fixed and tested
Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
Accusys
2026-05-11 07:03:22 +08:00
parent ef894a44ad
commit 39ba5ddf76
147 changed files with 19843 additions and 3053 deletions

View File

@@ -6,11 +6,11 @@ use std::time::Duration;
use tokio::time::sleep;
use tracing::{error, info, warn};
use crate::api::five_w1h_agent_api::run_5w1h_agent;
use crate::api::identity_agent_api::run_identity_agent;
use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::config::OUTPUT_DIR;
use crate::core::db::qdrant_db::QdrantDb;
use crate::api::five_w1h_agent_api::run_5w1h_agent;
use crate::api::identity_agent_api::run_identity_agent;
use crate::core::db::{
MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
};
@@ -72,7 +72,7 @@ impl JobWorker {
AND id NOT IN (
SELECT DISTINCT job_id FROM dev.processor_results
WHERE status IN ('pending', 'running')
)"
)",
)
.execute(self.db.pool())
.await
@@ -168,7 +168,10 @@ impl JobWorker {
} else {
job.processors.len()
};
let should_retry = self.check_and_complete_job(job.id, &job.uuid, expected_count).await.is_ok();
let should_retry = self
.check_and_complete_job(job.id, &job.uuid, expected_count)
.await
.is_ok();
if should_retry && self.processor_pool.can_start().await {
if let Err(e) = self.process_job(job.clone()).await {
error!("Failed to reprocess job {}: {}", job.uuid, e);
@@ -329,8 +332,11 @@ impl JobWorker {
.await?;
// Check if output file already exists on disk (source of truth)
let output_path =
PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.{}.json", job.uuid, processor_type.as_str()));
let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!(
"{}.{}.json",
job.uuid,
processor_type.as_str()
));
if output_path.exists() {
info!(
"Processor {} output file exists, marking completed and skipping",
@@ -361,23 +367,26 @@ impl JobWorker {
.await?;
started_count += 1;
// 覆寫 result_map 讓相依性檢查能正確判斷
result_map.insert(*processor_type, crate::core::db::ProcessorResult {
id: 0,
job_id: job.id,
processor_type: *processor_type,
status: ProcessorJobStatus::Completed,
started_at: None,
completed_at: None,
duration_secs: None,
chunks_produced: 0,
frames_processed: total_frames as i32,
output_size_bytes: 0,
error_message: None,
output_data: None,
retry_count: 0,
created_at: String::new(),
updated_at: String::new(),
});
result_map.insert(
*processor_type,
crate::core::db::ProcessorResult {
id: 0,
job_id: job.id,
processor_type: *processor_type,
status: ProcessorJobStatus::Completed,
started_at: None,
completed_at: None,
duration_secs: None,
chunks_produced: 0,
frames_processed: total_frames as i32,
output_size_bytes: 0,
error_message: None,
output_data: None,
retry_count: 0,
created_at: String::new(),
updated_at: String::new(),
},
);
continue;
}
@@ -524,7 +533,12 @@ impl JobWorker {
info!("Backup already exists: {}, skipping", bak_path.display());
} else {
match std::fs::copy(entry.path(), &bak_path) {
Ok(bytes) => info!("Backed up {} -> {} ({} bytes)", name, bak_path.display(), bytes),
Ok(bytes) => info!(
"Backed up {} -> {} ({} bytes)",
name,
bak_path.display(),
bytes
),
Err(e) => warn!("Failed to backup {}: {}", name, e),
}
}
@@ -568,12 +582,18 @@ impl JobWorker {
} else {
job.processors.len()
};
self.check_and_complete_job(job.id, &job.uuid, expected_count).await?;
self.check_and_complete_job(job.id, &job.uuid, expected_count)
.await?;
Ok(())
}
async fn check_and_complete_job(&self, job_id: i32, uuid: &str, expected_count: usize) -> Result<()> {
async fn check_and_complete_job(
&self,
job_id: i32,
uuid: &str,
expected_count: usize,
) -> Result<()> {
let results = self.db.get_processor_results_by_job(job_id).await?;
info!(
@@ -676,24 +696,41 @@ impl JobWorker {
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
// Automatically vectorize new sentence chunks
if count > 0 {
info!("📝 Starting automatic vectorize for {} chunks...", count);
if let Err(e) = Self::vectorize_chunks(&db_clone, &uuid_clone).await {
error!("❌ Auto-vectorize failed for {}: {}", uuid_clone, e);
info!(
"📝 Starting automatic vectorize for {} chunks...",
count
);
if let Err(e) =
Self::vectorize_chunks(&db_clone, &uuid_clone).await
{
error!(
"❌ Auto-vectorize failed for {}: {}",
uuid_clone, e
);
}
}
// Phase 1 release: sentence chunk embedding 交付
info!("📦 Phase 1 release packaging...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
Err(e) => {
error!("Failed PythonExecutor for release pack: {}", e);
return;
}
};
match executor.run(
"release_pack.py",
&["--phase", "1", "--file-uuid", &uuid_clone],
None, "RELEASE_P1",
Some(std::time::Duration::from_secs(120)),
).await {
Ok(()) => info!("✅ Phase 1 release packaged for {}", uuid_clone),
match executor
.run(
"release_pack.py",
&["--phase", "1", "--file-uuid", &uuid_clone],
None,
"RELEASE_P1",
Some(std::time::Duration::from_secs(120)),
)
.await
{
Ok(()) => {
info!("✅ Phase 1 release packaged for {}", uuid_clone)
}
Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
}
}
@@ -851,14 +888,21 @@ impl JobWorker {
info!("📦 Phase 2 release packaging...");
let executor = match crate::core::processor::PythonExecutor::new() {
Ok(ex) => ex,
Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
Err(e) => {
error!("Failed PythonExecutor for release pack: {}", e);
return;
}
};
match executor.run(
"release_pack.py",
&["--phase", "2", "--file-uuid", &uuid_clone],
None, "RELEASE_P2",
Some(std::time::Duration::from_secs(120)),
).await {
match executor
.run(
"release_pack.py",
&["--phase", "2", "--file-uuid", &uuid_clone],
None,
"RELEASE_P2",
Some(std::time::Duration::from_secs(120)),
)
.await
{
Ok(()) => info!("✅ Phase 2 release packaged for {}", uuid_clone),
Err(e) => error!("❌ Phase 2 release pack failed: {}", e),
}
@@ -970,7 +1014,10 @@ impl JobWorker {
}
let total = rows.len();
info!("[Vectorize] Starting vectorize of {} chunks for {}", total, uuid);
info!(
"[Vectorize] Starting vectorize of {} chunks for {}",
total, uuid
);
let mut stored = 0usize;
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
@@ -998,7 +1045,10 @@ impl JobWorker {
}
stored += 1;
if stored % 50 == 0 {
info!("[Vectorize] {}/{} vectors stored for {}", stored, total, uuid);
info!(
"[Vectorize] {}/{} vectors stored for {}",
stored, total, uuid
);
}
}
Err(e) => {
@@ -1007,7 +1057,10 @@ impl JobWorker {
}
}
info!("[Vectorize] Completed: {}/{} vectors stored for {}", stored, total, uuid);
info!(
"[Vectorize] Completed: {}/{} vectors stored for {}",
stored, total, uuid
);
Ok(())
}
}

View File

@@ -142,15 +142,21 @@ impl ProcessorPool {
.flatten();
if let Some(pid) = old_pid {
if pid > 0 {
warn!("[PID] Killing existing process {} for {}/{}", pid, uuid, processor);
unsafe { libc::kill(pid, libc::SIGKILL); }
warn!(
"[PID] Killing existing process {} for {}/{}",
pid, uuid, processor
);
unsafe {
libc::kill(pid, libc::SIGKILL);
}
}
}
}
}
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()).await;
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str())
.await;
let (cancel_tx, cancel_rx) = mpsc::channel(1);
let job_id = task.job.id;
@@ -231,15 +237,16 @@ impl ProcessorPool {
match result {
Ok(output) => {
// 驗收 agent 檢查產出內容
let verification = crate::verification::verifier::verify_output(
&processor_type,
&job.uuid,
);
let verification =
crate::verification::verifier::verify_output(&processor_type, &job.uuid);
if verification.passed {
info!(
"Processor {} completed and verified for job {} ({} chunks, {} frames)",
processor_name, job.uuid, output.chunks_produced, output.frames_processed
processor_name,
job.uuid,
output.chunks_produced,
output.frames_processed
);
// 清理暫存備份