feat: Phase 1 handover - schema migration, correction mechanism, API fixes

Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index Correction: asr-1.json format, generate/apply scripts API: 37/37 endpoints fixed and tested Docs: HANDOVER_V2.0.md for M4
2026-05-11 07:03:22 +08:00
parent ef894a44ad
commit 39ba5ddf76
147 changed files with 19843 additions and 3053 deletions
--- a/src/worker/job_worker.rs
+++ b/src/worker/job_worker.rs
@@ -6,11 +6,11 @@ use std::time::Duration;
 use tokio::time::sleep;
 use tracing::{error, info, warn};

+use crate::api::five_w1h_agent_api::run_5w1h_agent;
+use crate::api::identity_agent_api::run_identity_agent;
 use crate::core::chunk::{rule1_ingest, rule3_ingest};
 use crate::core::config::OUTPUT_DIR;
 use crate::core::db::qdrant_db::QdrantDb;
-use crate::api::five_w1h_agent_api::run_5w1h_agent;
-use crate::api::identity_agent_api::run_identity_agent;
 use crate::core::db::{
    MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
 };
@@ -72,7 +72,7 @@ impl JobWorker {
             AND id NOT IN (
                 SELECT DISTINCT job_id FROM dev.processor_results
                 WHERE status IN ('pending', 'running')
-             )"
+             )",
        )
        .execute(self.db.pool())
        .await
@@ -168,7 +168,10 @@ impl JobWorker {
                } else {
                    job.processors.len()
                };
-                let should_retry = self.check_and_complete_job(job.id, &job.uuid, expected_count).await.is_ok();
+                let should_retry = self
+                    .check_and_complete_job(job.id, &job.uuid, expected_count)
+                    .await
+                    .is_ok();
                if should_retry && self.processor_pool.can_start().await {
                    if let Err(e) = self.process_job(job.clone()).await {
                        error!("Failed to reprocess job {}: {}", job.uuid, e);
@@ -329,8 +332,11 @@ impl JobWorker {
                .await?;

            // Check if output file already exists on disk (source of truth)
-            let output_path =
-                PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.{}.json", job.uuid, processor_type.as_str()));
+            let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!(
+                "{}.{}.json",
+                job.uuid,
+                processor_type.as_str()
+            ));
            if output_path.exists() {
                info!(
                    "Processor {} output file exists, marking completed and skipping",
@@ -361,23 +367,26 @@ impl JobWorker {
                    .await?;
                started_count += 1;
                // 覆寫 result_map 讓相依性檢查能正確判斷
-                result_map.insert(*processor_type, crate::core::db::ProcessorResult {
-                    id: 0,
-                    job_id: job.id,
-                    processor_type: *processor_type,
-                    status: ProcessorJobStatus::Completed,
-                    started_at: None,
-                    completed_at: None,
-                    duration_secs: None,
-                    chunks_produced: 0,
-                    frames_processed: total_frames as i32,
-                    output_size_bytes: 0,
-                    error_message: None,
-                    output_data: None,
-                    retry_count: 0,
-                    created_at: String::new(),
-                    updated_at: String::new(),
-                });
+                result_map.insert(
+                    *processor_type,
+                    crate::core::db::ProcessorResult {
+                        id: 0,
+                        job_id: job.id,
+                        processor_type: *processor_type,
+                        status: ProcessorJobStatus::Completed,
+                        started_at: None,
+                        completed_at: None,
+                        duration_secs: None,
+                        chunks_produced: 0,
+                        frames_processed: total_frames as i32,
+                        output_size_bytes: 0,
+                        error_message: None,
+                        output_data: None,
+                        retry_count: 0,
+                        created_at: String::new(),
+                        updated_at: String::new(),
+                    },
+                );
                continue;
            }

@@ -524,7 +533,12 @@ impl JobWorker {
                        info!("Backup already exists: {}, skipping", bak_path.display());
                    } else {
                        match std::fs::copy(entry.path(), &bak_path) {
-                            Ok(bytes) => info!("Backed up {} -> {} ({} bytes)", name, bak_path.display(), bytes),
+                            Ok(bytes) => info!(
+                                "Backed up {} -> {} ({} bytes)",
+                                name,
+                                bak_path.display(),
+                                bytes
+                            ),
                            Err(e) => warn!("Failed to backup {}: {}", name, e),
                        }
                    }
@@ -568,12 +582,18 @@ impl JobWorker {
        } else {
            job.processors.len()
        };
-        self.check_and_complete_job(job.id, &job.uuid, expected_count).await?;
+        self.check_and_complete_job(job.id, &job.uuid, expected_count)
+            .await?;

        Ok(())
    }

-    async fn check_and_complete_job(&self, job_id: i32, uuid: &str, expected_count: usize) -> Result<()> {
+    async fn check_and_complete_job(
+        &self,
+        job_id: i32,
+        uuid: &str,
+        expected_count: usize,
+    ) -> Result<()> {
        let results = self.db.get_processor_results_by_job(job_id).await?;

        info!(
@@ -676,24 +696,41 @@ impl JobWorker {
                                info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
                                // Automatically vectorize new sentence chunks
                                if count > 0 {
-                                    info!("📝 Starting automatic vectorize for {} chunks...", count);
-                                    if let Err(e) = Self::vectorize_chunks(&db_clone, &uuid_clone).await {
-                                        error!("❌ Auto-vectorize failed for {}: {}", uuid_clone, e);
+                                    info!(
+                                        "📝 Starting automatic vectorize for {} chunks...",
+                                        count
+                                    );
+                                    if let Err(e) =
+                                        Self::vectorize_chunks(&db_clone, &uuid_clone).await
+                                    {
+                                        error!(
+                                            "❌ Auto-vectorize failed for {}: {}",
+                                            uuid_clone, e
+                                        );
                                    }
                                }
                                // Phase 1 release: sentence chunk embedding 交付
                                info!("📦 Phase 1 release packaging...");
                                let executor = match crate::core::processor::PythonExecutor::new() {
                                    Ok(ex) => ex,
-                                    Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
+                                    Err(e) => {
+                                        error!("Failed PythonExecutor for release pack: {}", e);
+                                        return;
+                                    }
                                };
-                                match executor.run(
-                                    "release_pack.py",
-                                    &["--phase", "1", "--file-uuid", &uuid_clone],
-                                    None, "RELEASE_P1",
-                                    Some(std::time::Duration::from_secs(120)),
-                                ).await {
-                                    Ok(()) => info!("✅ Phase 1 release packaged for {}", uuid_clone),
+                                match executor
+                                    .run(
+                                        "release_pack.py",
+                                        &["--phase", "1", "--file-uuid", &uuid_clone],
+                                        None,
+                                        "RELEASE_P1",
+                                        Some(std::time::Duration::from_secs(120)),
+                                    )
+                                    .await
+                                {
+                                    Ok(()) => {
+                                        info!("✅ Phase 1 release packaged for {}", uuid_clone)
+                                    }
                                    Err(e) => error!("❌ Phase 1 release pack failed: {}", e),
                                }
                            }
@@ -851,14 +888,21 @@ impl JobWorker {
                            info!("📦 Phase 2 release packaging...");
                            let executor = match crate::core::processor::PythonExecutor::new() {
                                Ok(ex) => ex,
-                                Err(e) => { error!("Failed PythonExecutor for release pack: {}", e); return; }
+                                Err(e) => {
+                                    error!("Failed PythonExecutor for release pack: {}", e);
+                                    return;
+                                }
                            };
-                            match executor.run(
-                                "release_pack.py",
-                                &["--phase", "2", "--file-uuid", &uuid_clone],
-                                None, "RELEASE_P2",
-                                Some(std::time::Duration::from_secs(120)),
-                            ).await {
+                            match executor
+                                .run(
+                                    "release_pack.py",
+                                    &["--phase", "2", "--file-uuid", &uuid_clone],
+                                    None,
+                                    "RELEASE_P2",
+                                    Some(std::time::Duration::from_secs(120)),
+                                )
+                                .await
+                            {
                                Ok(()) => info!("✅ Phase 2 release packaged for {}", uuid_clone),
                                Err(e) => error!("❌ Phase 2 release pack failed: {}", e),
                            }
@@ -970,7 +1014,10 @@ impl JobWorker {
        }

        let total = rows.len();
-        info!("[Vectorize] Starting vectorize of {} chunks for {}", total, uuid);
+        info!(
+            "[Vectorize] Starting vectorize of {} chunks for {}",
+            total, uuid
+        );

        let mut stored = 0usize;
        for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
@@ -998,7 +1045,10 @@ impl JobWorker {
                    }
                    stored += 1;
                    if stored % 50 == 0 {
-                        info!("[Vectorize] {}/{} vectors stored for {}", stored, total, uuid);
+                        info!(
+                            "[Vectorize] {}/{} vectors stored for {}",
+                            stored, total, uuid
+                        );
                    }
                }
                Err(e) => {
@@ -1007,7 +1057,10 @@ impl JobWorker {
            }
        }

-        info!("[Vectorize] Completed: {}/{} vectors stored for {}", stored, total, uuid);
+        info!(
+            "[Vectorize] Completed: {}/{} vectors stored for {}",
+            stored, total, uuid
+        );
        Ok(())
    }
 }
--- a/src/worker/processor.rs
+++ b/src/worker/processor.rs
@@ -142,15 +142,21 @@ impl ProcessorPool {
                .flatten();
            if let Some(pid) = old_pid {
                if pid > 0 {
-                    warn!("[PID] Killing existing process {} for {}/{}", pid, uuid, processor);
-                    unsafe { libc::kill(pid, libc::SIGKILL); }
+                    warn!(
+                        "[PID] Killing existing process {} for {}/{}",
+                        pid, uuid, processor
+                    );
+                    unsafe {
+                        libc::kill(pid, libc::SIGKILL);
+                    }
                }
            }
        }
    }

    pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
-        Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()).await;
+        Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str())
+            .await;

        let (cancel_tx, cancel_rx) = mpsc::channel(1);
        let job_id = task.job.id;
@@ -231,15 +237,16 @@ impl ProcessorPool {
            match result {
                Ok(output) => {
                    // 驗收 agent 檢查產出內容
-                    let verification = crate::verification::verifier::verify_output(
-                        &processor_type,
-                        &job.uuid,
-                    );
+                    let verification =
+                        crate::verification::verifier::verify_output(&processor_type, &job.uuid);

                    if verification.passed {
                        info!(
                            "Processor {} completed and verified for job {} ({} chunks, {} frames)",
-                            processor_name, job.uuid, output.chunks_produced, output.frames_processed
+                            processor_name,
+                            job.uuid,
+                            output.chunks_produced,
+                            output.frames_processed
                        );

                        // 清理暫存備份