feat: 新增 Job Worker 系統與 API 文檔全面更新

2026-03-26 16:16:34 +08:00
parent 80399b1c12
commit 82955504f3
70 changed files with 3460 additions and 376 deletions
--- a/src/core/db/mongodb_db.rs
+++ b/src/core/db/mongodb_db.rs
@@ -28,13 +28,15 @@ pub struct ChunkDocument {

 impl From<Chunk> for ChunkDocument {
    fn from(chunk: Chunk) -> Self {
+        let start_time = chunk.start_time().seconds();
+        let end_time = chunk.end_time().seconds();
        Self {
            uuid: chunk.uuid,
            chunk_id: chunk.chunk_id,
            chunk_index: chunk.chunk_index,
            chunk_type: chunk.chunk_type.as_str().to_string(),
-            start_time: chunk.start_time,
-            end_time: chunk.end_time,
+            start_time,
+            end_time,
            fps: chunk.fps,
            start_frame: chunk.start_frame,
            end_frame: chunk.end_frame,
@@ -118,8 +120,6 @@ impl MongoDb {
                    chunk_index: doc.chunk_index,
                    chunk_type,
                    rule: ChunkRule::Rule1,
-                    start_time: doc.start_time,
-                    end_time: doc.end_time,
                    fps: doc.fps,
                    start_frame: doc.start_frame,
                    end_frame: doc.end_frame,
@@ -178,8 +178,6 @@ impl MongoDb {
                    chunk_index: doc.chunk_index,
                    chunk_type,
                    rule: ChunkRule::Rule1,
-                    start_time: doc.start_time,
-                    end_time: doc.end_time,
                    fps: doc.fps,
                    start_frame: doc.start_frame,
                    end_frame: doc.end_frame,
@@ -235,8 +233,6 @@ impl MongoDb {
                    chunk_index: doc.chunk_index,
                    chunk_type,
                    rule: ChunkRule::Rule1,
-                    start_time: doc.start_time,
-                    end_time: doc.end_time,
                    fps: doc.fps,
                    start_frame: doc.start_frame,
                    end_frame: doc.end_frame,
--- a/src/core/db/postgres_db.rs
+++ b/src/core/db/postgres_db.rs
@@ -126,8 +126,6 @@ pub struct PreChunk {
    pub source_type: String,
    pub source_file: Option<String>,
    pub chunk_type: String,
-    pub start_time: f64,
-    pub end_time: f64,
    pub start_frame: i64,
    pub end_frame: i64,
    pub fps: f64,
@@ -209,7 +207,7 @@ pub struct MonitorJobStats {
    pub failed: i32,
 }

-#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
 #[serde(rename_all = "snake_case")]
 pub enum ProcessorType {
    Asr,
@@ -449,6 +447,12 @@ impl PostgresDb {
            .parse::<u64>()
            .unwrap_or(60);

+        tracing::info!(
+            "DB pool config: max_connections={}, acquire_timeout={}s",
+            max_connections,
+            acquire_timeout_secs
+        );
+
        let pool_options = PgPoolOptions::new()
            .max_connections(max_connections)
            .acquire_timeout(std::time::Duration::from_secs(acquire_timeout_secs));
@@ -1770,8 +1774,8 @@ impl PostgresDb {
        .bind(&chunk.chunk_id)
        .bind(chunk.chunk_index as i32)
        .bind(chunk.chunk_type.as_str())
-        .bind(chunk.start_time)
-        .bind(chunk.end_time)
+        .bind(chunk.start_time().seconds())
+        .bind(chunk.end_time().seconds())
        .bind(chunk.fps)
        .bind(chunk.start_frame)
        .bind(chunk.end_frame)
@@ -1791,7 +1795,7 @@ impl PostgresDb {

    pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
        let rows = sqlx::query(
-            "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE uuid = $1 ORDER BY chunk_index"
+            "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE uuid = $1 ORDER BY chunk_index"
        )
        .bind(uuid)
        .fetch_all(&self.pool)
@@ -1811,12 +1815,12 @@ impl PostgresDb {
                    _ => ChunkType::TimeBased,
                };

-                let content: serde_json::Value = r.get(11);
-                let metadata: Option<serde_json::Value> = r.get(12);
+                let content: serde_json::Value = r.get(9);
+                let metadata: Option<serde_json::Value> = r.get(10);

-                let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
-                let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
-                let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
+                let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
+                let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
+                let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();

                let (rule, content_data) = if content.get("rule").is_some() {
                    let rule_str = content
@@ -1844,8 +1848,7 @@ impl PostgresDb {
                    chunk_index: chunk_index as u32,
                    chunk_type,
                    rule,
-                    start_time: r.get("start_time"),
-                    end_time: r.get("end_time"),
+
                    fps: r.get("fps"),
                    start_frame: r.get("start_frame"),
                    end_frame: r.get("end_frame"),
@@ -1866,7 +1869,7 @@ impl PostgresDb {

    pub async fn get_chunk_by_chunk_id(&self, chunk_id: &str) -> Result<Option<Chunk>> {
        let row = sqlx::query(
-            "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = $1"
+            "SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = $1"
        )
        .bind(chunk_id)
        .fetch_optional(&self.pool)
@@ -1884,12 +1887,12 @@ impl PostgresDb {
                _ => ChunkType::TimeBased,
            };

-            let content: serde_json::Value = r.get(11);
-            let metadata: Option<serde_json::Value> = r.get(12);
+            let content: serde_json::Value = r.get(9);
+            let metadata: Option<serde_json::Value> = r.get(10);

-            let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
-            let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
-            let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
+            let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
+            let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
+            let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();

            let (rule, content_data) = if content.get("rule").is_some() {
                let rule_str = content
@@ -1917,8 +1920,6 @@ impl PostgresDb {
                chunk_index: chunk_index as u32,
                chunk_type,
                rule,
-                start_time: r.get("start_time"),
-                end_time: r.get("end_time"),
                fps: r.get("fps"),
                start_frame: r.get("start_frame"),
                end_frame: r.get("end_frame"),
@@ -1942,6 +1943,9 @@ impl PostgresDb {
            INSERT INTO pre_chunks (file_id, source_type, source_file, chunk_type, start_time, end_time, start_frame, end_frame, fps, raw_json, text_content, processed, chunk_id)
            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
            ON CONFLICT (file_id, source_type, start_frame, end_frame) DO UPDATE SET
+                start_time = EXCLUDED.start_time,
+                end_time = EXCLUDED.end_time,
+                fps = EXCLUDED.fps,
                raw_json = EXCLUDED.raw_json,
                text_content = EXCLUDED.text_content,
                processed = EXCLUDED.processed,
@@ -1953,8 +1957,8 @@ impl PostgresDb {
        .bind(&pre_chunk.source_type)
        .bind(&pre_chunk.source_file)
        .bind(&pre_chunk.chunk_type)
-        .bind(pre_chunk.start_time)
-        .bind(pre_chunk.end_time)
+        .bind(pre_chunk.start_frame as f64 / pre_chunk.fps)
+        .bind(pre_chunk.end_frame as f64 / pre_chunk.fps)
        .bind(pre_chunk.start_frame)
        .bind(pre_chunk.end_frame)
        .bind(pre_chunk.fps)
@@ -2108,8 +2112,7 @@ impl PostgresDb {
                    chunk_index: chunk_index as u32,
                    chunk_type,
                    rule,
-                    start_time: r.get("start_time"),
-                    end_time: r.get("end_time"),
+
                    fps: r.get("fps"),
                    start_frame: r.get("start_frame"),
                    end_frame: r.get("end_frame"),
@@ -2134,7 +2137,7 @@ impl PostgresDb {
        }

        let rows = sqlx::query(
-            "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = ANY($1) ORDER BY chunk_index",
+            "SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = ANY($1) ORDER BY chunk_index",
        )
        .bind(chunk_ids)
        .fetch_all(&self.pool)
@@ -2154,12 +2157,12 @@ impl PostgresDb {
                    _ => ChunkType::TimeBased,
                };

-                let content: serde_json::Value = r.get(11);
-                let metadata: Option<serde_json::Value> = r.get(12);
+                let content: serde_json::Value = r.get(9);
+                let metadata: Option<serde_json::Value> = r.get(10);

-                let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
-                let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
-                let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
+                let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
+                let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
+                let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();

                let (rule, content_data) = if content.get("rule").is_some() {
                    let rule_str = content
@@ -2187,8 +2190,7 @@ impl PostgresDb {
                    chunk_index: chunk_index as u32,
                    chunk_type,
                    rule,
-                    start_time: r.get("start_time"),
-                    end_time: r.get("end_time"),
+
                    fps: r.get("fps"),
                    start_frame: r.get("start_frame"),
                    end_frame: r.get("end_frame"),
@@ -2337,8 +2339,6 @@ impl PostgresDb {
                    chunk_index: r.2 as u32,
                    chunk_type,
                    rule: ChunkRule::Rule1,
-                    start_time: r.4,
-                    end_time: r.5,
                    fps: r.6,
                    start_frame: r.7,
                    end_frame: r.8,
@@ -2497,8 +2497,8 @@ impl PostgresDb {
                        chunk_type: chunk_data
                            .map(|c| c.chunk_type.as_str().to_string())
                            .unwrap_or_default(),
-                        start_time: chunk_data.map(|c| c.start_time).unwrap_or(0.0),
-                        end_time: chunk_data.map(|c| c.end_time).unwrap_or(0.0),
+                        start_time: chunk_data.map(|c| c.start_time().seconds()).unwrap_or(0.0),
+                        end_time: chunk_data.map(|c| c.end_time().seconds()).unwrap_or(0.0),
                        text: chunk_data
                            .and_then(|c| c.text_content.clone())
                            .unwrap_or_default(),
@@ -2584,6 +2584,7 @@ impl PostgresDb {
                   error_count, last_error, started_at, updated_at, created_at
            FROM monitor_jobs
            WHERE status = 'pending'
+               OR (status = 'running' AND EXISTS (SELECT 1 FROM processor_results WHERE job_id = monitor_jobs.id AND status = 'pending'))
            ORDER BY created_at ASC
            LIMIT $1
            FOR UPDATE SKIP LOCKED
@@ -2619,6 +2620,77 @@ impl PostgresDb {
        Ok(jobs)
    }

+    pub async fn get_running_jobs_with_all_processors_done(
+        &self,
+        limit: i32,
+    ) -> Result<Vec<MonitorJob>> {
+        let rows = sqlx::query(
+            r#"
+            SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
+                   error_count, last_error, started_at, updated_at, created_at
+            FROM monitor_jobs
+            WHERE status = 'running'
+              AND NOT EXISTS (
+                  SELECT 1 FROM processor_results pr 
+                  WHERE pr.job_id = monitor_jobs.id 
+                  AND pr.status IN ('pending', 'running')
+              )
+            ORDER BY updated_at ASC
+            LIMIT $1
+            FOR UPDATE SKIP LOCKED
+            "#
+        )
+        .bind(limit)
+        .fetch_all(&self.pool)
+        .await?;
+
+        let jobs: Vec<MonitorJob> = rows
+            .into_iter()
+            .map(|r| {
+                let status_str: String = r.get(3);
+                let status =
+                    MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
+                MonitorJob {
+                    id: r.get(0),
+                    uuid: r.get(1),
+                    video_path: r.get(2),
+                    status,
+                    current_processor: r.get(4),
+                    progress_total: r.get(5),
+                    progress_current: r.get(6),
+                    error_count: r.get(7),
+                    last_error: r.get(8),
+                    started_at: r.get(9),
+                    updated_at: r.get(10),
+                    created_at: r.get(11),
+                }
+            })
+            .collect();
+
+        Ok(jobs)
+    }
+
+    pub async fn update_job_processors_arrays(
+        &self,
+        job_id: i32,
+        completed_processors: Vec<String>,
+        failed_processors: Vec<String>,
+    ) -> Result<()> {
+        sqlx::query(
+            "UPDATE monitor_jobs 
+             SET completed_processors = $1, 
+                 failed_processors = $2, 
+                 updated_at = CURRENT_TIMESTAMP 
+             WHERE id = $3",
+        )
+        .bind(completed_processors)
+        .bind(failed_processors)
+        .bind(job_id)
+        .execute(&self.pool)
+        .await?;
+        Ok(())
+    }
+
    pub async fn update_job_status(&self, job_id: i32, status: MonitorJobStatus) -> Result<()> {
        sqlx::query(
            "UPDATE monitor_jobs SET status = $1, updated_at = CURRENT_TIMESTAMP WHERE id = $2",
@@ -2660,6 +2732,7 @@ impl PostgresDb {
            r#"
            INSERT INTO processor_results (job_id, processor, status)
            VALUES ($1, $2, 'pending')
+            ON CONFLICT (job_id, processor) DO UPDATE SET job_id = EXCLUDED.job_id
            RETURNING id
            "#,
        )
@@ -2685,8 +2758,8 @@ impl PostgresDb {
            SET status = $1,
                error_message = $2,
                output_data = $3,
+                started_at = CASE WHEN $1 = 'running' AND started_at IS NULL THEN CURRENT_TIMESTAMP ELSE started_at END,
                completed_at = CASE WHEN $1 IN ('completed', 'failed', 'skipped') THEN CURRENT_TIMESTAMP ELSE completed_at END,
-                duration_secs = CASE WHEN $1 IN ('completed', 'failed', 'skipped') THEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - started_at)) ELSE duration_secs END,
                updated_at = CURRENT_TIMESTAMP
            WHERE id = $4
            "#,
@@ -2705,7 +2778,7 @@ impl PostgresDb {
            r#"
            SELECT id, job_id, processor, status, output_path, started_at, completed_at,
                   error_message, progress_total, progress_current, last_checkpoint,
-                   created_at, updated_at, duration_secs
+                     created_at, updated_at, duration_secs
            FROM processor_results
            WHERE job_id = $1
            ORDER BY created_at ASC
--- a/src/core/db/sync_db.rs
+++ b/src/core/db/sync_db.rs
@@ -26,8 +26,8 @@ impl SyncDb {
        let uuid = chunk.uuid.clone();
        let chunk_id = chunk.chunk_id.clone();
        let chunk_type = chunk.chunk_type.as_str().to_string();
-        let start_time = chunk.start_time;
-        let end_time = chunk.end_time;
+        let start_time = chunk.start_time().seconds();
+        let end_time = chunk.end_time().seconds();

        let vector = self.embed_text(text).await?;

@@ -117,7 +117,7 @@ impl SyncDb {
                "language_probability": asr_result.language_probability,
            });

-            let chunk = Chunk::new(
+            let chunk = Chunk::from_seconds(
                0, // file_id - will be set later
                uuid.to_string(),
                i as u32,