feat: 新增 Job Worker 系統與 API 文檔全面更新

This commit is contained in:
Warren
2026-03-26 16:16:34 +08:00
parent 80399b1c12
commit 82955504f3
70 changed files with 3460 additions and 376 deletions

View File

@@ -28,13 +28,15 @@ pub struct ChunkDocument {
impl From<Chunk> for ChunkDocument {
fn from(chunk: Chunk) -> Self {
let start_time = chunk.start_time().seconds();
let end_time = chunk.end_time().seconds();
Self {
uuid: chunk.uuid,
chunk_id: chunk.chunk_id,
chunk_index: chunk.chunk_index,
chunk_type: chunk.chunk_type.as_str().to_string(),
start_time: chunk.start_time,
end_time: chunk.end_time,
start_time,
end_time,
fps: chunk.fps,
start_frame: chunk.start_frame,
end_frame: chunk.end_frame,
@@ -118,8 +120,6 @@ impl MongoDb {
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
start_time: doc.start_time,
end_time: doc.end_time,
fps: doc.fps,
start_frame: doc.start_frame,
end_frame: doc.end_frame,
@@ -178,8 +178,6 @@ impl MongoDb {
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
start_time: doc.start_time,
end_time: doc.end_time,
fps: doc.fps,
start_frame: doc.start_frame,
end_frame: doc.end_frame,
@@ -235,8 +233,6 @@ impl MongoDb {
chunk_index: doc.chunk_index,
chunk_type,
rule: ChunkRule::Rule1,
start_time: doc.start_time,
end_time: doc.end_time,
fps: doc.fps,
start_frame: doc.start_frame,
end_frame: doc.end_frame,

View File

@@ -126,8 +126,6 @@ pub struct PreChunk {
pub source_type: String,
pub source_file: Option<String>,
pub chunk_type: String,
pub start_time: f64,
pub end_time: f64,
pub start_frame: i64,
pub end_frame: i64,
pub fps: f64,
@@ -209,7 +207,7 @@ pub struct MonitorJobStats {
pub failed: i32,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(rename_all = "snake_case")]
pub enum ProcessorType {
Asr,
@@ -449,6 +447,12 @@ impl PostgresDb {
.parse::<u64>()
.unwrap_or(60);
tracing::info!(
"DB pool config: max_connections={}, acquire_timeout={}s",
max_connections,
acquire_timeout_secs
);
let pool_options = PgPoolOptions::new()
.max_connections(max_connections)
.acquire_timeout(std::time::Duration::from_secs(acquire_timeout_secs));
@@ -1770,8 +1774,8 @@ impl PostgresDb {
.bind(&chunk.chunk_id)
.bind(chunk.chunk_index as i32)
.bind(chunk.chunk_type.as_str())
.bind(chunk.start_time)
.bind(chunk.end_time)
.bind(chunk.start_time().seconds())
.bind(chunk.end_time().seconds())
.bind(chunk.fps)
.bind(chunk.start_frame)
.bind(chunk.end_frame)
@@ -1791,7 +1795,7 @@ impl PostgresDb {
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
let rows = sqlx::query(
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE uuid = $1 ORDER BY chunk_index"
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE uuid = $1 ORDER BY chunk_index"
)
.bind(uuid)
.fetch_all(&self.pool)
@@ -1811,12 +1815,12 @@ impl PostgresDb {
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(11);
let metadata: Option<serde_json::Value> = r.get(12);
let content: serde_json::Value = r.get(9);
let metadata: Option<serde_json::Value> = r.get(10);
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
@@ -1844,8 +1848,7 @@ impl PostgresDb {
chunk_index: chunk_index as u32,
chunk_type,
rule,
start_time: r.get("start_time"),
end_time: r.get("end_time"),
fps: r.get("fps"),
start_frame: r.get("start_frame"),
end_frame: r.get("end_frame"),
@@ -1866,7 +1869,7 @@ impl PostgresDb {
pub async fn get_chunk_by_chunk_id(&self, chunk_id: &str) -> Result<Option<Chunk>> {
let row = sqlx::query(
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = $1"
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = $1"
)
.bind(chunk_id)
.fetch_optional(&self.pool)
@@ -1884,12 +1887,12 @@ impl PostgresDb {
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(11);
let metadata: Option<serde_json::Value> = r.get(12);
let content: serde_json::Value = r.get(9);
let metadata: Option<serde_json::Value> = r.get(10);
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
@@ -1917,8 +1920,6 @@ impl PostgresDb {
chunk_index: chunk_index as u32,
chunk_type,
rule,
start_time: r.get("start_time"),
end_time: r.get("end_time"),
fps: r.get("fps"),
start_frame: r.get("start_frame"),
end_frame: r.get("end_frame"),
@@ -1942,6 +1943,9 @@ impl PostgresDb {
INSERT INTO pre_chunks (file_id, source_type, source_file, chunk_type, start_time, end_time, start_frame, end_frame, fps, raw_json, text_content, processed, chunk_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
ON CONFLICT (file_id, source_type, start_frame, end_frame) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
raw_json = EXCLUDED.raw_json,
text_content = EXCLUDED.text_content,
processed = EXCLUDED.processed,
@@ -1953,8 +1957,8 @@ impl PostgresDb {
.bind(&pre_chunk.source_type)
.bind(&pre_chunk.source_file)
.bind(&pre_chunk.chunk_type)
.bind(pre_chunk.start_time)
.bind(pre_chunk.end_time)
.bind(pre_chunk.start_frame as f64 / pre_chunk.fps)
.bind(pre_chunk.end_frame as f64 / pre_chunk.fps)
.bind(pre_chunk.start_frame)
.bind(pre_chunk.end_frame)
.bind(pre_chunk.fps)
@@ -2108,8 +2112,7 @@ impl PostgresDb {
chunk_index: chunk_index as u32,
chunk_type,
rule,
start_time: r.get("start_time"),
end_time: r.get("end_time"),
fps: r.get("fps"),
start_frame: r.get("start_frame"),
end_frame: r.get("end_frame"),
@@ -2134,7 +2137,7 @@ impl PostgresDb {
}
let rows = sqlx::query(
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = ANY($1) ORDER BY chunk_index",
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = ANY($1) ORDER BY chunk_index",
)
.bind(chunk_ids)
.fetch_all(&self.pool)
@@ -2154,12 +2157,12 @@ impl PostgresDb {
_ => ChunkType::TimeBased,
};
let content: serde_json::Value = r.get(11);
let metadata: Option<serde_json::Value> = r.get(12);
let content: serde_json::Value = r.get(9);
let metadata: Option<serde_json::Value> = r.get(10);
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
let (rule, content_data) = if content.get("rule").is_some() {
let rule_str = content
@@ -2187,8 +2190,7 @@ impl PostgresDb {
chunk_index: chunk_index as u32,
chunk_type,
rule,
start_time: r.get("start_time"),
end_time: r.get("end_time"),
fps: r.get("fps"),
start_frame: r.get("start_frame"),
end_frame: r.get("end_frame"),
@@ -2337,8 +2339,6 @@ impl PostgresDb {
chunk_index: r.2 as u32,
chunk_type,
rule: ChunkRule::Rule1,
start_time: r.4,
end_time: r.5,
fps: r.6,
start_frame: r.7,
end_frame: r.8,
@@ -2497,8 +2497,8 @@ impl PostgresDb {
chunk_type: chunk_data
.map(|c| c.chunk_type.as_str().to_string())
.unwrap_or_default(),
start_time: chunk_data.map(|c| c.start_time).unwrap_or(0.0),
end_time: chunk_data.map(|c| c.end_time).unwrap_or(0.0),
start_time: chunk_data.map(|c| c.start_time().seconds()).unwrap_or(0.0),
end_time: chunk_data.map(|c| c.end_time().seconds()).unwrap_or(0.0),
text: chunk_data
.and_then(|c| c.text_content.clone())
.unwrap_or_default(),
@@ -2584,6 +2584,7 @@ impl PostgresDb {
error_count, last_error, started_at, updated_at, created_at
FROM monitor_jobs
WHERE status = 'pending'
OR (status = 'running' AND EXISTS (SELECT 1 FROM processor_results WHERE job_id = monitor_jobs.id AND status = 'pending'))
ORDER BY created_at ASC
LIMIT $1
FOR UPDATE SKIP LOCKED
@@ -2619,6 +2620,77 @@ impl PostgresDb {
Ok(jobs)
}
pub async fn get_running_jobs_with_all_processors_done(
&self,
limit: i32,
) -> Result<Vec<MonitorJob>> {
let rows = sqlx::query(
r#"
SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
error_count, last_error, started_at, updated_at, created_at
FROM monitor_jobs
WHERE status = 'running'
AND NOT EXISTS (
SELECT 1 FROM processor_results pr
WHERE pr.job_id = monitor_jobs.id
AND pr.status IN ('pending', 'running')
)
ORDER BY updated_at ASC
LIMIT $1
FOR UPDATE SKIP LOCKED
"#
)
.bind(limit)
.fetch_all(&self.pool)
.await?;
let jobs: Vec<MonitorJob> = rows
.into_iter()
.map(|r| {
let status_str: String = r.get(3);
let status =
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
MonitorJob {
id: r.get(0),
uuid: r.get(1),
video_path: r.get(2),
status,
current_processor: r.get(4),
progress_total: r.get(5),
progress_current: r.get(6),
error_count: r.get(7),
last_error: r.get(8),
started_at: r.get(9),
updated_at: r.get(10),
created_at: r.get(11),
}
})
.collect();
Ok(jobs)
}
pub async fn update_job_processors_arrays(
&self,
job_id: i32,
completed_processors: Vec<String>,
failed_processors: Vec<String>,
) -> Result<()> {
sqlx::query(
"UPDATE monitor_jobs
SET completed_processors = $1,
failed_processors = $2,
updated_at = CURRENT_TIMESTAMP
WHERE id = $3",
)
.bind(completed_processors)
.bind(failed_processors)
.bind(job_id)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn update_job_status(&self, job_id: i32, status: MonitorJobStatus) -> Result<()> {
sqlx::query(
"UPDATE monitor_jobs SET status = $1, updated_at = CURRENT_TIMESTAMP WHERE id = $2",
@@ -2660,6 +2732,7 @@ impl PostgresDb {
r#"
INSERT INTO processor_results (job_id, processor, status)
VALUES ($1, $2, 'pending')
ON CONFLICT (job_id, processor) DO UPDATE SET job_id = EXCLUDED.job_id
RETURNING id
"#,
)
@@ -2685,8 +2758,8 @@ impl PostgresDb {
SET status = $1,
error_message = $2,
output_data = $3,
started_at = CASE WHEN $1 = 'running' AND started_at IS NULL THEN CURRENT_TIMESTAMP ELSE started_at END,
completed_at = CASE WHEN $1 IN ('completed', 'failed', 'skipped') THEN CURRENT_TIMESTAMP ELSE completed_at END,
duration_secs = CASE WHEN $1 IN ('completed', 'failed', 'skipped') THEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - started_at)) ELSE duration_secs END,
updated_at = CURRENT_TIMESTAMP
WHERE id = $4
"#,
@@ -2705,7 +2778,7 @@ impl PostgresDb {
r#"
SELECT id, job_id, processor, status, output_path, started_at, completed_at,
error_message, progress_total, progress_current, last_checkpoint,
created_at, updated_at, duration_secs
created_at, updated_at, duration_secs
FROM processor_results
WHERE job_id = $1
ORDER BY created_at ASC

View File

@@ -26,8 +26,8 @@ impl SyncDb {
let uuid = chunk.uuid.clone();
let chunk_id = chunk.chunk_id.clone();
let chunk_type = chunk.chunk_type.as_str().to_string();
let start_time = chunk.start_time;
let end_time = chunk.end_time;
let start_time = chunk.start_time().seconds();
let end_time = chunk.end_time().seconds();
let vector = self.embed_text(text).await?;
@@ -117,7 +117,7 @@ impl SyncDb {
"language_probability": asr_result.language_probability,
});
let chunk = Chunk::new(
let chunk = Chunk::from_seconds(
0, // file_id - will be set later
uuid.to_string(),
i as u32,