feat: 新增 Job Worker 系統與 API 文檔全面更新
This commit is contained in:
@@ -28,13 +28,15 @@ pub struct ChunkDocument {
|
||||
|
||||
impl From<Chunk> for ChunkDocument {
|
||||
fn from(chunk: Chunk) -> Self {
|
||||
let start_time = chunk.start_time().seconds();
|
||||
let end_time = chunk.end_time().seconds();
|
||||
Self {
|
||||
uuid: chunk.uuid,
|
||||
chunk_id: chunk.chunk_id,
|
||||
chunk_index: chunk.chunk_index,
|
||||
chunk_type: chunk.chunk_type.as_str().to_string(),
|
||||
start_time: chunk.start_time,
|
||||
end_time: chunk.end_time,
|
||||
start_time,
|
||||
end_time,
|
||||
fps: chunk.fps,
|
||||
start_frame: chunk.start_frame,
|
||||
end_frame: chunk.end_frame,
|
||||
@@ -118,8 +120,6 @@ impl MongoDb {
|
||||
chunk_index: doc.chunk_index,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: doc.start_time,
|
||||
end_time: doc.end_time,
|
||||
fps: doc.fps,
|
||||
start_frame: doc.start_frame,
|
||||
end_frame: doc.end_frame,
|
||||
@@ -178,8 +178,6 @@ impl MongoDb {
|
||||
chunk_index: doc.chunk_index,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: doc.start_time,
|
||||
end_time: doc.end_time,
|
||||
fps: doc.fps,
|
||||
start_frame: doc.start_frame,
|
||||
end_frame: doc.end_frame,
|
||||
@@ -235,8 +233,6 @@ impl MongoDb {
|
||||
chunk_index: doc.chunk_index,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: doc.start_time,
|
||||
end_time: doc.end_time,
|
||||
fps: doc.fps,
|
||||
start_frame: doc.start_frame,
|
||||
end_frame: doc.end_frame,
|
||||
|
||||
@@ -126,8 +126,6 @@ pub struct PreChunk {
|
||||
pub source_type: String,
|
||||
pub source_file: Option<String>,
|
||||
pub chunk_type: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub start_frame: i64,
|
||||
pub end_frame: i64,
|
||||
pub fps: f64,
|
||||
@@ -209,7 +207,7 @@ pub struct MonitorJobStats {
|
||||
pub failed: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ProcessorType {
|
||||
Asr,
|
||||
@@ -449,6 +447,12 @@ impl PostgresDb {
|
||||
.parse::<u64>()
|
||||
.unwrap_or(60);
|
||||
|
||||
tracing::info!(
|
||||
"DB pool config: max_connections={}, acquire_timeout={}s",
|
||||
max_connections,
|
||||
acquire_timeout_secs
|
||||
);
|
||||
|
||||
let pool_options = PgPoolOptions::new()
|
||||
.max_connections(max_connections)
|
||||
.acquire_timeout(std::time::Duration::from_secs(acquire_timeout_secs));
|
||||
@@ -1770,8 +1774,8 @@ impl PostgresDb {
|
||||
.bind(&chunk.chunk_id)
|
||||
.bind(chunk.chunk_index as i32)
|
||||
.bind(chunk.chunk_type.as_str())
|
||||
.bind(chunk.start_time)
|
||||
.bind(chunk.end_time)
|
||||
.bind(chunk.start_time().seconds())
|
||||
.bind(chunk.end_time().seconds())
|
||||
.bind(chunk.fps)
|
||||
.bind(chunk.start_frame)
|
||||
.bind(chunk.end_frame)
|
||||
@@ -1791,7 +1795,7 @@ impl PostgresDb {
|
||||
|
||||
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
|
||||
let rows = sqlx::query(
|
||||
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE uuid = $1 ORDER BY chunk_index"
|
||||
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE uuid = $1 ORDER BY chunk_index"
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(&self.pool)
|
||||
@@ -1811,12 +1815,12 @@ impl PostgresDb {
|
||||
_ => ChunkType::TimeBased,
|
||||
};
|
||||
|
||||
let content: serde_json::Value = r.get(11);
|
||||
let metadata: Option<serde_json::Value> = r.get(12);
|
||||
let content: serde_json::Value = r.get(9);
|
||||
let metadata: Option<serde_json::Value> = r.get(10);
|
||||
|
||||
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
|
||||
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
|
||||
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
|
||||
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
|
||||
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
|
||||
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
|
||||
|
||||
let (rule, content_data) = if content.get("rule").is_some() {
|
||||
let rule_str = content
|
||||
@@ -1844,8 +1848,7 @@ impl PostgresDb {
|
||||
chunk_index: chunk_index as u32,
|
||||
chunk_type,
|
||||
rule,
|
||||
start_time: r.get("start_time"),
|
||||
end_time: r.get("end_time"),
|
||||
|
||||
fps: r.get("fps"),
|
||||
start_frame: r.get("start_frame"),
|
||||
end_frame: r.get("end_frame"),
|
||||
@@ -1866,7 +1869,7 @@ impl PostgresDb {
|
||||
|
||||
pub async fn get_chunk_by_chunk_id(&self, chunk_id: &str) -> Result<Option<Chunk>> {
|
||||
let row = sqlx::query(
|
||||
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = $1"
|
||||
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = $1"
|
||||
)
|
||||
.bind(chunk_id)
|
||||
.fetch_optional(&self.pool)
|
||||
@@ -1884,12 +1887,12 @@ impl PostgresDb {
|
||||
_ => ChunkType::TimeBased,
|
||||
};
|
||||
|
||||
let content: serde_json::Value = r.get(11);
|
||||
let metadata: Option<serde_json::Value> = r.get(12);
|
||||
let content: serde_json::Value = r.get(9);
|
||||
let metadata: Option<serde_json::Value> = r.get(10);
|
||||
|
||||
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
|
||||
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
|
||||
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
|
||||
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
|
||||
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
|
||||
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
|
||||
|
||||
let (rule, content_data) = if content.get("rule").is_some() {
|
||||
let rule_str = content
|
||||
@@ -1917,8 +1920,6 @@ impl PostgresDb {
|
||||
chunk_index: chunk_index as u32,
|
||||
chunk_type,
|
||||
rule,
|
||||
start_time: r.get("start_time"),
|
||||
end_time: r.get("end_time"),
|
||||
fps: r.get("fps"),
|
||||
start_frame: r.get("start_frame"),
|
||||
end_frame: r.get("end_frame"),
|
||||
@@ -1942,6 +1943,9 @@ impl PostgresDb {
|
||||
INSERT INTO pre_chunks (file_id, source_type, source_file, chunk_type, start_time, end_time, start_frame, end_frame, fps, raw_json, text_content, processed, chunk_id)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
|
||||
ON CONFLICT (file_id, source_type, start_frame, end_frame) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
raw_json = EXCLUDED.raw_json,
|
||||
text_content = EXCLUDED.text_content,
|
||||
processed = EXCLUDED.processed,
|
||||
@@ -1953,8 +1957,8 @@ impl PostgresDb {
|
||||
.bind(&pre_chunk.source_type)
|
||||
.bind(&pre_chunk.source_file)
|
||||
.bind(&pre_chunk.chunk_type)
|
||||
.bind(pre_chunk.start_time)
|
||||
.bind(pre_chunk.end_time)
|
||||
.bind(pre_chunk.start_frame as f64 / pre_chunk.fps)
|
||||
.bind(pre_chunk.end_frame as f64 / pre_chunk.fps)
|
||||
.bind(pre_chunk.start_frame)
|
||||
.bind(pre_chunk.end_frame)
|
||||
.bind(pre_chunk.fps)
|
||||
@@ -2108,8 +2112,7 @@ impl PostgresDb {
|
||||
chunk_index: chunk_index as u32,
|
||||
chunk_type,
|
||||
rule,
|
||||
start_time: r.get("start_time"),
|
||||
end_time: r.get("end_time"),
|
||||
|
||||
fps: r.get("fps"),
|
||||
start_frame: r.get("start_frame"),
|
||||
end_frame: r.get("end_frame"),
|
||||
@@ -2134,7 +2137,7 @@ impl PostgresDb {
|
||||
}
|
||||
|
||||
let rows = sqlx::query(
|
||||
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = ANY($1) ORDER BY chunk_index",
|
||||
"SELECT file_id, uuid, chunk_id, chunk_index, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids FROM chunks WHERE chunk_id = ANY($1) ORDER BY chunk_index",
|
||||
)
|
||||
.bind(chunk_ids)
|
||||
.fetch_all(&self.pool)
|
||||
@@ -2154,12 +2157,12 @@ impl PostgresDb {
|
||||
_ => ChunkType::TimeBased,
|
||||
};
|
||||
|
||||
let content: serde_json::Value = r.get(11);
|
||||
let metadata: Option<serde_json::Value> = r.get(12);
|
||||
let content: serde_json::Value = r.get(9);
|
||||
let metadata: Option<serde_json::Value> = r.get(10);
|
||||
|
||||
let pre_chunk_ids: Vec<i32> = r.try_get(15).unwrap_or_default();
|
||||
let parent_chunk_id: Option<String> = r.try_get(16).ok().flatten();
|
||||
let child_chunk_ids: Vec<String> = r.try_get(17).unwrap_or_default();
|
||||
let pre_chunk_ids: Vec<i32> = r.try_get(13).unwrap_or_default();
|
||||
let parent_chunk_id: Option<String> = r.try_get(14).ok().flatten();
|
||||
let child_chunk_ids: Vec<String> = r.try_get(15).unwrap_or_default();
|
||||
|
||||
let (rule, content_data) = if content.get("rule").is_some() {
|
||||
let rule_str = content
|
||||
@@ -2187,8 +2190,7 @@ impl PostgresDb {
|
||||
chunk_index: chunk_index as u32,
|
||||
chunk_type,
|
||||
rule,
|
||||
start_time: r.get("start_time"),
|
||||
end_time: r.get("end_time"),
|
||||
|
||||
fps: r.get("fps"),
|
||||
start_frame: r.get("start_frame"),
|
||||
end_frame: r.get("end_frame"),
|
||||
@@ -2337,8 +2339,6 @@ impl PostgresDb {
|
||||
chunk_index: r.2 as u32,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: r.4,
|
||||
end_time: r.5,
|
||||
fps: r.6,
|
||||
start_frame: r.7,
|
||||
end_frame: r.8,
|
||||
@@ -2497,8 +2497,8 @@ impl PostgresDb {
|
||||
chunk_type: chunk_data
|
||||
.map(|c| c.chunk_type.as_str().to_string())
|
||||
.unwrap_or_default(),
|
||||
start_time: chunk_data.map(|c| c.start_time).unwrap_or(0.0),
|
||||
end_time: chunk_data.map(|c| c.end_time).unwrap_or(0.0),
|
||||
start_time: chunk_data.map(|c| c.start_time().seconds()).unwrap_or(0.0),
|
||||
end_time: chunk_data.map(|c| c.end_time().seconds()).unwrap_or(0.0),
|
||||
text: chunk_data
|
||||
.and_then(|c| c.text_content.clone())
|
||||
.unwrap_or_default(),
|
||||
@@ -2584,6 +2584,7 @@ impl PostgresDb {
|
||||
error_count, last_error, started_at, updated_at, created_at
|
||||
FROM monitor_jobs
|
||||
WHERE status = 'pending'
|
||||
OR (status = 'running' AND EXISTS (SELECT 1 FROM processor_results WHERE job_id = monitor_jobs.id AND status = 'pending'))
|
||||
ORDER BY created_at ASC
|
||||
LIMIT $1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
@@ -2619,6 +2620,77 @@ impl PostgresDb {
|
||||
Ok(jobs)
|
||||
}
|
||||
|
||||
pub async fn get_running_jobs_with_all_processors_done(
|
||||
&self,
|
||||
limit: i32,
|
||||
) -> Result<Vec<MonitorJob>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
|
||||
error_count, last_error, started_at, updated_at, created_at
|
||||
FROM monitor_jobs
|
||||
WHERE status = 'running'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM processor_results pr
|
||||
WHERE pr.job_id = monitor_jobs.id
|
||||
AND pr.status IN ('pending', 'running')
|
||||
)
|
||||
ORDER BY updated_at ASC
|
||||
LIMIT $1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
"#
|
||||
)
|
||||
.bind(limit)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
let jobs: Vec<MonitorJob> = rows
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let status_str: String = r.get(3);
|
||||
let status =
|
||||
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
|
||||
MonitorJob {
|
||||
id: r.get(0),
|
||||
uuid: r.get(1),
|
||||
video_path: r.get(2),
|
||||
status,
|
||||
current_processor: r.get(4),
|
||||
progress_total: r.get(5),
|
||||
progress_current: r.get(6),
|
||||
error_count: r.get(7),
|
||||
last_error: r.get(8),
|
||||
started_at: r.get(9),
|
||||
updated_at: r.get(10),
|
||||
created_at: r.get(11),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(jobs)
|
||||
}
|
||||
|
||||
pub async fn update_job_processors_arrays(
|
||||
&self,
|
||||
job_id: i32,
|
||||
completed_processors: Vec<String>,
|
||||
failed_processors: Vec<String>,
|
||||
) -> Result<()> {
|
||||
sqlx::query(
|
||||
"UPDATE monitor_jobs
|
||||
SET completed_processors = $1,
|
||||
failed_processors = $2,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = $3",
|
||||
)
|
||||
.bind(completed_processors)
|
||||
.bind(failed_processors)
|
||||
.bind(job_id)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn update_job_status(&self, job_id: i32, status: MonitorJobStatus) -> Result<()> {
|
||||
sqlx::query(
|
||||
"UPDATE monitor_jobs SET status = $1, updated_at = CURRENT_TIMESTAMP WHERE id = $2",
|
||||
@@ -2660,6 +2732,7 @@ impl PostgresDb {
|
||||
r#"
|
||||
INSERT INTO processor_results (job_id, processor, status)
|
||||
VALUES ($1, $2, 'pending')
|
||||
ON CONFLICT (job_id, processor) DO UPDATE SET job_id = EXCLUDED.job_id
|
||||
RETURNING id
|
||||
"#,
|
||||
)
|
||||
@@ -2685,8 +2758,8 @@ impl PostgresDb {
|
||||
SET status = $1,
|
||||
error_message = $2,
|
||||
output_data = $3,
|
||||
started_at = CASE WHEN $1 = 'running' AND started_at IS NULL THEN CURRENT_TIMESTAMP ELSE started_at END,
|
||||
completed_at = CASE WHEN $1 IN ('completed', 'failed', 'skipped') THEN CURRENT_TIMESTAMP ELSE completed_at END,
|
||||
duration_secs = CASE WHEN $1 IN ('completed', 'failed', 'skipped') THEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - started_at)) ELSE duration_secs END,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = $4
|
||||
"#,
|
||||
@@ -2705,7 +2778,7 @@ impl PostgresDb {
|
||||
r#"
|
||||
SELECT id, job_id, processor, status, output_path, started_at, completed_at,
|
||||
error_message, progress_total, progress_current, last_checkpoint,
|
||||
created_at, updated_at, duration_secs
|
||||
created_at, updated_at, duration_secs
|
||||
FROM processor_results
|
||||
WHERE job_id = $1
|
||||
ORDER BY created_at ASC
|
||||
|
||||
@@ -26,8 +26,8 @@ impl SyncDb {
|
||||
let uuid = chunk.uuid.clone();
|
||||
let chunk_id = chunk.chunk_id.clone();
|
||||
let chunk_type = chunk.chunk_type.as_str().to_string();
|
||||
let start_time = chunk.start_time;
|
||||
let end_time = chunk.end_time;
|
||||
let start_time = chunk.start_time().seconds();
|
||||
let end_time = chunk.end_time().seconds();
|
||||
|
||||
let vector = self.embed_text(text).await?;
|
||||
|
||||
@@ -117,7 +117,7 @@ impl SyncDb {
|
||||
"language_probability": asr_result.language_probability,
|
||||
});
|
||||
|
||||
let chunk = Chunk::new(
|
||||
let chunk = Chunk::from_seconds(
|
||||
0, // file_id - will be set later
|
||||
uuid.to_string(),
|
||||
i as u32,
|
||||
|
||||
Reference in New Issue
Block a user