feat: add job worker and duplicate registration check

Job Worker System:
- Add polling-based job worker (max 2 concurrent processors)
- Create monitor_jobs records when videos are registered
- Link videos.job_id to monitor_jobs
- Fix type mismatches (i32 vs i64) for database IDs

Duplicate Registration:
- Check if video already exists before registering
- Return existing video info with already_exists: true
- Use canonical path for UUID computation

USER_DATA_ROOT Configuration:
- Add MOMENTRY_USER_DATA_ROOT environment variable
- UUID computed from relative path (username/filename)
- Ensures consistent UUIDs when data root changes
This commit is contained in:
accusys
2026-03-25 02:50:31 +08:00
parent cd0f952aeb
commit 12a7b59232
9 changed files with 3669 additions and 229 deletions

354
src/worker/processor.rs Normal file
View File

@@ -0,0 +1,354 @@
use anyhow::{Context, Result};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::{mpsc, RwLock};
use tracing::{error, info};
use crate::core::db::RedisClient;
use crate::core::db::{MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType};
#[derive(Debug, Clone)]
pub struct ProcessorTask {
pub job: MonitorJob,
pub processor_type: ProcessorType,
pub processor_result_id: i32,
}
pub struct ProcessorPool {
db: Arc<PostgresDb>,
redis: Arc<RedisClient>,
max_concurrent: usize,
running: Arc<RwLock<HashMap<i32, ProcessorHandle>>>,
running_count: Arc<RwLock<usize>>,
}
struct ProcessorHandle {
#[allow(dead_code)]
processor_type: ProcessorType,
cancel_tx: mpsc::Sender<()>,
}
impl ProcessorPool {
pub fn new(db: Arc<PostgresDb>, redis: Arc<RedisClient>, max_concurrent: usize) -> Self {
Self {
db,
redis,
max_concurrent,
running: Arc::new(RwLock::new(HashMap::new())),
running_count: Arc::new(RwLock::new(0)),
}
}
pub async fn can_start(&self) -> bool {
let count = *self.running_count.read().await;
count < self.max_concurrent
}
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
let (cancel_tx, cancel_rx) = mpsc::channel(1);
let job_id = task.job.id;
let processor_type = task.processor_type;
{
let mut count = self.running_count.write().await;
if *count >= self.max_concurrent {
anyhow::bail!("Max concurrent processors reached");
}
*count += 1;
}
let running = self.running.clone();
let running_count = self.running_count.clone();
running.write().await.insert(
job_id,
ProcessorHandle {
processor_type,
cancel_tx,
},
);
let db = self.db.clone();
let redis = self.redis.clone();
let job = task.job.clone();
let processor_result_id = task.processor_result_id;
let processor_name = processor_type.as_str().to_string();
tokio::spawn(async move {
info!("Starting processor {} for job {}", processor_name, job.uuid);
let _ = db
.update_processor_result(
processor_result_id,
ProcessorJobStatus::Running,
None,
None,
)
.await;
let _ = redis
.update_worker_processor_status(&job.uuid, &processor_name, "running", None)
.await;
let result = Self::run_processor(&db, &redis, &job, processor_type, cancel_rx).await;
{
let mut running_guard = running.write().await;
running_guard.remove(&job_id);
let mut count_guard = running_count.write().await;
*count_guard -= 1;
}
match result {
Ok(output) => {
info!(
"Processor {} completed for job {}",
processor_name, job.uuid
);
let _ = db
.update_processor_result(
processor_result_id,
ProcessorJobStatus::Completed,
None,
Some(&output),
)
.await;
let _ = redis
.update_worker_processor_status(
&job.uuid,
&processor_name,
"completed",
None,
)
.await;
}
Err(e) => {
error!(
"Processor {} failed for job {}: {}",
processor_name, job.uuid, e
);
let _ = db
.update_processor_result(
processor_result_id,
ProcessorJobStatus::Failed,
Some(&e.to_string()),
None,
)
.await;
let _ = redis
.update_worker_processor_status(
&job.uuid,
&processor_name,
"failed",
Some(&e.to_string()),
)
.await;
}
}
});
Ok(())
}
async fn run_processor(
db: &PostgresDb,
redis: &RedisClient,
job: &MonitorJob,
processor_type: ProcessorType,
mut cancel_rx: mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let video_path = job.video_path.as_ref().context("No video path in job")?;
match processor_type {
ProcessorType::Asr => Self::run_asr(db, redis, video_path, &mut cancel_rx).await,
ProcessorType::Cut => Self::run_cut(db, redis, video_path, &mut cancel_rx).await,
ProcessorType::Yolo => Self::run_yolo(db, redis, video_path, &mut cancel_rx).await,
ProcessorType::Ocr => Self::run_ocr(db, redis, video_path, &mut cancel_rx).await,
ProcessorType::Face => Self::run_face(db, redis, video_path, &mut cancel_rx).await,
ProcessorType::Pose => Self::run_pose(db, redis, video_path, &mut cancel_rx).await,
ProcessorType::Asrx => Self::run_asrx(db, redis, video_path, &mut cancel_rx).await,
}
}
async fn run_asr(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_ASR_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/asr.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ASR script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
async fn run_cut(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_CUT_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/cut.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("CUT script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
async fn run_yolo(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_YOLO_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/yolo_processor.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("YOLO script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
async fn run_ocr(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_OCR_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/ocr.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("OCR script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
async fn run_face(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_FACE_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/face.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Face script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
async fn run_pose(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_POSE_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/pose.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Pose script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
async fn run_asrx(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_ASRX_SCRIPT")
.unwrap_or_else(|_| "/Users/accusys/momentry/scripts/asrx.py".to_string());
let output = tokio::process::Command::new("/opt/homebrew/bin/python3.11")
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ASRX script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
pub async fn get_running_count(&self) -> usize {
*self.running_count.read().await
}
pub async fn cancel_all(&self) {
let mut running = self.running.write().await;
for (_, handle) in running.drain() {
let _ = handle.cancel_tx.send(()).await;
}
let mut count = self.running_count.write().await;
*count = 0;
}
}