cleanup: remove dead code and duplicate docs

- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
This commit is contained in:
Warren
2026-05-04 01:31:21 +08:00
parent ee81e343ce
commit e75c4d6f07
3270 changed files with 35190 additions and 53367 deletions

View File

@@ -6,7 +6,9 @@ use tokio::sync::{mpsc, RwLock};
use tracing::{error, info};
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
use crate::core::db::{MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient};
use crate::core::db::{
MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
};
use crate::core::processor;
use crate::core::processor::asr::AsrResult;
use crate::core::processor::asrx::AsrxResult;
@@ -17,12 +19,16 @@ use crate::core::processor::pose::PoseResult;
use crate::core::processor::scene_classification::SceneClassificationResult;
use crate::core::processor::visual_chunk::VisualChunkResult;
use crate::core::processor::yolo::YoloResult;
use crate::worker::resources::SystemResources;
#[derive(Debug)]
struct ProcessorOutput {
data: serde_json::Value,
chunks_produced: i32,
frames_processed: i32,
total_frames: i32,
retry_count: i32,
pid: i32,
}
#[derive(Debug, Clone)]
@@ -35,7 +41,7 @@ pub struct ProcessorTask {
pub struct ProcessorPool {
db: Arc<PostgresDb>,
redis: Arc<RedisClient>,
max_concurrent: usize,
config_max: usize,
running: Arc<RwLock<HashMap<i32, ProcessorHandle>>>,
running_count: Arc<RwLock<usize>>,
}
@@ -51,15 +57,22 @@ impl ProcessorPool {
Self {
db,
redis,
max_concurrent,
config_max: max_concurrent,
running: Arc::new(RwLock::new(HashMap::new())),
running_count: Arc::new(RwLock::new(0)),
}
}
/// 根據系統資源計算當前安全的並發上限
pub async fn current_max(&self) -> usize {
let resources = SystemResources::check();
resources.safe_max_concurrent(self.config_max).max(1)
}
pub async fn can_start(&self) -> bool {
let count = *self.running_count.read().await;
count < self.max_concurrent
let max = self.current_max().await;
count < max
}
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
@@ -67,10 +80,14 @@ impl ProcessorPool {
let job_id = task.job.id;
let processor_type = task.processor_type;
let current_limit = self.current_max().await;
{
let mut count = self.running_count.write().await;
if *count >= self.max_concurrent {
anyhow::bail!("Max concurrent processors reached");
if *count >= current_limit {
anyhow::bail!(
"Max concurrent processors reached (dynamic limit: {})",
current_limit
);
}
*count += 1;
}
@@ -104,7 +121,17 @@ impl ProcessorPool {
.await;
let _ = redis
.update_worker_processor_status(&job.uuid, &processor_name, "running", None)
.update_worker_processor_status(
&job.uuid,
&processor_name,
"running",
None,
0,
0,
0,
0,
0,
)
.await;
let result = Self::run_processor(&db, &redis, &job, processor_type, cancel_rx).await;
@@ -142,6 +169,11 @@ impl ProcessorPool {
&processor_name,
"completed",
None,
output.frames_processed,
output.chunks_produced,
output.total_frames,
output.retry_count,
output.pid,
)
.await
{
@@ -173,6 +205,11 @@ impl ProcessorPool {
&processor_name,
"failed",
Some(&e.to_string()),
0,
0,
0,
0,
0,
)
.await
{
@@ -196,12 +233,8 @@ impl ProcessorPool {
// Generate output path
let output_dir = PathBuf::from(OUTPUT_DIR.as_str());
let output_path = output_dir.join(format!(
"job_{}_{}_{}.json",
job.id,
processor_type.as_str(),
chrono::Utc::now().timestamp_millis()
));
let output_path =
output_dir.join(format!("{}.{}.json", job.uuid, processor_type.as_str(),));
// Ensure output directory exists
if let Some(parent) = output_path.parent() {
@@ -229,11 +262,22 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Cut => {
let result =
processor::process_cut(video_path, output_path.to_str().unwrap(), uuid).await?;
let cut_path =
std::path::Path::new(&output_dir).join(format!("{}.cut.json", job.uuid));
let result = if cut_path.exists() {
// CUT 在 register 階段已完成,直接載入
let content =
std::fs::read_to_string(&cut_path).context("Failed to read cut.json")?;
serde_json::from_str(&content).context("Failed to parse cut.json")?
} else {
processor::process_cut(video_path, output_path.to_str().unwrap(), uuid).await?
};
let chunks_produced = result.scenes.len() as i32;
tracing::info!(
"CUT completed, storing {} scenes for {}",
@@ -247,6 +291,9 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Yolo => {
@@ -266,6 +313,9 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Ocr => {
@@ -284,6 +334,9 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Face => {
@@ -299,10 +352,17 @@ impl ProcessorPool {
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
}
// 將 face embedding 寫入 Qdrant
if let Err(e) = Self::store_face_embeddings_to_qdrant(&job.uuid, &result).await {
tracing::error!("Failed to store face embeddings to Qdrant: {}", e);
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Pose => {
@@ -322,6 +382,9 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Asrx => {
@@ -337,10 +400,17 @@ impl ProcessorPool {
if let Err(e) = Self::store_asrx_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store ASRX chunks for {}: {}", job.uuid, e);
}
// 將 voice embeddings 寫入 Qdrant
if let Err(e) = Self::store_voice_embeddings_to_qdrant(&job.uuid, &result).await {
tracing::error!("Failed to store voice embeddings to Qdrant: {}", e);
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::VisualChunk => {
@@ -363,15 +433,44 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Scene => {
let result = processor::process_scene_classification(
video_path,
output_path.to_str().unwrap(),
uuid,
)
.await?;
let scene_path =
std::path::Path::new(&output_dir).join(format!("{}.scene.json", job.uuid));
let scene_err =
std::path::Path::new(&output_dir).join(format!("{}.scene.err", job.uuid));
let scene_tmp =
std::path::Path::new(&output_dir).join(format!("{}.scene.tmp", job.uuid));
// 優先順序:.err跳過→ .json載入→ .tmp等待或重新執行
let result = if scene_err.exists() {
tracing::warn!("Scene previously failed for {}, skipping", job.uuid);
return Ok(ProcessorOutput {
data: serde_json::Value::Null,
chunks_produced: 0,
frames_processed: 0,
total_frames,
retry_count: 0,
pid: 0,
});
} else if scene_path.exists() {
tracing::info!("Scene JSON exists for {}, loading from file", job.uuid);
crate::core::processor::load_scene_from_file(scene_path.to_str().unwrap())?
} else if scene_tmp.exists() {
tracing::warn!("Scene tmp exists for {}, waiting for completion", job.uuid);
// 生產環境應等待,此處直接跳過避免卡住
crate::core::processor::SceneClassificationResult::default()
} else {
processor::process_scene_classification(
video_path,
output_path.to_str().unwrap(),
uuid,
)
.await?
};
let chunks_produced = result.scenes.len() as i32;
tracing::info!(
"Scene classification completed, storing {} scenes for {}",
@@ -385,186 +484,14 @@ impl ProcessorPool {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
}
}
#[allow(dead_code)]
async fn run_asr(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_ASR_SCRIPT")
.unwrap_or_else(|_| format!("{}/asr_processor.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ASR script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
#[allow(dead_code)]
async fn run_cut(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_CUT_SCRIPT")
.unwrap_or_else(|_| format!("{}/cut_processor.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("CUT script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
#[allow(dead_code)]
async fn run_yolo(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_YOLO_SCRIPT")
.unwrap_or_else(|_| format!("{}/yolo_processor.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("YOLO script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
#[allow(dead_code)]
async fn run_ocr(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_OCR_SCRIPT")
.unwrap_or_else(|_| format!("{}/ocr_processor.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("OCR script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
#[allow(dead_code)]
async fn run_face(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_FACE_SCRIPT")
.unwrap_or_else(|_| format!("{}/face_processor.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Face script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
#[allow(dead_code)]
async fn run_pose(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_POSE_SCRIPT")
.unwrap_or_else(|_| format!("{}/pose_processor.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Pose script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
#[allow(dead_code)]
async fn run_asrx(
_db: &PostgresDb,
_redis: &RedisClient,
video_path: &str,
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_ASRX_SCRIPT")
.unwrap_or_else(|_| format!("{}/asrx_processor_custom.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
.arg(video_path)
.output()
.await?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ASRX script failed: {}", stderr);
}
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
Ok(result)
}
pub async fn store_asr_chunks(
db: &PostgresDb,
uuid: &str,
@@ -726,14 +653,7 @@ impl ProcessorPool {
"timestamp": frame.timestamp,
});
// We could potentially parse identity_id if it's already matched, but for raw ingestion it's None.
pre_chunks_to_store.push((
frame.frame as i64,
Some(frame.timestamp),
data,
None, // identity_id
None, // confidence
));
pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None));
}
db.store_raw_pre_chunks_batch(uuid, "face", &pre_chunks_to_store)
@@ -741,6 +661,118 @@ impl ProcessorPool {
Ok(())
}
/// 將 face embeddings 寫入 Qdrant momentry_dev_face collection
pub async fn store_face_embeddings_to_qdrant(
uuid: &str,
face_result: &FaceResult,
) -> Result<()> {
let qdrant = QdrantDb::new();
let collection = format!(
"{}{}",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':'),
"_face"
);
let mut count = 0;
for frame in &face_result.frames {
for face in &frame.faces {
if let Some(embedding) = &face.embedding {
if embedding.len() != 512 {
continue;
}
// 使用 hash 作為 Qdrant point ID需要 unsigned integer
// 使用 frame number 作為 Qdrant point IDu64
let point_id = frame.frame as u64;
let payload = serde_json::json!({
"file_uuid": uuid,
"face_id": face.face_id,
"frame": frame.frame,
"timestamp": frame.timestamp,
"x": face.x,
"y": face.y,
"width": face.width,
"height": face.height,
"confidence": face.confidence,
});
if let Err(e) = qdrant
.upsert_vector_to_collection(
&collection,
point_id,
embedding,
Some(payload),
)
.await
{
tracing::error!("Failed to upsert face vector {}: {}", point_id, e);
} else {
count += 1;
}
}
}
}
if count > 0 {
tracing::info!("Stored {} face embeddings to Qdrant for {}", count, uuid);
}
Ok(())
}
/// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
pub async fn store_voice_embeddings_to_qdrant(
uuid: &str,
asrx_result: &AsrxResult,
) -> Result<()> {
let qdrant = QdrantDb::new();
let collection = format!(
"{}{}",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':'),
"_voice"
);
let embeddings = match &asrx_result.embeddings {
Some(e) => e,
None => return Ok(()),
};
let mut count = 0;
for (i, segment) in asrx_result.segments.iter().enumerate() {
if let Some(emb) = embeddings.get(i) {
if emb.len() != 192 {
continue;
}
let payload = serde_json::json!({
"file_uuid": uuid,
"speaker_id": segment.speaker_id,
"segment_index": i,
"start_frame": segment.start_frame,
"end_frame": segment.end_frame,
"start_time": segment.start_time,
"end_time": segment.end_time,
});
if let Err(e) = qdrant
.upsert_vector_to_collection(&collection, i as u64, emb, Some(payload))
.await
{
tracing::error!("Failed to upsert voice vector {}: {}", i, e);
} else {
count += 1;
}
}
}
if count > 0 {
tracing::info!("Stored {} voice embeddings to Qdrant for {}", count, uuid);
}
Ok(())
}
pub async fn store_pose_chunks(
db: &PostgresDb,
uuid: &str,
@@ -787,12 +819,11 @@ impl ProcessorPool {
let data = serde_json::json!({
"text": segment.text,
"speaker_id": segment.speaker_id,
"timestamp": segment.start,
"timestamp": segment.start_time,
});
// ASRX is time-based, so we use segment index or start time as coordinate.
// Let's use index for simplicity in pre_chunks, or start time.
pre_chunks_to_store.push((i as i64, Some(segment.start), data, None, None));
pre_chunks_to_store.push((i as i64, Some(segment.start_time), data, None, None));
}
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)