chore: backup before migration to new repo

This commit is contained in:
Warren
2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions

View File

@@ -5,6 +5,7 @@ use std::time::Duration;
use tokio::time::sleep;
use tracing::{error, info, warn};
use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::db::{
MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient, VideoStatus,
};
@@ -210,12 +211,58 @@ impl JobWorker {
.map(|r| r.processor_type.as_str().to_string())
.collect();
// Check prerequisites for Rule 1 Chunking BEFORE moving arrays
let has_asr = completed_processors.iter().any(|p| p == "asr");
let has_asrx = completed_processors.iter().any(|p| p == "asrx");
let has_cut = completed_processors.iter().any(|p| p == "cut");
// Update processor arrays in job record
self.db
.update_job_processors_arrays(job_id, completed_processors, failed_processors)
.await?;
if all_completed && !any_failed {
// 🚀 P1 Trigger: Rule 1 Chunking
if has_asr && has_asrx {
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
let db_clone = self.db.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
match db_clone.get_video_by_uuid(&uuid_clone).await {
Ok(Some(video)) => {
let fps = video.fps;
match rule1_ingest::ingest_rule1(db_clone.pool(), &uuid_clone, fps)
.await
{
Ok(count) => info!(
"✅ Rule 1 Ingestion completed: {} chunks inserted.",
count
),
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
}
}
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
Err(e) => error!("Failed to get video info for chunking: {}", e),
}
});
}
// 🚀 P1 Trigger: Rule 3 Scene Chunking
if has_cut && has_asr {
info!("📝 Prerequisites met for Rule 3 Scene Chunking. Starting ingestion...");
let db_clone = self.db.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
match rule3_ingest::ingest_rule3(db_clone.pool(), &uuid_clone).await {
Ok(count) => info!(
"✅ Rule 3 Scene Ingestion completed: {} scenes processed.",
count
),
Err(e) => error!("❌ Rule 3 Scene Ingestion failed: {}", e),
}
});
}
self.db
.update_job_status(job_id, MonitorJobStatus::Completed)
.await?;

View File

@@ -16,6 +16,7 @@ use crate::core::processor::cut::CutResult;
use crate::core::processor::face::FaceResult;
use crate::core::processor::ocr::OcrResult;
use crate::core::processor::pose::PoseResult;
use crate::core::processor::visual_chunk::VisualChunkResult;
use crate::core::processor::yolo::YoloResult;
#[derive(Debug, Clone)]
@@ -302,6 +303,24 @@ impl ProcessorPool {
}
Ok(serde_json::to_value(result)?)
}
ProcessorType::VisualChunk => {
let result = processor::process_visual_chunk_advanced(
video_path,
output_path.to_str().unwrap(),
uuid,
)
.await?;
// Store VisualChunk chunks in database
tracing::info!(
"VisualChunk completed, storing {} chunks for {}",
result.chunk_count,
job.uuid
);
if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
}
}
}
@@ -605,6 +624,13 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_yolo_{:04}", i);
// Populate text_content for BM25 search
let object_names: Vec<String> =
frame.objects.iter().map(|o| o.class_name.clone()).collect();
if !object_names.is_empty() {
chunk = chunk.with_text_content(object_names.join(" "));
}
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
@@ -660,6 +686,12 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_ocr_{:04}", i);
// Populate text_content for BM25 search
let texts: Vec<String> = frame.texts.iter().map(|t| t.text.clone()).collect();
if !texts.is_empty() {
chunk = chunk.with_text_content(texts.join(" "));
}
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
@@ -715,6 +747,16 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_face_{:04}", i);
// Populate text_content for BM25 search (face IDs)
let face_ids: Vec<String> = frame
.faces
.iter()
.filter_map(|f| f.face_id.clone())
.collect();
if !face_ids.is_empty() {
chunk = chunk.with_text_content(face_ids.join(" "));
}
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
@@ -770,6 +812,16 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_pose_{:04}", i);
// Populate text_content for BM25 search (person count indicator)
let person_count = frame.persons.len();
if person_count > 0 {
let text = format!("person person person")
.repeat(person_count.min(10))
.trim()
.to_string();
chunk = chunk.with_text_content(text);
}
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
@@ -825,6 +877,16 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_asrx_{:04}", i);
// Populate text_content for BM25 search (already has text)
chunk = chunk.with_text_content(segment.text.clone());
// Also store speaker_id in content
chunk.content = serde_json::json!({
"text": segment.text,
"speaker_id": segment.speaker_id,
"timestamp": segment.start,
});
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!("Stored ASRX chunk {} for video {}", i, uuid);
@@ -837,6 +899,24 @@ impl ProcessorPool {
Ok(())
}
pub async fn store_visual_chunk_chunks(
db: &PostgresDb,
uuid: &str,
visual_chunk_result: &VisualChunkResult,
) -> Result<()> {
for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() {
match db.store_chunk(chunk).await {
Ok(_) => {
tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid);
}
Err(e) => {
tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e);
}
}
}
Ok(())
}
pub async fn get_running_count(&self) -> usize {
*self.running_count.read().await
}