cleanup: remove dead code and duplicate docs

- Remove session-ses_2f27.md (161KB raw session log)
- Remove 49 ROOT_* duplicate files across REFERENCE/
- Remove 14 duplicate files between REFERENCE/ root and history/
- Remove asr_legacy.rs (dead code, replaced by asr.rs)
- Remove src/core/worker/ (duplicate JobWorker)
- Remove src/core/layers/ (empty directory)
- Remove 4 .bak files in src/
- Remove 7 dead private methods in worker/processor.rs
- Remove backup directory from git tracking
This commit is contained in:
Warren
2026-05-04 01:31:21 +08:00
parent ee81e343ce
commit e75c4d6f07
3270 changed files with 35190 additions and 53367 deletions

View File

@@ -58,7 +58,8 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result
fps,
content,
)
.with_metadata(metadata);
.with_metadata(metadata)
.with_text_content(seg.text.clone());
db.store_chunk_in_tx(&chunk, &mut tx).await?;

View File

@@ -32,9 +32,9 @@ struct AsrSegment {
/// 2. Aggregates Rule 1 (Sentence) chunks falling within each scene.
/// 3. Calls LLM to generate 5W1H+ summary.
/// 4. Inserts parent chunks into `dev.chunks`.
pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, asset_uuid);
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, file_uuid);
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, file_uuid);
// 1. Load CUT and ASR data
let cut_content = fs::read_to_string(&cut_path)
@@ -70,16 +70,16 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
}
}
// Query Rule 1 table for better linking
// Query chunks table for Rule 1 sentence chunks
let rule1_rows: Vec<(String,)> = sqlx::query_as(
r#"
SELECT id::text FROM chunks_rule1
WHERE asset_uuid = $1
SELECT chunk_id FROM chunks
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
AND start_frame >= $2
AND end_frame <= $3
"#,
)
.bind(asset_uuid)
.bind(file_uuid)
.bind(scene.start_frame as i64)
.bind(scene.end_frame as i64)
.fetch_all(&mut *tx)
@@ -98,14 +98,14 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
let texts: Vec<String> = sqlx::query_scalar(
r#"
SELECT content FROM chunks_rule1
WHERE asset_uuid = $1
SELECT text_content FROM chunks
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
AND start_frame >= $2
AND end_frame <= $3
ORDER BY start_frame ASC
"#,
)
.bind(asset_uuid)
.bind(file_uuid)
.bind(scene.start_frame as i64)
.bind(scene.end_frame as i64)
.fetch_all(&mut *tx)
@@ -136,7 +136,7 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
// 4. Insert into dev.chunks
let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE uuid = $1")
.bind(asset_uuid)
.bind(file_uuid)
.fetch_optional(&mut *tx)
.await?;
let fps = fps_query.unwrap_or(29.97);
@@ -157,7 +157,7 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
ON CONFLICT (uuid, chunk_id) DO NOTHING
"#,
)
.bind(asset_uuid)
.bind(file_uuid)
.bind(&chunk_id)
.bind(scene.scene_number as i32)
.bind("cut") // Chunk type

View File

@@ -1,755 +0,0 @@
use crate::core::time::FrameTime;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ChunkType {
TimeBased,
Sentence,
Cut,
Trace,
Story, // Parent chunk from story analysis
Visual, // Visual object-based chunk from YOLO detection
}
impl ChunkType {
pub fn as_str(&self) -> &'static str {
match self {
ChunkType::TimeBased => "time",
ChunkType::Sentence => "sentence",
ChunkType::Cut => "cut",
ChunkType::Trace => "trace",
ChunkType::Story => "story",
ChunkType::Visual => "visual",
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ChunkRule {
Rule1, // 直接轉換
Rule2, // 集合內容
}
/// 關鍵幀的物件列表
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KeyframeObjects {
/// 關鍵幀時間 (秒)
pub timestamp: f64,
/// 關鍵幀幀號
pub frame_number: u64,
/// 檢測到的物件
pub objects: Vec<DetectedObject>,
}
/// 檢測到的物件
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DetectedObject {
/// 物件類別名稱
pub class_name: String,
/// 物件類別 ID
pub class_id: u32,
/// 信心值 (0.0-1.0)
pub confidence: f32,
/// 邊界框 (x, y, width, height)
pub bbox: Option<BoundingBox>,
/// 出現次數 (在分片內)
pub occurrence: u32,
}
/// 邊界框
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualChunkContent {
pub start_time: f64,
pub end_time: f64,
pub keyframe_objects: Vec<KeyframeObjects>,
pub dominant_objects: Vec<String>,
pub object_relationships: Vec<(String, String, String)>, // (object1, relationship, object2)
pub scene_description: Option<String>,
pub metadata: VisualMetadata,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualMetadata {
pub object_count: u32,
pub unique_classes: Vec<String>,
pub max_confidence: f32,
pub avg_confidence: f32,
pub spatial_density: f32, // objects per frame
}
impl ChunkRule {
pub fn as_str(&self) -> &'static str {
match self {
ChunkRule::Rule1 => "rule_1",
ChunkRule::Rule2 => "rule_2",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk {
pub file_id: i32,
pub uuid: String,
pub chunk_id: String,
pub chunk_index: u32,
pub chunk_type: ChunkType,
pub rule: ChunkRule,
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
pub fps: f64,
/// Start frame (0-based)
pub start_frame: i64,
/// End frame (exclusive)
pub end_frame: i64,
pub text_content: Option<String>,
pub content: serde_json::Value,
pub metadata: Option<serde_json::Value>,
pub vector_id: Option<String>,
pub frame_count: i32,
pub pre_chunk_ids: Vec<i32>,
pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks)
pub visual_stats: Option<serde_json::Value>,
}
id: i64,
video_id: i64,
yolo_result: &crate::core::processor::yolo::YoloResult,
min_frames_per_chunk: usize,
similarity_threshold: f32,
) -> Vec<Self> {
if yolo_result.frames.is_empty() {
return vec![];
}
let mut chunks = Vec::new();
let mut current_chunk_frames = Vec::new();
let mut current_id = id;
for (i, frame) in yolo_result.frames.iter().enumerate() {
if current_chunk_frames.is_empty() {
current_chunk_frames.push(frame);
continue;
}
// Check similarity with last frame in current chunk
let last_frame = current_chunk_frames.last().unwrap();
let similarity = VisualChunkContent::frame_similarity(last_frame, frame);
if similarity >= similarity_threshold && current_chunk_frames.len() < 100 {
// Similar enough, add to current chunk
current_chunk_frames.push(frame);
} else {
// Not similar enough or chunk too large, create new chunk
if current_chunk_frames.len() >= min_frames_per_chunk {
if let Some(chunk) =
Self::create_chunk_from_frames(current_id, video_id, &current_chunk_frames)
{
chunks.push(chunk);
current_id += 1;
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
#[test]
fn test_chunk_type_visual_serialization() {
let chunk_type = ChunkType::Visual;
let json = serde_json::to_string(&chunk_type).unwrap();
assert_eq!(json, "\"visual\"");
let deserialized: ChunkType = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized, ChunkType::Visual);
}
#[test]
fn test_visual_chunk_creation() {
// Create a mock YOLO result
let yolo_result = YoloResult {
frame_count: 2,
fps: 30.0,
frames: vec![
YoloFrame {
frame: 0,
timestamp: 0.0,
objects: vec![
YoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 100,
y: 200,
width: 50,
height: 100,
confidence: 0.95,
},
YoloObject {
class_name: "car".to_string(),
class_id: 2,
x: 300,
y: 150,
width: 80,
height: 60,
confidence: 0.87,
},
],
},
YoloFrame {
frame: 1,
timestamp: 0.033, // 1/30 second
objects: vec![YoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 110,
y: 210,
width: 52,
height: 102,
confidence: 0.92,
}],
},
],
};
// Create visual chunk from YOLO result
let chunk = Chunk::from_yolo_result(1, 100, &yolo_result, 0, 1).unwrap();
// Verify chunk properties
assert_eq!(chunk.id, 1);
assert_eq!(chunk.video_id, 100);
assert_eq!(chunk.chunk_type, ChunkType::Visual);
assert_eq!(chunk.start_time, 0.0);
assert_eq!(chunk.end_time, 0.033);
// Verify visual content
if let ChunkContent::Visual(content) = chunk.content {
assert_eq!(content.metadata.object_count, 3);
assert_eq!(content.metadata.unique_classes.len(), 2);
assert!(content
.metadata
.unique_classes
.contains(&"person".to_string()));
assert!(content.metadata.unique_classes.contains(&"car".to_string()));
assert_eq!(content.dominant_objects, vec!["person"]);
assert_eq!(content.keyframe_objects.len(), 2);
} else {
panic!("Expected Visual content type");
}
}
#[test]
fn test_visual_chunk_content_methods() {
let content = VisualChunkContent {
start_time: 0.0,
end_time: 5.0,
keyframe_objects: vec![KeyframeObjects {
frame: 0,
timestamp: 0.0,
objects: vec![
DetectedObject {
class_name: "person".to_string(),
class_id: 0,
bounding_box: BoundingBox {
x: 100,
y: 200,
width: 50,
height: 100,
},
confidence: 0.95,
},
DetectedObject {
class_name: "car".to_string(),
class_id: 2,
bounding_box: BoundingBox {
x: 300,
y: 150,
width: 80,
height: 60,
},
confidence: 0.87,
},
],
}],
dominant_objects: vec!["person".to_string()],
object_relationships: vec![],
scene_description: Some("A person near a car".to_string()),
metadata: VisualMetadata {
object_count: 2,
unique_classes: vec!["person".to_string(), "car".to_string()],
max_confidence: 0.95,
avg_confidence: 0.91,
spatial_density: 2.0,
},
};
// Test summary method
let summary = content.summary();
assert!(summary.contains("Visual chunk from 0.0s to 5.0s"));
assert!(summary.contains("person"));
// Test contains_object method
assert!(content.contains_object("person"));
assert!(content.contains_object("car"));
assert!(!content.contains_object("dog"));
// Test high_confidence_objects method
let high_conf_objects = content.high_confidence_objects(0.9);
assert_eq!(high_conf_objects.len(), 1);
assert_eq!(high_conf_objects[0].class_name, "person");
}
#[test]
fn test_frame_similarity() {
let frame1 = YoloFrame {
frame: 0,
timestamp: 0.0,
objects: vec![
YoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 100,
y: 200,
width: 50,
height: 100,
confidence: 0.95,
},
YoloObject {
class_name: "car".to_string(),
class_id: 2,
x: 300,
y: 150,
width: 80,
height: 60,
confidence: 0.87,
},
],
};
let frame2 = YoloFrame {
frame: 1,
timestamp: 0.033,
objects: vec![
YoloObject {
class_name: "person".to_string(),
class_id: 0,
x: 110,
y: 210,
width: 52,
height: 102,
confidence: 0.92,
},
YoloObject {
class_name: "car".to_string(),
class_id: 2,
x: 310,
y: 155,
width: 82,
height: 62,
confidence: 0.85,
},
],
};
let frame3 = YoloFrame {
frame: 2,
timestamp: 0.066,
objects: vec![YoloObject {
class_name: "dog".to_string(),
class_id: 16,
x: 150,
y: 250,
width: 40,
height: 60,
confidence: 0.78,
}],
};
// Test similar frames (same objects)
let similarity_same =
VisualChunkContent::frame_similarity(&frame1, &frame2);
assert!((similarity_same - 1.0).abs() < 0.001);
// Test dissimilar frames (different objects)
let similarity_diff =
VisualChunkContent::frame_similarity(&frame1, &frame3);
assert!((similarity_diff - 0.0).abs() < 0.001);
// Test empty frames
let empty_frame = YoloFrame {
frame: 3,
timestamp: 0.1,
objects: vec![],
};
let similarity_empty =
VisualChunkContent::frame_similarity(&empty_frame, &empty_frame);
assert!((similarity_empty - 1.0).abs() < 0.001);
let similarity_mixed =
VisualChunkContent::frame_similarity(&empty_frame, &frame1);
assert!((similarity_mixed - 0.0).abs() < 0.001);
}
}
current_chunk_frames = vec![frame];
}
}
// Handle last chunk
if current_chunk_frames.len() >= min_frames_per_chunk {
if let Some(chunk) =
Self::create_chunk_from_frames(current_id, video_id, &current_chunk_frames)
{
chunks.push(chunk);
}
}
chunks
}
fn create_chunk_from_frames(
id: i64,
video_id: i64,
frames: &[&crate::core::processor::yolo::YoloFrame],
) -> Option<Self> {
if frames.is_empty() {
return None;
}
// Simple conversion - could use the from_yolo_result method
let start_frame = frames.first().unwrap().frame;
let end_frame = frames.last().unwrap().frame;
let dummy_yolo_result = crate::core::processor::yolo::YoloResult {
frame_count: frames.len() as u64,
fps: 0.0, // Not used in this context
frames: frames.iter().map(|f| (*f).clone()).collect(),
};
Self::from_yolo_result(id, video_id, &dummy_yolo_result, start_frame, end_frame)
}
/// Creates a new chunk from seconds (legacy conversion).
///
/// This is useful for migrating from older systems that store time as seconds.
/// The frame counts are calculated by rounding `seconds * fps`.
#[allow(clippy::too_many_arguments)]
pub fn from_seconds(
file_id: i32,
uuid: String,
chunk_index: u32,
chunk_type: ChunkType,
rule: ChunkRule,
start_time: f64,
end_time: f64,
fps: f64,
content: serde_json::Value,
) -> Self {
let start_frame = (start_time * fps).round() as i64;
let end_frame = (end_time * fps).round() as i64;
Self::new(
file_id,
uuid,
chunk_index,
chunk_type,
rule,
start_frame,
end_frame,
fps,
content,
)
}
/// Returns the start time as a `FrameTime`.
pub fn start_time(&self) -> FrameTime {
FrameTime::from_frames(self.start_frame, self.fps)
}
/// Returns the end time as a `FrameTime`.
pub fn end_time(&self) -> FrameTime {
FrameTime::from_frames(self.end_frame, self.fps)
}
/// Returns the duration in frames.
pub fn duration_frames(&self) -> i64 {
self.end_frame - self.start_frame
}
/// Returns the duration in seconds.
pub fn duration_seconds(&self) -> f64 {
self.duration_frames() as f64 / self.fps
}
/// Formats the start time as "seconds.frame" (e.g., "123.04").
pub fn format_start_sec_frame(&self) -> String {
self.start_time().format_sec_frame()
}
/// Formats the end time as "seconds.frame" (e.g., "456.15").
pub fn format_end_sec_frame(&self) -> String {
self.end_time().format_sec_frame()
}
/// Formats the start time as "HH:MM:SS".
pub fn format_start_hms(&self) -> String {
self.start_time().format_hms()
}
/// Formats the end time as "HH:MM:SS".
pub fn format_end_hms(&self) -> String {
self.end_time().format_hms()
}
/// Formats the start time as "HH:MM:SS.FF".
pub fn format_start_hms_frame(&self) -> String {
self.start_time().format_hms_frame()
}
/// Formats the end time as "HH:MM:SS.FF".
pub fn format_end_hms_frame(&self) -> String {
self.end_time().format_hms_frame()
}
/// Returns a tuple of (start_seconds, end_seconds) for compatibility.
///
/// This is provided for backward compatibility during migration.
/// Prefer using `start_time()` and `end_time()` methods.
pub fn time_range_seconds(&self) -> (f64, f64) {
(self.start_time().seconds(), self.end_time().seconds())
}
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
self.metadata = Some(metadata);
self
}
pub fn with_vector_id(mut self, vector_id: String) -> Self {
self.vector_id = Some(vector_id);
self
}
pub fn with_text_content(mut self, text: String) -> Self {
self.text_content = Some(text);
self
}
pub fn with_frame_count(mut self, count: i32) -> Self {
self.frame_count = count;
self
}
pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
self.pre_chunk_ids = ids;
self
}
pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
self.parent_chunk_id = Some(parent_id);
self
}
pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
self.child_chunk_ids = child_ids;
self
}
pub fn is_parent_chunk(&self) -> bool {
!self.child_chunk_ids.is_empty()
}
pub fn is_child_chunk(&self) -> bool {
self.parent_chunk_id.is_some()
}
/// 創建視覺分片
pub fn new_visual(
file_id: i32,
uuid: String,
chunk_index: u32,
start_frame: i64,
end_frame: i64,
fps: f64,
visual_content: VisualChunkContent,
) -> Self {
let content = serde_json::to_value(&visual_content)
.unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
Self::new(
file_id,
uuid,
chunk_index,
ChunkType::Visual,
ChunkRule::Rule2,
start_frame,
end_frame,
fps,
content,
)
}
/// 從 YOLO 結果創建視覺分片
pub fn from_yolo_result(
file_id: i32,
uuid: String,
chunk_index: u32,
start_frame: i64,
end_frame: i64,
fps: f64,
yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
) -> Self {
use crate::core::processor::yolo::YoloFrame;
use std::collections::HashMap;
// 分析物件統計
let mut object_counts = HashMap::new();
let mut keyframe_objects = Vec::new();
let mut all_objects = Vec::new();
for frame in &yolo_frames {
let mut frame_objects = Vec::new();
for obj in &frame.objects {
// 更新物件統計
*object_counts.entry(obj.class_name.clone()).or_insert(0) += 1;
// 創建檢測到的物件
let detected_obj = DetectedObject {
class_name: obj.class_name.clone(),
class_id: obj.class_id,
confidence: obj.confidence,
bbox: Some(BoundingBox {
x: obj.x,
y: obj.y,
width: obj.width,
height: obj.height,
}),
occurrence: 1,
};
frame_objects.push(detected_obj.clone());
all_objects.push(detected_obj);
}
if !frame_objects.is_empty() {
keyframe_objects.push(KeyframeObjects {
timestamp: frame.timestamp,
frame_number: frame.frame,
objects: frame_objects,
});
}
}
// 創建主要物件標籤
let primary_objects = object_counts
.iter()
.filter(|(_, &count)| count >= 3) // 出現至少3次的物件
.map(|(name, _)| name.clone())
.collect::<Vec<_>>()
.join(", ");
// 創建物件統計 JSON
let object_stats =
serde_json::to_value(&object_counts).unwrap_or_else(|_| serde_json::json!({}));
// 創建視覺內容
let visual_content = VisualChunkContent {
primary_objects: if primary_objects.is_empty() {
"no objects detected".to_string()
} else {
primary_objects
},
object_stats,
keyframe_objects,
object_frequency: serde_json::to_value(&object_counts)
.unwrap_or_else(|_| serde_json::json!({})),
visual_summary: None, // 可選,後續可添加 LLM 生成的摘要
};
Self::new_visual(
file_id,
uuid,
chunk_index,
start_frame,
end_frame,
fps,
visual_content,
)
}
}
impl VisualChunkContent {
/// Calculate similarity between two YOLO frames based on object composition
pub fn frame_similarity(
frame1: &crate::core::processor::yolo::YoloFrame,
frame2: &crate::core::processor::yolo::YoloFrame,
) -> f32 {
if frame1.objects.is_empty() && frame2.objects.is_empty() {
return 1.0; // Both empty frames are perfectly similar
}
if frame1.objects.is_empty() || frame2.objects.is_empty() {
return 0.0; // One empty, one non-empty are dissimilar
}
// Create sets of object class names
let set1: std::collections::HashSet<String> = frame1
.objects
.iter()
.map(|o| o.class_name.clone())
.collect();
let set2: std::collections::HashSet<String> = frame2
.objects
.iter()
.map(|o| o.class_name.clone())
.collect();
// Calculate Jaccard similarity
let intersection: Vec<_> = set1.intersection(&set2).collect();
let union: Vec<_> = set1.union(&set2).collect();
if union.is_empty() {
0.0
} else {
intersection.len() as f32 / union.len() as f32
}
}
/// Get a summary of the visual chunk
pub fn summary(&self) -> String {
let duration = self.end_time - self.start_time;
let frame_count = self.keyframe_objects.len();
format!(
"Visual chunk from {:.1}s to {:.1}s (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant objects: {}",
self.start_time,
self.end_time,
duration,
frame_count,
self.metadata.object_count,
self.metadata.unique_classes.len(),
if self.dominant_objects.is_empty() {
"none".to_string()
} else {
self.dominant_objects.join(", ")
}
)
}
/// Check if this chunk contains a specific object class
pub fn contains_object(&self, class_name: &str) -> bool {
self.keyframe_objects
.iter()
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
}
/// Get all objects with confidence above threshold
pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
self.keyframe_objects
.iter()
.flat_map(|ko| ko.objects.iter())
.filter(|obj| obj.confidence >= threshold)
.collect()
}
}

View File

@@ -228,6 +228,11 @@ impl From<VideoRow> for VideoRecord {
registration_time: row.registration_time,
total_frames: row.total_frames.unwrap_or(0) as u64,
parent_uuid: row.parent_uuid,
cut_done: false,
cut_count: 0,
cut_max_duration: 0.0,
scene_done: false,
audio_tracks: None,
}
}
}
@@ -254,6 +259,11 @@ pub struct VideoRecord {
pub registration_time: Option<String>,
pub total_frames: u64,
pub parent_uuid: Option<String>,
pub cut_done: bool,
pub cut_count: i32,
pub cut_max_duration: f64,
pub scene_done: bool,
pub audio_tracks: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -332,9 +342,9 @@ pub struct MonitorJob {
pub progress_current: i32,
pub error_count: i32,
pub last_error: Option<String>,
pub started_at: Option<chrono::NaiveDateTime>,
pub updated_at: Option<chrono::NaiveDateTime>,
pub created_at: chrono::NaiveDateTime,
pub started_at: Option<chrono::DateTime<chrono::Utc>>,
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub processors: Vec<String>,
pub completed_processors: Vec<String>,
pub failed_processors: Vec<String>,
@@ -393,17 +403,80 @@ impl ProcessorType {
}
}
/// 預估 CPU 使用率0.0 ~ 1.0, 1.0 = 一個完整核心)
pub fn estimated_cpu(&self) -> f64 {
match self {
ProcessorType::Asr => 1.0,
ProcessorType::Cut => 0.5,
ProcessorType::Yolo => 0.3,
ProcessorType::Ocr => 0.8,
ProcessorType::Face => 0.6,
ProcessorType::Pose => 0.4,
ProcessorType::Asrx => 0.8,
ProcessorType::VisualChunk => 0.3,
ProcessorType::Scene => 0.3,
}
}
/// 是否使用 GPU
pub fn uses_gpu(&self) -> bool {
match self {
ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose => true,
_ => false,
}
}
/// 預估記憶體使用量 (MB)
pub fn estimated_memory_mb(&self) -> u64 {
match self {
ProcessorType::Asr => 2048,
ProcessorType::Cut => 512,
ProcessorType::Yolo => 1024,
ProcessorType::Ocr => 1024,
ProcessorType::Face => 1536,
ProcessorType::Pose => 1024,
ProcessorType::Asrx => 2048,
ProcessorType::VisualChunk => 512,
ProcessorType::Scene => 512,
}
}
/// 使用的模型名稱(如有)
pub fn model_name(&self) -> Option<&'static str> {
match self {
ProcessorType::Asr => Some("faster-whisper"),
ProcessorType::Cut => None,
ProcessorType::Yolo => Some("yolov8n"),
ProcessorType::Ocr => Some("paddleocr"),
ProcessorType::Face => Some("insightface/buffalo_l"),
ProcessorType::Pose => Some("mediapipe/pose"),
ProcessorType::Asrx => Some("speechbrain/ecapa-tdnn"),
ProcessorType::VisualChunk => None,
ProcessorType::Scene => Some("places365"),
}
}
/// 依賴的其他 Processor需先完成才能執行
pub fn dependencies(&self) -> Vec<ProcessorType> {
match self {
ProcessorType::Asrx => vec![ProcessorType::Asr],
ProcessorType::VisualChunk => vec![ProcessorType::Yolo],
ProcessorType::Scene => vec![ProcessorType::Cut],
_ => vec![],
}
}
pub fn all() -> Vec<ProcessorType> {
vec![
ProcessorType::Asr,
ProcessorType::Cut,
ProcessorType::Scene,
ProcessorType::Asr,
ProcessorType::Asrx,
ProcessorType::Yolo,
ProcessorType::Ocr,
ProcessorType::Face,
ProcessorType::Pose,
ProcessorType::Asrx,
ProcessorType::VisualChunk,
ProcessorType::Scene,
]
}
}
@@ -701,8 +774,8 @@ impl PostgresDb {
.await?;
// Chunks
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(uuid, chunk_id))").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_uuid ON chunks(uuid)")
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
.execute(pool)
.await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
@@ -765,15 +838,13 @@ impl PostgresDb {
.await?;
// Chunks Rule 1
sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
sqlx::query(
"CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(asset_uuid)",
)
.execute(pool)
.await?;
sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), file_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(file_uuid)")
.execute(pool)
.await?;
// Jobs (Legacy/P0)
sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
sqlx::query("CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)")
.execute(pool)
.await?;
@@ -1162,8 +1233,8 @@ impl PostgresDb {
.await?;
sqlx::query(&format!(
"DELETE FROM {} WHERE video_id IN (SELECT id FROM {} WHERE uuid = $1)",
processor_results, videos
"DELETE FROM {} WHERE file_uuid = $1",
processor_results
))
.bind(uuid)
.execute(&self.pool)
@@ -2026,21 +2097,19 @@ impl PostgresDb {
r#"
INSERT INTO {} (
file_uuid, processor_type, coordinate_type, coordinate_index,
timestamp, data, identity_id, confidence
) VALUES ($1, $2, 'frame', $3, $4, $5, $6, $7)
start_frame, end_frame, start_time, data
) VALUES ($1, $2, 'frame', $3, $3, $3, $4, $5)
"#,
table
);
for (coord_idx, ts, data, id, conf) in chunks {
for (coord_idx, ts, data, _id, _conf) in chunks {
sqlx::query(&query)
.bind(file_uuid)
.bind(processor_type)
.bind(*coord_idx)
.bind(*ts)
.bind(data)
.bind(*id)
.bind(*conf)
.execute(&mut *tx)
.await?;
}
@@ -2060,7 +2129,7 @@ impl PostgresDb {
let query = format!(
r#"
INSERT INTO {} (
file_uuid, processor_type, coordinate_type, coordinate_index,
file_uuid, processor_type, coordinate_type, coordinate_index,
start_frame, end_frame, start_time, end_time, data
) VALUES ($1, 'asr', 'time', $2, $3, $4, $5, $6, $7)
"#,
@@ -2402,10 +2471,10 @@ impl PostgresDb {
offset: i64,
) -> Result<Vec<IdentityChunkRecord>> {
let query = r#"
SELECT c.id, c.uuid as file_uuid, c.chunk_id, c.chunk_type,
SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
c.start_time, c.end_time, c.text_content, c.content
FROM chunks c
WHERE c.uuid IN (
WHERE c.file_uuid IN (
SELECT DISTINCT fi.file_uuid
FROM file_identities fi
JOIN identities i ON fi.identity_id = i.id
@@ -2504,9 +2573,9 @@ impl PostgresDb {
sqlx::query(&format!(
r#"
INSERT INTO {} (file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
@@ -2579,9 +2648,9 @@ impl PostgresDb {
sqlx::query(&format!(
r#"
INSERT INTO {} (file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
start_time = EXCLUDED.start_time,
end_time = EXCLUDED.end_time,
fps = EXCLUDED.fps,
@@ -2626,7 +2695,7 @@ impl PostgresDb {
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
let table = schema::table_name("chunks");
let rows = sqlx::query(&format!(
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE uuid = $1 ORDER BY chunk_index",
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
table
))
.bind(uuid)
@@ -3264,36 +3333,40 @@ impl PostgresDb {
let sql = match uuid {
Some(_) => &format!(
r#"
SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
c.visual_stats,
pc.metadata->'structured_summary' as scene_summary,
c.parent_chunk_id::integer
FROM {} c
LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1)) AND c.uuid = $2
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1) OR c.text_content ILIKE $3) AND c.file_uuid = $2
ORDER BY bm25_score DESC
LIMIT $3
LIMIT $4
"#,
table
),
None => &format!(
r#"
SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
c.visual_stats,
pc.metadata->'structured_summary' as scene_summary,
c.parent_chunk_id::integer
FROM {} c
LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1))
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1) OR c.text_content ILIKE $2)
ORDER BY bm25_score DESC
LIMIT $2
LIMIT $3
"#,
table
),
};
// 使用 pg_trgm 支援中英文模糊搜尋
// ILIKE 支援中文 LIKE 匹配pg_trgm 的 similarity() 可做更精確的排名
let ilike_pattern = format!("%{}%", query);
let rows: Vec<(
String,
String,
@@ -3310,10 +3383,11 @@ impl PostgresDb {
Option<serde_json::Value>,
Option<i32>,
)> = match uuid {
Some(_) => {
Some(u) => {
sqlx::query_as(sql)
.bind(&tsquery)
.bind(uuid)
.bind(u)
.bind(&ilike_pattern)
.bind(limit as i64)
.fetch_all(&self.pool)
.await?
@@ -3321,6 +3395,7 @@ impl PostgresDb {
None => {
sqlx::query_as(sql)
.bind(&tsquery)
.bind(&ilike_pattern)
.bind(limit as i64)
.fetch_all(&self.pool)
.await?
@@ -3809,6 +3884,54 @@ impl PostgresDb {
Ok(results)
}
pub async fn get_all_running_jobs(&self, limit: i32) -> Result<Vec<MonitorJob>> {
let monitor_jobs = schema::table_name("monitor_jobs");
let rows = sqlx::query(&format!(
r#"
SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
error_count, last_error, started_at, updated_at, created_at,
processors, completed_processors, failed_processors, video_id
FROM {}
WHERE status = 'running'
ORDER BY created_at ASC
LIMIT $1
"#,
monitor_jobs
))
.bind(limit)
.fetch_all(&self.pool)
.await?;
let jobs: Vec<MonitorJob> = rows
.into_iter()
.map(|r| {
let status_str: String = r.get(3);
let status =
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Running);
MonitorJob {
id: r.get(0),
uuid: r.get(1),
video_path: r.get(2),
status,
current_processor: r.get(4),
progress_total: r.get(5),
progress_current: r.get(6),
error_count: r.get(7),
last_error: r.get(8),
started_at: r.get(9),
updated_at: r.get(10),
created_at: r.get(11),
processors: r.get::<Option<Vec<String>>, _>(12).unwrap_or_default(),
completed_processors: r.get::<Option<Vec<String>>, _>(13).unwrap_or_default(),
failed_processors: r.get::<Option<Vec<String>>, _>(14).unwrap_or_default(),
video_id: r.get(15),
}
})
.collect();
Ok(jobs)
}
pub async fn get_pending_jobs(&self, limit: i32) -> Result<Vec<MonitorJob>> {
let monitor_jobs = schema::table_name("monitor_jobs");
let rows = sqlx::query(&format!(
@@ -3817,7 +3940,7 @@ impl PostgresDb {
error_count, last_error, started_at, updated_at, created_at,
processors, completed_processors, failed_processors, video_id
FROM {}
WHERE status IN ('pending', 'running')
WHERE status = 'pending'
ORDER BY created_at ASC
LIMIT $1
"#,
@@ -4322,7 +4445,7 @@ impl PostgresDb {
name: &str,
) -> Result<crate::core::person_identity::Identity> {
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, embedding::text, metadata, created_at"#,
r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, identity_embedding::text as embedding, metadata, created_at"#,
)
.bind(name)
.fetch_one(&self.pool)
@@ -4371,7 +4494,7 @@ impl PostgresDb {
binding_value: &str,
) -> Result<Option<crate::core::person_identity::Identity>> {
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
"SELECT i.id, i.name, i.embedding::text, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2",
"SELECT i.id, i.name, i.identity_embedding::text as embedding, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2",
)
.bind(binding_type)
.bind(binding_value)
@@ -4389,12 +4512,12 @@ impl PostgresDb {
) -> Result<Vec<crate::core::person_identity::Identity>> {
let query = if !search.is_empty() {
sqlx::query_as::<_, crate::core::person_identity::Identity>(
"SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3",
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3",
)
.bind(format!("%{}%", search))
} else {
sqlx::query_as::<_, crate::core::person_identity::Identity>(
"SELECT id, name, embedding::text, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2",
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2",
)
};
let identities = query.bind(limit).bind(offset).fetch_all(&self.pool).await?;
@@ -4407,7 +4530,7 @@ impl PostgresDb {
id: i64,
) -> Result<Option<crate::core::person_identity::Identity>> {
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
"SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE id = $1",
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities WHERE id = $1",
)
.bind(id)
.fetch_optional(&self.pool)
@@ -4716,7 +4839,7 @@ impl PostgresDb {
"speaker_ids"
};
let query = format!(
"SELECT id, start_frame, end_frame, content FROM chunks WHERE uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame",
"SELECT id, start_frame, end_frame, content FROM chunks WHERE file_uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame",
column
);
@@ -4836,7 +4959,7 @@ mod tests {
width: 1920,
height: 1080,
fps: 30.0,
probe_json: Some("{}".to_string()),
probe_json: Some(serde_json::from_str("{}").unwrap()),
storage: StorageStatus::default(),
status: VideoStatus::Pending,
processing_status: None,
@@ -4847,6 +4970,11 @@ mod tests {
registration_time: None,
total_frames: 0,
parent_uuid: None,
cut_done: false,
cut_count: 0,
cut_max_duration: 0.0,
scene_done: false,
audio_tracks: None,
};
let json = serde_json::to_string(&record).unwrap();
@@ -4935,13 +5063,18 @@ mod tests {
error_count: 0,
last_error: None,
started_at: Some(
NaiveDateTime::parse_from_str("2024-01-01 10:00:00", "%Y-%m-%d %H:%M:%S").unwrap(),
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:00:00Z")
.unwrap()
.with_timezone(&chrono::Utc),
),
updated_at: Some(
NaiveDateTime::parse_from_str("2024-01-01 10:05:00", "%Y-%m-%d %H:%M:%S").unwrap(),
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:05:00Z")
.unwrap()
.with_timezone(&chrono::Utc),
),
created_at: NaiveDateTime::parse_from_str("2024-01-01 09:55:00", "%Y-%m-%d %H:%M:%S")
.unwrap(),
created_at: chrono::DateTime::parse_from_rfc3339("2024-01-01T09:55:00Z")
.unwrap()
.into(),
processors: vec!["asr".to_string(), "cut".to_string()],
completed_processors: vec!["asr".to_string()],
failed_processors: vec![],
@@ -4968,7 +5101,7 @@ mod tests {
"last_error": null,
"started_at": null,
"updated_at": null,
"created_at": "2024-01-01T00:00:00",
"created_at": "2024-01-01T00:00:00Z",
"processors": ["asr", "cut"],
"completed_processors": [],
"failed_processors": [],

View File

@@ -88,6 +88,62 @@ impl QdrantDb {
Ok(())
}
/// 將向量寫入指定 collection支援多 collection
pub async fn upsert_vector_to_collection(
&self,
collection: &str,
point_id: u64,
vector: &[f32],
payload: Option<serde_json::Value>,
) -> Result<()> {
let url = format!(
"{}/collections/{}/points?wait=true",
self.base_url, collection
);
tracing::debug!("Qdrant upsert URL: {}, collection: {}", url, collection);
let points = if let Some(p) = payload {
serde_json::json!({
"points": [{
"id": point_id,
"vector": vector,
"payload": p,
}]
})
} else {
serde_json::json!({
"points": [{
"id": point_id,
"vector": vector,
}]
})
};
let response = self
.client
.put(&url)
.header("api-key", &self.api_key)
.json(&points)
.send()
.await
.context("Failed to send upsert request to Qdrant")?;
let status = response.status();
if !status.is_success() {
let response_text = response.text().await.unwrap_or_default();
tracing::error!("Qdrant upsert failed: {} - {}", status, response_text);
anyhow::bail!(
"Qdrant upsert failed with status {}: {}",
status,
response_text
);
}
tracing::debug!("Successfully upserted vector for point: {}", point_id);
Ok(())
}
pub async fn upsert_vector(
&self,
chunk_id: &str,

View File

@@ -371,6 +371,11 @@ impl RedisClient {
processor: &str,
status: &str,
error: Option<&str>,
frames_processed: i32,
chunks_produced: i32,
total_frames: i32,
retry_count: i32,
pid: i32,
) -> Result<()> {
let mut conn = self.get_conn_internal().await?;
let prefix = REDIS_KEY_PREFIX.as_str();
@@ -378,13 +383,24 @@ impl RedisClient {
let now = chrono::Utc::now().to_rfc3339();
let mut fields: Vec<(&str, &str)> = vec![("status", status), ("updated_at", &now)];
let mut fields: Vec<(&str, String)> = vec![
("status", status.to_string()),
("updated_at", now),
("current", frames_processed.to_string()),
("total", total_frames.to_string()),
("frames_processed", frames_processed.to_string()),
("chunks_produced", chunks_produced.to_string()),
("retry_count", retry_count.to_string()),
("pid", pid.to_string()),
];
if let Some(err) = error {
fields.push(("error", err));
fields.push(("error", err.to_string()));
}
let _: Option<String> = conn.hset_multiple(&key, &fields).await?;
let field_refs: Vec<(&str, &str)> = fields.iter().map(|(k, v)| (*k, v.as_str())).collect();
let _: Option<String> = conn.hset_multiple(&key, &field_refs).await?;
let _: bool = conn.expire(&key, 86400).await?;
Ok(())

View File

@@ -182,6 +182,11 @@ impl IngestionService {
registration_time: None,
total_frames: total_frames.unwrap_or(0),
parent_uuid: None,
cut_done: false,
cut_count: 0,
cut_max_duration: 0.0,
scene_done: false,
audio_tracks: None,
};
self.db

View File

@@ -15,4 +15,3 @@ pub mod text;
pub mod thumbnail;
pub mod time;
pub mod tmdb;
pub mod worker;

View File

@@ -20,6 +20,8 @@ pub struct StreamInfo {
pub duration: Option<String>,
pub sample_rate: Option<String>,
pub channels: Option<u32>,
#[serde(default)]
pub tags: Option<serde_json::Value>,
}
#[derive(Debug, Serialize, Deserialize)]
@@ -69,6 +71,7 @@ pub fn probe_video(video_path: &str) -> Result<ProbeResult> {
duration: s["duration"].as_str().map(String::from),
sample_rate: s["sample_rate"].as_str().map(String::from),
channels: s["channels"].as_u64().map(|v| v as u32),
tags: s.get("tags").cloned(),
})
.collect()
})

View File

@@ -1,124 +0,0 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use super::executor::PythonExecutor;
use crate::core::config::processor;
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrResult {
pub language: Option<String>,
pub language_probability: Option<f64>,
pub segments: Vec<AsrSegment>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrSegment {
pub start: f64,
pub end: f64,
pub text: String,
}
pub async fn process_asr(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
) -> Result<AsrResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("asr_processor.py");
tracing::info!("[ASR] Starting ASR processing: {}", video_path);
executor
.run(
"asr_processor.py",
&[video_path, output_path],
uuid,
"ASR",
Some(Duration::from_secs(*processor::ASR_TIMEOUT_SECS)),
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
let result: AsrResult =
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
tracing::info!(
"[ASR] Result: {} segments, language: {:?}",
result.segments.len(),
result.language
);
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asr_result_serialization() {
let result = AsrResult {
language: Some("en".to_string()),
language_probability: Some(0.95),
segments: vec![
AsrSegment {
start: 0.0,
end: 2.5,
text: "Hello world".to_string(),
},
AsrSegment {
start: 2.5,
end: 5.0,
text: "Test speech".to_string(),
},
],
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("Hello world"));
assert!(json.contains("en"));
}
#[test]
fn test_asr_result_deserialization() {
let json = r#"{
"language": "zh",
"language_probability": 0.98,
"segments": [
{"start": 0.0, "end": 1.5, "text": "測試"}
]
}"#;
let result: AsrResult = serde_json::from_str(json).unwrap();
assert_eq!(result.language, Some("zh".to_string()));
assert_eq!(result.language_probability, Some(0.98));
assert_eq!(result.segments.len(), 1);
assert_eq!(result.segments[0].text, "測試");
}
#[test]
fn test_asr_segment_default() {
let segment = AsrSegment {
start: 0.0,
end: 1.0,
text: String::new(),
};
assert_eq!(segment.start, 0.0);
assert_eq!(segment.end, 1.0);
assert!(segment.text.is_empty());
}
#[test]
fn test_asr_result_empty_segments() {
let result = AsrResult {
language: None,
language_probability: None,
segments: vec![],
};
assert!(result.language.is_none());
assert!(result.segments.is_empty());
}
}

View File

@@ -12,12 +12,16 @@ const ASRX_TIMEOUT: Duration = Duration::from_secs(7200);
pub struct AsrxResult {
pub language: Option<String>,
pub segments: Vec<AsrxSegment>,
#[serde(skip_serializing)]
pub embeddings: Option<Vec<Vec<f32>>>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct AsrxSegment {
pub start: f64,
pub end: f64,
pub start_time: f64,
pub end_time: f64,
pub start_frame: u64,
pub end_frame: u64,
pub text: String,
pub speaker_id: Option<String>,
}
@@ -43,10 +47,19 @@ pub async fn process_asrx(
return Ok(AsrxResult {
language: None,
segments: vec![],
embeddings: None,
});
}
}
tracing::info!(
"[ASRX] Running: {} {} {} {}",
executor.python_path().display(),
script_path.display(),
video_path,
output_path,
);
let mut cmd = Command::new(executor.python_path());
cmd.arg(&script_path).arg(video_path).arg(output_path);
@@ -68,16 +81,21 @@ pub async fn process_asrx(
let stderr = String::from_utf8_lossy(&output.stderr);
for line in stderr.lines() {
if line.starts_with("ASRX_START") {
let trimmed = line.trim();
if trimmed.starts_with("ASRX_START") {
tracing::info!("[ASRX] Loading model...");
} else if line.starts_with("ASRX_PROGRESS:") {
let count = line.trim_start_matches("ASRX_PROGRESS:");
} else if trimmed.starts_with("ASRX_PROGRESS:") {
let count = trimmed.trim_start_matches("ASRX_PROGRESS:");
tracing::info!("[ASRX] Processed {} segments...", count);
} else if line.starts_with("ASRX_COMPLETE:") {
let count = line.trim_start_matches("ASRX_COMPLETE:");
} else if trimmed.starts_with("ASRX_COMPLETE:") {
let count = trimmed.trim_start_matches("ASRX_COMPLETE:");
tracing::info!("[ASRX] Completed! Total: {} segments", count);
} else if !trimmed.is_empty() && !trimmed.starts_with("[SelfASRX") {
tracing::debug!("[ASRX/stderr] {}", trimmed);
}
}
// Log full stderr for debugging
tracing::info!("[ASRX] stderr output:\n{}", stderr);
if !output.status.success() {
anyhow::bail!("ASRX failed: {}", stderr);
@@ -102,11 +120,14 @@ mod tests {
let result = AsrxResult {
language: Some("en".to_string()),
segments: vec![AsrxSegment {
start: 0.0,
end: 2.5,
start_time: 0.0,
end_time: 2.5,
start_frame: 0,
end_frame: 75,
text: "Hello".to_string(),
speaker_id: Some("SPEAKER_00".to_string()),
}],
embeddings: None,
};
let json = serde_json::to_string(&result).unwrap();
@@ -119,7 +140,7 @@ mod tests {
let json = r#"{
"language": "zh",
"segments": [
{"start": 0.0, "end": 1.5, "text": "測試", "speaker_id": "SPEAKER_01"}
{"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 45, "text": "測試", "speaker_id": "SPEAKER_01"}
]
}"#;
@@ -137,6 +158,7 @@ mod tests {
let result = AsrxResult {
language: None,
segments: vec![],
embeddings: None,
};
assert!(result.segments.is_empty());
assert!(result.language.is_none());
@@ -145,11 +167,13 @@ mod tests {
#[test]
fn test_asrx_segment_times() {
let segment = AsrxSegment {
start: 0.0,
end: 5.0,
start_time: 0.0,
end_time: 5.0,
start_frame: 0,
end_frame: 150,
text: "Test".to_string(),
speaker_id: None,
};
assert!(segment.end > segment.start);
assert!(segment.end_time > segment.start_time);
}
}

View File

@@ -147,6 +147,19 @@ impl PythonExecutor {
anyhow::bail!("Script not found: {:?}", script_path);
}
// 標記輸出檔為處理中add .tmp suffix
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
let tmp_path = output_path.as_ref().map(|p| {
let mut tmp = p.to_path_buf();
tmp.set_extension("json.tmp");
tmp
});
if let (Some(src), Some(dst)) = (&output_path, &tmp_path) {
if src.exists() {
let _ = std::fs::rename(src, dst);
}
}
let mut cmd = Command::new(&self.venv_python);
cmd.arg(&script_path);
@@ -220,12 +233,28 @@ impl PythonExecutor {
Ok(())
};
// 錯誤時 rename .json.tmp → .json.err
let mark_failed = || {
if let Some(tmp) = &tmp_path {
if tmp.exists() {
if let Some(out) = &output_path {
let mut err_path = out.to_path_buf();
err_path.set_extension("json.err");
let _ = std::fs::rename(tmp, &err_path);
}
}
}
};
if let Some(duration) = timeout_duration {
match timeout(duration, run_future).await {
Ok(Ok(())) => {}
Ok(Err(e)) => return Err(e),
Ok(Err(e)) => {
mark_failed();
return Err(e);
}
Err(_) => {
// Try to kill the entire process group
mark_failed();
if let Some(pid) = child_pid {
let pgid = pid as i32;
unsafe {
@@ -237,7 +266,19 @@ impl PythonExecutor {
}
}
} else {
run_future.await?;
if let Err(e) = run_future.await {
mark_failed();
return Err(e);
}
}
// 成功:.json.tmp → .json已完成
if let Some(tmp) = &tmp_path {
if tmp.exists() {
if let Some(out) = &output_path {
let _ = std::fs::rename(tmp, out);
}
}
}
Ok(())

View File

@@ -28,6 +28,7 @@ pub struct Face {
pub width: i32,
pub height: i32,
pub confidence: f32,
#[serde(skip_serializing)]
pub embedding: Option<Vec<f32>>,
pub landmarks: Option<Vec<Vec<f32>>>,
pub attributes: Option<FaceAttributes>,
@@ -111,7 +112,6 @@ mod tests {
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("face_1"));
assert!(json.contains("\"width\":50"));
assert!(json.contains("embedding"));
assert!(json.contains("landmarks"));
assert!(json.contains("attributes"));
}

View File

@@ -27,7 +27,8 @@ pub use face_recognition::{
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
load_scene_from_file, process_scene_classification, SceneClassificationResult, ScenePrediction,
SceneSegment,
};
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};

View File

@@ -7,7 +7,7 @@ use super::executor::PythonExecutor;
const SCENE_TIMEOUT: Duration = Duration::from_secs(7200);
/// 場景識別結果
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
pub struct SceneClassificationResult {
pub frame_count: u64,
pub fps: f64,
@@ -32,6 +32,19 @@ pub struct ScenePrediction {
pub confidence: f32,
}
/// 從已存在的 JSON 檔案載入場景結果(不重新執行 Python
pub fn load_scene_from_file(path: &str) -> Result<SceneClassificationResult> {
let json_str = std::fs::read_to_string(path).context("Failed to read scene JSON file")?;
let result: SceneClassificationResult =
serde_json::from_str(&json_str).context("Failed to parse scene JSON")?;
tracing::info!(
"[SCENE] Loaded {} scenes from {}",
result.scenes.len(),
path
);
Ok(result)
}
/// 執行場景識別
pub async fn process_scene_classification(
video_path: &str,

View File

@@ -12,7 +12,7 @@ use super::yolo::{YoloFrame, YoloResult};
const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
/// 視覺分片處理結果
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct VisualChunkResult {
/// 生成的視覺分片數量
pub chunk_count: u32,
@@ -284,7 +284,7 @@ pub async fn process_visual_chunk_advanced(
});
}
executor
let result = match executor
.run(
"visual_chunk_processor.py",
&[video_path, output_path],
@@ -293,13 +293,34 @@ pub async fn process_visual_chunk_advanced(
Some(VISUAL_CHUNK_TIMEOUT),
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str =
std::fs::read_to_string(output_path).context("Failed to read visual chunk output")?;
let result: VisualChunkResult =
serde_json::from_str(&json_str).context("Failed to parse visual chunk output")?;
{
Ok(_) => match std::fs::read_to_string(output_path) {
Ok(json_str) => match serde_json::from_str::<VisualChunkResult>(&json_str) {
Ok(r) => r,
Err(e) => {
tracing::warn!(
"[VisualChunk] Failed to parse output ({}), returning empty",
e
);
VisualChunkResult::default()
}
},
Err(e) => {
tracing::warn!(
"[VisualChunk] Failed to read output ({}), returning empty",
e
);
VisualChunkResult::default()
}
},
Err(e) => {
tracing::warn!(
"[VisualChunk] Failed to run script ({}), returning empty",
e
);
VisualChunkResult::default()
}
};
tracing::info!(
"[VisualChunk] Advanced generation result: {} chunks, {} frames",

View File

@@ -54,22 +54,81 @@ pub fn compute_uuid_from_relative_path(relative_path: &str) -> String {
compute_uuid(&username, &filepath)
}
/// Get MAC address of primary network interface
/// 取得本機內建網路介面的 MAC 位址(不可拆、非外接)。
/// 優先順序en0 (Wi-Fi) > en1 > 其他非 USB/Thunderbolt 介面。
/// 若都找不到則回傳 fallback。
/// Returns MAC address in format: a1:b2:c3:d4:e5:f6
pub fn get_mac_address() -> String {
use mac_address::get_mac_address;
// 使用 ifconfig 列出所有介面
let output = std::process::Command::new("ifconfig")
.args(["-a"])
.output()
.ok()
.and_then(|o| {
if o.status.success() {
Some(String::from_utf8_lossy(&o.stdout).to_string())
} else {
None
}
})
.unwrap_or_default();
match get_mac_address() {
Ok(Some(mac)) => {
let bytes = mac.bytes();
format!(
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
)
// 解析 ifconfig找到介面名稱與 MAC
let mut current_iface = String::new();
let mut candidates: Vec<(u32, String)> = Vec::new();
for line in output.lines() {
let trimmed = line.trim();
// 介面名稱行,如 "en0: flags=..."
if !trimmed.starts_with('\t') && trimmed.contains(": flags=") {
current_iface = trimmed.split(':').next().unwrap_or("").to_string();
}
// macOS: "ether a1:b2:c3:d4:e5:f6"
if let Some(mac_str) = trimmed.strip_prefix("ether ") {
let mac = mac_str.trim();
if mac.len() == 17 && mac.chars().filter(|&c| c == ':').count() == 5 {
if mac == "00:00:00:00:00:00" || mac == "ff:ff:ff:ff:ff:ff" {
continue;
}
// 優先級en0=0, en1=1, en2=2, 其他=100
let priority = match current_iface.as_str() {
"en0" => 0,
"en1" => 1,
"en2" => 2,
_ if current_iface.starts_with("en") => 3,
_ => 100,
};
candidates.push((priority, mac.to_string()));
}
}
// macOS: "lladdr a1:b2:c3:d4:e5:f6"
if let Some(mac_str) = trimmed.strip_prefix("lladdr ") {
let mac = mac_str.trim();
if mac.len() == 17 && mac.chars().filter(|&c| c == ':').count() == 5 {
if mac == "00:00:00:00:00:00" || mac == "ff:ff:ff:ff:ff:ff" {
continue;
}
let priority = match current_iface.as_str() {
"en0" => 0,
"en1" => 1,
"en2" => 2,
_ if current_iface.starts_with("en") => 3,
_ => 100,
};
candidates.push((priority, mac.to_string()));
}
}
Ok(None) => "00:00:00:00:00:00".to_string(),
Err(_) => "00:00:00:00:00:00".to_string(),
}
// 按優先級排序en0 > en1 > en2 > 其他)
candidates.sort_by_key(|k| k.0);
if let Some(mac) = candidates.first().map(|c| c.1.clone()) {
return mac;
}
// fallback
"00:00:00:00:00:00".to_string()
}
/// Compute Birth UUID (Stable Identity with Location)

View File

@@ -1,140 +0,0 @@
use crate::core::chunk;
use crate::core::db::PostgresDb;
use sqlx::PgPool;
use tokio::time::{sleep, Duration};
use tracing;
pub struct JobWorker {
pool: PgPool,
poll_interval: Duration,
}
impl JobWorker {
pub fn new(pool: PgPool, poll_interval_secs: u64) -> Self {
Self {
pool,
poll_interval: Duration::from_secs(poll_interval_secs),
}
}
pub async fn run(&self) {
tracing::info!(
"🤖 Job Worker started (Polling every {}s)",
self.poll_interval.as_secs()
);
loop {
match self.process_next_job().await {
Ok(has_work) => {
if !has_work {
// No work found, wait before polling again
sleep(self.poll_interval).await;
}
// If we processed a job, loop immediately to check for more
}
Err(e) => {
tracing::error!("❌ Job Worker error: {}", e);
sleep(Duration::from_secs(5)).await;
}
}
}
}
async fn process_next_job(&self) -> anyhow::Result<bool> {
// 1. Fetch a QUEUED job from monitor_jobs
// Using sqlx::query_as to map to tuple.
// Note: progress_total is int4 (i32).
let job_row: Option<(i32, String, i32)> = sqlx::query_as(
r#"
UPDATE dev.monitor_jobs
SET status = 'RUNNING', updated_at = NOW()
WHERE id = (
SELECT id FROM dev.monitor_jobs
WHERE status = 'QUEUED'
ORDER BY created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING id, uuid, COALESCE(progress_total, 0)
"#,
)
.fetch_optional(&self.pool)
.await?;
if let Some((job_id, asset_uuid, total_frames)) = job_row {
tracing::info!(
"🚀 Processing Job {} for Asset {} (Frames: {})",
job_id,
asset_uuid,
total_frames
);
// 2. Execute Logic (Default to rule1 for now as monitor_jobs doesn't store rule type explicitly)
let fps = self.get_asset_fps(&asset_uuid).await?;
let db = PostgresDb::from_pool(self.pool.clone());
let result = chunk::rule1_ingest::execute_rule1(&db, &asset_uuid, fps).await;
// 3. Update Job Status
match result {
Ok(chunk_count) => {
tracing::info!(
"✅ Job {} completed. Processed {} items.",
job_id,
chunk_count
);
// Update monitor_jobs
// Using runtime query to avoid compile-time macro checks
sqlx::query(
"UPDATE dev.monitor_jobs SET status = 'COMPLETED', progress_current = progress_total, updated_at = NOW() WHERE id = $1"
)
.bind(job_id)
.execute(&self.pool)
.await?;
// Update video processing_status
sqlx::query(
"UPDATE dev.videos SET processing_status = $1::jsonb WHERE file_uuid = $2",
)
.bind(serde_json::json!({"status": "COMPLETED"}))
.bind(asset_uuid)
.execute(&self.pool)
.await?;
}
Err(e) => {
tracing::error!("❌ Job {} failed: {}", job_id, e);
let err_msg = e.to_string();
let safe_msg = if err_msg.len() > 500 {
&err_msg[..500]
} else {
&err_msg
};
sqlx::query(
"UPDATE dev.monitor_jobs SET status = 'FAILED', last_error = $2, updated_at = NOW() WHERE id = $1"
)
.bind(job_id)
.bind(safe_msg)
.execute(&self.pool)
.await?;
}
}
return Ok(true); // Processed a job
}
Ok(false) // No job found
}
async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
// dev.videos now uses file_uuid and has a direct fps column
let fps: Option<f64> =
sqlx::query_scalar("SELECT fps FROM dev.videos WHERE file_uuid = $1")
.bind(uuid)
.fetch_optional(&self.pool)
.await?;
// Fallback to 29.97 if not found
Ok(fps.unwrap_or(29.97))
}
}

View File

@@ -1,2 +0,0 @@
pub mod job_runner;
pub use job_runner::JobWorker;