cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
This commit is contained in:
@@ -58,7 +58,8 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
.with_metadata(metadata);
|
||||
.with_metadata(metadata)
|
||||
.with_text_content(seg.text.clone());
|
||||
|
||||
db.store_chunk_in_tx(&chunk, &mut tx).await?;
|
||||
|
||||
|
||||
@@ -32,9 +32,9 @@ struct AsrSegment {
|
||||
/// 2. Aggregates Rule 1 (Sentence) chunks falling within each scene.
|
||||
/// 3. Calls LLM to generate 5W1H+ summary.
|
||||
/// 4. Inserts parent chunks into `dev.chunks`.
|
||||
pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, asset_uuid);
|
||||
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
|
||||
pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, file_uuid);
|
||||
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, file_uuid);
|
||||
|
||||
// 1. Load CUT and ASR data
|
||||
let cut_content = fs::read_to_string(&cut_path)
|
||||
@@ -70,16 +70,16 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
}
|
||||
}
|
||||
|
||||
// Query Rule 1 table for better linking
|
||||
// Query chunks table for Rule 1 sentence chunks
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT id::text FROM chunks_rule1
|
||||
WHERE asset_uuid = $1
|
||||
SELECT chunk_id FROM chunks
|
||||
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
|
||||
AND start_frame >= $2
|
||||
AND end_frame <= $3
|
||||
"#,
|
||||
)
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
.fetch_all(&mut *tx)
|
||||
@@ -98,14 +98,14 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
|
||||
let texts: Vec<String> = sqlx::query_scalar(
|
||||
r#"
|
||||
SELECT content FROM chunks_rule1
|
||||
WHERE asset_uuid = $1
|
||||
SELECT text_content FROM chunks
|
||||
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
|
||||
AND start_frame >= $2
|
||||
AND end_frame <= $3
|
||||
ORDER BY start_frame ASC
|
||||
"#,
|
||||
)
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
.fetch_all(&mut *tx)
|
||||
@@ -136,7 +136,7 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
|
||||
// 4. Insert into dev.chunks
|
||||
let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE uuid = $1")
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
let fps = fps_query.unwrap_or(29.97);
|
||||
@@ -157,7 +157,7 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
ON CONFLICT (uuid, chunk_id) DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.bind(&chunk_id)
|
||||
.bind(scene.scene_number as i32)
|
||||
.bind("cut") // Chunk type
|
||||
|
||||
@@ -1,755 +0,0 @@
|
||||
use crate::core::time::FrameTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkType {
|
||||
TimeBased,
|
||||
Sentence,
|
||||
Cut,
|
||||
Trace,
|
||||
Story, // Parent chunk from story analysis
|
||||
Visual, // Visual object-based chunk from YOLO detection
|
||||
}
|
||||
|
||||
impl ChunkType {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ChunkType::TimeBased => "time",
|
||||
ChunkType::Sentence => "sentence",
|
||||
ChunkType::Cut => "cut",
|
||||
ChunkType::Trace => "trace",
|
||||
ChunkType::Story => "story",
|
||||
ChunkType::Visual => "visual",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkRule {
|
||||
Rule1, // 直接轉換
|
||||
Rule2, // 集合內容
|
||||
}
|
||||
|
||||
/// 關鍵幀的物件列表
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct KeyframeObjects {
|
||||
/// 關鍵幀時間 (秒)
|
||||
pub timestamp: f64,
|
||||
/// 關鍵幀幀號
|
||||
pub frame_number: u64,
|
||||
/// 檢測到的物件
|
||||
pub objects: Vec<DetectedObject>,
|
||||
}
|
||||
|
||||
/// 檢測到的物件
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DetectedObject {
|
||||
/// 物件類別名稱
|
||||
pub class_name: String,
|
||||
/// 物件類別 ID
|
||||
pub class_id: u32,
|
||||
/// 信心值 (0.0-1.0)
|
||||
pub confidence: f32,
|
||||
/// 邊界框 (x, y, width, height)
|
||||
pub bbox: Option<BoundingBox>,
|
||||
/// 出現次數 (在分片內)
|
||||
pub occurrence: u32,
|
||||
}
|
||||
|
||||
/// 邊界框
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VisualChunkContent {
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub keyframe_objects: Vec<KeyframeObjects>,
|
||||
pub dominant_objects: Vec<String>,
|
||||
pub object_relationships: Vec<(String, String, String)>, // (object1, relationship, object2)
|
||||
pub scene_description: Option<String>,
|
||||
pub metadata: VisualMetadata,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VisualMetadata {
|
||||
pub object_count: u32,
|
||||
pub unique_classes: Vec<String>,
|
||||
pub max_confidence: f32,
|
||||
pub avg_confidence: f32,
|
||||
pub spatial_density: f32, // objects per frame
|
||||
}
|
||||
|
||||
impl ChunkRule {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ChunkRule::Rule1 => "rule_1",
|
||||
ChunkRule::Rule2 => "rule_2",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Chunk {
|
||||
pub file_id: i32,
|
||||
pub uuid: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_index: u32,
|
||||
pub chunk_type: ChunkType,
|
||||
pub rule: ChunkRule,
|
||||
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
|
||||
pub fps: f64,
|
||||
/// Start frame (0-based)
|
||||
pub start_frame: i64,
|
||||
/// End frame (exclusive)
|
||||
pub end_frame: i64,
|
||||
pub text_content: Option<String>,
|
||||
pub content: serde_json::Value,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub vector_id: Option<String>,
|
||||
pub frame_count: i32,
|
||||
pub pre_chunk_ids: Vec<i32>,
|
||||
pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
|
||||
pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks)
|
||||
pub visual_stats: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
id: i64,
|
||||
video_id: i64,
|
||||
yolo_result: &crate::core::processor::yolo::YoloResult,
|
||||
min_frames_per_chunk: usize,
|
||||
similarity_threshold: f32,
|
||||
) -> Vec<Self> {
|
||||
if yolo_result.frames.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
let mut current_chunk_frames = Vec::new();
|
||||
let mut current_id = id;
|
||||
|
||||
for (i, frame) in yolo_result.frames.iter().enumerate() {
|
||||
if current_chunk_frames.is_empty() {
|
||||
current_chunk_frames.push(frame);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check similarity with last frame in current chunk
|
||||
let last_frame = current_chunk_frames.last().unwrap();
|
||||
let similarity = VisualChunkContent::frame_similarity(last_frame, frame);
|
||||
|
||||
if similarity >= similarity_threshold && current_chunk_frames.len() < 100 {
|
||||
// Similar enough, add to current chunk
|
||||
current_chunk_frames.push(frame);
|
||||
} else {
|
||||
// Not similar enough or chunk too large, create new chunk
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
if let Some(chunk) =
|
||||
Self::create_chunk_from_frames(current_id, video_id, ¤t_chunk_frames)
|
||||
{
|
||||
chunks.push(chunk);
|
||||
current_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
#[test]
|
||||
fn test_chunk_type_visual_serialization() {
|
||||
let chunk_type = ChunkType::Visual;
|
||||
let json = serde_json::to_string(&chunk_type).unwrap();
|
||||
assert_eq!(json, "\"visual\"");
|
||||
|
||||
let deserialized: ChunkType = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(deserialized, ChunkType::Visual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_creation() {
|
||||
// Create a mock YOLO result
|
||||
let yolo_result = YoloResult {
|
||||
frame_count: 2,
|
||||
fps: 30.0,
|
||||
frames: vec![
|
||||
YoloFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.95,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
},
|
||||
YoloFrame {
|
||||
frame: 1,
|
||||
timestamp: 0.033, // 1/30 second
|
||||
objects: vec![YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.92,
|
||||
}],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// Create visual chunk from YOLO result
|
||||
let chunk = Chunk::from_yolo_result(1, 100, &yolo_result, 0, 1).unwrap();
|
||||
|
||||
// Verify chunk properties
|
||||
assert_eq!(chunk.id, 1);
|
||||
assert_eq!(chunk.video_id, 100);
|
||||
assert_eq!(chunk.chunk_type, ChunkType::Visual);
|
||||
assert_eq!(chunk.start_time, 0.0);
|
||||
assert_eq!(chunk.end_time, 0.033);
|
||||
|
||||
// Verify visual content
|
||||
if let ChunkContent::Visual(content) = chunk.content {
|
||||
assert_eq!(content.metadata.object_count, 3);
|
||||
assert_eq!(content.metadata.unique_classes.len(), 2);
|
||||
assert!(content
|
||||
.metadata
|
||||
.unique_classes
|
||||
.contains(&"person".to_string()));
|
||||
assert!(content.metadata.unique_classes.contains(&"car".to_string()));
|
||||
assert_eq!(content.dominant_objects, vec!["person"]);
|
||||
assert_eq!(content.keyframe_objects.len(), 2);
|
||||
} else {
|
||||
panic!("Expected Visual content type");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_content_methods() {
|
||||
let content = VisualChunkContent {
|
||||
start_time: 0.0,
|
||||
end_time: 5.0,
|
||||
keyframe_objects: vec![KeyframeObjects {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
DetectedObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
bounding_box: BoundingBox {
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
},
|
||||
confidence: 0.95,
|
||||
},
|
||||
DetectedObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
bounding_box: BoundingBox {
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
},
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
}],
|
||||
dominant_objects: vec!["person".to_string()],
|
||||
object_relationships: vec![],
|
||||
scene_description: Some("A person near a car".to_string()),
|
||||
metadata: VisualMetadata {
|
||||
object_count: 2,
|
||||
unique_classes: vec!["person".to_string(), "car".to_string()],
|
||||
max_confidence: 0.95,
|
||||
avg_confidence: 0.91,
|
||||
spatial_density: 2.0,
|
||||
},
|
||||
};
|
||||
|
||||
// Test summary method
|
||||
let summary = content.summary();
|
||||
assert!(summary.contains("Visual chunk from 0.0s to 5.0s"));
|
||||
assert!(summary.contains("person"));
|
||||
|
||||
// Test contains_object method
|
||||
assert!(content.contains_object("person"));
|
||||
assert!(content.contains_object("car"));
|
||||
assert!(!content.contains_object("dog"));
|
||||
|
||||
// Test high_confidence_objects method
|
||||
let high_conf_objects = content.high_confidence_objects(0.9);
|
||||
assert_eq!(high_conf_objects.len(), 1);
|
||||
assert_eq!(high_conf_objects[0].class_name, "person");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_frame_similarity() {
|
||||
let frame1 = YoloFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.95,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame2 = YoloFrame {
|
||||
frame: 1,
|
||||
timestamp: 0.033,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.92,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 310,
|
||||
y: 155,
|
||||
width: 82,
|
||||
height: 62,
|
||||
confidence: 0.85,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame3 = YoloFrame {
|
||||
frame: 2,
|
||||
timestamp: 0.066,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 150,
|
||||
y: 250,
|
||||
width: 40,
|
||||
height: 60,
|
||||
confidence: 0.78,
|
||||
}],
|
||||
};
|
||||
|
||||
// Test similar frames (same objects)
|
||||
let similarity_same =
|
||||
VisualChunkContent::frame_similarity(&frame1, &frame2);
|
||||
assert!((similarity_same - 1.0).abs() < 0.001);
|
||||
|
||||
// Test dissimilar frames (different objects)
|
||||
let similarity_diff =
|
||||
VisualChunkContent::frame_similarity(&frame1, &frame3);
|
||||
assert!((similarity_diff - 0.0).abs() < 0.001);
|
||||
|
||||
// Test empty frames
|
||||
let empty_frame = YoloFrame {
|
||||
frame: 3,
|
||||
timestamp: 0.1,
|
||||
objects: vec![],
|
||||
};
|
||||
let similarity_empty =
|
||||
VisualChunkContent::frame_similarity(&empty_frame, &empty_frame);
|
||||
assert!((similarity_empty - 1.0).abs() < 0.001);
|
||||
|
||||
let similarity_mixed =
|
||||
VisualChunkContent::frame_similarity(&empty_frame, &frame1);
|
||||
assert!((similarity_mixed - 0.0).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
current_chunk_frames = vec![frame];
|
||||
}
|
||||
}
|
||||
|
||||
// Handle last chunk
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
if let Some(chunk) =
|
||||
Self::create_chunk_from_frames(current_id, video_id, ¤t_chunk_frames)
|
||||
{
|
||||
chunks.push(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
fn create_chunk_from_frames(
|
||||
id: i64,
|
||||
video_id: i64,
|
||||
frames: &[&crate::core::processor::yolo::YoloFrame],
|
||||
) -> Option<Self> {
|
||||
if frames.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Simple conversion - could use the from_yolo_result method
|
||||
let start_frame = frames.first().unwrap().frame;
|
||||
let end_frame = frames.last().unwrap().frame;
|
||||
let dummy_yolo_result = crate::core::processor::yolo::YoloResult {
|
||||
frame_count: frames.len() as u64,
|
||||
fps: 0.0, // Not used in this context
|
||||
frames: frames.iter().map(|f| (*f).clone()).collect(),
|
||||
};
|
||||
|
||||
Self::from_yolo_result(id, video_id, &dummy_yolo_result, start_frame, end_frame)
|
||||
}
|
||||
|
||||
/// Creates a new chunk from seconds (legacy conversion).
|
||||
///
|
||||
/// This is useful for migrating from older systems that store time as seconds.
|
||||
/// The frame counts are calculated by rounding `seconds * fps`.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn from_seconds(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_index: u32,
|
||||
chunk_type: ChunkType,
|
||||
rule: ChunkRule,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
fps: f64,
|
||||
content: serde_json::Value,
|
||||
) -> Self {
|
||||
let start_frame = (start_time * fps).round() as i64;
|
||||
let end_frame = (end_time * fps).round() as i64;
|
||||
Self::new(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_index,
|
||||
chunk_type,
|
||||
rule,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the start time as a `FrameTime`.
|
||||
pub fn start_time(&self) -> FrameTime {
|
||||
FrameTime::from_frames(self.start_frame, self.fps)
|
||||
}
|
||||
|
||||
/// Returns the end time as a `FrameTime`.
|
||||
pub fn end_time(&self) -> FrameTime {
|
||||
FrameTime::from_frames(self.end_frame, self.fps)
|
||||
}
|
||||
|
||||
/// Returns the duration in frames.
|
||||
pub fn duration_frames(&self) -> i64 {
|
||||
self.end_frame - self.start_frame
|
||||
}
|
||||
|
||||
/// Returns the duration in seconds.
|
||||
pub fn duration_seconds(&self) -> f64 {
|
||||
self.duration_frames() as f64 / self.fps
|
||||
}
|
||||
|
||||
/// Formats the start time as "seconds.frame" (e.g., "123.04").
|
||||
pub fn format_start_sec_frame(&self) -> String {
|
||||
self.start_time().format_sec_frame()
|
||||
}
|
||||
|
||||
/// Formats the end time as "seconds.frame" (e.g., "456.15").
|
||||
pub fn format_end_sec_frame(&self) -> String {
|
||||
self.end_time().format_sec_frame()
|
||||
}
|
||||
|
||||
/// Formats the start time as "HH:MM:SS".
|
||||
pub fn format_start_hms(&self) -> String {
|
||||
self.start_time().format_hms()
|
||||
}
|
||||
|
||||
/// Formats the end time as "HH:MM:SS".
|
||||
pub fn format_end_hms(&self) -> String {
|
||||
self.end_time().format_hms()
|
||||
}
|
||||
|
||||
/// Formats the start time as "HH:MM:SS.FF".
|
||||
pub fn format_start_hms_frame(&self) -> String {
|
||||
self.start_time().format_hms_frame()
|
||||
}
|
||||
|
||||
/// Formats the end time as "HH:MM:SS.FF".
|
||||
pub fn format_end_hms_frame(&self) -> String {
|
||||
self.end_time().format_hms_frame()
|
||||
}
|
||||
|
||||
/// Returns a tuple of (start_seconds, end_seconds) for compatibility.
|
||||
///
|
||||
/// This is provided for backward compatibility during migration.
|
||||
/// Prefer using `start_time()` and `end_time()` methods.
|
||||
pub fn time_range_seconds(&self) -> (f64, f64) {
|
||||
(self.start_time().seconds(), self.end_time().seconds())
|
||||
}
|
||||
|
||||
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
|
||||
self.metadata = Some(metadata);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_vector_id(mut self, vector_id: String) -> Self {
|
||||
self.vector_id = Some(vector_id);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_text_content(mut self, text: String) -> Self {
|
||||
self.text_content = Some(text);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_frame_count(mut self, count: i32) -> Self {
|
||||
self.frame_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
|
||||
self.pre_chunk_ids = ids;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
|
||||
self.parent_chunk_id = Some(parent_id);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
|
||||
self.child_chunk_ids = child_ids;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn is_parent_chunk(&self) -> bool {
|
||||
!self.child_chunk_ids.is_empty()
|
||||
}
|
||||
|
||||
pub fn is_child_chunk(&self) -> bool {
|
||||
self.parent_chunk_id.is_some()
|
||||
}
|
||||
|
||||
/// 創建視覺分片
|
||||
pub fn new_visual(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_index: u32,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
visual_content: VisualChunkContent,
|
||||
) -> Self {
|
||||
let content = serde_json::to_value(&visual_content)
|
||||
.unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
|
||||
|
||||
Self::new(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_index,
|
||||
ChunkType::Visual,
|
||||
ChunkRule::Rule2,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
}
|
||||
|
||||
/// 從 YOLO 結果創建視覺分片
|
||||
pub fn from_yolo_result(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_index: u32,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
|
||||
) -> Self {
|
||||
use crate::core::processor::yolo::YoloFrame;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// 分析物件統計
|
||||
let mut object_counts = HashMap::new();
|
||||
let mut keyframe_objects = Vec::new();
|
||||
let mut all_objects = Vec::new();
|
||||
|
||||
for frame in &yolo_frames {
|
||||
let mut frame_objects = Vec::new();
|
||||
|
||||
for obj in &frame.objects {
|
||||
// 更新物件統計
|
||||
*object_counts.entry(obj.class_name.clone()).or_insert(0) += 1;
|
||||
|
||||
// 創建檢測到的物件
|
||||
let detected_obj = DetectedObject {
|
||||
class_name: obj.class_name.clone(),
|
||||
class_id: obj.class_id,
|
||||
confidence: obj.confidence,
|
||||
bbox: Some(BoundingBox {
|
||||
x: obj.x,
|
||||
y: obj.y,
|
||||
width: obj.width,
|
||||
height: obj.height,
|
||||
}),
|
||||
occurrence: 1,
|
||||
};
|
||||
|
||||
frame_objects.push(detected_obj.clone());
|
||||
all_objects.push(detected_obj);
|
||||
}
|
||||
|
||||
if !frame_objects.is_empty() {
|
||||
keyframe_objects.push(KeyframeObjects {
|
||||
timestamp: frame.timestamp,
|
||||
frame_number: frame.frame,
|
||||
objects: frame_objects,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 創建主要物件標籤
|
||||
let primary_objects = object_counts
|
||||
.iter()
|
||||
.filter(|(_, &count)| count >= 3) // 出現至少3次的物件
|
||||
.map(|(name, _)| name.clone())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
// 創建物件統計 JSON
|
||||
let object_stats =
|
||||
serde_json::to_value(&object_counts).unwrap_or_else(|_| serde_json::json!({}));
|
||||
|
||||
// 創建視覺內容
|
||||
let visual_content = VisualChunkContent {
|
||||
primary_objects: if primary_objects.is_empty() {
|
||||
"no objects detected".to_string()
|
||||
} else {
|
||||
primary_objects
|
||||
},
|
||||
object_stats,
|
||||
keyframe_objects,
|
||||
object_frequency: serde_json::to_value(&object_counts)
|
||||
.unwrap_or_else(|_| serde_json::json!({})),
|
||||
visual_summary: None, // 可選,後續可添加 LLM 生成的摘要
|
||||
};
|
||||
|
||||
Self::new_visual(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_index,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
visual_content,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl VisualChunkContent {
|
||||
/// Calculate similarity between two YOLO frames based on object composition
|
||||
pub fn frame_similarity(
|
||||
frame1: &crate::core::processor::yolo::YoloFrame,
|
||||
frame2: &crate::core::processor::yolo::YoloFrame,
|
||||
) -> f32 {
|
||||
if frame1.objects.is_empty() && frame2.objects.is_empty() {
|
||||
return 1.0; // Both empty frames are perfectly similar
|
||||
}
|
||||
|
||||
if frame1.objects.is_empty() || frame2.objects.is_empty() {
|
||||
return 0.0; // One empty, one non-empty are dissimilar
|
||||
}
|
||||
|
||||
// Create sets of object class names
|
||||
let set1: std::collections::HashSet<String> = frame1
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
let set2: std::collections::HashSet<String> = frame2
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
|
||||
// Calculate Jaccard similarity
|
||||
let intersection: Vec<_> = set1.intersection(&set2).collect();
|
||||
let union: Vec<_> = set1.union(&set2).collect();
|
||||
|
||||
if union.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
intersection.len() as f32 / union.len() as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a summary of the visual chunk
|
||||
pub fn summary(&self) -> String {
|
||||
let duration = self.end_time - self.start_time;
|
||||
let frame_count = self.keyframe_objects.len();
|
||||
|
||||
format!(
|
||||
"Visual chunk from {:.1}s to {:.1}s (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant objects: {}",
|
||||
self.start_time,
|
||||
self.end_time,
|
||||
duration,
|
||||
frame_count,
|
||||
self.metadata.object_count,
|
||||
self.metadata.unique_classes.len(),
|
||||
if self.dominant_objects.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
self.dominant_objects.join(", ")
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Check if this chunk contains a specific object class
|
||||
pub fn contains_object(&self, class_name: &str) -> bool {
|
||||
self.keyframe_objects
|
||||
.iter()
|
||||
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
|
||||
}
|
||||
|
||||
/// Get all objects with confidence above threshold
|
||||
pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
|
||||
self.keyframe_objects
|
||||
.iter()
|
||||
.flat_map(|ko| ko.objects.iter())
|
||||
.filter(|obj| obj.confidence >= threshold)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
@@ -228,6 +228,11 @@ impl From<VideoRow> for VideoRecord {
|
||||
registration_time: row.registration_time,
|
||||
total_frames: row.total_frames.unwrap_or(0) as u64,
|
||||
parent_uuid: row.parent_uuid,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -254,6 +259,11 @@ pub struct VideoRecord {
|
||||
pub registration_time: Option<String>,
|
||||
pub total_frames: u64,
|
||||
pub parent_uuid: Option<String>,
|
||||
pub cut_done: bool,
|
||||
pub cut_count: i32,
|
||||
pub cut_max_duration: f64,
|
||||
pub scene_done: bool,
|
||||
pub audio_tracks: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -332,9 +342,9 @@ pub struct MonitorJob {
|
||||
pub progress_current: i32,
|
||||
pub error_count: i32,
|
||||
pub last_error: Option<String>,
|
||||
pub started_at: Option<chrono::NaiveDateTime>,
|
||||
pub updated_at: Option<chrono::NaiveDateTime>,
|
||||
pub created_at: chrono::NaiveDateTime,
|
||||
pub started_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub created_at: chrono::DateTime<chrono::Utc>,
|
||||
pub processors: Vec<String>,
|
||||
pub completed_processors: Vec<String>,
|
||||
pub failed_processors: Vec<String>,
|
||||
@@ -393,17 +403,80 @@ impl ProcessorType {
|
||||
}
|
||||
}
|
||||
|
||||
/// 預估 CPU 使用率(0.0 ~ 1.0, 1.0 = 一個完整核心)
|
||||
pub fn estimated_cpu(&self) -> f64 {
|
||||
match self {
|
||||
ProcessorType::Asr => 1.0,
|
||||
ProcessorType::Cut => 0.5,
|
||||
ProcessorType::Yolo => 0.3,
|
||||
ProcessorType::Ocr => 0.8,
|
||||
ProcessorType::Face => 0.6,
|
||||
ProcessorType::Pose => 0.4,
|
||||
ProcessorType::Asrx => 0.8,
|
||||
ProcessorType::VisualChunk => 0.3,
|
||||
ProcessorType::Scene => 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
/// 是否使用 GPU
|
||||
pub fn uses_gpu(&self) -> bool {
|
||||
match self {
|
||||
ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// 預估記憶體使用量 (MB)
|
||||
pub fn estimated_memory_mb(&self) -> u64 {
|
||||
match self {
|
||||
ProcessorType::Asr => 2048,
|
||||
ProcessorType::Cut => 512,
|
||||
ProcessorType::Yolo => 1024,
|
||||
ProcessorType::Ocr => 1024,
|
||||
ProcessorType::Face => 1536,
|
||||
ProcessorType::Pose => 1024,
|
||||
ProcessorType::Asrx => 2048,
|
||||
ProcessorType::VisualChunk => 512,
|
||||
ProcessorType::Scene => 512,
|
||||
}
|
||||
}
|
||||
|
||||
/// 使用的模型名稱(如有)
|
||||
pub fn model_name(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
ProcessorType::Asr => Some("faster-whisper"),
|
||||
ProcessorType::Cut => None,
|
||||
ProcessorType::Yolo => Some("yolov8n"),
|
||||
ProcessorType::Ocr => Some("paddleocr"),
|
||||
ProcessorType::Face => Some("insightface/buffalo_l"),
|
||||
ProcessorType::Pose => Some("mediapipe/pose"),
|
||||
ProcessorType::Asrx => Some("speechbrain/ecapa-tdnn"),
|
||||
ProcessorType::VisualChunk => None,
|
||||
ProcessorType::Scene => Some("places365"),
|
||||
}
|
||||
}
|
||||
|
||||
/// 依賴的其他 Processor(需先完成才能執行)
|
||||
pub fn dependencies(&self) -> Vec<ProcessorType> {
|
||||
match self {
|
||||
ProcessorType::Asrx => vec![ProcessorType::Asr],
|
||||
ProcessorType::VisualChunk => vec![ProcessorType::Yolo],
|
||||
ProcessorType::Scene => vec![ProcessorType::Cut],
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn all() -> Vec<ProcessorType> {
|
||||
vec![
|
||||
ProcessorType::Asr,
|
||||
ProcessorType::Cut,
|
||||
ProcessorType::Scene,
|
||||
ProcessorType::Asr,
|
||||
ProcessorType::Asrx,
|
||||
ProcessorType::Yolo,
|
||||
ProcessorType::Ocr,
|
||||
ProcessorType::Face,
|
||||
ProcessorType::Pose,
|
||||
ProcessorType::Asrx,
|
||||
ProcessorType::VisualChunk,
|
||||
ProcessorType::Scene,
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -701,8 +774,8 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
// Chunks
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(uuid, chunk_id))").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_uuid ON chunks(uuid)")
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
|
||||
@@ -765,15 +838,13 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
// Chunks Rule 1
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query(
|
||||
"CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(asset_uuid)",
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), file_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(file_uuid)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// Jobs (Legacy/P0)
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
@@ -1162,8 +1233,8 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE video_id IN (SELECT id FROM {} WHERE uuid = $1)",
|
||||
processor_results, videos
|
||||
"DELETE FROM {} WHERE file_uuid = $1",
|
||||
processor_results
|
||||
))
|
||||
.bind(uuid)
|
||||
.execute(&self.pool)
|
||||
@@ -2026,21 +2097,19 @@ impl PostgresDb {
|
||||
r#"
|
||||
INSERT INTO {} (
|
||||
file_uuid, processor_type, coordinate_type, coordinate_index,
|
||||
timestamp, data, identity_id, confidence
|
||||
) VALUES ($1, $2, 'frame', $3, $4, $5, $6, $7)
|
||||
start_frame, end_frame, start_time, data
|
||||
) VALUES ($1, $2, 'frame', $3, $3, $3, $4, $5)
|
||||
"#,
|
||||
table
|
||||
);
|
||||
|
||||
for (coord_idx, ts, data, id, conf) in chunks {
|
||||
for (coord_idx, ts, data, _id, _conf) in chunks {
|
||||
sqlx::query(&query)
|
||||
.bind(file_uuid)
|
||||
.bind(processor_type)
|
||||
.bind(*coord_idx)
|
||||
.bind(*ts)
|
||||
.bind(data)
|
||||
.bind(*id)
|
||||
.bind(*conf)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
@@ -2060,7 +2129,7 @@ impl PostgresDb {
|
||||
let query = format!(
|
||||
r#"
|
||||
INSERT INTO {} (
|
||||
file_uuid, processor_type, coordinate_type, coordinate_index,
|
||||
file_uuid, processor_type, coordinate_type, coordinate_index,
|
||||
start_frame, end_frame, start_time, end_time, data
|
||||
) VALUES ($1, 'asr', 'time', $2, $3, $4, $5, $6, $7)
|
||||
"#,
|
||||
@@ -2402,10 +2471,10 @@ impl PostgresDb {
|
||||
offset: i64,
|
||||
) -> Result<Vec<IdentityChunkRecord>> {
|
||||
let query = r#"
|
||||
SELECT c.id, c.uuid as file_uuid, c.chunk_id, c.chunk_type,
|
||||
SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
|
||||
c.start_time, c.end_time, c.text_content, c.content
|
||||
FROM chunks c
|
||||
WHERE c.uuid IN (
|
||||
WHERE c.file_uuid IN (
|
||||
SELECT DISTINCT fi.file_uuid
|
||||
FROM file_identities fi
|
||||
JOIN identities i ON fi.identity_id = i.id
|
||||
@@ -2504,9 +2573,9 @@ impl PostgresDb {
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
|
||||
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
@@ -2579,9 +2648,9 @@ impl PostgresDb {
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
|
||||
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
@@ -2626,7 +2695,7 @@ impl PostgresDb {
|
||||
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
|
||||
let table = schema::table_name("chunks");
|
||||
let rows = sqlx::query(&format!(
|
||||
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE uuid = $1 ORDER BY chunk_index",
|
||||
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
|
||||
table
|
||||
))
|
||||
.bind(uuid)
|
||||
@@ -3264,36 +3333,40 @@ impl PostgresDb {
|
||||
let sql = match uuid {
|
||||
Some(_) => &format!(
|
||||
r#"
|
||||
SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
|
||||
c.visual_stats,
|
||||
pc.metadata->'structured_summary' as scene_summary,
|
||||
c.parent_chunk_id::integer
|
||||
FROM {} c
|
||||
LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1)) AND c.uuid = $2
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1) OR c.text_content ILIKE $3) AND c.file_uuid = $2
|
||||
ORDER BY bm25_score DESC
|
||||
LIMIT $3
|
||||
LIMIT $4
|
||||
"#,
|
||||
table
|
||||
),
|
||||
None => &format!(
|
||||
r#"
|
||||
SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
|
||||
c.visual_stats,
|
||||
pc.metadata->'structured_summary' as scene_summary,
|
||||
c.parent_chunk_id::integer
|
||||
FROM {} c
|
||||
LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1))
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1) OR c.text_content ILIKE $2)
|
||||
ORDER BY bm25_score DESC
|
||||
LIMIT $2
|
||||
LIMIT $3
|
||||
"#,
|
||||
table
|
||||
),
|
||||
};
|
||||
|
||||
// 使用 pg_trgm 支援中英文模糊搜尋
|
||||
// ILIKE 支援中文 LIKE 匹配,pg_trgm 的 similarity() 可做更精確的排名
|
||||
let ilike_pattern = format!("%{}%", query);
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
String,
|
||||
@@ -3310,10 +3383,11 @@ impl PostgresDb {
|
||||
Option<serde_json::Value>,
|
||||
Option<i32>,
|
||||
)> = match uuid {
|
||||
Some(_) => {
|
||||
Some(u) => {
|
||||
sqlx::query_as(sql)
|
||||
.bind(&tsquery)
|
||||
.bind(uuid)
|
||||
.bind(u)
|
||||
.bind(&ilike_pattern)
|
||||
.bind(limit as i64)
|
||||
.fetch_all(&self.pool)
|
||||
.await?
|
||||
@@ -3321,6 +3395,7 @@ impl PostgresDb {
|
||||
None => {
|
||||
sqlx::query_as(sql)
|
||||
.bind(&tsquery)
|
||||
.bind(&ilike_pattern)
|
||||
.bind(limit as i64)
|
||||
.fetch_all(&self.pool)
|
||||
.await?
|
||||
@@ -3809,6 +3884,54 @@ impl PostgresDb {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn get_all_running_jobs(&self, limit: i32) -> Result<Vec<MonitorJob>> {
|
||||
let monitor_jobs = schema::table_name("monitor_jobs");
|
||||
let rows = sqlx::query(&format!(
|
||||
r#"
|
||||
SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
|
||||
error_count, last_error, started_at, updated_at, created_at,
|
||||
processors, completed_processors, failed_processors, video_id
|
||||
FROM {}
|
||||
WHERE status = 'running'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT $1
|
||||
"#,
|
||||
monitor_jobs
|
||||
))
|
||||
.bind(limit)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
let jobs: Vec<MonitorJob> = rows
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let status_str: String = r.get(3);
|
||||
let status =
|
||||
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Running);
|
||||
MonitorJob {
|
||||
id: r.get(0),
|
||||
uuid: r.get(1),
|
||||
video_path: r.get(2),
|
||||
status,
|
||||
current_processor: r.get(4),
|
||||
progress_total: r.get(5),
|
||||
progress_current: r.get(6),
|
||||
error_count: r.get(7),
|
||||
last_error: r.get(8),
|
||||
started_at: r.get(9),
|
||||
updated_at: r.get(10),
|
||||
created_at: r.get(11),
|
||||
processors: r.get::<Option<Vec<String>>, _>(12).unwrap_or_default(),
|
||||
completed_processors: r.get::<Option<Vec<String>>, _>(13).unwrap_or_default(),
|
||||
failed_processors: r.get::<Option<Vec<String>>, _>(14).unwrap_or_default(),
|
||||
video_id: r.get(15),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(jobs)
|
||||
}
|
||||
|
||||
pub async fn get_pending_jobs(&self, limit: i32) -> Result<Vec<MonitorJob>> {
|
||||
let monitor_jobs = schema::table_name("monitor_jobs");
|
||||
let rows = sqlx::query(&format!(
|
||||
@@ -3817,7 +3940,7 @@ impl PostgresDb {
|
||||
error_count, last_error, started_at, updated_at, created_at,
|
||||
processors, completed_processors, failed_processors, video_id
|
||||
FROM {}
|
||||
WHERE status IN ('pending', 'running')
|
||||
WHERE status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT $1
|
||||
"#,
|
||||
@@ -4322,7 +4445,7 @@ impl PostgresDb {
|
||||
name: &str,
|
||||
) -> Result<crate::core::person_identity::Identity> {
|
||||
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, embedding::text, metadata, created_at"#,
|
||||
r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, identity_embedding::text as embedding, metadata, created_at"#,
|
||||
)
|
||||
.bind(name)
|
||||
.fetch_one(&self.pool)
|
||||
@@ -4371,7 +4494,7 @@ impl PostgresDb {
|
||||
binding_value: &str,
|
||||
) -> Result<Option<crate::core::person_identity::Identity>> {
|
||||
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT i.id, i.name, i.embedding::text, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2",
|
||||
"SELECT i.id, i.name, i.identity_embedding::text as embedding, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2",
|
||||
)
|
||||
.bind(binding_type)
|
||||
.bind(binding_value)
|
||||
@@ -4389,12 +4512,12 @@ impl PostgresDb {
|
||||
) -> Result<Vec<crate::core::person_identity::Identity>> {
|
||||
let query = if !search.is_empty() {
|
||||
sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3",
|
||||
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3",
|
||||
)
|
||||
.bind(format!("%{}%", search))
|
||||
} else {
|
||||
sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT id, name, embedding::text, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2",
|
||||
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2",
|
||||
)
|
||||
};
|
||||
let identities = query.bind(limit).bind(offset).fetch_all(&self.pool).await?;
|
||||
@@ -4407,7 +4530,7 @@ impl PostgresDb {
|
||||
id: i64,
|
||||
) -> Result<Option<crate::core::person_identity::Identity>> {
|
||||
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE id = $1",
|
||||
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities WHERE id = $1",
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(&self.pool)
|
||||
@@ -4716,7 +4839,7 @@ impl PostgresDb {
|
||||
"speaker_ids"
|
||||
};
|
||||
let query = format!(
|
||||
"SELECT id, start_frame, end_frame, content FROM chunks WHERE uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame",
|
||||
"SELECT id, start_frame, end_frame, content FROM chunks WHERE file_uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame",
|
||||
column
|
||||
);
|
||||
|
||||
@@ -4836,7 +4959,7 @@ mod tests {
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
fps: 30.0,
|
||||
probe_json: Some("{}".to_string()),
|
||||
probe_json: Some(serde_json::from_str("{}").unwrap()),
|
||||
storage: StorageStatus::default(),
|
||||
status: VideoStatus::Pending,
|
||||
processing_status: None,
|
||||
@@ -4847,6 +4970,11 @@ mod tests {
|
||||
registration_time: None,
|
||||
total_frames: 0,
|
||||
parent_uuid: None,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&record).unwrap();
|
||||
@@ -4935,13 +5063,18 @@ mod tests {
|
||||
error_count: 0,
|
||||
last_error: None,
|
||||
started_at: Some(
|
||||
NaiveDateTime::parse_from_str("2024-01-01 10:00:00", "%Y-%m-%d %H:%M:%S").unwrap(),
|
||||
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:00:00Z")
|
||||
.unwrap()
|
||||
.with_timezone(&chrono::Utc),
|
||||
),
|
||||
updated_at: Some(
|
||||
NaiveDateTime::parse_from_str("2024-01-01 10:05:00", "%Y-%m-%d %H:%M:%S").unwrap(),
|
||||
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:05:00Z")
|
||||
.unwrap()
|
||||
.with_timezone(&chrono::Utc),
|
||||
),
|
||||
created_at: NaiveDateTime::parse_from_str("2024-01-01 09:55:00", "%Y-%m-%d %H:%M:%S")
|
||||
.unwrap(),
|
||||
created_at: chrono::DateTime::parse_from_rfc3339("2024-01-01T09:55:00Z")
|
||||
.unwrap()
|
||||
.into(),
|
||||
processors: vec!["asr".to_string(), "cut".to_string()],
|
||||
completed_processors: vec!["asr".to_string()],
|
||||
failed_processors: vec![],
|
||||
@@ -4968,7 +5101,7 @@ mod tests {
|
||||
"last_error": null,
|
||||
"started_at": null,
|
||||
"updated_at": null,
|
||||
"created_at": "2024-01-01T00:00:00",
|
||||
"created_at": "2024-01-01T00:00:00Z",
|
||||
"processors": ["asr", "cut"],
|
||||
"completed_processors": [],
|
||||
"failed_processors": [],
|
||||
|
||||
@@ -88,6 +88,62 @@ impl QdrantDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將向量寫入指定 collection(支援多 collection)
|
||||
pub async fn upsert_vector_to_collection(
|
||||
&self,
|
||||
collection: &str,
|
||||
point_id: u64,
|
||||
vector: &[f32],
|
||||
payload: Option<serde_json::Value>,
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, collection
|
||||
);
|
||||
|
||||
tracing::debug!("Qdrant upsert URL: {}, collection: {}", url, collection);
|
||||
|
||||
let points = if let Some(p) = payload {
|
||||
serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": vector,
|
||||
"payload": p,
|
||||
}]
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": vector,
|
||||
}]
|
||||
})
|
||||
};
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.json(&points)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send upsert request to Qdrant")?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let response_text = response.text().await.unwrap_or_default();
|
||||
tracing::error!("Qdrant upsert failed: {} - {}", status, response_text);
|
||||
anyhow::bail!(
|
||||
"Qdrant upsert failed with status {}: {}",
|
||||
status,
|
||||
response_text
|
||||
);
|
||||
}
|
||||
|
||||
tracing::debug!("Successfully upserted vector for point: {}", point_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vector(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
|
||||
@@ -371,6 +371,11 @@ impl RedisClient {
|
||||
processor: &str,
|
||||
status: &str,
|
||||
error: Option<&str>,
|
||||
frames_processed: i32,
|
||||
chunks_produced: i32,
|
||||
total_frames: i32,
|
||||
retry_count: i32,
|
||||
pid: i32,
|
||||
) -> Result<()> {
|
||||
let mut conn = self.get_conn_internal().await?;
|
||||
let prefix = REDIS_KEY_PREFIX.as_str();
|
||||
@@ -378,13 +383,24 @@ impl RedisClient {
|
||||
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
|
||||
let mut fields: Vec<(&str, &str)> = vec![("status", status), ("updated_at", &now)];
|
||||
let mut fields: Vec<(&str, String)> = vec![
|
||||
("status", status.to_string()),
|
||||
("updated_at", now),
|
||||
("current", frames_processed.to_string()),
|
||||
("total", total_frames.to_string()),
|
||||
("frames_processed", frames_processed.to_string()),
|
||||
("chunks_produced", chunks_produced.to_string()),
|
||||
("retry_count", retry_count.to_string()),
|
||||
("pid", pid.to_string()),
|
||||
];
|
||||
|
||||
if let Some(err) = error {
|
||||
fields.push(("error", err));
|
||||
fields.push(("error", err.to_string()));
|
||||
}
|
||||
|
||||
let _: Option<String> = conn.hset_multiple(&key, &fields).await?;
|
||||
let field_refs: Vec<(&str, &str)> = fields.iter().map(|(k, v)| (*k, v.as_str())).collect();
|
||||
|
||||
let _: Option<String> = conn.hset_multiple(&key, &field_refs).await?;
|
||||
let _: bool = conn.expire(&key, 86400).await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -182,6 +182,11 @@ impl IngestionService {
|
||||
registration_time: None,
|
||||
total_frames: total_frames.unwrap_or(0),
|
||||
parent_uuid: None,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
};
|
||||
|
||||
self.db
|
||||
|
||||
@@ -15,4 +15,3 @@ pub mod text;
|
||||
pub mod thumbnail;
|
||||
pub mod time;
|
||||
pub mod tmdb;
|
||||
pub mod worker;
|
||||
|
||||
@@ -20,6 +20,8 @@ pub struct StreamInfo {
|
||||
pub duration: Option<String>,
|
||||
pub sample_rate: Option<String>,
|
||||
pub channels: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub tags: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -69,6 +71,7 @@ pub fn probe_video(video_path: &str) -> Result<ProbeResult> {
|
||||
duration: s["duration"].as_str().map(String::from),
|
||||
sample_rate: s["sample_rate"].as_str().map(String::from),
|
||||
channels: s["channels"].as_u64().map(|v| v as u32),
|
||||
tags: s.get("tags").cloned(),
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
use crate::core::config::processor;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrResult {
|
||||
pub language: Option<String>,
|
||||
pub language_probability: Option<f64>,
|
||||
pub segments: Vec<AsrSegment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
pub async fn process_asr(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<AsrResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("asr_processor.py");
|
||||
|
||||
tracing::info!("[ASR] Starting ASR processing: {}", video_path);
|
||||
|
||||
executor
|
||||
.run(
|
||||
"asr_processor.py",
|
||||
&[video_path, output_path],
|
||||
uuid,
|
||||
"ASR",
|
||||
Some(Duration::from_secs(*processor::ASR_TIMEOUT_SECS)),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
|
||||
|
||||
let result: AsrResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
|
||||
|
||||
tracing::info!(
|
||||
"[ASR] Result: {} segments, language: {:?}",
|
||||
result.segments.len(),
|
||||
result.language
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_serialization() {
|
||||
let result = AsrResult {
|
||||
language: Some("en".to_string()),
|
||||
language_probability: Some(0.95),
|
||||
segments: vec![
|
||||
AsrSegment {
|
||||
start: 0.0,
|
||||
end: 2.5,
|
||||
text: "Hello world".to_string(),
|
||||
},
|
||||
AsrSegment {
|
||||
start: 2.5,
|
||||
end: 5.0,
|
||||
text: "Test speech".to_string(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("Hello world"));
|
||||
assert!(json.contains("en"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_deserialization() {
|
||||
let json = r#"{
|
||||
"language": "zh",
|
||||
"language_probability": 0.98,
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 1.5, "text": "測試"}
|
||||
]
|
||||
}"#;
|
||||
|
||||
let result: AsrResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.language, Some("zh".to_string()));
|
||||
assert_eq!(result.language_probability, Some(0.98));
|
||||
assert_eq!(result.segments.len(), 1);
|
||||
assert_eq!(result.segments[0].text, "測試");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_segment_default() {
|
||||
let segment = AsrSegment {
|
||||
start: 0.0,
|
||||
end: 1.0,
|
||||
text: String::new(),
|
||||
};
|
||||
assert_eq!(segment.start, 0.0);
|
||||
assert_eq!(segment.end, 1.0);
|
||||
assert!(segment.text.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_empty_segments() {
|
||||
let result = AsrResult {
|
||||
language: None,
|
||||
language_probability: None,
|
||||
segments: vec![],
|
||||
};
|
||||
assert!(result.language.is_none());
|
||||
assert!(result.segments.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -12,12 +12,16 @@ const ASRX_TIMEOUT: Duration = Duration::from_secs(7200);
|
||||
pub struct AsrxResult {
|
||||
pub language: Option<String>,
|
||||
pub segments: Vec<AsrxSegment>,
|
||||
#[serde(skip_serializing)]
|
||||
pub embeddings: Option<Vec<Vec<f32>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrxSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub start_frame: u64,
|
||||
pub end_frame: u64,
|
||||
pub text: String,
|
||||
pub speaker_id: Option<String>,
|
||||
}
|
||||
@@ -43,10 +47,19 @@ pub async fn process_asrx(
|
||||
return Ok(AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[ASRX] Running: {} {} {} {}",
|
||||
executor.python_path().display(),
|
||||
script_path.display(),
|
||||
video_path,
|
||||
output_path,
|
||||
);
|
||||
|
||||
let mut cmd = Command::new(executor.python_path());
|
||||
cmd.arg(&script_path).arg(video_path).arg(output_path);
|
||||
|
||||
@@ -68,16 +81,21 @@ pub async fn process_asrx(
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
for line in stderr.lines() {
|
||||
if line.starts_with("ASRX_START") {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with("ASRX_START") {
|
||||
tracing::info!("[ASRX] Loading model...");
|
||||
} else if line.starts_with("ASRX_PROGRESS:") {
|
||||
let count = line.trim_start_matches("ASRX_PROGRESS:");
|
||||
} else if trimmed.starts_with("ASRX_PROGRESS:") {
|
||||
let count = trimmed.trim_start_matches("ASRX_PROGRESS:");
|
||||
tracing::info!("[ASRX] Processed {} segments...", count);
|
||||
} else if line.starts_with("ASRX_COMPLETE:") {
|
||||
let count = line.trim_start_matches("ASRX_COMPLETE:");
|
||||
} else if trimmed.starts_with("ASRX_COMPLETE:") {
|
||||
let count = trimmed.trim_start_matches("ASRX_COMPLETE:");
|
||||
tracing::info!("[ASRX] Completed! Total: {} segments", count);
|
||||
} else if !trimmed.is_empty() && !trimmed.starts_with("[SelfASRX") {
|
||||
tracing::debug!("[ASRX/stderr] {}", trimmed);
|
||||
}
|
||||
}
|
||||
// Log full stderr for debugging
|
||||
tracing::info!("[ASRX] stderr output:\n{}", stderr);
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!("ASRX failed: {}", stderr);
|
||||
@@ -102,11 +120,14 @@ mod tests {
|
||||
let result = AsrxResult {
|
||||
language: Some("en".to_string()),
|
||||
segments: vec![AsrxSegment {
|
||||
start: 0.0,
|
||||
end: 2.5,
|
||||
start_time: 0.0,
|
||||
end_time: 2.5,
|
||||
start_frame: 0,
|
||||
end_frame: 75,
|
||||
text: "Hello".to_string(),
|
||||
speaker_id: Some("SPEAKER_00".to_string()),
|
||||
}],
|
||||
embeddings: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
@@ -119,7 +140,7 @@ mod tests {
|
||||
let json = r#"{
|
||||
"language": "zh",
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 1.5, "text": "測試", "speaker_id": "SPEAKER_01"}
|
||||
{"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 45, "text": "測試", "speaker_id": "SPEAKER_01"}
|
||||
]
|
||||
}"#;
|
||||
|
||||
@@ -137,6 +158,7 @@ mod tests {
|
||||
let result = AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
};
|
||||
assert!(result.segments.is_empty());
|
||||
assert!(result.language.is_none());
|
||||
@@ -145,11 +167,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_asrx_segment_times() {
|
||||
let segment = AsrxSegment {
|
||||
start: 0.0,
|
||||
end: 5.0,
|
||||
start_time: 0.0,
|
||||
end_time: 5.0,
|
||||
start_frame: 0,
|
||||
end_frame: 150,
|
||||
text: "Test".to_string(),
|
||||
speaker_id: None,
|
||||
};
|
||||
assert!(segment.end > segment.start);
|
||||
assert!(segment.end_time > segment.start_time);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,6 +147,19 @@ impl PythonExecutor {
|
||||
anyhow::bail!("Script not found: {:?}", script_path);
|
||||
}
|
||||
|
||||
// 標記輸出檔為處理中(add .tmp suffix)
|
||||
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
|
||||
let tmp_path = output_path.as_ref().map(|p| {
|
||||
let mut tmp = p.to_path_buf();
|
||||
tmp.set_extension("json.tmp");
|
||||
tmp
|
||||
});
|
||||
if let (Some(src), Some(dst)) = (&output_path, &tmp_path) {
|
||||
if src.exists() {
|
||||
let _ = std::fs::rename(src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(&self.venv_python);
|
||||
cmd.arg(&script_path);
|
||||
|
||||
@@ -220,12 +233,28 @@ impl PythonExecutor {
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// 錯誤時 rename .json.tmp → .json.err
|
||||
let mark_failed = || {
|
||||
if let Some(tmp) = &tmp_path {
|
||||
if tmp.exists() {
|
||||
if let Some(out) = &output_path {
|
||||
let mut err_path = out.to_path_buf();
|
||||
err_path.set_extension("json.err");
|
||||
let _ = std::fs::rename(tmp, &err_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(duration) = timeout_duration {
|
||||
match timeout(duration, run_future).await {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Ok(Err(e)) => {
|
||||
mark_failed();
|
||||
return Err(e);
|
||||
}
|
||||
Err(_) => {
|
||||
// Try to kill the entire process group
|
||||
mark_failed();
|
||||
if let Some(pid) = child_pid {
|
||||
let pgid = pid as i32;
|
||||
unsafe {
|
||||
@@ -237,7 +266,19 @@ impl PythonExecutor {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
run_future.await?;
|
||||
if let Err(e) = run_future.await {
|
||||
mark_failed();
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
// 成功:.json.tmp → .json(已完成)
|
||||
if let Some(tmp) = &tmp_path {
|
||||
if tmp.exists() {
|
||||
if let Some(out) = &output_path {
|
||||
let _ = std::fs::rename(tmp, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -28,6 +28,7 @@ pub struct Face {
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
pub confidence: f32,
|
||||
#[serde(skip_serializing)]
|
||||
pub embedding: Option<Vec<f32>>,
|
||||
pub landmarks: Option<Vec<Vec<f32>>>,
|
||||
pub attributes: Option<FaceAttributes>,
|
||||
@@ -111,7 +112,6 @@ mod tests {
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("face_1"));
|
||||
assert!(json.contains("\"width\":50"));
|
||||
assert!(json.contains("embedding"));
|
||||
assert!(json.contains("landmarks"));
|
||||
assert!(json.contains("attributes"));
|
||||
}
|
||||
|
||||
@@ -27,7 +27,8 @@ pub use face_recognition::{
|
||||
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
|
||||
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
|
||||
pub use scene_classification::{
|
||||
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
|
||||
load_scene_from_file, process_scene_classification, SceneClassificationResult, ScenePrediction,
|
||||
SceneSegment,
|
||||
};
|
||||
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
|
||||
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
|
||||
|
||||
@@ -7,7 +7,7 @@ use super::executor::PythonExecutor;
|
||||
const SCENE_TIMEOUT: Duration = Duration::from_secs(7200);
|
||||
|
||||
/// 場景識別結果
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
||||
pub struct SceneClassificationResult {
|
||||
pub frame_count: u64,
|
||||
pub fps: f64,
|
||||
@@ -32,6 +32,19 @@ pub struct ScenePrediction {
|
||||
pub confidence: f32,
|
||||
}
|
||||
|
||||
/// 從已存在的 JSON 檔案載入場景結果(不重新執行 Python)
|
||||
pub fn load_scene_from_file(path: &str) -> Result<SceneClassificationResult> {
|
||||
let json_str = std::fs::read_to_string(path).context("Failed to read scene JSON file")?;
|
||||
let result: SceneClassificationResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse scene JSON")?;
|
||||
tracing::info!(
|
||||
"[SCENE] Loaded {} scenes from {}",
|
||||
result.scenes.len(),
|
||||
path
|
||||
);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// 執行場景識別
|
||||
pub async fn process_scene_classification(
|
||||
video_path: &str,
|
||||
|
||||
@@ -12,7 +12,7 @@ use super::yolo::{YoloFrame, YoloResult};
|
||||
const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
|
||||
/// 視覺分片處理結果
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct VisualChunkResult {
|
||||
/// 生成的視覺分片數量
|
||||
pub chunk_count: u32,
|
||||
@@ -284,7 +284,7 @@ pub async fn process_visual_chunk_advanced(
|
||||
});
|
||||
}
|
||||
|
||||
executor
|
||||
let result = match executor
|
||||
.run(
|
||||
"visual_chunk_processor.py",
|
||||
&[video_path, output_path],
|
||||
@@ -293,13 +293,34 @@ pub async fn process_visual_chunk_advanced(
|
||||
Some(VISUAL_CHUNK_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(output_path).context("Failed to read visual chunk output")?;
|
||||
|
||||
let result: VisualChunkResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse visual chunk output")?;
|
||||
{
|
||||
Ok(_) => match std::fs::read_to_string(output_path) {
|
||||
Ok(json_str) => match serde_json::from_str::<VisualChunkResult>(&json_str) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to parse output ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to read output ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to run script ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Advanced generation result: {} chunks, {} frames",
|
||||
|
||||
@@ -54,22 +54,81 @@ pub fn compute_uuid_from_relative_path(relative_path: &str) -> String {
|
||||
compute_uuid(&username, &filepath)
|
||||
}
|
||||
|
||||
/// Get MAC address of primary network interface
|
||||
/// 取得本機內建網路介面的 MAC 位址(不可拆、非外接)。
|
||||
/// 優先順序:en0 (Wi-Fi) > en1 > 其他非 USB/Thunderbolt 介面。
|
||||
/// 若都找不到則回傳 fallback。
|
||||
/// Returns MAC address in format: a1:b2:c3:d4:e5:f6
|
||||
pub fn get_mac_address() -> String {
|
||||
use mac_address::get_mac_address;
|
||||
// 使用 ifconfig 列出所有介面
|
||||
let output = std::process::Command::new("ifconfig")
|
||||
.args(["-a"])
|
||||
.output()
|
||||
.ok()
|
||||
.and_then(|o| {
|
||||
if o.status.success() {
|
||||
Some(String::from_utf8_lossy(&o.stdout).to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
match get_mac_address() {
|
||||
Ok(Some(mac)) => {
|
||||
let bytes = mac.bytes();
|
||||
format!(
|
||||
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
||||
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
|
||||
)
|
||||
// 解析 ifconfig,找到介面名稱與 MAC
|
||||
let mut current_iface = String::new();
|
||||
let mut candidates: Vec<(u32, String)> = Vec::new();
|
||||
|
||||
for line in output.lines() {
|
||||
let trimmed = line.trim();
|
||||
// 介面名稱行,如 "en0: flags=..."
|
||||
if !trimmed.starts_with('\t') && trimmed.contains(": flags=") {
|
||||
current_iface = trimmed.split(':').next().unwrap_or("").to_string();
|
||||
}
|
||||
|
||||
// macOS: "ether a1:b2:c3:d4:e5:f6"
|
||||
if let Some(mac_str) = trimmed.strip_prefix("ether ") {
|
||||
let mac = mac_str.trim();
|
||||
if mac.len() == 17 && mac.chars().filter(|&c| c == ':').count() == 5 {
|
||||
if mac == "00:00:00:00:00:00" || mac == "ff:ff:ff:ff:ff:ff" {
|
||||
continue;
|
||||
}
|
||||
// 優先級:en0=0, en1=1, en2=2, 其他=100
|
||||
let priority = match current_iface.as_str() {
|
||||
"en0" => 0,
|
||||
"en1" => 1,
|
||||
"en2" => 2,
|
||||
_ if current_iface.starts_with("en") => 3,
|
||||
_ => 100,
|
||||
};
|
||||
candidates.push((priority, mac.to_string()));
|
||||
}
|
||||
}
|
||||
// macOS: "lladdr a1:b2:c3:d4:e5:f6"
|
||||
if let Some(mac_str) = trimmed.strip_prefix("lladdr ") {
|
||||
let mac = mac_str.trim();
|
||||
if mac.len() == 17 && mac.chars().filter(|&c| c == ':').count() == 5 {
|
||||
if mac == "00:00:00:00:00:00" || mac == "ff:ff:ff:ff:ff:ff" {
|
||||
continue;
|
||||
}
|
||||
let priority = match current_iface.as_str() {
|
||||
"en0" => 0,
|
||||
"en1" => 1,
|
||||
"en2" => 2,
|
||||
_ if current_iface.starts_with("en") => 3,
|
||||
_ => 100,
|
||||
};
|
||||
candidates.push((priority, mac.to_string()));
|
||||
}
|
||||
}
|
||||
Ok(None) => "00:00:00:00:00:00".to_string(),
|
||||
Err(_) => "00:00:00:00:00:00".to_string(),
|
||||
}
|
||||
|
||||
// 按優先級排序(en0 > en1 > en2 > 其他)
|
||||
candidates.sort_by_key(|k| k.0);
|
||||
if let Some(mac) = candidates.first().map(|c| c.1.clone()) {
|
||||
return mac;
|
||||
}
|
||||
|
||||
// fallback
|
||||
"00:00:00:00:00:00".to_string()
|
||||
}
|
||||
|
||||
/// Compute Birth UUID (Stable Identity with Location)
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
use crate::core::chunk;
|
||||
use crate::core::db::PostgresDb;
|
||||
use sqlx::PgPool;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use tracing;
|
||||
|
||||
pub struct JobWorker {
|
||||
pool: PgPool,
|
||||
poll_interval: Duration,
|
||||
}
|
||||
|
||||
impl JobWorker {
|
||||
pub fn new(pool: PgPool, poll_interval_secs: u64) -> Self {
|
||||
Self {
|
||||
pool,
|
||||
poll_interval: Duration::from_secs(poll_interval_secs),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(&self) {
|
||||
tracing::info!(
|
||||
"🤖 Job Worker started (Polling every {}s)",
|
||||
self.poll_interval.as_secs()
|
||||
);
|
||||
|
||||
loop {
|
||||
match self.process_next_job().await {
|
||||
Ok(has_work) => {
|
||||
if !has_work {
|
||||
// No work found, wait before polling again
|
||||
sleep(self.poll_interval).await;
|
||||
}
|
||||
// If we processed a job, loop immediately to check for more
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("❌ Job Worker error: {}", e);
|
||||
sleep(Duration::from_secs(5)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_next_job(&self) -> anyhow::Result<bool> {
|
||||
// 1. Fetch a QUEUED job from monitor_jobs
|
||||
// Using sqlx::query_as to map to tuple.
|
||||
// Note: progress_total is int4 (i32).
|
||||
let job_row: Option<(i32, String, i32)> = sqlx::query_as(
|
||||
r#"
|
||||
UPDATE dev.monitor_jobs
|
||||
SET status = 'RUNNING', updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM dev.monitor_jobs
|
||||
WHERE status = 'QUEUED'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING id, uuid, COALESCE(progress_total, 0)
|
||||
"#,
|
||||
)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
if let Some((job_id, asset_uuid, total_frames)) = job_row {
|
||||
tracing::info!(
|
||||
"🚀 Processing Job {} for Asset {} (Frames: {})",
|
||||
job_id,
|
||||
asset_uuid,
|
||||
total_frames
|
||||
);
|
||||
|
||||
// 2. Execute Logic (Default to rule1 for now as monitor_jobs doesn't store rule type explicitly)
|
||||
let fps = self.get_asset_fps(&asset_uuid).await?;
|
||||
let db = PostgresDb::from_pool(self.pool.clone());
|
||||
|
||||
let result = chunk::rule1_ingest::execute_rule1(&db, &asset_uuid, fps).await;
|
||||
|
||||
// 3. Update Job Status
|
||||
match result {
|
||||
Ok(chunk_count) => {
|
||||
tracing::info!(
|
||||
"✅ Job {} completed. Processed {} items.",
|
||||
job_id,
|
||||
chunk_count
|
||||
);
|
||||
|
||||
// Update monitor_jobs
|
||||
// Using runtime query to avoid compile-time macro checks
|
||||
sqlx::query(
|
||||
"UPDATE dev.monitor_jobs SET status = 'COMPLETED', progress_current = progress_total, updated_at = NOW() WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
// Update video processing_status
|
||||
sqlx::query(
|
||||
"UPDATE dev.videos SET processing_status = $1::jsonb WHERE file_uuid = $2",
|
||||
)
|
||||
.bind(serde_json::json!({"status": "COMPLETED"}))
|
||||
.bind(asset_uuid)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("❌ Job {} failed: {}", job_id, e);
|
||||
let err_msg = e.to_string();
|
||||
let safe_msg = if err_msg.len() > 500 {
|
||||
&err_msg[..500]
|
||||
} else {
|
||||
&err_msg
|
||||
};
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE dev.monitor_jobs SET status = 'FAILED', last_error = $2, updated_at = NOW() WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(safe_msg)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
return Ok(true); // Processed a job
|
||||
}
|
||||
|
||||
Ok(false) // No job found
|
||||
}
|
||||
|
||||
async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
|
||||
// dev.videos now uses file_uuid and has a direct fps column
|
||||
let fps: Option<f64> =
|
||||
sqlx::query_scalar("SELECT fps FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(uuid)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
// Fallback to 29.97 if not found
|
||||
Ok(fps.unwrap_or(29.97))
|
||||
}
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
pub mod job_runner;
|
||||
pub use job_runner::JobWorker;
|
||||
Reference in New Issue
Block a user