chore: backup before migration to new repo

This commit is contained in:
Warren
2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions

View File

@@ -10,6 +10,8 @@ pub const KEY_PREFIX_VIDEO: &str = "video:";
pub const KEY_PREFIX_SEARCH: &str = "search:";
pub const KEY_PREFIX_SEARCH_HYBRID: &str = "search:hybrid:";
pub const KEY_PREFIX_SEARCH_N8N: &str = "search:n8n:";
pub const KEY_PREFIX_SEARCH_BM25: &str = "search:bm25:";
pub const KEY_PREFIX_SEARCH_N8N_BM25: &str = "search:n8n:bm25:";
pub const KEY_HEALTH: &str = "health:basic";
pub fn videos_list(page: usize, limit: usize) -> String {
@@ -32,6 +34,14 @@ pub fn n8n_search(query_hash: &str) -> String {
format!("{}{}", KEY_PREFIX_SEARCH_N8N, query_hash)
}
pub fn bm25_search(query_hash: &str) -> String {
format!("{}{}", KEY_PREFIX_SEARCH_BM25, query_hash)
}
pub fn n8n_bm25_search(query_hash: &str) -> String {
format!("{}{}", KEY_PREFIX_SEARCH_N8N_BM25, query_hash)
}
pub fn health() -> String {
KEY_HEALTH.to_string()
}
@@ -48,6 +58,17 @@ pub fn search_prefix() -> String {
format!("^{}", KEY_PREFIX_SEARCH)
}
pub const KEY_PREFIX_VISUAL_SEARCH: &str = "search:visual:";
pub const CATEGORY_VISUAL_SEARCH: &str = "visual_search";
pub fn visual_search(uuid: &str, criteria_hash: &str) -> String {
format!("{}{}:{}", KEY_PREFIX_VISUAL_SEARCH, uuid, criteria_hash)
}
pub fn visual_search_prefix() -> String {
format!("^{}", KEY_PREFIX_VISUAL_SEARCH)
}
#[cfg(test)]
mod tests {
use super::*;
@@ -78,8 +99,28 @@ mod tests {
assert_eq!(n8n_search("hash123"), "search:n8n:hash123");
}
#[test]
fn test_bm25_search() {
assert_eq!(bm25_search("hash123"), "search:bm25:hash123");
}
#[test]
fn test_n8n_bm25_search() {
assert_eq!(n8n_bm25_search("hash123"), "search:n8n:bm25:hash123");
}
#[test]
fn test_health() {
assert_eq!(health(), "health:basic");
}
#[test]
fn test_visual_search() {
assert_eq!(visual_search("abc123", "hash"), "search:visual:abc123:hash");
}
#[test]
fn test_visual_search_prefix() {
assert_eq!(visual_search_prefix(), "^search:visual:");
}
}

View File

@@ -136,6 +136,10 @@ impl MongoCache {
self.settings.ttl_video_meta
}
pub fn ttl_visual_search(&self) -> u64 {
self.settings.ttl_search // Reuse search TTL
}
pub async fn get<T: DeserializeOwned>(&self, key: &str) -> Result<Option<T>> {
if !self.is_enabled() {
return Ok(None);

View File

@@ -1,5 +1,9 @@
pub mod rule1_ingest;
pub mod rule3_ingest;
pub mod splitter;
pub mod types;
pub use rule1_ingest::ingest_rule1;
pub use rule3_ingest::ingest_rule3;
pub use splitter::{AsrSegment, ChunkSplitter};
pub use types::{Chunk, ChunkType};

View File

@@ -1,6 +1,7 @@
use crate::core::time::FrameTime;
use serde::{Deserialize, Serialize};
// ==================== ChunkType ====================
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ChunkType {
@@ -8,7 +9,8 @@ pub enum ChunkType {
Sentence,
Cut,
Trace,
Story, // Parent chunk from story analysis
Story,
Visual, // 視覺分片 (Phase 2.1)
}
impl ChunkType {
@@ -19,10 +21,12 @@ impl ChunkType {
ChunkType::Cut => "cut",
ChunkType::Trace => "trace",
ChunkType::Story => "story",
ChunkType::Visual => "visual",
}
}
}
// ==================== ChunkRule ====================
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ChunkRule {
@@ -39,6 +43,73 @@ impl ChunkRule {
}
}
// ==================== 視覺分片相關結構 (Phase 2.1) ====================
/// 邊界框
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BoundingBox {
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
}
/// 檢測到的物件
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DetectedObject {
/// 物件類別名稱
pub class_name: String,
/// 物件類別 ID
pub class_id: u32,
/// 信心值 (0.0-1.0)
pub confidence: f32,
/// 邊界框
pub bbox: Option<BoundingBox>,
/// 出現次數 (在分片內)
pub occurrence: u32,
}
/// 關鍵幀的物件列表
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KeyframeObjects {
/// 關鍵幀時間 (秒) - 僅供參考,主要使用 frame_number
pub timestamp: f64,
/// 關鍵幀幀號 - 主要時間標示
pub frame_number: u64,
/// 檢測到的物件
pub objects: Vec<DetectedObject>,
}
/// 視覺元數據
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualMetadata {
/// 總物件數量
pub object_count: u32,
/// 唯一物件類別列表
pub unique_classes: Vec<String>,
/// 最高信心值
pub max_confidence: f32,
/// 平均信心值
pub avg_confidence: f32,
/// 空間密度(每幀平均物件數)
pub spatial_density: f32,
}
/// 視覺分片內容
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualChunkContent {
/// 關鍵幀物件列表,每個關鍵幀包含 frame_number
pub keyframe_objects: Vec<KeyframeObjects>,
/// 主要物件標籤(出現在大多數幀中的物件)
pub dominant_objects: Vec<String>,
/// 物件關係 (object1, relationship, object2) - 可選
pub object_relationships: Vec<(String, String, String)>,
/// 場景描述 - 可選
pub scene_description: Option<String>,
/// 視覺元數據
pub metadata: VisualMetadata,
}
// ==================== Chunk 主結構 ====================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk {
pub file_id: i32,
@@ -49,9 +120,9 @@ pub struct Chunk {
pub rule: ChunkRule,
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
pub fps: f64,
/// Start frame (0-based)
/// Start frame (0-based) - 主要時間標示
pub start_frame: i64,
/// End frame (exclusive)
/// End frame (exclusive) - 主要時間標示
pub end_frame: i64,
pub text_content: Option<String>,
pub content: serde_json::Value,
@@ -61,17 +132,11 @@ pub struct Chunk {
pub pre_chunk_ids: Vec<i32>,
pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks)
pub visual_stats: Option<serde_json::Value>,
}
impl Chunk {
/// Creates a new chunk from frame counts.
///
/// # Arguments
///
/// * `start_frame` - Start frame (0-based)
/// * `end_frame` - End frame (exclusive)
/// * `fps` - Frames per second (can be fractional)
#[allow(clippy::too_many_arguments)]
/// 創建新分片
pub fn new(
file_id: i32,
uuid: String,
@@ -83,11 +148,13 @@ impl Chunk {
fps: f64,
content: serde_json::Value,
) -> Self {
let chunk_id = format!("{}_{:04}", chunk_type.as_str(), chunk_index);
let frame_count = (end_frame - start_frame) as i32;
let chunk_id = format!("{}_{}", uuid, chunk_index);
Self {
file_id,
uuid,
chunk_id: chunk_id.clone(),
chunk_id,
chunk_index,
chunk_type,
rule,
@@ -98,17 +165,171 @@ impl Chunk {
content,
metadata: None,
vector_id: None,
frame_count: 0,
frame_count,
pre_chunk_ids: vec![],
parent_chunk_id: None,
child_chunk_ids: vec![],
visual_stats: None,
}
}
/// Creates a new chunk from seconds (legacy conversion).
/// 創建視覺分片 (Phase 2.1)
pub fn new_visual(
file_id: i32,
uuid: String,
chunk_index: u32,
start_frame: i64,
end_frame: i64,
fps: f64,
visual_content: VisualChunkContent,
) -> Self {
let content = serde_json::to_value(&visual_content)
.unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
Self::new(
file_id,
uuid,
chunk_index,
ChunkType::Visual,
ChunkRule::Rule2,
start_frame,
end_frame,
fps,
content,
)
}
/// 從 YOLO 幀創建視覺分片 (Phase 2.1)
pub fn from_yolo_frames(
file_id: i32,
uuid: String,
chunk_index: u32,
start_frame: i64,
end_frame: i64,
fps: f64,
yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
) -> Self {
// 將 YOLO 幀轉換為關鍵幀物件
let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
.iter()
.map(|frame| {
let objects: Vec<DetectedObject> = frame
.objects
.iter()
.map(|obj| DetectedObject {
class_name: obj.class_name.clone(),
class_id: obj.class_id,
confidence: obj.confidence,
bbox: Some(BoundingBox {
x: obj.x,
y: obj.y,
width: obj.width,
height: obj.height,
}),
occurrence: 1,
})
.collect();
KeyframeObjects {
timestamp: frame.timestamp,
frame_number: frame.frame,
objects,
}
})
.collect();
// 計算物件統計
let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
// 收集所有物件類別
let all_classes: Vec<String> = yolo_frames
.iter()
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
.collect();
// 獲取唯一類別
let unique_classes: Vec<String> = all_classes
.iter()
.cloned()
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
// 計算信心值統計
let confidences: Vec<f32> = yolo_frames
.iter()
.flat_map(|f| f.objects.iter().map(|o| o.confidence))
.collect();
let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
let avg_confidence = if !confidences.is_empty() {
confidences.iter().sum::<f32>() / confidences.len() as f32
} else {
0.0
};
// 計算主要物件(出現在大多數幀中的物件)
let mut object_counts = std::collections::HashMap::new();
for frame in &yolo_frames {
let frame_classes: std::collections::HashSet<_> =
frame.objects.iter().map(|o| o.class_name.clone()).collect();
for class in frame_classes {
*object_counts.entry(class).or_insert(0) += 1;
}
}
let mut dominant_objects: Vec<String> = object_counts
.into_iter()
.filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5)
.map(|(class, _)| class)
.collect();
dominant_objects.sort();
// 創建視覺內容
let visual_content = VisualChunkContent {
keyframe_objects,
dominant_objects,
object_relationships: vec![], // 可選:後期添加關係檢測
scene_description: None, // 可選:後期添加 LLM 生成的場景描述
metadata: VisualMetadata {
object_count: total_objects,
unique_classes,
max_confidence,
avg_confidence,
spatial_density: if yolo_frames.len() > 0 {
total_objects as f32 / yolo_frames.len() as f32
} else {
0.0
},
},
};
Self::new_visual(
file_id,
uuid,
chunk_index,
start_frame,
end_frame,
fps,
visual_content,
)
}
/// 將分片轉換為幀時間
pub fn to_frame_time(&self) -> FrameTime {
// 使用第一個幀作為參考點
FrameTime::from_frames(self.start_frame, self.fps)
}
/// 檢查是否是父分片
pub fn is_parent(&self) -> bool {
self.parent_chunk_id.is_some()
}
/// 從秒數創建新分片(舊版轉換)
///
/// This is useful for migrating from older systems that store time as seconds.
/// The frame counts are calculated by rounding `seconds * fps`.
/// 這對於從存儲時間為秒的舊系統遷移很有用。
/// 幀數通過舍入 `seconds * fps` 計算。
#[allow(clippy::too_many_arguments)]
pub fn from_seconds(
file_id: i32,
@@ -136,104 +357,197 @@ impl Chunk {
)
}
/// Returns the start time as a `FrameTime`.
/// 返回開始時間為 `FrameTime`
pub fn start_time(&self) -> FrameTime {
FrameTime::from_frames(self.start_frame, self.fps)
}
/// Returns the end time as a `FrameTime`.
/// 返回結束時間為 `FrameTime`
pub fn end_time(&self) -> FrameTime {
FrameTime::from_frames(self.end_frame, self.fps)
}
/// Returns the duration in frames.
/// 返回持續時間的幀數
pub fn duration_frames(&self) -> i64 {
self.end_frame - self.start_frame
}
/// Returns the duration in seconds.
/// 返回持續時間的秒數
pub fn duration_seconds(&self) -> f64 {
self.duration_frames() as f64 / self.fps
}
/// Formats the start time as "seconds.frame" (e.g., "123.04").
/// 將開始時間格式化為 "seconds.frame" (例如:"123.04")
pub fn format_start_sec_frame(&self) -> String {
self.start_time().format_sec_frame()
}
/// Formats the end time as "seconds.frame" (e.g., "456.15").
/// 將結束時間格式化為 "seconds.frame" (例如:"456.15")
pub fn format_end_sec_frame(&self) -> String {
self.end_time().format_sec_frame()
}
/// Formats the start time as "HH:MM:SS".
/// 將開始時間格式化為 "HH:MM:SS"
pub fn format_start_hms(&self) -> String {
self.start_time().format_hms()
}
/// Formats the end time as "HH:MM:SS".
/// 將結束時間格式化為 "HH:MM:SS"
pub fn format_end_hms(&self) -> String {
self.end_time().format_hms()
}
/// Formats the start time as "HH:MM:SS.FF".
/// 將開始時間格式化為 "HH:MM:SS.FF"
pub fn format_start_hms_frame(&self) -> String {
self.start_time().format_hms_frame()
}
/// Formats the end time as "HH:MM:SS.FF".
/// 將結束時間格式化為 "HH:MM:SS.FF"
pub fn format_end_hms_frame(&self) -> String {
self.end_time().format_hms_frame()
}
/// Returns a tuple of (start_seconds, end_seconds) for compatibility.
/// 返回 (start_seconds, end_seconds) 元組用於兼容性
///
/// This is provided for backward compatibility during migration.
/// Prefer using `start_time()` and `end_time()` methods.
/// 這在遷移期間提供向後兼容性。
/// 建議使用 `start_time()` `end_time()` 方法。
pub fn time_range_seconds(&self) -> (f64, f64) {
(self.start_time().seconds(), self.end_time().seconds())
}
/// 添加元數據
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
self.metadata = Some(metadata);
self
}
/// 添加向量 ID
pub fn with_vector_id(mut self, vector_id: String) -> Self {
self.vector_id = Some(vector_id);
self
}
/// 添加文本內容
pub fn with_text_content(mut self, text: String) -> Self {
self.text_content = Some(text);
self
}
/// 設置幀數
pub fn with_frame_count(mut self, count: i32) -> Self {
self.frame_count = count;
self
}
/// 設置前一個分片 ID
pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
self.pre_chunk_ids = ids;
self
}
/// 設置父分片 ID
pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
self.parent_chunk_id = Some(parent_id);
self
}
/// 設置子分片 ID
pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
self.child_chunk_ids = child_ids;
self
}
}
pub fn is_parent_chunk(&self) -> bool {
!self.child_chunk_ids.is_empty()
// ==================== VisualChunkContent 輔助方法 ====================
impl VisualChunkContent {
/// 計算兩個 YOLO 幀之間的相似度(基於物件組成)
pub fn frame_similarity(
frame1: &crate::core::processor::yolo::YoloFrame,
frame2: &crate::core::processor::yolo::YoloFrame,
) -> f32 {
if frame1.objects.is_empty() && frame2.objects.is_empty() {
return 1.0; // 兩個空幀完全相似
}
if frame1.objects.is_empty() || frame2.objects.is_empty() {
return 0.0; // 一個空一個非空,不相似
}
// 創建物件類別名稱集合
let set1: std::collections::HashSet<String> = frame1
.objects
.iter()
.map(|o| o.class_name.clone())
.collect();
let set2: std::collections::HashSet<String> = frame2
.objects
.iter()
.map(|o| o.class_name.clone())
.collect();
// 計算 Jaccard 相似度
let intersection: Vec<_> = set1.intersection(&set2).collect();
let union: Vec<_> = set1.union(&set2).collect();
if union.is_empty() {
0.0
} else {
intersection.len() as f32 / union.len() as f32
}
}
pub fn is_child_chunk(&self) -> bool {
self.parent_chunk_id.is_some()
/// 獲取視覺分片的摘要(使用關鍵幀的 frame_number
pub fn summary(&self, fps: f64) -> String {
if self.keyframe_objects.is_empty() {
return "Empty visual chunk".to_string();
}
let first_frame = self.keyframe_objects.first().unwrap().frame_number;
let last_frame = self.keyframe_objects.last().unwrap().frame_number;
// 計算時間(僅供參考)
let start_time = if fps > 0.0 {
first_frame as f64 / fps
} else {
0.0
};
let end_time = if fps > 0.0 {
last_frame as f64 / fps
} else {
0.0
};
let duration = end_time - start_time;
let frame_count = self.keyframe_objects.len();
format!(
"Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}",
first_frame,
last_frame,
duration,
frame_count,
self.metadata.object_count,
self.metadata.unique_classes.len(),
if self.dominant_objects.is_empty() {
"none".to_string()
} else {
self.dominant_objects.join(", ")
}
)
}
/// 檢查是否包含特定物件類別
pub fn contains_object(&self, class_name: &str) -> bool {
self.keyframe_objects
.iter()
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
}
/// 獲取信心值高於閾值的所有物件
pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
self.keyframe_objects
.iter()
.flat_map(|ko| ko.objects.iter())
.filter(|obj| obj.confidence >= threshold)
.collect()
}
}

View File

@@ -164,3 +164,29 @@ pub mod cache {
.unwrap_or(3600)
});
}
pub mod llm {
use super::*;
pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_URL")
.unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
});
pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
});
pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_TIMEOUT")
.unwrap_or_else(|_| "120".to_string())
.parse()
.unwrap_or(120)
});
pub static SUMMARY_ENABLED: Lazy<bool> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_ENABLED")
.map(|v| v == "true" || v == "1")
.unwrap_or(true)
});
}

View File

@@ -6,6 +6,7 @@ use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
pub struct MongoDb {
base_url: String,
database: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -53,7 +54,8 @@ impl MongoDb {
pub fn new() -> Self {
let base_url =
std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string());
Self { base_url }
let database = crate::core::config::MONGODB_DATABASE.clone();
Self { base_url, database }
}
}
@@ -68,7 +70,7 @@ impl MongoDb {
let doc: ChunkDocument = chunk.clone().into();
let client = reqwest::Client::new();
let url = format!("{}/momentry/chunks", self.base_url);
let url = format!("{}/{}/chunks", self.base_url, self.database);
client
.post(&url)
@@ -83,8 +85,8 @@ impl MongoDb {
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
let client = reqwest::Client::new();
let url = format!(
"{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}",
self.base_url, uuid
"{}/{}/chunks?filter={{\"uuid\":\"{}\"}}",
self.base_url, self.database, uuid
);
let response = client
@@ -131,6 +133,7 @@ impl MongoDb {
pre_chunk_ids: vec![],
parent_chunk_id: doc.parent_chunk_id,
child_chunk_ids: doc.child_chunk_ids,
visual_stats: None,
}
})
.collect();
@@ -141,8 +144,8 @@ impl MongoDb {
pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> {
let client = reqwest::Client::new();
let url = format!(
"{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
self.base_url, query
"{}/{}/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
self.base_url, self.database, query
);
let response = client
@@ -189,6 +192,7 @@ impl MongoDb {
pre_chunk_ids: vec![],
parent_chunk_id: doc.parent_chunk_id,
child_chunk_ids: doc.child_chunk_ids,
visual_stats: None,
}
})
.collect();
@@ -198,7 +202,7 @@ impl MongoDb {
pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> {
let client = reqwest::Client::new();
let url = format!("{}/momentry/chunks", self.base_url);
let url = format!("{}/{}/chunks", self.base_url, self.database);
let response = client
.get(&url)
@@ -244,6 +248,7 @@ impl MongoDb {
pre_chunk_ids: vec![],
parent_chunk_id: doc.parent_chunk_id,
child_chunk_ids: doc.child_chunk_ids,
visual_stats: None,
}
})
.collect();

File diff suppressed because it is too large Load Diff

View File

@@ -128,7 +128,7 @@ impl QdrantDb {
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
point_id_str.hash(&mut hasher);
let point_id = hasher.finish() as u64;
let point_id = hasher.finish();
let body = serde_json::json!({
"points": [{
@@ -171,7 +171,7 @@ impl QdrantDb {
));
}
tracing::debug!("Qdrant response: {}", response_text);
tracing::debug!("Qdrant upsert response status: {}", status);
tracing::info!("Successfully upserted vector for chunk: {}", chunk_id);
Ok(())
}
@@ -257,6 +257,101 @@ impl QdrantDb {
Ok(search_results)
}
pub async fn search_collections(
&self,
query_vector: &[f32],
collections: &[&str],
limit: usize,
) -> Result<Vec<SearchResult>> {
let mut handles = Vec::new();
for &collection in collections {
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
let client = self.client.clone();
let api_key = self.api_key.clone();
let query_vec = query_vector.to_vec();
let body = serde_json::json!({
"vector": query_vec,
"limit": limit * 2, // Fetch more from each to account for overlaps
"with_payload": true
});
handles.push(async move {
let response = client
.post(&url)
.header("api-key", &api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await;
match response {
Ok(resp) if resp.status().is_success() => {
let resp_text = resp
.text()
.await
.unwrap_or_else(|_| "Failed to read response".to_string());
#[derive(Deserialize)]
struct QdrantSearchResult {
result: Vec<QdrantPoint>,
}
#[derive(Deserialize)]
struct QdrantPoint {
#[allow(dead_code)]
id: serde_json::Value,
score: f64,
payload: HashMap<String, serde_json::Value>,
}
if let Ok(result) = serde_json::from_str::<QdrantSearchResult>(&resp_text) {
let results: Vec<SearchResult> = result
.result
.into_iter()
.map(|r| {
let uuid = r
.payload
.get("uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let chunk_id = r
.payload
.get("chunk_id")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
SearchResult {
uuid,
chunk_id,
score: r.score as f32,
}
})
.collect();
Ok::<Vec<SearchResult>, anyhow::Error>(results)
} else {
Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new())
}
}
_ => Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new()),
}
});
}
let results = futures_util::future::join_all(handles).await;
let mut merged: Vec<SearchResult> = results
.into_iter()
.filter_map(Result::ok)
.flatten()
.collect();
// Sort by score descending
merged.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
// Deduplicate by chunk_id + uuid
merged.dedup_by_key(|r| (r.chunk_id.clone(), r.uuid.clone()));
// Truncate to limit
merged.truncate(limit);
Ok(merged)
}
pub async fn search_in_uuid(
&self,
query_vector: &[f32],

View File

@@ -4,9 +4,15 @@ pub mod chunk;
pub mod config;
pub mod db;
pub mod embedding;
pub mod ingestion;
pub mod llm;
pub mod overlay;
pub mod person_identity;
pub mod probe;
pub mod processor;
pub mod storage;
pub mod text;
pub mod thumbnail;
pub mod time;
pub mod tmdb;
pub mod worker;

View File

@@ -28,16 +28,23 @@ pub async fn process_asrx(
uuid: Option<&str>,
) -> Result<AsrxResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("asrx_processor.py");
let script_path = executor.script_path("asrx_processor_custom.py");
tracing::info!("[ASRX] Starting speaker diarization: {}", video_path);
tracing::info!(
"[ASRX] Starting speaker diarization (custom): {}",
video_path
);
if !script_path.exists() {
tracing::warn!("[ASRX] Script not found, returning empty result");
return Ok(AsrxResult {
language: None,
segments: vec![],
});
tracing::warn!("[ASRX] Custom script not found, falling back to original");
let fallback_path = executor.script_path("asrx_processor.py");
if !fallback_path.exists() {
tracing::warn!("[ASRX] No script found, returning empty result");
return Ok(AsrxResult {
language: None,
segments: vec![],
});
}
}
let mut cmd = Command::new(executor.python_path());

View File

@@ -9,6 +9,7 @@ pub mod ocr;
pub mod pose;
pub mod scene_classification;
pub mod story;
pub mod visual_chunk;
pub mod yolo;
pub use asr::{process_asr, AsrResult, AsrSegment};
@@ -28,4 +29,5 @@ pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};