feat: update core API, database layer, and worker modules
- Remove unused imports (n8n_search, universal_search, Client, Arc, etc.) - Update API endpoints for identity, face recognition, search - Fix postgres_db.rs search_videos parent_uuid column - Add snapshot API and identity agent API - Clean up backup files (.bak, .bak2)
This commit is contained in:
131
src/core/cache/redis_cache.rs
vendored
131
src/core/cache/redis_cache.rs
vendored
@@ -4,7 +4,7 @@ use serde::{de::DeserializeOwned, Serialize};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::core::config::{cache as cache_config, REDIS_KEY_PREFIX};
|
||||
use crate::core::config::{cache as cache_config, snapshot as snapshot_config, REDIS_KEY_PREFIX};
|
||||
use crate::core::db::RedisClient;
|
||||
|
||||
pub struct RedisCache {
|
||||
@@ -133,6 +133,135 @@ impl RedisCache {
|
||||
pub async fn invalidate_videos_list(&self) -> Result<u64> {
|
||||
self.invalidate_pattern("videos:*").await
|
||||
}
|
||||
|
||||
// --- Snapshot Cache Methods ---
|
||||
|
||||
pub async fn snapshot_hits_key(file_uuid: &str) -> String {
|
||||
format!("snapshot:hits:{}", file_uuid)
|
||||
}
|
||||
|
||||
pub async fn snapshot_status_key(file_uuid: &str) -> String {
|
||||
format!("snapshot:status:{}", file_uuid)
|
||||
}
|
||||
|
||||
pub async fn snapshot_last_access_key(file_uuid: &str) -> String {
|
||||
format!("snapshot:last_access:{}", file_uuid)
|
||||
}
|
||||
|
||||
pub async fn snapshot_migrate_hint_key(file_uuid: &str) -> String {
|
||||
format!("snapshot:migrate_hint:{}", file_uuid)
|
||||
}
|
||||
|
||||
pub async fn increment_snapshot_hits(&self, file_uuid: &str) -> Result<u64> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_hits_key(file_uuid).await;
|
||||
let new_count: u64 = redis::cmd("INCR").arg(&key).query_async(&mut conn).await?;
|
||||
let _: () = redis::cmd("EXPIRE")
|
||||
.arg(&key)
|
||||
.arg(*snapshot_config::WARM_TTL_SECS)
|
||||
.query_async(&mut conn)
|
||||
.await?;
|
||||
Ok(new_count)
|
||||
}
|
||||
|
||||
pub async fn get_snapshot_hits(&self, file_uuid: &str) -> Result<u64> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_hits_key(file_uuid).await;
|
||||
let count: Option<u64> = conn.get(&key).await?;
|
||||
Ok(count.unwrap_or(0))
|
||||
}
|
||||
|
||||
pub async fn update_last_access(&self, file_uuid: &str) -> Result<()> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_last_access_key(file_uuid).await;
|
||||
let now: i64 = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as i64;
|
||||
let _: String = conn
|
||||
.set_ex(&key, now, *snapshot_config::WARM_TTL_SECS)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_snapshot_status(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
status: &str,
|
||||
progress: Option<f32>,
|
||||
) -> Result<()> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_status_key(file_uuid).await;
|
||||
let payload = serde_json::json!({
|
||||
"status": status,
|
||||
"progress": progress,
|
||||
});
|
||||
let json = serde_json::to_string(&payload)?;
|
||||
let ttl = if status == "generating" {
|
||||
*crate::core::config::snapshot::GENERATING_TIMEOUT_SECS
|
||||
} else {
|
||||
*snapshot_config::WARM_TTL_SECS
|
||||
};
|
||||
let _: String = conn.set_ex(&key, json, ttl).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_snapshot_status(&self, file_uuid: &str) -> Result<serde_json::Value> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_status_key(file_uuid).await;
|
||||
let value: Option<String> = conn.get(&key).await?;
|
||||
match value {
|
||||
Some(json) => Ok(serde_json::from_str(&json)?),
|
||||
None => Ok(serde_json::json!({
|
||||
"status": "cold",
|
||||
"progress": null,
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn clear_snapshot_status(&self, file_uuid: &str) -> Result<()> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_status_key(file_uuid).await;
|
||||
let _: () = conn.del(&key).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_migrate_hint(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
parent_uuid: &str,
|
||||
count: u64,
|
||||
) -> Result<()> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_migrate_hint_key(file_uuid).await;
|
||||
let payload = serde_json::json!({
|
||||
"parent_uuid": parent_uuid,
|
||||
"count": count,
|
||||
});
|
||||
let json = serde_json::to_string(&payload)?;
|
||||
let _: String = conn
|
||||
.set_ex(&key, json, *snapshot_config::WARM_TTL_SECS)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_migrate_hint(&self, file_uuid: &str) -> Result<Option<serde_json::Value>> {
|
||||
let client = self.client.read().await;
|
||||
let mut conn = client.get_conn_internal().await?;
|
||||
let key = Self::snapshot_migrate_hint_key(file_uuid).await;
|
||||
let value: Option<String> = conn.get(&key).await?;
|
||||
match value {
|
||||
Some(json) => Ok(Some(serde_json::from_str(&json)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for RedisCache {
|
||||
|
||||
@@ -3,7 +3,7 @@ pub mod rule3_ingest;
|
||||
pub mod splitter;
|
||||
pub mod types;
|
||||
|
||||
pub use rule1_ingest::ingest_rule1;
|
||||
pub use rule1_ingest::execute_rule1;
|
||||
pub use rule3_ingest::ingest_rule3;
|
||||
pub use splitter::{AsrSegment, ChunkSplitter};
|
||||
pub use types::{Chunk, ChunkType};
|
||||
|
||||
@@ -1,94 +1,367 @@
|
||||
use crate::core::config::OUTPUT_DIR;
|
||||
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
use crate::core::db::schema;
|
||||
use crate::core::db::PostgresDb;
|
||||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use sqlx::PgPool;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use serde_json::Value;
|
||||
use sqlx::{PgPool, Row};
|
||||
use tracing::info;
|
||||
|
||||
// --- 結構體定義 (對齊外部處理器產出格式) ---
|
||||
pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result<usize> {
|
||||
let pool = db.pool();
|
||||
let pre_chunks_table = schema::table_name("pre_chunks");
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrSegment {
|
||||
start: f64,
|
||||
end: f64,
|
||||
text: String,
|
||||
}
|
||||
let asr_segments = fetch_asr_segments(pool, file_uuid, &pre_chunks_table).await?;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxSegment {
|
||||
start: f64,
|
||||
end: f64,
|
||||
speaker: String,
|
||||
}
|
||||
let asrx_segments = fetch_asrx_segments(pool, file_uuid, &pre_chunks_table).await?;
|
||||
|
||||
// --- 核心邏輯 ---
|
||||
let yolo_frames = fetch_yolo_frames(pool, file_uuid, &pre_chunks_table).await?;
|
||||
|
||||
/// 執行 Rule 1 入庫
|
||||
/// 讀取 asr.json 與 asrx.json,合併 Speaker 資訊,寫入 chunks_rule1
|
||||
pub async fn ingest_rule1(pool: &PgPool, asset_uuid: &str, fps: f64) -> Result<usize> {
|
||||
// 1. 讀取檔案
|
||||
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
|
||||
let asrx_path = format!("{}/{}.asrx.json", *OUTPUT_DIR, asset_uuid);
|
||||
let face_frames = fetch_face_frames(pool, file_uuid).await?;
|
||||
|
||||
let asr_content = fs::read_to_string(&asr_path)
|
||||
.with_context(|| format!("Failed to read ASR file: {}", asr_path))?;
|
||||
let asrx_content = fs::read_to_string(&asrx_path)
|
||||
.with_context(|| format!("Failed to read ASRX file: {}", asrx_path))?;
|
||||
let video = db
|
||||
.get_video_by_uuid(file_uuid)
|
||||
.await?
|
||||
.context("Video not found")?;
|
||||
|
||||
let asr_segments: Vec<AsrSegment> = serde_json::from_str(&asr_content)?;
|
||||
let asrx_segments: Vec<AsrxSegment> = serde_json::from_str(&asrx_content)?;
|
||||
let file_id = video.id;
|
||||
|
||||
let mut count = 0;
|
||||
|
||||
// 2. 交易處理
|
||||
let mut tx = pool.begin().await?;
|
||||
|
||||
for seg in &asr_segments {
|
||||
// 時間轉幀
|
||||
let start_frame = (seg.start * fps).round() as i64;
|
||||
let end_frame = (seg.end * fps).round() as i64;
|
||||
for (idx, seg) in asr_segments.iter().enumerate() {
|
||||
let speaker_id = find_best_speaker(&seg, &asrx_segments);
|
||||
|
||||
// 3. 尋找重疊最多的 Speaker
|
||||
let mut best_speaker: Option<String> = None;
|
||||
let mut max_overlap = 0.0f64;
|
||||
let yolo_objects = find_yolo_objects(seg.start_frame, seg.end_frame, &yolo_frames);
|
||||
|
||||
for spk in &asrx_segments {
|
||||
let overlap = (seg.end.min(spk.end) - seg.start.max(spk.start)).max(0.0);
|
||||
if overlap > max_overlap {
|
||||
max_overlap = overlap;
|
||||
best_speaker = Some(spk.speaker.clone());
|
||||
}
|
||||
}
|
||||
let face_ids = find_face_ids(seg.start_frame, seg.end_frame, &face_frames);
|
||||
|
||||
let speaker_id = best_speaker.unwrap_or("UNKNOWN".to_string());
|
||||
let metadata = serde_json::json!({
|
||||
"speaker_id": speaker_id,
|
||||
"yolo_objects": yolo_objects,
|
||||
"face_ids": face_ids,
|
||||
"language": seg.language,
|
||||
});
|
||||
|
||||
// 4. 寫入 DB
|
||||
sqlx::query!(
|
||||
r#"
|
||||
INSERT INTO chunks_rule1 (
|
||||
id, asset_uuid, start_frame, end_frame, content, speaker_id
|
||||
) VALUES (
|
||||
gen_random_uuid(), $1, $2, $3, $4, $5
|
||||
)
|
||||
"#,
|
||||
asset_uuid,
|
||||
start_frame,
|
||||
end_frame,
|
||||
seg.text,
|
||||
speaker_id
|
||||
let content = serde_json::json!({
|
||||
"text": seg.text,
|
||||
"text_normalized": seg.text.to_lowercase(),
|
||||
});
|
||||
|
||||
let chunk = Chunk::from_seconds(
|
||||
file_id as i32,
|
||||
file_uuid.to_string(),
|
||||
idx as u32,
|
||||
ChunkType::Sentence,
|
||||
ChunkRule::Rule1,
|
||||
seg.start_time,
|
||||
seg.end_time,
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
.with_metadata(metadata);
|
||||
|
||||
db.store_chunk_in_tx(&chunk, &mut tx).await?;
|
||||
|
||||
count += 1;
|
||||
|
||||
// 每 100 筆 Commit 一次 (可選優化)
|
||||
if count % 500 == 0 {
|
||||
tx.commit().await?;
|
||||
tx = pool.begin().await?;
|
||||
if count % 100 == 0 {
|
||||
info!(
|
||||
"Rule 1: Processed {} segments for video {}",
|
||||
count, file_uuid
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
|
||||
info!(
|
||||
"Rule 1 completed: {} sentence chunks created for video {}",
|
||||
count, file_uuid
|
||||
);
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct AsrSegment {
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
text: String,
|
||||
language: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct AsrxSegment {
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
speaker: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct YoloFrame {
|
||||
frame: i64,
|
||||
detections: Vec<YoloDetection>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct YoloDetection {
|
||||
class_name: String,
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct FaceFrame {
|
||||
frame: i64,
|
||||
faces: Vec<FaceDetection>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct FaceDetection {
|
||||
face_id: String,
|
||||
confidence: f64,
|
||||
identity_id: Option<i32>,
|
||||
}
|
||||
|
||||
async fn fetch_asr_segments(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
table: &str,
|
||||
) -> Result<Vec<AsrSegment>> {
|
||||
let query = format!(
|
||||
r#"
|
||||
SELECT
|
||||
start_frame, end_frame, start_time, end_time, data
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND processor_type = 'asr'
|
||||
ORDER BY start_frame
|
||||
"#,
|
||||
table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
|
||||
|
||||
let segments: Vec<AsrSegment> = rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
|
||||
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
|
||||
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
|
||||
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
|
||||
let data: Value = row.try_get("data").unwrap_or(Value::Null);
|
||||
|
||||
let text = data.get("text").and_then(|t| t.as_str()).unwrap_or("");
|
||||
let language = data
|
||||
.get("language")
|
||||
.and_then(|l| l.as_str())
|
||||
.unwrap_or("unknown");
|
||||
|
||||
AsrSegment {
|
||||
start_frame,
|
||||
end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
text: text.to_string(),
|
||||
language: language.to_string(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(segments)
|
||||
}
|
||||
|
||||
async fn fetch_asrx_segments(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
table: &str,
|
||||
) -> Result<Vec<AsrxSegment>> {
|
||||
let query = format!(
|
||||
r#"
|
||||
SELECT
|
||||
start_frame, end_frame, start_time, end_time, data
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND processor_type = 'asrx'
|
||||
ORDER BY start_frame
|
||||
"#,
|
||||
table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
|
||||
|
||||
let segments: Vec<AsrxSegment> = rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
|
||||
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
|
||||
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
|
||||
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
|
||||
let data: Value = row.try_get("data").unwrap_or(Value::Null);
|
||||
|
||||
let speaker = data
|
||||
.get("speaker")
|
||||
.and_then(|s| s.as_str())
|
||||
.unwrap_or("UNKNOWN");
|
||||
|
||||
AsrxSegment {
|
||||
start_frame,
|
||||
end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
speaker: speaker.to_string(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(segments)
|
||||
}
|
||||
|
||||
async fn fetch_yolo_frames(pool: &PgPool, file_uuid: &str, table: &str) -> Result<Vec<YoloFrame>> {
|
||||
let query = format!(
|
||||
r#"
|
||||
SELECT
|
||||
coordinate_index as frame, data
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND processor_type = 'yolo'
|
||||
ORDER BY coordinate_index
|
||||
"#,
|
||||
table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
|
||||
|
||||
let frames: Vec<YoloFrame> = rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let frame: i64 = row.try_get("frame").unwrap_or(0);
|
||||
let data: Value = row.try_get("data").unwrap_or(Value::Null);
|
||||
|
||||
let detections: Vec<YoloDetection> = data
|
||||
.get("detections")
|
||||
.and_then(|d| d.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|det| {
|
||||
let class_name = det.get("class_name").and_then(|c| c.as_str());
|
||||
let confidence = det.get("confidence").and_then(|c| c.as_f64());
|
||||
|
||||
if class_name.is_some() && confidence.is_some() {
|
||||
Some(YoloDetection {
|
||||
class_name: class_name.unwrap().to_string(),
|
||||
confidence: confidence.unwrap(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
YoloFrame { frame, detections }
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(frames)
|
||||
}
|
||||
|
||||
async fn fetch_face_frames(pool: &PgPool, file_uuid: &str) -> Result<Vec<FaceFrame>> {
|
||||
let face_detections_table = schema::table_name("face_detections");
|
||||
|
||||
let query = format!(
|
||||
r#"
|
||||
SELECT
|
||||
frame_number as frame,
|
||||
face_id,
|
||||
confidence,
|
||||
identity_id
|
||||
FROM {}
|
||||
WHERE file_uuid = $1
|
||||
ORDER BY frame_number
|
||||
"#,
|
||||
face_detections_table
|
||||
);
|
||||
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
|
||||
|
||||
let mut frame_map: std::collections::HashMap<i64, FaceFrame> = std::collections::HashMap::new();
|
||||
|
||||
for row in rows {
|
||||
let frame: i64 = row.try_get("frame").unwrap_or(0);
|
||||
let face_id: Option<String> = row.try_get("face_id").ok();
|
||||
let confidence: f64 = row.try_get("confidence").unwrap_or(0.0);
|
||||
let identity_id: Option<i32> = row.try_get("identity_id").ok();
|
||||
|
||||
if let Some(face_id) = face_id {
|
||||
let detection = FaceDetection {
|
||||
face_id: face_id.clone(),
|
||||
confidence,
|
||||
identity_id,
|
||||
};
|
||||
|
||||
frame_map
|
||||
.entry(frame)
|
||||
.or_insert_with(|| FaceFrame {
|
||||
frame,
|
||||
faces: Vec::new(),
|
||||
})
|
||||
.faces
|
||||
.push(detection);
|
||||
}
|
||||
}
|
||||
|
||||
let mut frames: Vec<FaceFrame> = frame_map.into_values().collect();
|
||||
frames.sort_by_key(|f| f.frame);
|
||||
|
||||
Ok(frames)
|
||||
}
|
||||
|
||||
fn find_best_speaker(asr_seg: &AsrSegment, asrx_segments: &[AsrxSegment]) -> String {
|
||||
let mut best_speaker = "UNKNOWN".to_string();
|
||||
let mut max_overlap = 0.0f64;
|
||||
|
||||
for spk in asrx_segments {
|
||||
let overlap =
|
||||
(asr_seg.end_time.min(spk.end_time) - asr_seg.start_time.max(spk.start_time)).max(0.0);
|
||||
if overlap > max_overlap {
|
||||
max_overlap = overlap;
|
||||
best_speaker = spk.speaker.clone();
|
||||
}
|
||||
}
|
||||
|
||||
best_speaker
|
||||
}
|
||||
|
||||
fn find_yolo_objects(start_frame: i64, end_frame: i64, yolo_frames: &[YoloFrame]) -> Vec<String> {
|
||||
let mut objects = Vec::new();
|
||||
|
||||
for frame in yolo_frames {
|
||||
if frame.frame >= start_frame && frame.frame <= end_frame {
|
||||
for det in &frame.detections {
|
||||
if det.confidence > 0.5 && !objects.contains(&det.class_name) {
|
||||
objects.push(det.class_name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
objects
|
||||
}
|
||||
|
||||
fn find_face_ids(start_frame: i64, end_frame: i64, face_frames: &[FaceFrame]) -> Vec<String> {
|
||||
let mut face_ids = Vec::new();
|
||||
|
||||
for frame in face_frames {
|
||||
if frame.frame >= start_frame && frame.frame <= end_frame {
|
||||
for face in &frame.faces {
|
||||
if face.confidence > 0.5 && !face_ids.contains(&face.face_id) {
|
||||
face_ids.push(face.face_id.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
face_ids
|
||||
}
|
||||
|
||||
@@ -165,6 +165,42 @@ pub mod cache {
|
||||
});
|
||||
}
|
||||
|
||||
pub mod snapshot {
|
||||
use super::*;
|
||||
|
||||
pub static SNAPSHOT_DIR_NAME: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_SNAPSHOT_DIR_NAME").unwrap_or_else(|_| ".momentry_snapshots".to_string())
|
||||
});
|
||||
|
||||
pub static HOT_THRESHOLD: Lazy<u64> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_SNAPSHOT_HOT_THRESHOLD")
|
||||
.unwrap_or_else(|_| "5".to_string())
|
||||
.parse()
|
||||
.unwrap_or(5)
|
||||
});
|
||||
|
||||
pub static HOT_TTL_SECS: Lazy<u64> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_SNAPSHOT_HOT_TTL_SECS")
|
||||
.unwrap_or_else(|_| "86400".to_string())
|
||||
.parse()
|
||||
.unwrap_or(86400)
|
||||
});
|
||||
|
||||
pub static WARM_TTL_SECS: Lazy<u64> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_SNAPSHOT_WARM_TTL_SECS")
|
||||
.unwrap_or_else(|_| "604800".to_string())
|
||||
.parse()
|
||||
.unwrap_or(604800)
|
||||
});
|
||||
|
||||
pub static GENERATING_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_SNAPSHOT_GENERATING_TIMEOUT")
|
||||
.unwrap_or_else(|_| "1800".to_string())
|
||||
.parse()
|
||||
.unwrap_or(1800)
|
||||
});
|
||||
}
|
||||
|
||||
pub mod llm {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -41,9 +41,10 @@ pub mod sync_db;
|
||||
|
||||
pub use mongodb_db::MongoDb;
|
||||
pub use postgres_db::{
|
||||
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileRecord, HybridSearchResult, MonitorJob,
|
||||
MonitorJobStats, MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorResult,
|
||||
ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
|
||||
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileIdentityRecord, FileRecord,
|
||||
HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
|
||||
IdentityFileRecord, MonitorJob, MonitorJobStats, MonitorJobStatus, PostgresDb,
|
||||
ProcessorJobStatus, ProcessorResult, ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
|
||||
};
|
||||
pub use qdrant_db::{QdrantDb, VectorPayload};
|
||||
pub use redis_client::{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,8 @@ use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use super::Database;
|
||||
use crate::core::config;
|
||||
use crate::core::storage::snapshot_manager::SnapshotTier;
|
||||
|
||||
pub struct RedisDb {
|
||||
#[allow(dead_code)]
|
||||
@@ -28,14 +30,20 @@ pub struct Job {
|
||||
pub updated_at: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct SnapshotCacheEntry {
|
||||
pub hits: u64,
|
||||
pub last_access: i64,
|
||||
pub status: String,
|
||||
pub progress: Option<f32>,
|
||||
}
|
||||
|
||||
impl RedisDb {
|
||||
pub async fn push_job(&self, _job: &Job) -> Result<()> {
|
||||
// TODO: Implement Redis client
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_pending_jobs(&self) -> Result<Vec<Job>> {
|
||||
// TODO: Implement Redis client
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
@@ -45,14 +53,82 @@ impl RedisDb {
|
||||
_status: &str,
|
||||
_progress: f32,
|
||||
) -> Result<()> {
|
||||
// TODO: Implement Redis client
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn publish_event(&self, _channel: &str, _message: &str) -> Result<()> {
|
||||
// TODO: Implement Redis Pub/Sub
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --- Snapshot Cache Methods ---
|
||||
|
||||
pub async fn increment_snapshot_hits(&self, _file_uuid: &str) -> Result<u64> {
|
||||
// TODO: Redis HINCRBY snapshot:hits:{uuid}
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub async fn get_snapshot_hits(&self, _file_uuid: &str) -> Result<u64> {
|
||||
// TODO: Redis GET snapshot:hits:{uuid}
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub async fn update_last_access(&self, _file_uuid: &str) -> Result<()> {
|
||||
// TODO: Redis SET snapshot:last_access:{uuid} = now EX 7d
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_snapshot_status(
|
||||
&self,
|
||||
_file_uuid: &str,
|
||||
status: &str,
|
||||
progress: Option<f32>,
|
||||
) -> Result<()> {
|
||||
// TODO: Redis SET snapshot:status:{uuid} = {status, progress} EX 30m
|
||||
let _ = (status, progress);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_snapshot_status(&self, _file_uuid: &str) -> Result<SnapshotCacheEntry> {
|
||||
// TODO: Redis GET snapshot:status:{uuid}
|
||||
Ok(SnapshotCacheEntry {
|
||||
hits: 0,
|
||||
last_access: 0,
|
||||
status: "cold".to_string(),
|
||||
progress: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn clear_snapshot_status(&self, _file_uuid: &str) -> Result<()> {
|
||||
// TODO: Redis DEL snapshot:status:{uuid}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_migrate_hint(
|
||||
&self,
|
||||
_file_uuid: &str,
|
||||
_parent_uuid: &str,
|
||||
_count: u64,
|
||||
) -> Result<()> {
|
||||
// TODO: Redis SET snapshot:migrate_hint:{uuid}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_migrate_hint(&self, _file_uuid: &str) -> Result<Option<(String, u64)>> {
|
||||
// TODO: Redis GET snapshot:migrate_hint:{uuid}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn compute_status_ttl(&self, tier: SnapshotTier) -> u64 {
|
||||
match tier {
|
||||
SnapshotTier::Hot => *config::snapshot::HOT_TTL_SECS,
|
||||
SnapshotTier::Warm => *config::snapshot::WARM_TTL_SECS,
|
||||
SnapshotTier::Cold => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generating_timeout() -> u64 {
|
||||
*config::snapshot::GENERATING_TIMEOUT_SECS
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde_json::json;
|
||||
use serde_json;
|
||||
|
||||
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
use crate::core::db::mongodb_db::MongoDb;
|
||||
@@ -77,9 +77,9 @@ impl SyncDb {
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.post("http://localhost:11434/api/embeddings")
|
||||
.json(&json!({
|
||||
"model": "nomic-embed-text-v2-moe:latest",
|
||||
"prompt": text
|
||||
.json(&serde_json::json!({
|
||||
"model": "all-minilm",
|
||||
"prompt": text,
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
@@ -107,12 +107,21 @@ impl SyncDb {
|
||||
|
||||
for (i, segment) in asr_result.segments.iter().enumerate() {
|
||||
let segment: &AsrSegment = segment;
|
||||
let content = json!({
|
||||
"text": segment.text,
|
||||
"text_normalized": segment.text.to_lowercase(),
|
||||
let content = serde_json::json!({
|
||||
"rule": "rule1",
|
||||
"data": {
|
||||
"text": segment.text,
|
||||
"start": segment.start,
|
||||
"end": segment.end,
|
||||
},
|
||||
});
|
||||
|
||||
let metadata = json!({
|
||||
let metadata = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"chunk_type": "sentence",
|
||||
"chunk_rule": "rule1",
|
||||
"fps": 0.0, // Will be set later
|
||||
"start_frame": 0,
|
||||
"end_frame": 0,
|
||||
"language": asr_result.language,
|
||||
"language_probability": asr_result.language_probability,
|
||||
});
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use sqlx;
|
||||
use std::path::Path;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::core::db::{Database, PostgresDb, VideoRecord, VideoStatus};
|
||||
use crate::core::db::{PostgresDb, VideoRecord, VideoStatus};
|
||||
use crate::core::probe;
|
||||
use crate::core::storage::uuid as uuid_utils;
|
||||
use crate::core::storage::FileManager;
|
||||
use crate::uuid as uuid_utils;
|
||||
|
||||
/// Handles the automatic ingestion of video files.
|
||||
/// This service is responsible for:
|
||||
/// 1. Running `ffprobe` (Pre-processing)
|
||||
/// 2. Saving probe JSON
|
||||
/// 3. Registering the video in the database (making it visible in the API)
|
||||
pub struct IngestionService {
|
||||
db: PostgresDb,
|
||||
}
|
||||
@@ -21,20 +18,56 @@ impl IngestionService {
|
||||
Self { db }
|
||||
}
|
||||
|
||||
/// Registers a video file found in the watched directory.
|
||||
/// This function is idempotent: if the video (UUID) already exists, it skips.
|
||||
pub async fn ingest(&self, file_path: &str) -> Result<Option<String>> {
|
||||
let path = Path::new(file_path);
|
||||
|
||||
// 1. Validate extension
|
||||
if !is_video_extension(path) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// 2. Compute UUID
|
||||
let uuid = uuid_utils::compute_uuid_from_path(file_path);
|
||||
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
|
||||
let filename = path
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
// Stable UUID based on MAC + Birthday + Filename.
|
||||
// Moving the file (path change) keeps the SAME identity.
|
||||
|
||||
// 1. Look for existing Birthday (Identity Anchor)
|
||||
// If the file (by name) was registered before, use its original birth time.
|
||||
let birthday = sqlx::query_scalar::<_, chrono::DateTime<chrono::Utc>>(
|
||||
"SELECT registration_time FROM dev.videos WHERE file_name = $1 AND registration_time IS NOT NULL LIMIT 1"
|
||||
)
|
||||
.bind(&filename)
|
||||
.fetch_optional(self.db.pool())
|
||||
.await
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|t| t.to_rfc3339())
|
||||
.unwrap_or_else(|| Utc::now().to_rfc3339());
|
||||
|
||||
let parent = canonical_path
|
||||
.parent()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
// 2. Compute UUID
|
||||
let uuid = uuid_utils::compute_birth_uuid(
|
||||
&uuid_utils::get_mac_address(),
|
||||
&birthday,
|
||||
&canonical_path.to_string_lossy(),
|
||||
&filename,
|
||||
);
|
||||
|
||||
let parent = canonical_path
|
||||
.parent()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let username = uuid_utils::extract_username_from_path(&parent);
|
||||
|
||||
// 3. Check if already registered
|
||||
if let Ok(Some(_)) = self.db.get_video_by_uuid(&uuid).await {
|
||||
info!(
|
||||
"Video already registered: {} ({})",
|
||||
@@ -46,11 +79,9 @@ impl IngestionService {
|
||||
|
||||
info!("Starting ingestion for: {} ({})", path.display(), uuid);
|
||||
|
||||
// 4. Run ffprobe
|
||||
let probe_result = probe::probe_video(file_path)
|
||||
.with_context(|| format!("Failed to probe video: {}", file_path))?;
|
||||
|
||||
// 5. Extract metadata
|
||||
let duration = probe_result
|
||||
.format
|
||||
.duration
|
||||
@@ -78,7 +109,6 @@ impl IngestionService {
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Save Probe JSON
|
||||
let file_manager = FileManager::new(std::path::PathBuf::from("."));
|
||||
let probe_json_str = serde_json::to_string_pretty(&probe_result)?;
|
||||
|
||||
@@ -88,33 +118,72 @@ impl IngestionService {
|
||||
info!("Probe JSON saved for {}", uuid);
|
||||
}
|
||||
|
||||
// 7. Create Record
|
||||
// Use absolute path for safety
|
||||
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
|
||||
let total_frames = {
|
||||
let video_stream = probe_result
|
||||
.streams
|
||||
.iter()
|
||||
.find(|s| s.codec_type.as_deref() == Some("video"));
|
||||
|
||||
if let Some(stream) = video_stream {
|
||||
if let Some(nb_frames_str) = &stream.nb_frames {
|
||||
if let Ok(nb_frames) = nb_frames_str.parse::<u64>() {
|
||||
info!(
|
||||
"Using nb_frames from ffprobe: {} frames for {}",
|
||||
nb_frames,
|
||||
path.display()
|
||||
);
|
||||
Some(nb_frames)
|
||||
} else {
|
||||
warn!(
|
||||
"Failed to parse nb_frames, using duration * fps fallback for {}",
|
||||
path.display()
|
||||
);
|
||||
Some((duration * fps).floor() as u64)
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
"nb_frames not available, using duration * fps fallback for {}",
|
||||
path.display()
|
||||
);
|
||||
Some((duration * fps).floor() as u64)
|
||||
}
|
||||
} else {
|
||||
warn!("No video stream found for {}", path.display());
|
||||
Some(0)
|
||||
}
|
||||
};
|
||||
|
||||
let birth_registration = serde_json::json!({
|
||||
"registration_source": {
|
||||
"username": username,
|
||||
"original_path": parent,
|
||||
"original_filename": filename
|
||||
}
|
||||
});
|
||||
|
||||
let record = VideoRecord {
|
||||
id: 0,
|
||||
uuid: uuid.clone(),
|
||||
file_uuid: uuid.clone(),
|
||||
file_path: canonical_path.to_string_lossy().to_string(),
|
||||
file_name: path
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.to_string_lossy()
|
||||
.to_string(),
|
||||
file_name: filename,
|
||||
file_type: None,
|
||||
duration,
|
||||
width,
|
||||
height,
|
||||
fps,
|
||||
probe_json: Some(probe_json_str),
|
||||
storage: Default::default(),
|
||||
status: VideoStatus::Pending, // Ready for processing
|
||||
status: VideoStatus::Pending,
|
||||
processing_status: Some(serde_json::json!({"phase": "REGISTERED"})),
|
||||
birth_registration: None,
|
||||
user_id: None,
|
||||
job_id: None,
|
||||
created_at: String::new(),
|
||||
registration_time: None,
|
||||
total_frames: total_frames.unwrap_or(0),
|
||||
parent_uuid: None,
|
||||
};
|
||||
|
||||
// 8. Insert DB
|
||||
self.db
|
||||
.register_video(&record)
|
||||
.await
|
||||
@@ -125,9 +194,14 @@ impl IngestionService {
|
||||
.await
|
||||
.with_context(|| "Failed to set registration_time")?;
|
||||
|
||||
self.db
|
||||
.update_birth_registration(&uuid, &birth_registration)
|
||||
.await
|
||||
.with_context(|| "Failed to set birth_registration")?;
|
||||
|
||||
info!(
|
||||
"Successfully registered video: {} (UUID: {})",
|
||||
record.file_name, uuid
|
||||
"Successfully registered video: {} (UUID: {}, Birth UUID: {})",
|
||||
record.file_name, uuid, uuid
|
||||
);
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ pub struct PersonIdentity {
|
||||
pub person_id: String,
|
||||
pub face_identity_id: Option<i32>,
|
||||
pub speaker_id: Option<String>,
|
||||
pub video_uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub confidence: f64,
|
||||
pub name: Option<String>,
|
||||
pub metadata: serde_json::Value,
|
||||
@@ -85,7 +85,7 @@ pub struct SuggestedBinding {
|
||||
pub struct PersonAppearance {
|
||||
pub id: i32,
|
||||
pub person_id: String,
|
||||
pub video_uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub duration: f64,
|
||||
@@ -124,7 +124,7 @@ pub struct PersonStatistics {
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CreatePersonIdentityRequest {
|
||||
pub video_uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub face_identity_id: Option<i32>,
|
||||
pub speaker_id: Option<String>,
|
||||
pub name: Option<String>,
|
||||
@@ -196,7 +196,7 @@ mod tests {
|
||||
person_id: "person_001".to_string(),
|
||||
face_identity_id: Some(123),
|
||||
speaker_id: Some("SPEAKER_00".to_string()),
|
||||
video_uuid: "video_abc".to_string(),
|
||||
file_uuid: "video_abc".to_string(),
|
||||
confidence: 0.85,
|
||||
name: Some("张三".to_string()),
|
||||
metadata: serde_json::json!({"role": "host"}),
|
||||
@@ -220,7 +220,7 @@ mod tests {
|
||||
let appearance = PersonAppearance {
|
||||
id: 1,
|
||||
person_id: "person_001".to_string(),
|
||||
video_uuid: "video_abc".to_string(),
|
||||
file_uuid: "video_abc".to_string(),
|
||||
start_time: 10.5,
|
||||
end_time: 25.3,
|
||||
duration: 14.8,
|
||||
|
||||
@@ -16,6 +16,7 @@ pub struct StreamInfo {
|
||||
pub width: Option<u32>,
|
||||
pub height: Option<u32>,
|
||||
pub r_frame_rate: Option<String>,
|
||||
pub nb_frames: Option<String>,
|
||||
pub duration: Option<String>,
|
||||
pub sample_rate: Option<String>,
|
||||
pub channels: Option<u32>,
|
||||
@@ -64,6 +65,7 @@ pub fn probe_video(video_path: &str) -> Result<ProbeResult> {
|
||||
width: s["width"].as_u64().map(|v| v as u32),
|
||||
height: s["height"].as_u64().map(|v| v as u32),
|
||||
r_frame_rate: s["r_frame_rate"].as_str().map(String::from),
|
||||
nb_frames: s["nb_frames"].as_str().map(String::from),
|
||||
duration: s["duration"].as_str().map(String::from),
|
||||
sample_rate: s["sample_rate"].as_str().map(String::from),
|
||||
channels: s["channels"].as_u64().map(|v| v as u32),
|
||||
|
||||
@@ -28,6 +28,15 @@ pub struct Face {
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
pub confidence: f32,
|
||||
pub embedding: Option<Vec<f32>>,
|
||||
pub landmarks: Option<Vec<Vec<f32>>>,
|
||||
pub attributes: Option<FaceAttributes>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct FaceAttributes {
|
||||
pub age: Option<i32>,
|
||||
pub gender: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn process_face(
|
||||
@@ -89,6 +98,12 @@ mod tests {
|
||||
width: 50,
|
||||
height: 60,
|
||||
confidence: 0.95,
|
||||
embedding: Some(vec![0.1, 0.2, 0.3]),
|
||||
landmarks: Some(vec![vec![10.0, 20.0], vec![30.0, 40.0]]),
|
||||
attributes: Some(FaceAttributes {
|
||||
age: Some(30),
|
||||
gender: Some("male".to_string()),
|
||||
}),
|
||||
}],
|
||||
}],
|
||||
};
|
||||
@@ -96,6 +111,9 @@ mod tests {
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("face_1"));
|
||||
assert!(json.contains("\"width\":50"));
|
||||
assert!(json.contains("embedding"));
|
||||
assert!(json.contains("landmarks"));
|
||||
assert!(json.contains("attributes"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -108,7 +126,17 @@ mod tests {
|
||||
"frame": 30,
|
||||
"timestamp": 1.2,
|
||||
"faces": [
|
||||
{"face_id": "f1", "x": 10, "y": 20, "width": 30, "height": 40, "confidence": 0.85}
|
||||
{
|
||||
"face_id": "f1",
|
||||
"x": 10,
|
||||
"y": 20,
|
||||
"width": 30,
|
||||
"height": 40,
|
||||
"confidence": 0.85,
|
||||
"embedding": [0.1, 0.2, 0.3],
|
||||
"landmarks": [[5.0, 10.0]],
|
||||
"attributes": {"age": 25, "gender": "female"}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -118,6 +146,9 @@ mod tests {
|
||||
assert_eq!(result.frame_count, 50);
|
||||
assert_eq!(result.frames.len(), 1);
|
||||
assert_eq!(result.frames[0].faces[0].x, 10);
|
||||
assert!(result.frames[0].faces[0].embedding.is_some());
|
||||
assert!(result.frames[0].faces[0].landmarks.is_some());
|
||||
assert!(result.frames[0].faces[0].attributes.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -139,7 +170,33 @@ mod tests {
|
||||
width: 10,
|
||||
height: 10,
|
||||
confidence: 0.5,
|
||||
embedding: None,
|
||||
landmarks: None,
|
||||
attributes: None,
|
||||
};
|
||||
assert!(face.confidence >= 0.0 && face.confidence <= 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_face_with_embedding() {
|
||||
let face = Face {
|
||||
face_id: Some("face_001".to_string()),
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 60,
|
||||
confidence: 0.95,
|
||||
embedding: Some(vec![0.1; 512]),
|
||||
landmarks: None,
|
||||
attributes: Some(FaceAttributes {
|
||||
age: Some(35),
|
||||
gender: Some("male".to_string()),
|
||||
}),
|
||||
};
|
||||
|
||||
assert!(face.embedding.is_some());
|
||||
let embedding = face.embedding.unwrap();
|
||||
assert_eq!(embedding.len(), 512);
|
||||
assert_eq!(embedding[0], 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ pub mod face_recognition;
|
||||
pub mod ocr;
|
||||
pub mod pose;
|
||||
pub mod scene_classification;
|
||||
pub mod snapshot_agent;
|
||||
pub mod story;
|
||||
pub mod visual_chunk;
|
||||
pub mod yolo;
|
||||
@@ -28,6 +29,7 @@ pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
|
||||
pub use scene_classification::{
|
||||
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
|
||||
};
|
||||
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
|
||||
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
|
||||
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
|
||||
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
491
src/core/processor/snapshot_agent.rs
Normal file
491
src/core/processor/snapshot_agent.rs
Normal file
@@ -0,0 +1,491 @@
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::core::config;
|
||||
use crate::core::db::{Database, PostgresDb};
|
||||
use crate::core::storage::snapshot_manager::SnapshotManager;
|
||||
|
||||
pub struct SnapshotAgentConfig {
|
||||
pub output_dir: String,
|
||||
pub hot_threshold: u64,
|
||||
}
|
||||
|
||||
impl Default for SnapshotAgentConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
output_dir: config::OUTPUT_DIR.clone(),
|
||||
hot_threshold: *config::snapshot::HOT_THRESHOLD,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct FaceDetection {
|
||||
id: i32,
|
||||
file_uuid: String,
|
||||
frame_number: i64,
|
||||
confidence: f64,
|
||||
bbox: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct VideoInfo {
|
||||
file_path: String,
|
||||
fps: f64,
|
||||
}
|
||||
|
||||
pub struct SnapshotAgent {
|
||||
config: SnapshotAgentConfig,
|
||||
manager: SnapshotManager,
|
||||
}
|
||||
|
||||
impl SnapshotAgent {
|
||||
pub fn new(config: SnapshotAgentConfig) -> Self {
|
||||
let manager = SnapshotManager::new(&config.output_dir);
|
||||
Self { config, manager }
|
||||
}
|
||||
|
||||
pub fn default() -> Self {
|
||||
Self::new(SnapshotAgentConfig::default())
|
||||
}
|
||||
|
||||
pub async fn generate_file_snapshots(
|
||||
&self,
|
||||
file_uuid: &str,
|
||||
snapshot_type: &str,
|
||||
) -> Result<()> {
|
||||
info!(
|
||||
"Starting snapshot generation: file_uuid={}, type={}",
|
||||
file_uuid, snapshot_type
|
||||
);
|
||||
|
||||
let db = PostgresDb::init()
|
||||
.await
|
||||
.context("Failed to connect to database")?;
|
||||
|
||||
let video_info = self
|
||||
.get_video_info(db.pool(), file_uuid)
|
||||
.await
|
||||
.context("Failed to get video info")?;
|
||||
|
||||
self.manager
|
||||
.ensure_file_dirs(file_uuid)
|
||||
.context("Failed to create snapshot directories")?;
|
||||
|
||||
match snapshot_type {
|
||||
"faces" => {
|
||||
self.extract_face_snapshots(db.pool(), file_uuid, &video_info)
|
||||
.await?
|
||||
}
|
||||
"ocr" => {
|
||||
self.extract_ocr_snapshots(db.pool(), file_uuid, &video_info)
|
||||
.await?
|
||||
}
|
||||
"logos" => {
|
||||
self.extract_logo_snapshots(db.pool(), file_uuid, &video_info)
|
||||
.await?
|
||||
}
|
||||
"products" => {
|
||||
self.extract_product_snapshots(db.pool(), file_uuid, &video_info)
|
||||
.await?
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Unsupported snapshot type: {}",
|
||||
snapshot_type
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Snapshot generation completed: file_uuid={}, type={}",
|
||||
file_uuid, snapshot_type
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_face_snapshots(
|
||||
&self,
|
||||
pool: &sqlx::PgPool,
|
||||
file_uuid: &str,
|
||||
video_info: &VideoInfo,
|
||||
) -> Result<()> {
|
||||
let face_table = crate::core::db::schema::table_name("face_detections");
|
||||
let query = format!(
|
||||
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND confidence >= 0.5
|
||||
ORDER BY confidence DESC
|
||||
LIMIT 50",
|
||||
face_table
|
||||
);
|
||||
|
||||
let faces: Vec<(i32, String, i64, f64, Option<serde_json::Value>)> = sqlx::query_as(&query)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("Failed to query face detections")?;
|
||||
|
||||
let output_dir = self.manager.file_type_dir(file_uuid, "faces");
|
||||
let mut saved_count = 0;
|
||||
|
||||
for (face_id_db, _uuid, frame_num, confidence, bbox_json) in faces {
|
||||
let bbox = match bbox_json {
|
||||
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
|
||||
None => Bbox::default(),
|
||||
};
|
||||
|
||||
let timestamp = frame_num as f64 / video_info.fps;
|
||||
let output_path =
|
||||
output_dir.join(format!("face_{}_conf{:.2}.jpg", face_id_db, confidence));
|
||||
|
||||
if self
|
||||
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
saved_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Extracted {} face snapshots for file_uuid={}",
|
||||
saved_count, file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_ocr_snapshots(
|
||||
&self,
|
||||
pool: &sqlx::PgPool,
|
||||
file_uuid: &str,
|
||||
video_info: &VideoInfo,
|
||||
) -> Result<()> {
|
||||
let ocr_table = crate::core::db::schema::table_name("ocr_detections");
|
||||
let query = format!(
|
||||
"SELECT id, frame_number, text, bbox, confidence
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND confidence >= 0.7
|
||||
ORDER BY confidence DESC
|
||||
LIMIT 30",
|
||||
ocr_table
|
||||
);
|
||||
|
||||
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
|
||||
sqlx::query_as(&query)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("Failed to query OCR detections")?;
|
||||
|
||||
let output_dir = self.manager.file_type_dir(file_uuid, "ocr");
|
||||
let mut saved_count = 0;
|
||||
|
||||
for (det_id, frame_num, text, bbox_json, _confidence) in detections {
|
||||
let bbox = match bbox_json {
|
||||
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
|
||||
None => Bbox::default(),
|
||||
};
|
||||
|
||||
let timestamp = frame_num as f64 / video_info.fps;
|
||||
let safe_text = text
|
||||
.chars()
|
||||
.take(20)
|
||||
.filter(|c| c.is_alphanumeric() || *c == ' ')
|
||||
.collect::<String>()
|
||||
.replace(' ', "_");
|
||||
let output_path = output_dir.join(format!("ocr_{}_{}.jpg", det_id, safe_text));
|
||||
|
||||
if self
|
||||
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
saved_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Extracted {} OCR snapshots for file_uuid={}",
|
||||
saved_count, file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_logo_snapshots(
|
||||
&self,
|
||||
pool: &sqlx::PgPool,
|
||||
file_uuid: &str,
|
||||
video_info: &VideoInfo,
|
||||
) -> Result<()> {
|
||||
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
|
||||
let query = format!(
|
||||
"SELECT id, frame_number, class_name, bbox, confidence
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND class_name IN ('logo', 'brand') AND confidence >= 0.6
|
||||
ORDER BY confidence DESC
|
||||
LIMIT 20",
|
||||
yolo_table
|
||||
);
|
||||
|
||||
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
|
||||
sqlx::query_as(&query)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("Failed to query logo detections")?;
|
||||
|
||||
let output_dir = self.manager.file_type_dir(file_uuid, "logos");
|
||||
let mut saved_count = 0;
|
||||
|
||||
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
|
||||
let bbox = match bbox_json {
|
||||
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
|
||||
None => Bbox::default(),
|
||||
};
|
||||
|
||||
let timestamp = frame_num as f64 / video_info.fps;
|
||||
let output_path = output_dir.join(format!(
|
||||
"logo_{}_{}_{:.2}.jpg",
|
||||
det_id, class_name, confidence
|
||||
));
|
||||
|
||||
if self
|
||||
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
saved_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Extracted {} logo snapshots for file_uuid={}",
|
||||
saved_count, file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_product_snapshots(
|
||||
&self,
|
||||
pool: &sqlx::PgPool,
|
||||
file_uuid: &str,
|
||||
video_info: &VideoInfo,
|
||||
) -> Result<()> {
|
||||
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
|
||||
let query = format!(
|
||||
"SELECT id, frame_number, class_name, bbox, confidence
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND class_name NOT IN ('logo', 'brand', 'person', 'face') AND confidence >= 0.6
|
||||
ORDER BY confidence DESC
|
||||
LIMIT 20",
|
||||
yolo_table
|
||||
);
|
||||
|
||||
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
|
||||
sqlx::query_as(&query)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.context("Failed to query product detections")?;
|
||||
|
||||
let output_dir = self.manager.file_type_dir(file_uuid, "products");
|
||||
let mut saved_count = 0;
|
||||
|
||||
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
|
||||
let bbox = match bbox_json {
|
||||
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
|
||||
None => Bbox::default(),
|
||||
};
|
||||
|
||||
let timestamp = frame_num as f64 / video_info.fps;
|
||||
let output_path = output_dir.join(format!(
|
||||
"product_{}_{}_{:.2}.jpg",
|
||||
det_id, class_name, confidence
|
||||
));
|
||||
|
||||
if self
|
||||
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
saved_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Extracted {} product snapshots for file_uuid={}",
|
||||
saved_count, file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_frame(
|
||||
&self,
|
||||
video_path: &str,
|
||||
timestamp: f64,
|
||||
bbox: &Bbox,
|
||||
output_path: &Path,
|
||||
) -> Result<()> {
|
||||
let crop_filter = format!("crop={}:{}:{}:{}", bbox.width, bbox.height, bbox.x, bbox.y);
|
||||
|
||||
let output = Command::new("ffmpeg")
|
||||
.args(&[
|
||||
"-ss",
|
||||
&format!("{:.3}", timestamp),
|
||||
"-i",
|
||||
video_path,
|
||||
"-vf",
|
||||
&crop_filter,
|
||||
"-frames:v",
|
||||
"1",
|
||||
"-f",
|
||||
"image2",
|
||||
"-y",
|
||||
output_path.to_str().context("Invalid output path")?,
|
||||
])
|
||||
.output()
|
||||
.context("Failed to execute ffmpeg")?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err(anyhow::anyhow!("ffmpeg failed: {}", stderr));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_video_info(&self, pool: &sqlx::PgPool, file_uuid: &str) -> Result<VideoInfo> {
|
||||
let video_table = crate::core::db::schema::table_name("videos");
|
||||
let query = format!(
|
||||
"SELECT file_path, fps FROM {} WHERE file_uuid = $1",
|
||||
video_table
|
||||
);
|
||||
|
||||
let row: Option<(String, f64)> = sqlx::query_as(&query)
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.context("Failed to query video info")?;
|
||||
|
||||
match row {
|
||||
Some((file_path, fps)) => Ok(VideoInfo { file_path, fps }),
|
||||
None => Err(anyhow::anyhow!("Video not found: file_uuid={}", file_uuid)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn migrate_snapshots(
|
||||
&self,
|
||||
new_uuid: &str,
|
||||
parent_uuid: &str,
|
||||
) -> Result<Vec<String>> {
|
||||
info!(
|
||||
"Starting snapshot migration: {} -> {}",
|
||||
parent_uuid, new_uuid
|
||||
);
|
||||
|
||||
self.manager
|
||||
.ensure_file_dirs(new_uuid)
|
||||
.context("Failed to create snapshot directories")?;
|
||||
|
||||
let parent_types = self.manager.list_snapshot_types(parent_uuid);
|
||||
let mut migrated = Vec::new();
|
||||
|
||||
for snap_type in &parent_types {
|
||||
let src = self.manager.file_type_dir(parent_uuid, snap_type);
|
||||
let dst = self.manager.file_type_dir(new_uuid, snap_type);
|
||||
|
||||
if src.exists() {
|
||||
if let Err(e) = copy_dir_recursive(&src, &dst) {
|
||||
warn!("Failed to migrate {} snapshots: {}", snap_type, e);
|
||||
} else {
|
||||
migrated.push(snap_type.clone());
|
||||
info!(
|
||||
"Migrated {} snapshots: {} -> {}",
|
||||
snap_type, parent_uuid, new_uuid
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("Migration completed: {} types migrated", migrated.len());
|
||||
Ok(migrated)
|
||||
}
|
||||
|
||||
pub async fn auto_tear_down(&self, file_uuid: &str) -> Result<()> {
|
||||
info!("Starting auto tear down for file_uuid={}", file_uuid);
|
||||
|
||||
let types = self.manager.list_snapshot_types(file_uuid);
|
||||
let hits = types.len() as u64;
|
||||
let tier = SnapshotManager::compute_tier(hits);
|
||||
|
||||
if tier != crate::core::storage::snapshot_manager::SnapshotTier::Cold {
|
||||
info!(
|
||||
"Skipping tear down: file_uuid={} is not Cold (tier={:?})",
|
||||
file_uuid, tier
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let redis_cache = crate::core::cache::redis_cache::RedisCache::new()
|
||||
.context("Failed to create Redis cache")?;
|
||||
|
||||
let last_access = redis_cache.get_snapshot_hits(file_uuid).await.unwrap_or(0);
|
||||
if last_access > 0 {
|
||||
info!(
|
||||
"Skipping tear down: file_uuid={} has recent access (hits={})",
|
||||
file_uuid, last_access
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.manager
|
||||
.remove_file_snapshots(file_uuid)
|
||||
.context("Failed to remove snapshot files")?;
|
||||
|
||||
let status_key =
|
||||
crate::core::cache::redis_cache::RedisCache::snapshot_status_key(file_uuid).await;
|
||||
let hits_key =
|
||||
crate::core::cache::redis_cache::RedisCache::snapshot_hits_key(file_uuid).await;
|
||||
let access_key =
|
||||
crate::core::cache::redis_cache::RedisCache::snapshot_last_access_key(file_uuid).await;
|
||||
|
||||
redis_cache.delete(&status_key).await.ok();
|
||||
redis_cache.delete(&hits_key).await.ok();
|
||||
redis_cache.delete(&access_key).await.ok();
|
||||
|
||||
info!("Auto tear down completed for file_uuid={}", file_uuid);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn manager(&self) -> &SnapshotManager {
|
||||
&self.manager
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
struct Bbox {
|
||||
x: i32,
|
||||
y: i32,
|
||||
width: i32,
|
||||
height: i32,
|
||||
}
|
||||
|
||||
fn copy_dir_recursive(src: &Path, dst: &Path) -> std::io::Result<()> {
|
||||
std::fs::create_dir_all(dst)?;
|
||||
for entry in std::fs::read_dir(src)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
let dest_path = dst.join(entry.file_name());
|
||||
if path.is_dir() {
|
||||
copy_dir_recursive(&path, &dest_path)?;
|
||||
} else {
|
||||
std::fs::copy(&path, &dest_path)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
pub mod file_manager;
|
||||
pub mod output_dir;
|
||||
pub mod snapshot_manager;
|
||||
pub mod uuid;
|
||||
|
||||
pub use file_manager::FileManager;
|
||||
pub use output_dir::OutputDir;
|
||||
pub use snapshot_manager::SnapshotManager;
|
||||
pub use uuid::compute_uuid;
|
||||
|
||||
268
src/core/storage/snapshot_manager.rs
Normal file
268
src/core/storage/snapshot_manager.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::core::config;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SnapshotTier {
|
||||
Hot,
|
||||
Warm,
|
||||
Cold,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SnapshotTier {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
SnapshotTier::Hot => write!(f, "hot"),
|
||||
SnapshotTier::Warm => write!(f, "warm"),
|
||||
SnapshotTier::Cold => write!(f, "cold"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct SnapshotStatus {
|
||||
pub file_uuid: String,
|
||||
pub tier: SnapshotTier,
|
||||
pub hits: u64,
|
||||
pub types: Vec<String>,
|
||||
pub generated_at: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SnapshotManager {
|
||||
base_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl SnapshotManager {
|
||||
pub fn new(user_dir: &str) -> Self {
|
||||
let snapshot_dir_name = config::snapshot::SNAPSHOT_DIR_NAME.as_str();
|
||||
let base_dir = Path::new(user_dir).join(snapshot_dir_name);
|
||||
Self { base_dir }
|
||||
}
|
||||
|
||||
pub fn base_dir(&self) -> &Path {
|
||||
&self.base_dir
|
||||
}
|
||||
|
||||
pub fn file_snapshot_dir(&self, file_uuid: &str) -> PathBuf {
|
||||
self.base_dir.join(file_uuid)
|
||||
}
|
||||
|
||||
pub fn file_type_dir(&self, file_uuid: &str, snapshot_type: &str) -> PathBuf {
|
||||
self.base_dir.join(file_uuid).join(snapshot_type)
|
||||
}
|
||||
|
||||
pub fn identity_snapshot_dir(&self, identity_uuid: &str) -> PathBuf {
|
||||
self.base_dir.join("identities").join(identity_uuid)
|
||||
}
|
||||
|
||||
pub fn identity_face_dir(&self, identity_uuid: &str) -> PathBuf {
|
||||
self.base_dir
|
||||
.join("identities")
|
||||
.join(identity_uuid)
|
||||
.join("faces")
|
||||
}
|
||||
|
||||
pub fn ensure_file_dirs(&self, file_uuid: &str) -> std::io::Result<()> {
|
||||
let dir = self.file_snapshot_dir(file_uuid);
|
||||
std::fs::create_dir_all(&dir)?;
|
||||
for snap_type in ["faces", "logos", "products", "ocr"] {
|
||||
std::fs::create_dir_all(dir.join(snap_type))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn ensure_identity_dirs(&self, identity_uuid: &str) -> std::io::Result<()> {
|
||||
let dir = self.identity_snapshot_dir(identity_uuid);
|
||||
std::fs::create_dir_all(&dir)?;
|
||||
std::fs::create_dir_all(dir.join("faces"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compute_tier(hits: u64) -> SnapshotTier {
|
||||
let threshold = *config::snapshot::HOT_THRESHOLD;
|
||||
if hits >= threshold {
|
||||
SnapshotTier::Hot
|
||||
} else if hits > 0 {
|
||||
SnapshotTier::Warm
|
||||
} else {
|
||||
SnapshotTier::Cold
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tier_ttl(&self, tier: SnapshotTier) -> u64 {
|
||||
match tier {
|
||||
SnapshotTier::Hot => *config::snapshot::HOT_TTL_SECS,
|
||||
SnapshotTier::Warm => *config::snapshot::WARM_TTL_SECS,
|
||||
SnapshotTier::Cold => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn snapshot_exists(&self, file_uuid: &str, snapshot_type: &str) -> bool {
|
||||
self.file_type_dir(file_uuid, snapshot_type).exists()
|
||||
}
|
||||
|
||||
pub fn list_snapshot_types(&self, file_uuid: &str) -> Vec<String> {
|
||||
let dir = self.file_snapshot_dir(file_uuid);
|
||||
if !dir.exists() {
|
||||
return Vec::new();
|
||||
}
|
||||
std::fs::read_dir(&dir)
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.path().is_dir())
|
||||
.filter_map(|e| e.file_name().to_str().map(String::from))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn remove_file_snapshots(&self, file_uuid: &str) -> std::io::Result<()> {
|
||||
let dir = self.file_snapshot_dir(file_uuid);
|
||||
if dir.exists() {
|
||||
std::fs::remove_dir_all(&dir)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn remove_identity_snapshots(&self, identity_uuid: &str) -> std::io::Result<()> {
|
||||
let dir = self.identity_snapshot_dir(identity_uuid);
|
||||
if dir.exists() {
|
||||
std::fs::remove_dir_all(&dir)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_test_manager() -> (SnapshotManager, tempfile::TempDir) {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let manager = SnapshotManager::new(temp_dir.path().to_str().unwrap());
|
||||
(manager, temp_dir)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_tier_hot() {
|
||||
assert_eq!(SnapshotManager::compute_tier(5), SnapshotTier::Hot);
|
||||
assert_eq!(SnapshotManager::compute_tier(10), SnapshotTier::Hot);
|
||||
assert_eq!(SnapshotManager::compute_tier(100), SnapshotTier::Hot);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_tier_warm() {
|
||||
assert_eq!(SnapshotManager::compute_tier(1), SnapshotTier::Warm);
|
||||
assert_eq!(SnapshotManager::compute_tier(4), SnapshotTier::Warm);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_tier_cold() {
|
||||
assert_eq!(SnapshotManager::compute_tier(0), SnapshotTier::Cold);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tier_display() {
|
||||
assert_eq!(SnapshotTier::Hot.to_string(), "hot");
|
||||
assert_eq!(SnapshotTier::Warm.to_string(), "warm");
|
||||
assert_eq!(SnapshotTier::Cold.to_string(), "cold");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ensure_file_dirs_creates_structure() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let file_uuid = "test_file_123";
|
||||
|
||||
manager.ensure_file_dirs(file_uuid).unwrap();
|
||||
|
||||
assert!(manager.file_snapshot_dir(file_uuid).exists());
|
||||
assert!(manager.file_type_dir(file_uuid, "faces").exists());
|
||||
assert!(manager.file_type_dir(file_uuid, "logos").exists());
|
||||
assert!(manager.file_type_dir(file_uuid, "products").exists());
|
||||
assert!(manager.file_type_dir(file_uuid, "ocr").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ensure_identity_dirs_creates_structure() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let identity_uuid = "test_identity_456";
|
||||
|
||||
manager.ensure_identity_dirs(identity_uuid).unwrap();
|
||||
|
||||
assert!(manager.identity_snapshot_dir(identity_uuid).exists());
|
||||
assert!(manager.identity_face_dir(identity_uuid).exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_snapshot_types_empty() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let types = manager.list_snapshot_types("nonexistent");
|
||||
assert!(types.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_snapshot_types_after_creation() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let file_uuid = "test_file_789";
|
||||
|
||||
manager.ensure_file_dirs(file_uuid).unwrap();
|
||||
let types = manager.list_snapshot_types(file_uuid);
|
||||
|
||||
assert_eq!(types.len(), 4);
|
||||
assert!(types.contains(&"faces".to_string()));
|
||||
assert!(types.contains(&"logos".to_string()));
|
||||
assert!(types.contains(&"products".to_string()));
|
||||
assert!(types.contains(&"ocr".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_file_snapshots() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let file_uuid = "test_file_remove";
|
||||
|
||||
manager.ensure_file_dirs(file_uuid).unwrap();
|
||||
assert!(manager.file_snapshot_dir(file_uuid).exists());
|
||||
|
||||
manager.remove_file_snapshots(file_uuid).unwrap();
|
||||
assert!(!manager.file_snapshot_dir(file_uuid).exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_identity_snapshots() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let identity_uuid = "test_identity_remove";
|
||||
|
||||
manager.ensure_identity_dirs(identity_uuid).unwrap();
|
||||
assert!(manager.identity_snapshot_dir(identity_uuid).exists());
|
||||
|
||||
manager.remove_identity_snapshots(identity_uuid).unwrap();
|
||||
assert!(!manager.identity_snapshot_dir(identity_uuid).exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_snapshot_exists() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
let file_uuid = "test_exists";
|
||||
|
||||
assert!(!manager.snapshot_exists(file_uuid, "faces"));
|
||||
|
||||
manager.ensure_file_dirs(file_uuid).unwrap();
|
||||
assert!(manager.snapshot_exists(file_uuid, "faces"));
|
||||
assert!(!manager.snapshot_exists(file_uuid, "nonexistent"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tier_ttl() {
|
||||
let (manager, _temp) = create_test_manager();
|
||||
|
||||
let hot_ttl = manager.tier_ttl(SnapshotTier::Hot);
|
||||
assert_eq!(hot_ttl, *config::snapshot::HOT_TTL_SECS);
|
||||
|
||||
let warm_ttl = manager.tier_ttl(SnapshotTier::Warm);
|
||||
assert_eq!(warm_ttl, *config::snapshot::WARM_TTL_SECS);
|
||||
|
||||
let cold_ttl = manager.tier_ttl(SnapshotTier::Cold);
|
||||
assert_eq!(cold_ttl, 0);
|
||||
}
|
||||
}
|
||||
@@ -2,12 +2,12 @@ use sha2::{Digest, Sha256};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Compute UUID from file path using SHA256
|
||||
/// UUID = SHA256(user_path + filename)[0:16]
|
||||
/// UUID = SHA256(user_path + filename)[0:32]
|
||||
pub fn compute_uuid(user_path: &str, filename: &str) -> String {
|
||||
let key = format!("{}/{}", user_path.trim_end_matches('/'), filename);
|
||||
let hash = Sha256::digest(key.as_bytes());
|
||||
let hash_str = hex::encode(hash);
|
||||
hash_str[0..16].to_string()
|
||||
hash_str[0..32].to_string()
|
||||
}
|
||||
|
||||
/// Compute UUID from full file path
|
||||
@@ -29,19 +29,16 @@ pub fn compute_uuid_from_path(full_path: &str) -> String {
|
||||
/// Input: ./demo/video.mp4 or ./demo/path/to/video.mp4
|
||||
/// Returns: (username, filepath) e.g., ("demo", "video.mp4") or ("demo", "path/to/video.mp4")
|
||||
pub fn extract_user_from_relative_path(relative_path: &str) -> (String, String) {
|
||||
// Remove leading ./
|
||||
let path = relative_path.strip_prefix("./").unwrap_or(relative_path);
|
||||
|
||||
let path_buf = PathBuf::from(path);
|
||||
|
||||
// First component is username
|
||||
let mut components = path_buf.components();
|
||||
let username = components
|
||||
.next()
|
||||
.map(|c| c.as_os_str().to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Remaining path (filepath)
|
||||
let filepath: String = components
|
||||
.map(|c| c.as_os_str().to_string_lossy().to_string())
|
||||
.collect::<Vec<_>>()
|
||||
@@ -57,6 +54,62 @@ pub fn compute_uuid_from_relative_path(relative_path: &str) -> String {
|
||||
compute_uuid(&username, &filepath)
|
||||
}
|
||||
|
||||
/// Get MAC address of primary network interface
|
||||
/// Returns MAC address in format: a1:b2:c3:d4:e5:f6
|
||||
pub fn get_mac_address() -> String {
|
||||
use mac_address::get_mac_address;
|
||||
|
||||
match get_mac_address() {
|
||||
Ok(Some(mac)) => {
|
||||
let bytes = mac.bytes();
|
||||
format!(
|
||||
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
||||
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
|
||||
)
|
||||
}
|
||||
Ok(None) => "00:00:00:00:00:00".to_string(),
|
||||
Err(_) => "00:00:00:00:00:00".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute Birth UUID (Stable Identity with Location)
|
||||
/// UUID = SHA256(mac_address|birthday|path|filename)[0:32]
|
||||
///
|
||||
/// This UUID format ensures:
|
||||
/// - Location Encoding: Path is part of the identity (like location code in ID card).
|
||||
/// - Stability: Uses the original Birthday, not the current timestamp.
|
||||
/// - Uniqueness: Different MAC, Birthday, Path, or Filename produces different UUIDs.
|
||||
pub fn compute_birth_uuid(
|
||||
mac_address: &str,
|
||||
birthday: &str, // Fixed timestamp of original registration
|
||||
path: &str, // Canonical file path (Location)
|
||||
filename: &str,
|
||||
) -> String {
|
||||
let key = format!(
|
||||
"{}|{}|{}|{}",
|
||||
mac_address,
|
||||
birthday,
|
||||
path.trim_end_matches('/'),
|
||||
filename
|
||||
);
|
||||
let hash = Sha256::digest(key.as_bytes());
|
||||
hex::encode(hash)[0..32].to_string()
|
||||
}
|
||||
|
||||
/// Check if UUID is Birth UUID format (32 characters)
|
||||
pub fn is_birth_uuid(uuid: &str) -> bool {
|
||||
uuid.len() == 32 && !uuid.contains('_')
|
||||
}
|
||||
|
||||
/// Extract username from sftpgo user home path
|
||||
/// Input: ./demo/video.mp4 or /Users/.../demo/video.mp4
|
||||
/// Returns: username (e.g., "demo")
|
||||
pub fn extract_username_from_path(path: &str) -> String {
|
||||
let relative = path.strip_prefix("./").unwrap_or(path);
|
||||
let parts: Vec<&str> = relative.split('/').collect();
|
||||
parts.first().copied().unwrap_or("demo").to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -64,14 +117,14 @@ mod tests {
|
||||
#[test]
|
||||
fn test_uuid_computation() {
|
||||
let uuid = compute_uuid("/Users/test/Videos", "video.mp4");
|
||||
assert_eq!(uuid.len(), 16);
|
||||
assert_eq!(uuid.len(), 32);
|
||||
println!("UUID: {}", uuid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uuid_from_path() {
|
||||
let uuid = compute_uuid_from_path("/Users/test/Videos/video.mp4");
|
||||
assert_eq!(uuid.len(), 16);
|
||||
assert_eq!(uuid.len(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -90,11 +143,102 @@ mod tests {
|
||||
let uuid1 = compute_uuid_from_relative_path("./demo/video.mp4");
|
||||
let uuid2 = compute_uuid_from_relative_path("./demo/video.mp4");
|
||||
assert_eq!(uuid1, uuid2);
|
||||
assert_eq!(uuid1.len(), 16);
|
||||
assert_eq!(uuid1.len(), 32);
|
||||
|
||||
// Different users with same filename should have different UUIDs
|
||||
let uuid_demo = compute_uuid_from_relative_path("./demo/video.mp4");
|
||||
let uuid_warren = compute_uuid_from_relative_path("./warren/video.mp4");
|
||||
assert_ne!(uuid_demo, uuid_warren);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_mac_address() {
|
||||
let mac = get_mac_address();
|
||||
assert_eq!(mac.len(), 17); // a1:b2:c3:d4:e5:f6
|
||||
assert!(mac.contains(':'));
|
||||
println!("MAC Address: {}", mac);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_birth_uuid_generation() {
|
||||
let uuid = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T22:00:00+08:00",
|
||||
"/Users/test/Videos",
|
||||
"video.mp4",
|
||||
);
|
||||
assert_eq!(uuid.len(), 32);
|
||||
println!("Birth UUID: {}", uuid);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_birth_uuid_different_mac() {
|
||||
let uuid1 = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/test/Videos",
|
||||
"video.mp4",
|
||||
);
|
||||
let uuid2 = compute_birth_uuid(
|
||||
"d4:e5:f6:a1:b2:c3",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/test/Videos",
|
||||
"video.mp4",
|
||||
);
|
||||
assert_ne!(uuid1, uuid2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_birth_uuid_different_path() {
|
||||
// Moving file to different path creates new identity
|
||||
let uuid1 = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/test/Videos",
|
||||
"video.mp4",
|
||||
);
|
||||
let uuid2 = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/test/Archive",
|
||||
"video.mp4",
|
||||
);
|
||||
assert_ne!(uuid1, uuid2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_birth_uuid_same_elements() {
|
||||
let uuid1 = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/test/Videos",
|
||||
"video.mp4",
|
||||
);
|
||||
let uuid2 = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/test/Videos",
|
||||
"video.mp4",
|
||||
);
|
||||
assert_eq!(uuid1, uuid2); // Same elements = same UUID
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_birth_uuid() {
|
||||
let birth_uuid = compute_birth_uuid(
|
||||
"a1:b2:c3:d4:e5:f6",
|
||||
"2026-04-27T10:00:00",
|
||||
"/Users/demo",
|
||||
"video.mp4",
|
||||
);
|
||||
assert!(is_birth_uuid(&birth_uuid));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_username_from_path() {
|
||||
let username = extract_username_from_path("./demo/video.mp4");
|
||||
assert_eq!(username, "demo");
|
||||
|
||||
let username = extract_username_from_path("./warren/path/to/video.mp4");
|
||||
assert_eq!(username, "warren");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use crate::core::chunk;
|
||||
use crate::core::db::PostgresDb;
|
||||
use sqlx::PgPool;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use tracing;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::core::chunk;
|
||||
|
||||
pub struct JobWorker {
|
||||
pool: PgPool,
|
||||
@@ -42,47 +41,39 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
async fn process_next_job(&self) -> anyhow::Result<bool> {
|
||||
// 1. Fetch a QUEUED job
|
||||
// We use a transaction to ensure no two workers pick the same job (atomic update)
|
||||
let job_row: Option<(String, String, String, String, String, i64)> = sqlx::query_as(
|
||||
// 1. Fetch a QUEUED job from monitor_jobs
|
||||
// Using sqlx::query_as to map to tuple.
|
||||
// Note: progress_total is int4 (i32).
|
||||
let job_row: Option<(i32, String, i32)> = sqlx::query_as(
|
||||
r#"
|
||||
UPDATE dev.jobs
|
||||
UPDATE dev.monitor_jobs
|
||||
SET status = 'RUNNING', updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM dev.jobs
|
||||
SELECT id FROM dev.monitor_jobs
|
||||
WHERE status = 'QUEUED'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING id::text, asset_uuid, rule, status, processor_list, total_frames
|
||||
RETURNING id, uuid, COALESCE(progress_total, 0)
|
||||
"#,
|
||||
)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
if let Some((job_id, asset_uuid, rule, _status, _processors, total_frames)) = job_row {
|
||||
let job_uuid =
|
||||
Uuid::parse_str(&job_id).map_err(|e| anyhow::anyhow!("Invalid job UUID: {}", e))?;
|
||||
|
||||
if let Some((job_id, asset_uuid, total_frames)) = job_row {
|
||||
tracing::info!(
|
||||
"🚀 Processing Job {} for Asset {} (Rule: {})",
|
||||
"🚀 Processing Job {} for Asset {} (Frames: {})",
|
||||
job_id,
|
||||
asset_uuid,
|
||||
rule
|
||||
total_frames
|
||||
);
|
||||
|
||||
// 2. Execute Logic based on Rule
|
||||
let result = match rule.as_str() {
|
||||
"rule1" => {
|
||||
let fps = self.get_asset_fps(&asset_uuid).await?;
|
||||
chunk::rule1_ingest::ingest_rule1(&self.pool, &asset_uuid, fps).await
|
||||
}
|
||||
_ => {
|
||||
tracing::warn!("Unknown rule type: {}", rule);
|
||||
Ok(0)
|
||||
}
|
||||
};
|
||||
// 2. Execute Logic (Default to rule1 for now as monitor_jobs doesn't store rule type explicitly)
|
||||
let fps = self.get_asset_fps(&asset_uuid).await?;
|
||||
let db = PostgresDb::from_pool(self.pool.clone());
|
||||
|
||||
let result = chunk::rule1_ingest::execute_rule1(&db, &asset_uuid, fps).await;
|
||||
|
||||
// 3. Update Job Status
|
||||
match result {
|
||||
@@ -93,17 +84,21 @@ impl JobWorker {
|
||||
chunk_count
|
||||
);
|
||||
|
||||
sqlx::query!(
|
||||
"UPDATE dev.jobs SET status = 'COMPLETED', processed_frames = total_frames, updated_at = NOW() WHERE id = $1",
|
||||
job_uuid
|
||||
// Update monitor_jobs
|
||||
// Using runtime query to avoid compile-time macro checks
|
||||
sqlx::query(
|
||||
"UPDATE dev.monitor_jobs SET status = 'COMPLETED', progress_current = progress_total, updated_at = NOW() WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query!(
|
||||
"UPDATE dev.videos SET processing_status = 'COMPLETED' WHERE uuid = $1",
|
||||
asset_uuid
|
||||
// Update video processing_status
|
||||
sqlx::query(
|
||||
"UPDATE dev.videos SET processing_status = $1::jsonb WHERE file_uuid = $2",
|
||||
)
|
||||
.bind(serde_json::json!({"status": "COMPLETED"}))
|
||||
.bind(asset_uuid)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
}
|
||||
@@ -116,11 +111,11 @@ impl JobWorker {
|
||||
&err_msg
|
||||
};
|
||||
|
||||
sqlx::query!(
|
||||
"UPDATE dev.jobs SET status = 'FAILED', error_message = $2, updated_at = NOW() WHERE id = $1",
|
||||
job_uuid,
|
||||
safe_msg
|
||||
sqlx::query(
|
||||
"UPDATE dev.monitor_jobs SET status = 'FAILED', last_error = $2, updated_at = NOW() WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(safe_msg)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
}
|
||||
@@ -132,8 +127,9 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
|
||||
// dev.videos now uses file_uuid and has a direct fps column
|
||||
let fps: Option<f64> =
|
||||
sqlx::query_scalar("SELECT (metadata->>'fps')::float FROM dev.videos WHERE uuid = $1")
|
||||
sqlx::query_scalar("SELECT fps FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(uuid)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
Reference in New Issue
Block a user