feat: update core API, database layer, and worker modules

- Remove unused imports (n8n_search, universal_search, Client, Arc, etc.)
- Update API endpoints for identity, face recognition, search
- Fix postgres_db.rs search_videos parent_uuid column
- Add snapshot API and identity agent API
- Clean up backup files (.bak, .bak2)
This commit is contained in:
Warren
2026-04-30 15:07:02 +08:00
parent 8f2208dd63
commit 2b23d1cfbd
148 changed files with 8553 additions and 48637 deletions

View File

@@ -4,7 +4,7 @@ use serde::{de::DeserializeOwned, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::core::config::{cache as cache_config, REDIS_KEY_PREFIX};
use crate::core::config::{cache as cache_config, snapshot as snapshot_config, REDIS_KEY_PREFIX};
use crate::core::db::RedisClient;
pub struct RedisCache {
@@ -133,6 +133,135 @@ impl RedisCache {
pub async fn invalidate_videos_list(&self) -> Result<u64> {
self.invalidate_pattern("videos:*").await
}
// --- Snapshot Cache Methods ---
pub async fn snapshot_hits_key(file_uuid: &str) -> String {
format!("snapshot:hits:{}", file_uuid)
}
pub async fn snapshot_status_key(file_uuid: &str) -> String {
format!("snapshot:status:{}", file_uuid)
}
pub async fn snapshot_last_access_key(file_uuid: &str) -> String {
format!("snapshot:last_access:{}", file_uuid)
}
pub async fn snapshot_migrate_hint_key(file_uuid: &str) -> String {
format!("snapshot:migrate_hint:{}", file_uuid)
}
pub async fn increment_snapshot_hits(&self, file_uuid: &str) -> Result<u64> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_hits_key(file_uuid).await;
let new_count: u64 = redis::cmd("INCR").arg(&key).query_async(&mut conn).await?;
let _: () = redis::cmd("EXPIRE")
.arg(&key)
.arg(*snapshot_config::WARM_TTL_SECS)
.query_async(&mut conn)
.await?;
Ok(new_count)
}
pub async fn get_snapshot_hits(&self, file_uuid: &str) -> Result<u64> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_hits_key(file_uuid).await;
let count: Option<u64> = conn.get(&key).await?;
Ok(count.unwrap_or(0))
}
pub async fn update_last_access(&self, file_uuid: &str) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_last_access_key(file_uuid).await;
let now: i64 = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
let _: String = conn
.set_ex(&key, now, *snapshot_config::WARM_TTL_SECS)
.await?;
Ok(())
}
pub async fn set_snapshot_status(
&self,
file_uuid: &str,
status: &str,
progress: Option<f32>,
) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_status_key(file_uuid).await;
let payload = serde_json::json!({
"status": status,
"progress": progress,
});
let json = serde_json::to_string(&payload)?;
let ttl = if status == "generating" {
*crate::core::config::snapshot::GENERATING_TIMEOUT_SECS
} else {
*snapshot_config::WARM_TTL_SECS
};
let _: String = conn.set_ex(&key, json, ttl).await?;
Ok(())
}
pub async fn get_snapshot_status(&self, file_uuid: &str) -> Result<serde_json::Value> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_status_key(file_uuid).await;
let value: Option<String> = conn.get(&key).await?;
match value {
Some(json) => Ok(serde_json::from_str(&json)?),
None => Ok(serde_json::json!({
"status": "cold",
"progress": null,
})),
}
}
pub async fn clear_snapshot_status(&self, file_uuid: &str) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_status_key(file_uuid).await;
let _: () = conn.del(&key).await?;
Ok(())
}
pub async fn set_migrate_hint(
&self,
file_uuid: &str,
parent_uuid: &str,
count: u64,
) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_migrate_hint_key(file_uuid).await;
let payload = serde_json::json!({
"parent_uuid": parent_uuid,
"count": count,
});
let json = serde_json::to_string(&payload)?;
let _: String = conn
.set_ex(&key, json, *snapshot_config::WARM_TTL_SECS)
.await?;
Ok(())
}
pub async fn get_migrate_hint(&self, file_uuid: &str) -> Result<Option<serde_json::Value>> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_migrate_hint_key(file_uuid).await;
let value: Option<String> = conn.get(&key).await?;
match value {
Some(json) => Ok(Some(serde_json::from_str(&json)?)),
None => Ok(None),
}
}
}
impl Clone for RedisCache {

View File

@@ -3,7 +3,7 @@ pub mod rule3_ingest;
pub mod splitter;
pub mod types;
pub use rule1_ingest::ingest_rule1;
pub use rule1_ingest::execute_rule1;
pub use rule3_ingest::ingest_rule3;
pub use splitter::{AsrSegment, ChunkSplitter};
pub use types::{Chunk, ChunkType};

View File

@@ -1,94 +1,367 @@
use crate::core::config::OUTPUT_DIR;
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use crate::core::db::schema;
use crate::core::db::PostgresDb;
use anyhow::{Context, Result};
use serde::Deserialize;
use sqlx::PgPool;
use std::fs;
use std::path::Path;
use serde_json::Value;
use sqlx::{PgPool, Row};
use tracing::info;
// --- 結構體定義 (對齊外部處理器產出格式) ---
pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result<usize> {
let pool = db.pool();
let pre_chunks_table = schema::table_name("pre_chunks");
#[derive(Debug, Deserialize)]
struct AsrSegment {
start: f64,
end: f64,
text: String,
}
let asr_segments = fetch_asr_segments(pool, file_uuid, &pre_chunks_table).await?;
#[derive(Debug, Deserialize)]
struct AsrxSegment {
start: f64,
end: f64,
speaker: String,
}
let asrx_segments = fetch_asrx_segments(pool, file_uuid, &pre_chunks_table).await?;
// --- 核心邏輯 ---
let yolo_frames = fetch_yolo_frames(pool, file_uuid, &pre_chunks_table).await?;
/// 執行 Rule 1 入庫
/// 讀取 asr.json 與 asrx.json合併 Speaker 資訊,寫入 chunks_rule1
pub async fn ingest_rule1(pool: &PgPool, asset_uuid: &str, fps: f64) -> Result<usize> {
// 1. 讀取檔案
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
let asrx_path = format!("{}/{}.asrx.json", *OUTPUT_DIR, asset_uuid);
let face_frames = fetch_face_frames(pool, file_uuid).await?;
let asr_content = fs::read_to_string(&asr_path)
.with_context(|| format!("Failed to read ASR file: {}", asr_path))?;
let asrx_content = fs::read_to_string(&asrx_path)
.with_context(|| format!("Failed to read ASRX file: {}", asrx_path))?;
let video = db
.get_video_by_uuid(file_uuid)
.await?
.context("Video not found")?;
let asr_segments: Vec<AsrSegment> = serde_json::from_str(&asr_content)?;
let asrx_segments: Vec<AsrxSegment> = serde_json::from_str(&asrx_content)?;
let file_id = video.id;
let mut count = 0;
// 2. 交易處理
let mut tx = pool.begin().await?;
for seg in &asr_segments {
// 時間轉幀
let start_frame = (seg.start * fps).round() as i64;
let end_frame = (seg.end * fps).round() as i64;
for (idx, seg) in asr_segments.iter().enumerate() {
let speaker_id = find_best_speaker(&seg, &asrx_segments);
// 3. 尋找重疊最多的 Speaker
let mut best_speaker: Option<String> = None;
let mut max_overlap = 0.0f64;
let yolo_objects = find_yolo_objects(seg.start_frame, seg.end_frame, &yolo_frames);
for spk in &asrx_segments {
let overlap = (seg.end.min(spk.end) - seg.start.max(spk.start)).max(0.0);
if overlap > max_overlap {
max_overlap = overlap;
best_speaker = Some(spk.speaker.clone());
}
}
let face_ids = find_face_ids(seg.start_frame, seg.end_frame, &face_frames);
let speaker_id = best_speaker.unwrap_or("UNKNOWN".to_string());
let metadata = serde_json::json!({
"speaker_id": speaker_id,
"yolo_objects": yolo_objects,
"face_ids": face_ids,
"language": seg.language,
});
// 4. 寫入 DB
sqlx::query!(
r#"
INSERT INTO chunks_rule1 (
id, asset_uuid, start_frame, end_frame, content, speaker_id
) VALUES (
gen_random_uuid(), $1, $2, $3, $4, $5
)
"#,
asset_uuid,
start_frame,
end_frame,
seg.text,
speaker_id
let content = serde_json::json!({
"text": seg.text,
"text_normalized": seg.text.to_lowercase(),
});
let chunk = Chunk::from_seconds(
file_id as i32,
file_uuid.to_string(),
idx as u32,
ChunkType::Sentence,
ChunkRule::Rule1,
seg.start_time,
seg.end_time,
fps,
content,
)
.execute(&mut *tx)
.await?;
.with_metadata(metadata);
db.store_chunk_in_tx(&chunk, &mut tx).await?;
count += 1;
// 每 100 筆 Commit 一次 (可選優化)
if count % 500 == 0 {
tx.commit().await?;
tx = pool.begin().await?;
if count % 100 == 0 {
info!(
"Rule 1: Processed {} segments for video {}",
count, file_uuid
);
}
}
tx.commit().await?;
info!(
"Rule 1 completed: {} sentence chunks created for video {}",
count, file_uuid
);
Ok(count)
}
#[derive(Debug, Clone)]
struct AsrSegment {
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
text: String,
language: String,
}
#[derive(Debug, Clone)]
struct AsrxSegment {
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
speaker: String,
}
#[derive(Debug, Clone)]
struct YoloFrame {
frame: i64,
detections: Vec<YoloDetection>,
}
#[derive(Debug, Clone)]
struct YoloDetection {
class_name: String,
confidence: f64,
}
#[derive(Debug, Clone)]
struct FaceFrame {
frame: i64,
faces: Vec<FaceDetection>,
}
#[derive(Debug, Clone)]
struct FaceDetection {
face_id: String,
confidence: f64,
identity_id: Option<i32>,
}
async fn fetch_asr_segments(
pool: &PgPool,
file_uuid: &str,
table: &str,
) -> Result<Vec<AsrSegment>> {
let query = format!(
r#"
SELECT
start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asr'
ORDER BY start_frame
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let segments: Vec<AsrSegment> = rows
.iter()
.map(|row| {
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let text = data.get("text").and_then(|t| t.as_str()).unwrap_or("");
let language = data
.get("language")
.and_then(|l| l.as_str())
.unwrap_or("unknown");
AsrSegment {
start_frame,
end_frame,
start_time,
end_time,
text: text.to_string(),
language: language.to_string(),
}
})
.collect();
Ok(segments)
}
async fn fetch_asrx_segments(
pool: &PgPool,
file_uuid: &str,
table: &str,
) -> Result<Vec<AsrxSegment>> {
let query = format!(
r#"
SELECT
start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asrx'
ORDER BY start_frame
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let segments: Vec<AsrxSegment> = rows
.iter()
.map(|row| {
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let speaker = data
.get("speaker")
.and_then(|s| s.as_str())
.unwrap_or("UNKNOWN");
AsrxSegment {
start_frame,
end_frame,
start_time,
end_time,
speaker: speaker.to_string(),
}
})
.collect();
Ok(segments)
}
async fn fetch_yolo_frames(pool: &PgPool, file_uuid: &str, table: &str) -> Result<Vec<YoloFrame>> {
let query = format!(
r#"
SELECT
coordinate_index as frame, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'yolo'
ORDER BY coordinate_index
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let frames: Vec<YoloFrame> = rows
.iter()
.map(|row| {
let frame: i64 = row.try_get("frame").unwrap_or(0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let detections: Vec<YoloDetection> = data
.get("detections")
.and_then(|d| d.as_array())
.map(|arr| {
arr.iter()
.filter_map(|det| {
let class_name = det.get("class_name").and_then(|c| c.as_str());
let confidence = det.get("confidence").and_then(|c| c.as_f64());
if class_name.is_some() && confidence.is_some() {
Some(YoloDetection {
class_name: class_name.unwrap().to_string(),
confidence: confidence.unwrap(),
})
} else {
None
}
})
.collect()
})
.unwrap_or_default();
YoloFrame { frame, detections }
})
.collect();
Ok(frames)
}
async fn fetch_face_frames(pool: &PgPool, file_uuid: &str) -> Result<Vec<FaceFrame>> {
let face_detections_table = schema::table_name("face_detections");
let query = format!(
r#"
SELECT
frame_number as frame,
face_id,
confidence,
identity_id
FROM {}
WHERE file_uuid = $1
ORDER BY frame_number
"#,
face_detections_table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let mut frame_map: std::collections::HashMap<i64, FaceFrame> = std::collections::HashMap::new();
for row in rows {
let frame: i64 = row.try_get("frame").unwrap_or(0);
let face_id: Option<String> = row.try_get("face_id").ok();
let confidence: f64 = row.try_get("confidence").unwrap_or(0.0);
let identity_id: Option<i32> = row.try_get("identity_id").ok();
if let Some(face_id) = face_id {
let detection = FaceDetection {
face_id: face_id.clone(),
confidence,
identity_id,
};
frame_map
.entry(frame)
.or_insert_with(|| FaceFrame {
frame,
faces: Vec::new(),
})
.faces
.push(detection);
}
}
let mut frames: Vec<FaceFrame> = frame_map.into_values().collect();
frames.sort_by_key(|f| f.frame);
Ok(frames)
}
fn find_best_speaker(asr_seg: &AsrSegment, asrx_segments: &[AsrxSegment]) -> String {
let mut best_speaker = "UNKNOWN".to_string();
let mut max_overlap = 0.0f64;
for spk in asrx_segments {
let overlap =
(asr_seg.end_time.min(spk.end_time) - asr_seg.start_time.max(spk.start_time)).max(0.0);
if overlap > max_overlap {
max_overlap = overlap;
best_speaker = spk.speaker.clone();
}
}
best_speaker
}
fn find_yolo_objects(start_frame: i64, end_frame: i64, yolo_frames: &[YoloFrame]) -> Vec<String> {
let mut objects = Vec::new();
for frame in yolo_frames {
if frame.frame >= start_frame && frame.frame <= end_frame {
for det in &frame.detections {
if det.confidence > 0.5 && !objects.contains(&det.class_name) {
objects.push(det.class_name.clone());
}
}
}
}
objects
}
fn find_face_ids(start_frame: i64, end_frame: i64, face_frames: &[FaceFrame]) -> Vec<String> {
let mut face_ids = Vec::new();
for frame in face_frames {
if frame.frame >= start_frame && frame.frame <= end_frame {
for face in &frame.faces {
if face.confidence > 0.5 && !face_ids.contains(&face.face_id) {
face_ids.push(face.face_id.clone());
}
}
}
}
face_ids
}

View File

@@ -165,6 +165,42 @@ pub mod cache {
});
}
pub mod snapshot {
use super::*;
pub static SNAPSHOT_DIR_NAME: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_DIR_NAME").unwrap_or_else(|_| ".momentry_snapshots".to_string())
});
pub static HOT_THRESHOLD: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_HOT_THRESHOLD")
.unwrap_or_else(|_| "5".to_string())
.parse()
.unwrap_or(5)
});
pub static HOT_TTL_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_HOT_TTL_SECS")
.unwrap_or_else(|_| "86400".to_string())
.parse()
.unwrap_or(86400)
});
pub static WARM_TTL_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_WARM_TTL_SECS")
.unwrap_or_else(|_| "604800".to_string())
.parse()
.unwrap_or(604800)
});
pub static GENERATING_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_GENERATING_TIMEOUT")
.unwrap_or_else(|_| "1800".to_string())
.parse()
.unwrap_or(1800)
});
}
pub mod llm {
use super::*;

View File

@@ -41,9 +41,10 @@ pub mod sync_db;
pub use mongodb_db::MongoDb;
pub use postgres_db::{
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileRecord, HybridSearchResult, MonitorJob,
MonitorJobStats, MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorResult,
ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileIdentityRecord, FileRecord,
HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
IdentityFileRecord, MonitorJob, MonitorJobStats, MonitorJobStatus, PostgresDb,
ProcessorJobStatus, ProcessorResult, ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
};
pub use qdrant_db::{QdrantDb, VectorPayload};
pub use redis_client::{

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,8 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use super::Database;
use crate::core::config;
use crate::core::storage::snapshot_manager::SnapshotTier;
pub struct RedisDb {
#[allow(dead_code)]
@@ -28,14 +30,20 @@ pub struct Job {
pub updated_at: String,
}
#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct SnapshotCacheEntry {
pub hits: u64,
pub last_access: i64,
pub status: String,
pub progress: Option<f32>,
}
impl RedisDb {
pub async fn push_job(&self, _job: &Job) -> Result<()> {
// TODO: Implement Redis client
Ok(())
}
pub async fn get_pending_jobs(&self) -> Result<Vec<Job>> {
// TODO: Implement Redis client
Ok(vec![])
}
@@ -45,14 +53,82 @@ impl RedisDb {
_status: &str,
_progress: f32,
) -> Result<()> {
// TODO: Implement Redis client
Ok(())
}
pub async fn publish_event(&self, _channel: &str, _message: &str) -> Result<()> {
// TODO: Implement Redis Pub/Sub
Ok(())
}
// --- Snapshot Cache Methods ---
pub async fn increment_snapshot_hits(&self, _file_uuid: &str) -> Result<u64> {
// TODO: Redis HINCRBY snapshot:hits:{uuid}
Ok(0)
}
pub async fn get_snapshot_hits(&self, _file_uuid: &str) -> Result<u64> {
// TODO: Redis GET snapshot:hits:{uuid}
Ok(0)
}
pub async fn update_last_access(&self, _file_uuid: &str) -> Result<()> {
// TODO: Redis SET snapshot:last_access:{uuid} = now EX 7d
Ok(())
}
pub async fn set_snapshot_status(
&self,
_file_uuid: &str,
status: &str,
progress: Option<f32>,
) -> Result<()> {
// TODO: Redis SET snapshot:status:{uuid} = {status, progress} EX 30m
let _ = (status, progress);
Ok(())
}
pub async fn get_snapshot_status(&self, _file_uuid: &str) -> Result<SnapshotCacheEntry> {
// TODO: Redis GET snapshot:status:{uuid}
Ok(SnapshotCacheEntry {
hits: 0,
last_access: 0,
status: "cold".to_string(),
progress: None,
})
}
pub async fn clear_snapshot_status(&self, _file_uuid: &str) -> Result<()> {
// TODO: Redis DEL snapshot:status:{uuid}
Ok(())
}
pub async fn set_migrate_hint(
&self,
_file_uuid: &str,
_parent_uuid: &str,
_count: u64,
) -> Result<()> {
// TODO: Redis SET snapshot:migrate_hint:{uuid}
Ok(())
}
pub async fn get_migrate_hint(&self, _file_uuid: &str) -> Result<Option<(String, u64)>> {
// TODO: Redis GET snapshot:migrate_hint:{uuid}
Ok(None)
}
pub fn compute_status_ttl(&self, tier: SnapshotTier) -> u64 {
match tier {
SnapshotTier::Hot => *config::snapshot::HOT_TTL_SECS,
SnapshotTier::Warm => *config::snapshot::WARM_TTL_SECS,
SnapshotTier::Cold => 0,
}
}
pub fn generating_timeout() -> u64 {
*config::snapshot::GENERATING_TIMEOUT_SECS
}
}
#[async_trait]

View File

@@ -1,5 +1,5 @@
use anyhow::{Context, Result};
use serde_json::json;
use serde_json;
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use crate::core::db::mongodb_db::MongoDb;
@@ -77,9 +77,9 @@ impl SyncDb {
let client = reqwest::Client::new();
let response = client
.post("http://localhost:11434/api/embeddings")
.json(&json!({
"model": "nomic-embed-text-v2-moe:latest",
"prompt": text
.json(&serde_json::json!({
"model": "all-minilm",
"prompt": text,
}))
.send()
.await
@@ -107,12 +107,21 @@ impl SyncDb {
for (i, segment) in asr_result.segments.iter().enumerate() {
let segment: &AsrSegment = segment;
let content = json!({
"text": segment.text,
"text_normalized": segment.text.to_lowercase(),
let content = serde_json::json!({
"rule": "rule1",
"data": {
"text": segment.text,
"start": segment.start,
"end": segment.end,
},
});
let metadata = json!({
let metadata = serde_json::json!({
"file_uuid": uuid,
"chunk_type": "sentence",
"chunk_rule": "rule1",
"fps": 0.0, // Will be set later
"start_frame": 0,
"end_frame": 0,
"language": asr_result.language,
"language_probability": asr_result.language_probability,
});

View File

@@ -1,17 +1,14 @@
use anyhow::{Context, Result};
use chrono::Utc;
use sqlx;
use std::path::Path;
use tracing::{info, warn};
use crate::core::db::{Database, PostgresDb, VideoRecord, VideoStatus};
use crate::core::db::{PostgresDb, VideoRecord, VideoStatus};
use crate::core::probe;
use crate::core::storage::uuid as uuid_utils;
use crate::core::storage::FileManager;
use crate::uuid as uuid_utils;
/// Handles the automatic ingestion of video files.
/// This service is responsible for:
/// 1. Running `ffprobe` (Pre-processing)
/// 2. Saving probe JSON
/// 3. Registering the video in the database (making it visible in the API)
pub struct IngestionService {
db: PostgresDb,
}
@@ -21,20 +18,56 @@ impl IngestionService {
Self { db }
}
/// Registers a video file found in the watched directory.
/// This function is idempotent: if the video (UUID) already exists, it skips.
pub async fn ingest(&self, file_path: &str) -> Result<Option<String>> {
let path = Path::new(file_path);
// 1. Validate extension
if !is_video_extension(path) {
return Ok(None);
}
// 2. Compute UUID
let uuid = uuid_utils::compute_uuid_from_path(file_path);
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
let filename = path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
// Stable UUID based on MAC + Birthday + Filename.
// Moving the file (path change) keeps the SAME identity.
// 1. Look for existing Birthday (Identity Anchor)
// If the file (by name) was registered before, use its original birth time.
let birthday = sqlx::query_scalar::<_, chrono::DateTime<chrono::Utc>>(
"SELECT registration_time FROM dev.videos WHERE file_name = $1 AND registration_time IS NOT NULL LIMIT 1"
)
.bind(&filename)
.fetch_optional(self.db.pool())
.await
.ok()
.flatten()
.map(|t| t.to_rfc3339())
.unwrap_or_else(|| Utc::now().to_rfc3339());
let parent = canonical_path
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// 2. Compute UUID
let uuid = uuid_utils::compute_birth_uuid(
&uuid_utils::get_mac_address(),
&birthday,
&canonical_path.to_string_lossy(),
&filename,
);
let parent = canonical_path
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
let username = uuid_utils::extract_username_from_path(&parent);
// 3. Check if already registered
if let Ok(Some(_)) = self.db.get_video_by_uuid(&uuid).await {
info!(
"Video already registered: {} ({})",
@@ -46,11 +79,9 @@ impl IngestionService {
info!("Starting ingestion for: {} ({})", path.display(), uuid);
// 4. Run ffprobe
let probe_result = probe::probe_video(file_path)
.with_context(|| format!("Failed to probe video: {}", file_path))?;
// 5. Extract metadata
let duration = probe_result
.format
.duration
@@ -78,7 +109,6 @@ impl IngestionService {
}
}
// 6. Save Probe JSON
let file_manager = FileManager::new(std::path::PathBuf::from("."));
let probe_json_str = serde_json::to_string_pretty(&probe_result)?;
@@ -88,33 +118,72 @@ impl IngestionService {
info!("Probe JSON saved for {}", uuid);
}
// 7. Create Record
// Use absolute path for safety
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
let total_frames = {
let video_stream = probe_result
.streams
.iter()
.find(|s| s.codec_type.as_deref() == Some("video"));
if let Some(stream) = video_stream {
if let Some(nb_frames_str) = &stream.nb_frames {
if let Ok(nb_frames) = nb_frames_str.parse::<u64>() {
info!(
"Using nb_frames from ffprobe: {} frames for {}",
nb_frames,
path.display()
);
Some(nb_frames)
} else {
warn!(
"Failed to parse nb_frames, using duration * fps fallback for {}",
path.display()
);
Some((duration * fps).floor() as u64)
}
} else {
warn!(
"nb_frames not available, using duration * fps fallback for {}",
path.display()
);
Some((duration * fps).floor() as u64)
}
} else {
warn!("No video stream found for {}", path.display());
Some(0)
}
};
let birth_registration = serde_json::json!({
"registration_source": {
"username": username,
"original_path": parent,
"original_filename": filename
}
});
let record = VideoRecord {
id: 0,
uuid: uuid.clone(),
file_uuid: uuid.clone(),
file_path: canonical_path.to_string_lossy().to_string(),
file_name: path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
file_name: filename,
file_type: None,
duration,
width,
height,
fps,
probe_json: Some(probe_json_str),
storage: Default::default(),
status: VideoStatus::Pending, // Ready for processing
status: VideoStatus::Pending,
processing_status: Some(serde_json::json!({"phase": "REGISTERED"})),
birth_registration: None,
user_id: None,
job_id: None,
created_at: String::new(),
registration_time: None,
total_frames: total_frames.unwrap_or(0),
parent_uuid: None,
};
// 8. Insert DB
self.db
.register_video(&record)
.await
@@ -125,9 +194,14 @@ impl IngestionService {
.await
.with_context(|| "Failed to set registration_time")?;
self.db
.update_birth_registration(&uuid, &birth_registration)
.await
.with_context(|| "Failed to set birth_registration")?;
info!(
"Successfully registered video: {} (UUID: {})",
record.file_name, uuid
"Successfully registered video: {} (UUID: {}, Birth UUID: {})",
record.file_name, uuid, uuid
);
Ok(Some(uuid))
}

View File

@@ -12,7 +12,7 @@ pub struct PersonIdentity {
pub person_id: String,
pub face_identity_id: Option<i32>,
pub speaker_id: Option<String>,
pub video_uuid: String,
pub file_uuid: String,
pub confidence: f64,
pub name: Option<String>,
pub metadata: serde_json::Value,
@@ -85,7 +85,7 @@ pub struct SuggestedBinding {
pub struct PersonAppearance {
pub id: i32,
pub person_id: String,
pub video_uuid: String,
pub file_uuid: String,
pub start_time: f64,
pub end_time: f64,
pub duration: f64,
@@ -124,7 +124,7 @@ pub struct PersonStatistics {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CreatePersonIdentityRequest {
pub video_uuid: String,
pub file_uuid: String,
pub face_identity_id: Option<i32>,
pub speaker_id: Option<String>,
pub name: Option<String>,
@@ -196,7 +196,7 @@ mod tests {
person_id: "person_001".to_string(),
face_identity_id: Some(123),
speaker_id: Some("SPEAKER_00".to_string()),
video_uuid: "video_abc".to_string(),
file_uuid: "video_abc".to_string(),
confidence: 0.85,
name: Some("张三".to_string()),
metadata: serde_json::json!({"role": "host"}),
@@ -220,7 +220,7 @@ mod tests {
let appearance = PersonAppearance {
id: 1,
person_id: "person_001".to_string(),
video_uuid: "video_abc".to_string(),
file_uuid: "video_abc".to_string(),
start_time: 10.5,
end_time: 25.3,
duration: 14.8,

View File

@@ -16,6 +16,7 @@ pub struct StreamInfo {
pub width: Option<u32>,
pub height: Option<u32>,
pub r_frame_rate: Option<String>,
pub nb_frames: Option<String>,
pub duration: Option<String>,
pub sample_rate: Option<String>,
pub channels: Option<u32>,
@@ -64,6 +65,7 @@ pub fn probe_video(video_path: &str) -> Result<ProbeResult> {
width: s["width"].as_u64().map(|v| v as u32),
height: s["height"].as_u64().map(|v| v as u32),
r_frame_rate: s["r_frame_rate"].as_str().map(String::from),
nb_frames: s["nb_frames"].as_str().map(String::from),
duration: s["duration"].as_str().map(String::from),
sample_rate: s["sample_rate"].as_str().map(String::from),
channels: s["channels"].as_u64().map(|v| v as u32),

View File

@@ -28,6 +28,15 @@ pub struct Face {
pub width: i32,
pub height: i32,
pub confidence: f32,
pub embedding: Option<Vec<f32>>,
pub landmarks: Option<Vec<Vec<f32>>>,
pub attributes: Option<FaceAttributes>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FaceAttributes {
pub age: Option<i32>,
pub gender: Option<String>,
}
pub async fn process_face(
@@ -89,6 +98,12 @@ mod tests {
width: 50,
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1, 0.2, 0.3]),
landmarks: Some(vec![vec![10.0, 20.0], vec![30.0, 40.0]]),
attributes: Some(FaceAttributes {
age: Some(30),
gender: Some("male".to_string()),
}),
}],
}],
};
@@ -96,6 +111,9 @@ mod tests {
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("face_1"));
assert!(json.contains("\"width\":50"));
assert!(json.contains("embedding"));
assert!(json.contains("landmarks"));
assert!(json.contains("attributes"));
}
#[test]
@@ -108,7 +126,17 @@ mod tests {
"frame": 30,
"timestamp": 1.2,
"faces": [
{"face_id": "f1", "x": 10, "y": 20, "width": 30, "height": 40, "confidence": 0.85}
{
"face_id": "f1",
"x": 10,
"y": 20,
"width": 30,
"height": 40,
"confidence": 0.85,
"embedding": [0.1, 0.2, 0.3],
"landmarks": [[5.0, 10.0]],
"attributes": {"age": 25, "gender": "female"}
}
]
}
]
@@ -118,6 +146,9 @@ mod tests {
assert_eq!(result.frame_count, 50);
assert_eq!(result.frames.len(), 1);
assert_eq!(result.frames[0].faces[0].x, 10);
assert!(result.frames[0].faces[0].embedding.is_some());
assert!(result.frames[0].faces[0].landmarks.is_some());
assert!(result.frames[0].faces[0].attributes.is_some());
}
#[test]
@@ -139,7 +170,33 @@ mod tests {
width: 10,
height: 10,
confidence: 0.5,
embedding: None,
landmarks: None,
attributes: None,
};
assert!(face.confidence >= 0.0 && face.confidence <= 1.0);
}
#[test]
fn test_face_with_embedding() {
let face = Face {
face_id: Some("face_001".to_string()),
x: 100,
y: 200,
width: 50,
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1; 512]),
landmarks: None,
attributes: Some(FaceAttributes {
age: Some(35),
gender: Some("male".to_string()),
}),
};
assert!(face.embedding.is_some());
let embedding = face.embedding.unwrap();
assert_eq!(embedding.len(), 512);
assert_eq!(embedding[0], 0.1);
}
}

View File

@@ -8,6 +8,7 @@ pub mod face_recognition;
pub mod ocr;
pub mod pose;
pub mod scene_classification;
pub mod snapshot_agent;
pub mod story;
pub mod visual_chunk;
pub mod yolo;
@@ -28,6 +29,7 @@ pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
};
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -0,0 +1,491 @@
use std::path::Path;
use std::process::Command;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::core::config;
use crate::core::db::{Database, PostgresDb};
use crate::core::storage::snapshot_manager::SnapshotManager;
pub struct SnapshotAgentConfig {
pub output_dir: String,
pub hot_threshold: u64,
}
impl Default for SnapshotAgentConfig {
fn default() -> Self {
Self {
output_dir: config::OUTPUT_DIR.clone(),
hot_threshold: *config::snapshot::HOT_THRESHOLD,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct FaceDetection {
id: i32,
file_uuid: String,
frame_number: i64,
confidence: f64,
bbox: Option<serde_json::Value>,
}
#[derive(Debug, Serialize, Deserialize)]
struct VideoInfo {
file_path: String,
fps: f64,
}
pub struct SnapshotAgent {
config: SnapshotAgentConfig,
manager: SnapshotManager,
}
impl SnapshotAgent {
pub fn new(config: SnapshotAgentConfig) -> Self {
let manager = SnapshotManager::new(&config.output_dir);
Self { config, manager }
}
pub fn default() -> Self {
Self::new(SnapshotAgentConfig::default())
}
pub async fn generate_file_snapshots(
&self,
file_uuid: &str,
snapshot_type: &str,
) -> Result<()> {
info!(
"Starting snapshot generation: file_uuid={}, type={}",
file_uuid, snapshot_type
);
let db = PostgresDb::init()
.await
.context("Failed to connect to database")?;
let video_info = self
.get_video_info(db.pool(), file_uuid)
.await
.context("Failed to get video info")?;
self.manager
.ensure_file_dirs(file_uuid)
.context("Failed to create snapshot directories")?;
match snapshot_type {
"faces" => {
self.extract_face_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"ocr" => {
self.extract_ocr_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"logos" => {
self.extract_logo_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"products" => {
self.extract_product_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
_ => {
return Err(anyhow::anyhow!(
"Unsupported snapshot type: {}",
snapshot_type
))
}
}
info!(
"Snapshot generation completed: file_uuid={}, type={}",
file_uuid, snapshot_type
);
Ok(())
}
async fn extract_face_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let face_table = crate::core::db::schema::table_name("face_detections");
let query = format!(
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox
FROM {}
WHERE file_uuid = $1 AND confidence >= 0.5
ORDER BY confidence DESC
LIMIT 50",
face_table
);
let faces: Vec<(i32, String, i64, f64, Option<serde_json::Value>)> = sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query face detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "faces");
let mut saved_count = 0;
for (face_id_db, _uuid, frame_num, confidence, bbox_json) in faces {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path =
output_dir.join(format!("face_{}_conf{:.2}.jpg", face_id_db, confidence));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} face snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_ocr_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let ocr_table = crate::core::db::schema::table_name("ocr_detections");
let query = format!(
"SELECT id, frame_number, text, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND confidence >= 0.7
ORDER BY confidence DESC
LIMIT 30",
ocr_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query OCR detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "ocr");
let mut saved_count = 0;
for (det_id, frame_num, text, bbox_json, _confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let safe_text = text
.chars()
.take(20)
.filter(|c| c.is_alphanumeric() || *c == ' ')
.collect::<String>()
.replace(' ', "_");
let output_path = output_dir.join(format!("ocr_{}_{}.jpg", det_id, safe_text));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} OCR snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_logo_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
let query = format!(
"SELECT id, frame_number, class_name, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND class_name IN ('logo', 'brand') AND confidence >= 0.6
ORDER BY confidence DESC
LIMIT 20",
yolo_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query logo detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "logos");
let mut saved_count = 0;
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path = output_dir.join(format!(
"logo_{}_{}_{:.2}.jpg",
det_id, class_name, confidence
));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} logo snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_product_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
let query = format!(
"SELECT id, frame_number, class_name, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND class_name NOT IN ('logo', 'brand', 'person', 'face') AND confidence >= 0.6
ORDER BY confidence DESC
LIMIT 20",
yolo_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query product detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "products");
let mut saved_count = 0;
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path = output_dir.join(format!(
"product_{}_{}_{:.2}.jpg",
det_id, class_name, confidence
));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} product snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_frame(
&self,
video_path: &str,
timestamp: f64,
bbox: &Bbox,
output_path: &Path,
) -> Result<()> {
let crop_filter = format!("crop={}:{}:{}:{}", bbox.width, bbox.height, bbox.x, bbox.y);
let output = Command::new("ffmpeg")
.args(&[
"-ss",
&format!("{:.3}", timestamp),
"-i",
video_path,
"-vf",
&crop_filter,
"-frames:v",
"1",
"-f",
"image2",
"-y",
output_path.to_str().context("Invalid output path")?,
])
.output()
.context("Failed to execute ffmpeg")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!("ffmpeg failed: {}", stderr));
}
Ok(())
}
async fn get_video_info(&self, pool: &sqlx::PgPool, file_uuid: &str) -> Result<VideoInfo> {
let video_table = crate::core::db::schema::table_name("videos");
let query = format!(
"SELECT file_path, fps FROM {} WHERE file_uuid = $1",
video_table
);
let row: Option<(String, f64)> = sqlx::query_as(&query)
.bind(file_uuid)
.fetch_optional(pool)
.await
.context("Failed to query video info")?;
match row {
Some((file_path, fps)) => Ok(VideoInfo { file_path, fps }),
None => Err(anyhow::anyhow!("Video not found: file_uuid={}", file_uuid)),
}
}
pub async fn migrate_snapshots(
&self,
new_uuid: &str,
parent_uuid: &str,
) -> Result<Vec<String>> {
info!(
"Starting snapshot migration: {} -> {}",
parent_uuid, new_uuid
);
self.manager
.ensure_file_dirs(new_uuid)
.context("Failed to create snapshot directories")?;
let parent_types = self.manager.list_snapshot_types(parent_uuid);
let mut migrated = Vec::new();
for snap_type in &parent_types {
let src = self.manager.file_type_dir(parent_uuid, snap_type);
let dst = self.manager.file_type_dir(new_uuid, snap_type);
if src.exists() {
if let Err(e) = copy_dir_recursive(&src, &dst) {
warn!("Failed to migrate {} snapshots: {}", snap_type, e);
} else {
migrated.push(snap_type.clone());
info!(
"Migrated {} snapshots: {} -> {}",
snap_type, parent_uuid, new_uuid
);
}
}
}
info!("Migration completed: {} types migrated", migrated.len());
Ok(migrated)
}
pub async fn auto_tear_down(&self, file_uuid: &str) -> Result<()> {
info!("Starting auto tear down for file_uuid={}", file_uuid);
let types = self.manager.list_snapshot_types(file_uuid);
let hits = types.len() as u64;
let tier = SnapshotManager::compute_tier(hits);
if tier != crate::core::storage::snapshot_manager::SnapshotTier::Cold {
info!(
"Skipping tear down: file_uuid={} is not Cold (tier={:?})",
file_uuid, tier
);
return Ok(());
}
let redis_cache = crate::core::cache::redis_cache::RedisCache::new()
.context("Failed to create Redis cache")?;
let last_access = redis_cache.get_snapshot_hits(file_uuid).await.unwrap_or(0);
if last_access > 0 {
info!(
"Skipping tear down: file_uuid={} has recent access (hits={})",
file_uuid, last_access
);
return Ok(());
}
self.manager
.remove_file_snapshots(file_uuid)
.context("Failed to remove snapshot files")?;
let status_key =
crate::core::cache::redis_cache::RedisCache::snapshot_status_key(file_uuid).await;
let hits_key =
crate::core::cache::redis_cache::RedisCache::snapshot_hits_key(file_uuid).await;
let access_key =
crate::core::cache::redis_cache::RedisCache::snapshot_last_access_key(file_uuid).await;
redis_cache.delete(&status_key).await.ok();
redis_cache.delete(&hits_key).await.ok();
redis_cache.delete(&access_key).await.ok();
info!("Auto tear down completed for file_uuid={}", file_uuid);
Ok(())
}
pub fn manager(&self) -> &SnapshotManager {
&self.manager
}
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Bbox {
x: i32,
y: i32,
width: i32,
height: i32,
}
fn copy_dir_recursive(src: &Path, dst: &Path) -> std::io::Result<()> {
std::fs::create_dir_all(dst)?;
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let path = entry.path();
let dest_path = dst.join(entry.file_name());
if path.is_dir() {
copy_dir_recursive(&path, &dest_path)?;
} else {
std::fs::copy(&path, &dest_path)?;
}
}
Ok(())
}

View File

@@ -1,7 +1,9 @@
pub mod file_manager;
pub mod output_dir;
pub mod snapshot_manager;
pub mod uuid;
pub use file_manager::FileManager;
pub use output_dir::OutputDir;
pub use snapshot_manager::SnapshotManager;
pub use uuid::compute_uuid;

View File

@@ -0,0 +1,268 @@
use std::path::{Path, PathBuf};
use crate::core::config;
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SnapshotTier {
Hot,
Warm,
Cold,
}
impl std::fmt::Display for SnapshotTier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SnapshotTier::Hot => write!(f, "hot"),
SnapshotTier::Warm => write!(f, "warm"),
SnapshotTier::Cold => write!(f, "cold"),
}
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct SnapshotStatus {
pub file_uuid: String,
pub tier: SnapshotTier,
pub hits: u64,
pub types: Vec<String>,
pub generated_at: Option<String>,
}
#[derive(Debug, Clone)]
pub struct SnapshotManager {
base_dir: PathBuf,
}
impl SnapshotManager {
pub fn new(user_dir: &str) -> Self {
let snapshot_dir_name = config::snapshot::SNAPSHOT_DIR_NAME.as_str();
let base_dir = Path::new(user_dir).join(snapshot_dir_name);
Self { base_dir }
}
pub fn base_dir(&self) -> &Path {
&self.base_dir
}
pub fn file_snapshot_dir(&self, file_uuid: &str) -> PathBuf {
self.base_dir.join(file_uuid)
}
pub fn file_type_dir(&self, file_uuid: &str, snapshot_type: &str) -> PathBuf {
self.base_dir.join(file_uuid).join(snapshot_type)
}
pub fn identity_snapshot_dir(&self, identity_uuid: &str) -> PathBuf {
self.base_dir.join("identities").join(identity_uuid)
}
pub fn identity_face_dir(&self, identity_uuid: &str) -> PathBuf {
self.base_dir
.join("identities")
.join(identity_uuid)
.join("faces")
}
pub fn ensure_file_dirs(&self, file_uuid: &str) -> std::io::Result<()> {
let dir = self.file_snapshot_dir(file_uuid);
std::fs::create_dir_all(&dir)?;
for snap_type in ["faces", "logos", "products", "ocr"] {
std::fs::create_dir_all(dir.join(snap_type))?;
}
Ok(())
}
pub fn ensure_identity_dirs(&self, identity_uuid: &str) -> std::io::Result<()> {
let dir = self.identity_snapshot_dir(identity_uuid);
std::fs::create_dir_all(&dir)?;
std::fs::create_dir_all(dir.join("faces"))?;
Ok(())
}
pub fn compute_tier(hits: u64) -> SnapshotTier {
let threshold = *config::snapshot::HOT_THRESHOLD;
if hits >= threshold {
SnapshotTier::Hot
} else if hits > 0 {
SnapshotTier::Warm
} else {
SnapshotTier::Cold
}
}
pub fn tier_ttl(&self, tier: SnapshotTier) -> u64 {
match tier {
SnapshotTier::Hot => *config::snapshot::HOT_TTL_SECS,
SnapshotTier::Warm => *config::snapshot::WARM_TTL_SECS,
SnapshotTier::Cold => 0,
}
}
pub fn snapshot_exists(&self, file_uuid: &str, snapshot_type: &str) -> bool {
self.file_type_dir(file_uuid, snapshot_type).exists()
}
pub fn list_snapshot_types(&self, file_uuid: &str) -> Vec<String> {
let dir = self.file_snapshot_dir(file_uuid);
if !dir.exists() {
return Vec::new();
}
std::fs::read_dir(&dir)
.into_iter()
.flatten()
.filter_map(|e| e.ok())
.filter(|e| e.path().is_dir())
.filter_map(|e| e.file_name().to_str().map(String::from))
.collect()
}
pub fn remove_file_snapshots(&self, file_uuid: &str) -> std::io::Result<()> {
let dir = self.file_snapshot_dir(file_uuid);
if dir.exists() {
std::fs::remove_dir_all(&dir)?;
}
Ok(())
}
pub fn remove_identity_snapshots(&self, identity_uuid: &str) -> std::io::Result<()> {
let dir = self.identity_snapshot_dir(identity_uuid);
if dir.exists() {
std::fs::remove_dir_all(&dir)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_manager() -> (SnapshotManager, tempfile::TempDir) {
let temp_dir = tempfile::tempdir().unwrap();
let manager = SnapshotManager::new(temp_dir.path().to_str().unwrap());
(manager, temp_dir)
}
#[test]
fn test_compute_tier_hot() {
assert_eq!(SnapshotManager::compute_tier(5), SnapshotTier::Hot);
assert_eq!(SnapshotManager::compute_tier(10), SnapshotTier::Hot);
assert_eq!(SnapshotManager::compute_tier(100), SnapshotTier::Hot);
}
#[test]
fn test_compute_tier_warm() {
assert_eq!(SnapshotManager::compute_tier(1), SnapshotTier::Warm);
assert_eq!(SnapshotManager::compute_tier(4), SnapshotTier::Warm);
}
#[test]
fn test_compute_tier_cold() {
assert_eq!(SnapshotManager::compute_tier(0), SnapshotTier::Cold);
}
#[test]
fn test_tier_display() {
assert_eq!(SnapshotTier::Hot.to_string(), "hot");
assert_eq!(SnapshotTier::Warm.to_string(), "warm");
assert_eq!(SnapshotTier::Cold.to_string(), "cold");
}
#[test]
fn test_ensure_file_dirs_creates_structure() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_file_123";
manager.ensure_file_dirs(file_uuid).unwrap();
assert!(manager.file_snapshot_dir(file_uuid).exists());
assert!(manager.file_type_dir(file_uuid, "faces").exists());
assert!(manager.file_type_dir(file_uuid, "logos").exists());
assert!(manager.file_type_dir(file_uuid, "products").exists());
assert!(manager.file_type_dir(file_uuid, "ocr").exists());
}
#[test]
fn test_ensure_identity_dirs_creates_structure() {
let (manager, _temp) = create_test_manager();
let identity_uuid = "test_identity_456";
manager.ensure_identity_dirs(identity_uuid).unwrap();
assert!(manager.identity_snapshot_dir(identity_uuid).exists());
assert!(manager.identity_face_dir(identity_uuid).exists());
}
#[test]
fn test_list_snapshot_types_empty() {
let (manager, _temp) = create_test_manager();
let types = manager.list_snapshot_types("nonexistent");
assert!(types.is_empty());
}
#[test]
fn test_list_snapshot_types_after_creation() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_file_789";
manager.ensure_file_dirs(file_uuid).unwrap();
let types = manager.list_snapshot_types(file_uuid);
assert_eq!(types.len(), 4);
assert!(types.contains(&"faces".to_string()));
assert!(types.contains(&"logos".to_string()));
assert!(types.contains(&"products".to_string()));
assert!(types.contains(&"ocr".to_string()));
}
#[test]
fn test_remove_file_snapshots() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_file_remove";
manager.ensure_file_dirs(file_uuid).unwrap();
assert!(manager.file_snapshot_dir(file_uuid).exists());
manager.remove_file_snapshots(file_uuid).unwrap();
assert!(!manager.file_snapshot_dir(file_uuid).exists());
}
#[test]
fn test_remove_identity_snapshots() {
let (manager, _temp) = create_test_manager();
let identity_uuid = "test_identity_remove";
manager.ensure_identity_dirs(identity_uuid).unwrap();
assert!(manager.identity_snapshot_dir(identity_uuid).exists());
manager.remove_identity_snapshots(identity_uuid).unwrap();
assert!(!manager.identity_snapshot_dir(identity_uuid).exists());
}
#[test]
fn test_snapshot_exists() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_exists";
assert!(!manager.snapshot_exists(file_uuid, "faces"));
manager.ensure_file_dirs(file_uuid).unwrap();
assert!(manager.snapshot_exists(file_uuid, "faces"));
assert!(!manager.snapshot_exists(file_uuid, "nonexistent"));
}
#[test]
fn test_tier_ttl() {
let (manager, _temp) = create_test_manager();
let hot_ttl = manager.tier_ttl(SnapshotTier::Hot);
assert_eq!(hot_ttl, *config::snapshot::HOT_TTL_SECS);
let warm_ttl = manager.tier_ttl(SnapshotTier::Warm);
assert_eq!(warm_ttl, *config::snapshot::WARM_TTL_SECS);
let cold_ttl = manager.tier_ttl(SnapshotTier::Cold);
assert_eq!(cold_ttl, 0);
}
}

View File

@@ -2,12 +2,12 @@ use sha2::{Digest, Sha256};
use std::path::PathBuf;
/// Compute UUID from file path using SHA256
/// UUID = SHA256(user_path + filename)[0:16]
/// UUID = SHA256(user_path + filename)[0:32]
pub fn compute_uuid(user_path: &str, filename: &str) -> String {
let key = format!("{}/{}", user_path.trim_end_matches('/'), filename);
let hash = Sha256::digest(key.as_bytes());
let hash_str = hex::encode(hash);
hash_str[0..16].to_string()
hash_str[0..32].to_string()
}
/// Compute UUID from full file path
@@ -29,19 +29,16 @@ pub fn compute_uuid_from_path(full_path: &str) -> String {
/// Input: ./demo/video.mp4 or ./demo/path/to/video.mp4
/// Returns: (username, filepath) e.g., ("demo", "video.mp4") or ("demo", "path/to/video.mp4")
pub fn extract_user_from_relative_path(relative_path: &str) -> (String, String) {
// Remove leading ./
let path = relative_path.strip_prefix("./").unwrap_or(relative_path);
let path_buf = PathBuf::from(path);
// First component is username
let mut components = path_buf.components();
let username = components
.next()
.map(|c| c.as_os_str().to_string_lossy().to_string())
.unwrap_or_default();
// Remaining path (filepath)
let filepath: String = components
.map(|c| c.as_os_str().to_string_lossy().to_string())
.collect::<Vec<_>>()
@@ -57,6 +54,62 @@ pub fn compute_uuid_from_relative_path(relative_path: &str) -> String {
compute_uuid(&username, &filepath)
}
/// Get MAC address of primary network interface
/// Returns MAC address in format: a1:b2:c3:d4:e5:f6
pub fn get_mac_address() -> String {
use mac_address::get_mac_address;
match get_mac_address() {
Ok(Some(mac)) => {
let bytes = mac.bytes();
format!(
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
)
}
Ok(None) => "00:00:00:00:00:00".to_string(),
Err(_) => "00:00:00:00:00:00".to_string(),
}
}
/// Compute Birth UUID (Stable Identity with Location)
/// UUID = SHA256(mac_address|birthday|path|filename)[0:32]
///
/// This UUID format ensures:
/// - Location Encoding: Path is part of the identity (like location code in ID card).
/// - Stability: Uses the original Birthday, not the current timestamp.
/// - Uniqueness: Different MAC, Birthday, Path, or Filename produces different UUIDs.
pub fn compute_birth_uuid(
mac_address: &str,
birthday: &str, // Fixed timestamp of original registration
path: &str, // Canonical file path (Location)
filename: &str,
) -> String {
let key = format!(
"{}|{}|{}|{}",
mac_address,
birthday,
path.trim_end_matches('/'),
filename
);
let hash = Sha256::digest(key.as_bytes());
hex::encode(hash)[0..32].to_string()
}
/// Check if UUID is Birth UUID format (32 characters)
pub fn is_birth_uuid(uuid: &str) -> bool {
uuid.len() == 32 && !uuid.contains('_')
}
/// Extract username from sftpgo user home path
/// Input: ./demo/video.mp4 or /Users/.../demo/video.mp4
/// Returns: username (e.g., "demo")
pub fn extract_username_from_path(path: &str) -> String {
let relative = path.strip_prefix("./").unwrap_or(path);
let parts: Vec<&str> = relative.split('/').collect();
parts.first().copied().unwrap_or("demo").to_string()
}
#[cfg(test)]
mod tests {
use super::*;
@@ -64,14 +117,14 @@ mod tests {
#[test]
fn test_uuid_computation() {
let uuid = compute_uuid("/Users/test/Videos", "video.mp4");
assert_eq!(uuid.len(), 16);
assert_eq!(uuid.len(), 32);
println!("UUID: {}", uuid);
}
#[test]
fn test_uuid_from_path() {
let uuid = compute_uuid_from_path("/Users/test/Videos/video.mp4");
assert_eq!(uuid.len(), 16);
assert_eq!(uuid.len(), 32);
}
#[test]
@@ -90,11 +143,102 @@ mod tests {
let uuid1 = compute_uuid_from_relative_path("./demo/video.mp4");
let uuid2 = compute_uuid_from_relative_path("./demo/video.mp4");
assert_eq!(uuid1, uuid2);
assert_eq!(uuid1.len(), 16);
assert_eq!(uuid1.len(), 32);
// Different users with same filename should have different UUIDs
let uuid_demo = compute_uuid_from_relative_path("./demo/video.mp4");
let uuid_warren = compute_uuid_from_relative_path("./warren/video.mp4");
assert_ne!(uuid_demo, uuid_warren);
}
#[test]
fn test_get_mac_address() {
let mac = get_mac_address();
assert_eq!(mac.len(), 17); // a1:b2:c3:d4:e5:f6
assert!(mac.contains(':'));
println!("MAC Address: {}", mac);
}
#[test]
fn test_birth_uuid_generation() {
let uuid = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T22:00:00+08:00",
"/Users/test/Videos",
"video.mp4",
);
assert_eq!(uuid.len(), 32);
println!("Birth UUID: {}", uuid);
}
#[test]
fn test_birth_uuid_different_mac() {
let uuid1 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
let uuid2 = compute_birth_uuid(
"d4:e5:f6:a1:b2:c3",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
assert_ne!(uuid1, uuid2);
}
#[test]
fn test_birth_uuid_different_path() {
// Moving file to different path creates new identity
let uuid1 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
let uuid2 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Archive",
"video.mp4",
);
assert_ne!(uuid1, uuid2);
}
#[test]
fn test_birth_uuid_same_elements() {
let uuid1 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
let uuid2 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
assert_eq!(uuid1, uuid2); // Same elements = same UUID
}
#[test]
fn test_is_birth_uuid() {
let birth_uuid = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/demo",
"video.mp4",
);
assert!(is_birth_uuid(&birth_uuid));
}
#[test]
fn test_extract_username_from_path() {
let username = extract_username_from_path("./demo/video.mp4");
assert_eq!(username, "demo");
let username = extract_username_from_path("./warren/path/to/video.mp4");
assert_eq!(username, "warren");
}
}

View File

@@ -1,9 +1,8 @@
use crate::core::chunk;
use crate::core::db::PostgresDb;
use sqlx::PgPool;
use tokio::time::{sleep, Duration};
use tracing;
use uuid::Uuid;
use crate::core::chunk;
pub struct JobWorker {
pool: PgPool,
@@ -42,47 +41,39 @@ impl JobWorker {
}
async fn process_next_job(&self) -> anyhow::Result<bool> {
// 1. Fetch a QUEUED job
// We use a transaction to ensure no two workers pick the same job (atomic update)
let job_row: Option<(String, String, String, String, String, i64)> = sqlx::query_as(
// 1. Fetch a QUEUED job from monitor_jobs
// Using sqlx::query_as to map to tuple.
// Note: progress_total is int4 (i32).
let job_row: Option<(i32, String, i32)> = sqlx::query_as(
r#"
UPDATE dev.jobs
UPDATE dev.monitor_jobs
SET status = 'RUNNING', updated_at = NOW()
WHERE id = (
SELECT id FROM dev.jobs
SELECT id FROM dev.monitor_jobs
WHERE status = 'QUEUED'
ORDER BY created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING id::text, asset_uuid, rule, status, processor_list, total_frames
RETURNING id, uuid, COALESCE(progress_total, 0)
"#,
)
.fetch_optional(&self.pool)
.await?;
if let Some((job_id, asset_uuid, rule, _status, _processors, total_frames)) = job_row {
let job_uuid =
Uuid::parse_str(&job_id).map_err(|e| anyhow::anyhow!("Invalid job UUID: {}", e))?;
if let Some((job_id, asset_uuid, total_frames)) = job_row {
tracing::info!(
"🚀 Processing Job {} for Asset {} (Rule: {})",
"🚀 Processing Job {} for Asset {} (Frames: {})",
job_id,
asset_uuid,
rule
total_frames
);
// 2. Execute Logic based on Rule
let result = match rule.as_str() {
"rule1" => {
let fps = self.get_asset_fps(&asset_uuid).await?;
chunk::rule1_ingest::ingest_rule1(&self.pool, &asset_uuid, fps).await
}
_ => {
tracing::warn!("Unknown rule type: {}", rule);
Ok(0)
}
};
// 2. Execute Logic (Default to rule1 for now as monitor_jobs doesn't store rule type explicitly)
let fps = self.get_asset_fps(&asset_uuid).await?;
let db = PostgresDb::from_pool(self.pool.clone());
let result = chunk::rule1_ingest::execute_rule1(&db, &asset_uuid, fps).await;
// 3. Update Job Status
match result {
@@ -93,17 +84,21 @@ impl JobWorker {
chunk_count
);
sqlx::query!(
"UPDATE dev.jobs SET status = 'COMPLETED', processed_frames = total_frames, updated_at = NOW() WHERE id = $1",
job_uuid
// Update monitor_jobs
// Using runtime query to avoid compile-time macro checks
sqlx::query(
"UPDATE dev.monitor_jobs SET status = 'COMPLETED', progress_current = progress_total, updated_at = NOW() WHERE id = $1"
)
.bind(job_id)
.execute(&self.pool)
.await?;
sqlx::query!(
"UPDATE dev.videos SET processing_status = 'COMPLETED' WHERE uuid = $1",
asset_uuid
// Update video processing_status
sqlx::query(
"UPDATE dev.videos SET processing_status = $1::jsonb WHERE file_uuid = $2",
)
.bind(serde_json::json!({"status": "COMPLETED"}))
.bind(asset_uuid)
.execute(&self.pool)
.await?;
}
@@ -116,11 +111,11 @@ impl JobWorker {
&err_msg
};
sqlx::query!(
"UPDATE dev.jobs SET status = 'FAILED', error_message = $2, updated_at = NOW() WHERE id = $1",
job_uuid,
safe_msg
sqlx::query(
"UPDATE dev.monitor_jobs SET status = 'FAILED', last_error = $2, updated_at = NOW() WHERE id = $1"
)
.bind(job_id)
.bind(safe_msg)
.execute(&self.pool)
.await?;
}
@@ -132,8 +127,9 @@ impl JobWorker {
}
async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
// dev.videos now uses file_uuid and has a direct fps column
let fps: Option<f64> =
sqlx::query_scalar("SELECT (metadata->>'fps')::float FROM dev.videos WHERE uuid = $1")
sqlx::query_scalar("SELECT fps FROM dev.videos WHERE file_uuid = $1")
.bind(uuid)
.fetch_optional(&self.pool)
.await?;