feat: update core API, database layer, and worker modules

- Remove unused imports (n8n_search, universal_search, Client, Arc, etc.)
- Update API endpoints for identity, face recognition, search
- Fix postgres_db.rs search_videos parent_uuid column
- Add snapshot API and identity agent API
- Clean up backup files (.bak, .bak2)
This commit is contained in:
Warren
2026-04-30 15:07:02 +08:00
parent 8f2208dd63
commit 2b23d1cfbd
148 changed files with 8553 additions and 48637 deletions

View File

@@ -28,6 +28,15 @@ pub struct Face {
pub width: i32,
pub height: i32,
pub confidence: f32,
pub embedding: Option<Vec<f32>>,
pub landmarks: Option<Vec<Vec<f32>>>,
pub attributes: Option<FaceAttributes>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FaceAttributes {
pub age: Option<i32>,
pub gender: Option<String>,
}
pub async fn process_face(
@@ -89,6 +98,12 @@ mod tests {
width: 50,
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1, 0.2, 0.3]),
landmarks: Some(vec![vec![10.0, 20.0], vec![30.0, 40.0]]),
attributes: Some(FaceAttributes {
age: Some(30),
gender: Some("male".to_string()),
}),
}],
}],
};
@@ -96,6 +111,9 @@ mod tests {
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("face_1"));
assert!(json.contains("\"width\":50"));
assert!(json.contains("embedding"));
assert!(json.contains("landmarks"));
assert!(json.contains("attributes"));
}
#[test]
@@ -108,7 +126,17 @@ mod tests {
"frame": 30,
"timestamp": 1.2,
"faces": [
{"face_id": "f1", "x": 10, "y": 20, "width": 30, "height": 40, "confidence": 0.85}
{
"face_id": "f1",
"x": 10,
"y": 20,
"width": 30,
"height": 40,
"confidence": 0.85,
"embedding": [0.1, 0.2, 0.3],
"landmarks": [[5.0, 10.0]],
"attributes": {"age": 25, "gender": "female"}
}
]
}
]
@@ -118,6 +146,9 @@ mod tests {
assert_eq!(result.frame_count, 50);
assert_eq!(result.frames.len(), 1);
assert_eq!(result.frames[0].faces[0].x, 10);
assert!(result.frames[0].faces[0].embedding.is_some());
assert!(result.frames[0].faces[0].landmarks.is_some());
assert!(result.frames[0].faces[0].attributes.is_some());
}
#[test]
@@ -139,7 +170,33 @@ mod tests {
width: 10,
height: 10,
confidence: 0.5,
embedding: None,
landmarks: None,
attributes: None,
};
assert!(face.confidence >= 0.0 && face.confidence <= 1.0);
}
#[test]
fn test_face_with_embedding() {
let face = Face {
face_id: Some("face_001".to_string()),
x: 100,
y: 200,
width: 50,
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1; 512]),
landmarks: None,
attributes: Some(FaceAttributes {
age: Some(35),
gender: Some("male".to_string()),
}),
};
assert!(face.embedding.is_some());
let embedding = face.embedding.unwrap();
assert_eq!(embedding.len(), 512);
assert_eq!(embedding[0], 0.1);
}
}

View File

@@ -8,6 +8,7 @@ pub mod face_recognition;
pub mod ocr;
pub mod pose;
pub mod scene_classification;
pub mod snapshot_agent;
pub mod story;
pub mod visual_chunk;
pub mod yolo;
@@ -28,6 +29,7 @@ pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
};
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -0,0 +1,491 @@
use std::path::Path;
use std::process::Command;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::core::config;
use crate::core::db::{Database, PostgresDb};
use crate::core::storage::snapshot_manager::SnapshotManager;
pub struct SnapshotAgentConfig {
pub output_dir: String,
pub hot_threshold: u64,
}
impl Default for SnapshotAgentConfig {
fn default() -> Self {
Self {
output_dir: config::OUTPUT_DIR.clone(),
hot_threshold: *config::snapshot::HOT_THRESHOLD,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct FaceDetection {
id: i32,
file_uuid: String,
frame_number: i64,
confidence: f64,
bbox: Option<serde_json::Value>,
}
#[derive(Debug, Serialize, Deserialize)]
struct VideoInfo {
file_path: String,
fps: f64,
}
pub struct SnapshotAgent {
config: SnapshotAgentConfig,
manager: SnapshotManager,
}
impl SnapshotAgent {
pub fn new(config: SnapshotAgentConfig) -> Self {
let manager = SnapshotManager::new(&config.output_dir);
Self { config, manager }
}
pub fn default() -> Self {
Self::new(SnapshotAgentConfig::default())
}
pub async fn generate_file_snapshots(
&self,
file_uuid: &str,
snapshot_type: &str,
) -> Result<()> {
info!(
"Starting snapshot generation: file_uuid={}, type={}",
file_uuid, snapshot_type
);
let db = PostgresDb::init()
.await
.context("Failed to connect to database")?;
let video_info = self
.get_video_info(db.pool(), file_uuid)
.await
.context("Failed to get video info")?;
self.manager
.ensure_file_dirs(file_uuid)
.context("Failed to create snapshot directories")?;
match snapshot_type {
"faces" => {
self.extract_face_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"ocr" => {
self.extract_ocr_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"logos" => {
self.extract_logo_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"products" => {
self.extract_product_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
_ => {
return Err(anyhow::anyhow!(
"Unsupported snapshot type: {}",
snapshot_type
))
}
}
info!(
"Snapshot generation completed: file_uuid={}, type={}",
file_uuid, snapshot_type
);
Ok(())
}
async fn extract_face_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let face_table = crate::core::db::schema::table_name("face_detections");
let query = format!(
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox
FROM {}
WHERE file_uuid = $1 AND confidence >= 0.5
ORDER BY confidence DESC
LIMIT 50",
face_table
);
let faces: Vec<(i32, String, i64, f64, Option<serde_json::Value>)> = sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query face detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "faces");
let mut saved_count = 0;
for (face_id_db, _uuid, frame_num, confidence, bbox_json) in faces {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path =
output_dir.join(format!("face_{}_conf{:.2}.jpg", face_id_db, confidence));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} face snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_ocr_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let ocr_table = crate::core::db::schema::table_name("ocr_detections");
let query = format!(
"SELECT id, frame_number, text, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND confidence >= 0.7
ORDER BY confidence DESC
LIMIT 30",
ocr_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query OCR detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "ocr");
let mut saved_count = 0;
for (det_id, frame_num, text, bbox_json, _confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let safe_text = text
.chars()
.take(20)
.filter(|c| c.is_alphanumeric() || *c == ' ')
.collect::<String>()
.replace(' ', "_");
let output_path = output_dir.join(format!("ocr_{}_{}.jpg", det_id, safe_text));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} OCR snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_logo_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
let query = format!(
"SELECT id, frame_number, class_name, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND class_name IN ('logo', 'brand') AND confidence >= 0.6
ORDER BY confidence DESC
LIMIT 20",
yolo_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query logo detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "logos");
let mut saved_count = 0;
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path = output_dir.join(format!(
"logo_{}_{}_{:.2}.jpg",
det_id, class_name, confidence
));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} logo snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_product_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
let query = format!(
"SELECT id, frame_number, class_name, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND class_name NOT IN ('logo', 'brand', 'person', 'face') AND confidence >= 0.6
ORDER BY confidence DESC
LIMIT 20",
yolo_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query product detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "products");
let mut saved_count = 0;
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path = output_dir.join(format!(
"product_{}_{}_{:.2}.jpg",
det_id, class_name, confidence
));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} product snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_frame(
&self,
video_path: &str,
timestamp: f64,
bbox: &Bbox,
output_path: &Path,
) -> Result<()> {
let crop_filter = format!("crop={}:{}:{}:{}", bbox.width, bbox.height, bbox.x, bbox.y);
let output = Command::new("ffmpeg")
.args(&[
"-ss",
&format!("{:.3}", timestamp),
"-i",
video_path,
"-vf",
&crop_filter,
"-frames:v",
"1",
"-f",
"image2",
"-y",
output_path.to_str().context("Invalid output path")?,
])
.output()
.context("Failed to execute ffmpeg")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!("ffmpeg failed: {}", stderr));
}
Ok(())
}
async fn get_video_info(&self, pool: &sqlx::PgPool, file_uuid: &str) -> Result<VideoInfo> {
let video_table = crate::core::db::schema::table_name("videos");
let query = format!(
"SELECT file_path, fps FROM {} WHERE file_uuid = $1",
video_table
);
let row: Option<(String, f64)> = sqlx::query_as(&query)
.bind(file_uuid)
.fetch_optional(pool)
.await
.context("Failed to query video info")?;
match row {
Some((file_path, fps)) => Ok(VideoInfo { file_path, fps }),
None => Err(anyhow::anyhow!("Video not found: file_uuid={}", file_uuid)),
}
}
pub async fn migrate_snapshots(
&self,
new_uuid: &str,
parent_uuid: &str,
) -> Result<Vec<String>> {
info!(
"Starting snapshot migration: {} -> {}",
parent_uuid, new_uuid
);
self.manager
.ensure_file_dirs(new_uuid)
.context("Failed to create snapshot directories")?;
let parent_types = self.manager.list_snapshot_types(parent_uuid);
let mut migrated = Vec::new();
for snap_type in &parent_types {
let src = self.manager.file_type_dir(parent_uuid, snap_type);
let dst = self.manager.file_type_dir(new_uuid, snap_type);
if src.exists() {
if let Err(e) = copy_dir_recursive(&src, &dst) {
warn!("Failed to migrate {} snapshots: {}", snap_type, e);
} else {
migrated.push(snap_type.clone());
info!(
"Migrated {} snapshots: {} -> {}",
snap_type, parent_uuid, new_uuid
);
}
}
}
info!("Migration completed: {} types migrated", migrated.len());
Ok(migrated)
}
pub async fn auto_tear_down(&self, file_uuid: &str) -> Result<()> {
info!("Starting auto tear down for file_uuid={}", file_uuid);
let types = self.manager.list_snapshot_types(file_uuid);
let hits = types.len() as u64;
let tier = SnapshotManager::compute_tier(hits);
if tier != crate::core::storage::snapshot_manager::SnapshotTier::Cold {
info!(
"Skipping tear down: file_uuid={} is not Cold (tier={:?})",
file_uuid, tier
);
return Ok(());
}
let redis_cache = crate::core::cache::redis_cache::RedisCache::new()
.context("Failed to create Redis cache")?;
let last_access = redis_cache.get_snapshot_hits(file_uuid).await.unwrap_or(0);
if last_access > 0 {
info!(
"Skipping tear down: file_uuid={} has recent access (hits={})",
file_uuid, last_access
);
return Ok(());
}
self.manager
.remove_file_snapshots(file_uuid)
.context("Failed to remove snapshot files")?;
let status_key =
crate::core::cache::redis_cache::RedisCache::snapshot_status_key(file_uuid).await;
let hits_key =
crate::core::cache::redis_cache::RedisCache::snapshot_hits_key(file_uuid).await;
let access_key =
crate::core::cache::redis_cache::RedisCache::snapshot_last_access_key(file_uuid).await;
redis_cache.delete(&status_key).await.ok();
redis_cache.delete(&hits_key).await.ok();
redis_cache.delete(&access_key).await.ok();
info!("Auto tear down completed for file_uuid={}", file_uuid);
Ok(())
}
pub fn manager(&self) -> &SnapshotManager {
&self.manager
}
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Bbox {
x: i32,
y: i32,
width: i32,
height: i32,
}
fn copy_dir_recursive(src: &Path, dst: &Path) -> std::io::Result<()> {
std::fs::create_dir_all(dst)?;
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let path = entry.path();
let dest_path = dst.join(entry.file_name());
if path.is_dir() {
copy_dir_recursive(&path, &dest_path)?;
} else {
std::fs::copy(&path, &dest_path)?;
}
}
Ok(())
}