feat: update core API, database layer, and worker modules

- Remove unused imports (n8n_search, universal_search, Client, Arc, etc.)
- Update API endpoints for identity, face recognition, search
- Fix postgres_db.rs search_videos parent_uuid column
- Add snapshot API and identity agent API
- Clean up backup files (.bak, .bak2)
This commit is contained in:
Warren
2026-04-30 15:07:02 +08:00
parent 8f2208dd63
commit 2b23d1cfbd
148 changed files with 8553 additions and 48637 deletions

View File

@@ -1,19 +1,11 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::post,
Router,
};
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use tracing;
use crate::api::server::AppState;
pub fn agent_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/translate", post(translate_text))
Router::new().route("/api/v1/agents/translate", post(translate_text))
}
#[derive(Debug, Deserialize)]
@@ -35,7 +27,6 @@ async fn translate_text(
State(_state): State<AppState>,
Json(req): Json<TranslationRequest>,
) -> Result<Json<TranslationResponse>, (StatusCode, String)> {
let system_prompt = "You are a professional translator for Momentry Core, a digital asset management system specializing in video analysis.
## Guidelines:
@@ -53,7 +44,7 @@ async fn translate_text(
// Call Ollama API
let client = Client::new();
let ollama_url = "http://localhost:11434/api/generate";
// Using qwen3:latest which is available locally
let model = "qwen3:latest".to_string();
@@ -64,16 +55,27 @@ async fn translate_text(
"stream": false
});
let response = client.post(ollama_url)
let response = client
.post(ollama_url)
.json(&body)
.send()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to call LLM: {}", e)))?;
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to call LLM: {}", e),
)
})?;
let ollama_resp: serde_json::Value = response.json().await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse LLM response: {}", e)))?;
let ollama_resp: serde_json::Value = response.json().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse LLM response: {}", e),
)
})?;
let translated_text = ollama_resp.get("response")
let translated_text = ollama_resp
.get("response")
.and_then(|v| v.as_str())
.unwrap_or("Translation failed")
.to_string();

View File

@@ -16,7 +16,7 @@ use crate::core::processor::face_recognition::{
#[derive(Debug, Deserialize)]
pub struct FaceRecognitionRequest {
pub video_uuid: String,
pub file_uuid: String,
pub enable_recognition: Option<bool>,
pub enable_tracking: Option<bool>,
pub enable_clustering: Option<bool>,
@@ -33,7 +33,7 @@ pub struct FaceRecognitionResponse {
#[derive(Debug, Deserialize)]
pub struct FaceRegistrationRequest {
pub video_uuid: String,
pub file_uuid: String,
pub name: String,
pub metadata: Option<serde_json::Value>,
}
@@ -47,7 +47,7 @@ pub struct FaceRegistrationApiResponse {
#[derive(Debug, Deserialize)]
pub struct FaceSearchRequest {
pub video_uuid: String,
pub file_uuid: String,
pub embedding: Vec<f32>,
pub similarity_threshold: Option<f64>,
pub limit: Option<i32>,
@@ -71,7 +71,7 @@ pub struct FaceSearchResult {
#[derive(Debug, Deserialize)]
pub struct FaceListQuery {
pub video_uuid: String,
pub file_uuid: String,
pub page: Option<usize>,
pub page_size: Option<usize>,
pub active_only: Option<bool>,
@@ -106,7 +106,7 @@ pub fn face_recognition_routes() -> Router<crate::api::server::AppState> {
.route("/api/v1/face/:face_id", get(get_face_details))
.route("/api/v1/face/:face_id", axum::routing::delete(delete_face))
.route(
"/api/v1/face/results/:video_uuid",
"/api/v1/face/results/:file_uuid",
get(get_recognition_results),
)
}
@@ -119,7 +119,7 @@ async fn recognize_faces(
tracing::info!(
"[FACE_RECOGNITION] Starting recognition for video: {}, processing_id: {}",
request.video_uuid,
request.file_uuid,
processing_id
);
@@ -134,12 +134,12 @@ async fn recognize_faces(
}
};
let video_record = match db.get_video_by_uuid(&request.video_uuid).await {
let video_record = match db.get_video_by_uuid(&request.file_uuid).await {
Ok(Some(record)) => record,
Ok(None) => {
return Err((
StatusCode::NOT_FOUND,
format!("Video not found: {}", request.video_uuid),
format!("Video not found: {}", request.file_uuid),
))
}
Err(e) => {
@@ -178,13 +178,13 @@ async fn recognize_faces(
};
// Store results in database
if let Err(e) = store_recognition_results(&db, &request.video_uuid, &result).await {
if let Err(e) = store_recognition_results(&db, &request.file_uuid, &result).await {
tracing::warn!("Failed to store recognition results: {}", e);
}
Ok(Json(FaceRecognitionResponse {
success: true,
message: format!("Face recognition completed for {}", request.video_uuid),
message: format!("Face recognition completed for {}", request.file_uuid),
result: Some(result),
processing_id,
}))
@@ -334,7 +334,7 @@ async fn register_face_api(
.bind(&name)
.bind(&embedding_str)
.bind(&attrs_json)
.bind(&metadata.unwrap_or(serde_json::json!({})))
.bind(serde_json::to_string(&metadata.unwrap_or(serde_json::json!({}))).unwrap())
.execute(db.pool())
.await
{
@@ -694,7 +694,7 @@ async fn delete_face(
async fn get_recognition_results(
State(_state): State<crate::api::server::AppState>,
Path(video_uuid): Path<String>,
Path(file_uuid): Path<String>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
@@ -708,7 +708,7 @@ async fn get_recognition_results(
let query = r#"
SELECT
video_uuid,
file_uuid,
frame_count,
fps,
total_faces,
@@ -718,7 +718,7 @@ async fn get_recognition_results(
processing_time_secs,
created_at
FROM face_recognition_results
WHERE video_uuid = $1
WHERE file_uuid = $1
ORDER BY created_at DESC
LIMIT 1
"#;
@@ -734,7 +734,7 @@ async fn get_recognition_results(
Option<f64>,
chrono::DateTime<chrono::Utc>,
)> = match sqlx::query_as(query)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_optional(db.pool())
.await
{
@@ -749,7 +749,7 @@ async fn get_recognition_results(
match result {
Some((
video_uuid,
file_uuid,
frame_count,
fps,
total_faces,
@@ -761,7 +761,7 @@ async fn get_recognition_results(
)) => {
let response = serde_json::json!({
"success": true,
"video_uuid": video_uuid,
"file_uuid": file_uuid,
"frame_count": frame_count,
"fps": fps,
"total_faces": total_faces,
@@ -776,14 +776,14 @@ async fn get_recognition_results(
}
None => Err((
StatusCode::NOT_FOUND,
format!("No recognition results found for video: {}", video_uuid),
format!("No recognition results found for video: {}", file_uuid),
)),
}
}
async fn store_recognition_results(
db: &PostgresDb,
video_uuid: &str,
file_uuid: &str,
result: &FaceRecognitionResult,
) -> Result<(), anyhow::Error> {
let total_faces = result.frames.iter().map(|f| f.faces.len()).sum::<usize>();
@@ -796,7 +796,7 @@ async fn store_recognition_results(
let query = r#"
INSERT INTO face_recognition_results (
video_uuid,
file_uuid,
frame_count,
fps,
total_faces,
@@ -804,7 +804,7 @@ async fn store_recognition_results(
clusters_count,
result_data
) VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT (video_uuid) DO UPDATE SET
ON CONFLICT (file_uuid) DO UPDATE SET
frame_count = EXCLUDED.frame_count,
fps = EXCLUDED.fps,
total_faces = EXCLUDED.total_faces,
@@ -815,7 +815,7 @@ async fn store_recognition_results(
"#;
sqlx::query(query)
.bind(video_uuid)
.bind(file_uuid)
.bind(result.frame_count as i64)
.bind(result.fps)
.bind(total_faces as i32)
@@ -840,7 +840,7 @@ async fn store_recognition_results(
let insert_query = r#"
INSERT INTO face_detections (
video_uuid,
file_uuid,
frame_number,
timestamp_secs,
face_id,
@@ -854,7 +854,7 @@ async fn store_recognition_results(
identity_confidence,
cluster_id
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::vector, $11, $12, $13)
ON CONFLICT (video_uuid, frame_number, x, y, width, height) DO UPDATE SET
ON CONFLICT (file_uuid, frame_number, x, y, width, height) DO UPDATE SET
face_id = EXCLUDED.face_id,
confidence = EXCLUDED.confidence,
embedding = EXCLUDED.embedding,
@@ -874,7 +874,7 @@ async fn store_recognition_results(
.map(|c| c.cluster_id.clone());
sqlx::query(insert_query)
.bind(video_uuid)
.bind(file_uuid)
.bind(frame.frame as i64)
.bind(frame.timestamp)
.bind(face.face_id.as_deref())
@@ -908,7 +908,7 @@ async fn store_recognition_results(
let cluster_query = r#"
INSERT INTO face_clusters (
cluster_id,
video_uuid,
file_uuid,
centroid,
size,
representative_face_id,
@@ -923,7 +923,7 @@ async fn store_recognition_results(
sqlx::query(cluster_query)
.bind(&cluster.cluster_id)
.bind(video_uuid)
.bind(file_uuid)
.bind(&centroid_str)
.bind(cluster.size as i32)
.bind(cluster.representative_face_id.as_deref())

View File

@@ -0,0 +1,673 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::{get, post},
Router,
};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sqlx::Row;
use crate::api::server::AppState;
use crate::core::db::PostgresDb;
pub fn five_w1h_agent_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/5w1h/analyze", post(analyze_5w1h))
.route("/api/v1/agents/5w1h/batch", post(batch_analyze_5w1h))
.route("/api/v1/agents/5w1h/status", get(get_5w1h_status))
}
#[derive(Debug, Deserialize)]
pub struct Analyze5W1HRequest {
pub file_uuid: String,
pub scene_group_size: Option<usize>,
pub model: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct Analyze5W1HResponse {
pub success: bool,
pub file_uuid: String,
pub summaries: Vec<SummaryChunk>,
pub processing_status: FiveW1HProcessingStatus,
}
#[derive(Debug, Serialize)]
pub struct SummaryChunk {
pub chunk_id: String,
pub summary: String,
pub analysis_5w1h: FiveW1HAnalysis,
pub start_frame: i64,
pub end_frame: i64,
pub start_time: f64,
pub end_time: f64,
pub fps: f64,
pub scene_count: usize,
}
#[derive(Debug, Serialize)]
pub struct FiveW1HAnalysis {
pub who: Vec<String>,
pub what: Vec<String>,
pub location: Vec<String>,
pub when: String,
pub why: String,
pub how: String,
}
#[derive(Debug, Serialize)]
pub struct FiveW1HProcessingStatus {
pub status: String,
pub scenes_processed: i32,
pub scenes_total: i32,
pub progress_pct: f64,
}
#[derive(Debug, Deserialize)]
pub struct BatchAnalyze5W1HRequest {
pub file_uuids: Vec<String>,
pub scene_group_size: Option<usize>,
}
#[derive(Debug, Serialize)]
pub struct BatchAnalyze5W1HResponse {
pub success: bool,
pub jobs: Vec<BatchJobStatus>,
}
#[derive(Debug, Serialize)]
pub struct BatchJobStatus {
pub file_uuid: String,
pub status: String,
pub message: String,
}
async fn analyze_5w1h(
State(state): State<AppState>,
Json(req): Json<Analyze5W1HRequest>,
) -> Result<Json<Analyze5W1HResponse>, (StatusCode, String)> {
let db = PostgresDb::from_pool(state.db.pool().clone());
let scene_group_size = req.scene_group_size.unwrap_or(7);
let model = req.model.unwrap_or_else(|| "gemma4:latest".to_string());
let rule3_chunks = fetch_rule3_chunks(&db, &req.file_uuid)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
if rule3_chunks.is_empty() {
return Err((
StatusCode::BAD_REQUEST,
"No Rule 3 chunks found for this video".to_string(),
));
}
let scenes_total = rule3_chunks.len() as i32;
update_agent_status(&db, &req.file_uuid, "running", 0, scenes_total, 0.0)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let summaries =
process_scene_groups(&db, &req.file_uuid, &rule3_chunks, scene_group_size, &model)
.await
.map_err(|e| {
tracing::error!("Failed to process scene groups: {}", e);
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string())
})?;
let scenes_processed = rule3_chunks.len() as i32;
let progress_pct = 100.0;
update_agent_status(
&db,
&req.file_uuid,
"completed",
scenes_processed,
scenes_total,
progress_pct,
)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(Analyze5W1HResponse {
success: true,
file_uuid: req.file_uuid,
summaries,
processing_status: FiveW1HProcessingStatus {
status: "completed".to_string(),
scenes_processed,
scenes_total,
progress_pct,
},
}))
}
async fn batch_analyze_5w1h(
State(state): State<AppState>,
Json(req): Json<BatchAnalyze5W1HRequest>,
) -> Result<Json<BatchAnalyze5W1HResponse>, (StatusCode, String)> {
let scene_group_size = req.scene_group_size.unwrap_or(7);
let jobs: Vec<BatchJobStatus> = req
.file_uuids
.iter()
.map(|uuid| {
let uuid_clone = uuid.clone();
let db_clone = PostgresDb::from_pool(state.db.pool().clone());
let group_size = scene_group_size;
tokio::spawn(async move {
let _ = process_single_video_5w1h(&db_clone, &uuid_clone, group_size).await;
});
BatchJobStatus {
file_uuid: uuid.clone(),
status: "queued".to_string(),
message: "Job queued for async processing".to_string(),
}
})
.collect();
Ok(Json(BatchAnalyze5W1HResponse {
success: true,
jobs,
}))
}
async fn get_5w1h_status(
State(state): State<AppState>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let db = PostgresDb::from_pool(state.db.pool().clone());
let videos_with_5w1h = fetch_videos_with_5w1h_status(&db)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(serde_json::json!({
"success": true,
"videos": videos_with_5w1h
})))
}
async fn fetch_rule3_chunks(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<Vec<Rule3Chunk>> {
let table = crate::core::db::schema::table_name("chunks");
let query = format!(
r#"
SELECT chunk_id, start_frame, end_frame, fps, content, metadata
FROM {}
WHERE uuid = $1 AND (chunk_type = 'scene' OR chunk_type = 'cut')
ORDER BY start_frame
"#,
table
);
let rows = sqlx::query(&query)
.bind(file_uuid)
.fetch_all(db.pool())
.await?;
let chunks: Vec<Rule3Chunk> = rows
.iter()
.map(|row| {
let chunk_id: String = row.try_get("chunk_id").unwrap_or_default();
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let fps: f64 = row.try_get("fps").unwrap_or(30.0);
let content: serde_json::Value =
row.try_get("content").unwrap_or(serde_json::Value::Null);
let metadata: serde_json::Value =
row.try_get("metadata").unwrap_or(serde_json::Value::Null);
let summary = content
.get("data")
.and_then(|d| d.get("scene_number"))
.and_then(|s| s.as_u64())
.map(|n| format!("Scene {}", n))
.unwrap_or_else(|| {
content
.get("data")
.and_then(|d| d.get("summary"))
.and_then(|s| s.as_str())
.unwrap_or("No summary")
.to_string()
});
Rule3Chunk {
chunk_id,
start_frame,
end_frame,
fps,
summary,
metadata,
}
})
.collect();
Ok(chunks)
}
async fn process_scene_groups(
db: &PostgresDb,
file_uuid: &str,
rule3_chunks: &[Rule3Chunk],
group_size: usize,
model: &str,
) -> anyhow::Result<Vec<SummaryChunk>> {
let mut summaries = Vec::new();
let chunks_total = rule3_chunks.len();
for (group_idx, group) in rule3_chunks.chunks(group_size).enumerate() {
let context_text = group
.iter()
.map(|c| c.summary.clone())
.collect::<Vec<_>>()
.join("\n\n");
let faces = aggregate_faces(group);
let objects = aggregate_objects(group);
let llm_result = call_llm_for_5w1h(&context_text, &faces, &objects, model).await?;
let start_frame = group.first().map(|c| c.start_frame).unwrap_or(0);
let end_frame = group.last().map(|c| c.end_frame).unwrap_or(0);
let fps = group.first().map(|c| c.fps).unwrap_or(30.0);
let start_time = start_frame as f64 / fps;
let end_time = end_frame as f64 / fps;
let chunk_id = format!("summary_{}_{}", file_uuid, group_idx);
let summary = SummaryChunk {
chunk_id: chunk_id.clone(),
summary: llm_result.summary,
analysis_5w1h: llm_result.analysis_5w1h,
start_frame,
end_frame,
start_time,
end_time,
fps,
scene_count: group.len(),
};
store_summary_chunk(db, file_uuid, &summary, group).await?;
let scenes_processed = ((group_idx + 1) * group_size).min(chunks_total) as i32;
let progress_pct = (scenes_processed as f64 / chunks_total as f64) * 100.0;
update_agent_status(
db,
file_uuid,
"running",
scenes_processed,
chunks_total as i32,
progress_pct,
)
.await?;
summaries.push(summary);
}
Ok(summaries)
}
async fn call_llm_for_5w1h(
context_text: &str,
faces: &[String],
objects: &[String],
model: &str,
) -> anyhow::Result<LLM5W1HResult> {
let system_prompt = r#"You are a video scene analysis assistant for Momentry Core.
## Task:
Analyze the provided video scenes and extract structured 5W1H information.
## Output Format (JSON):
{
"summary": "A brief 2-3 sentence summary of these scenes",
"5w1h": {
"who": ["List of main characters/actors"],
"what": ["List of main events/actions"],
"where": ["List of locations/settings"],
"when": "Time of day or temporal context",
"why": "Motivation or reason for events",
"how": "Method or process used"
}
}
## Guidelines:
- Keep summaries concise and natural
- Extract key information, not details
- Return ONLY valid JSON, no explanations"#;
let prompt = format!(
r#"Analyze these video scenes:
## Scene Summaries:
{}
## Detected Faces:
{}
## Detected Objects:
{}
Return the 5W1H analysis in JSON format."#,
context_text,
faces.join(", "),
objects.join(", ")
);
let client = Client::new();
let ollama_url = "http://localhost:11434/api/generate";
let body = serde_json::json!({
"model": model,
"prompt": prompt,
"system": system_prompt,
"stream": false,
"format": "json"
});
let response = client
.post(ollama_url)
.json(&body)
.timeout(std::time::Duration::from_secs(60))
.send()
.await?;
let ollama_resp: serde_json::Value = response.json().await?;
let response_text = ollama_resp
.get("response")
.and_then(|v| v.as_str())
.unwrap_or("{}");
let parsed: serde_json::Value = serde_json::from_str(response_text)?;
let summary = parsed
.get("summary")
.and_then(|s| s.as_str())
.unwrap_or("No summary generated")
.to_string();
let analysis_5w1h = FiveW1HAnalysis {
who: parsed
.get("5w1h")
.and_then(|w| w.get("who"))
.and_then(|w| w.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect()
})
.unwrap_or_default(),
what: parsed
.get("5w1h")
.and_then(|w| w.get("what"))
.and_then(|w| w.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect()
})
.unwrap_or_default(),
location: parsed
.get("5w1h")
.and_then(|w| w.get("where"))
.and_then(|w| w.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect()
})
.unwrap_or_default(),
when: parsed
.get("5w1h")
.and_then(|w| w.get("when"))
.and_then(|w| w.as_str())
.unwrap_or("unknown")
.to_string(),
why: parsed
.get("5w1h")
.and_then(|w| w.get("why"))
.and_then(|w| w.as_str())
.unwrap_or("unknown")
.to_string(),
how: parsed
.get("5w1h")
.and_then(|w| w.get("how"))
.and_then(|w| w.as_str())
.unwrap_or("unknown")
.to_string(),
};
Ok(LLM5W1HResult {
summary,
analysis_5w1h,
})
}
async fn store_summary_chunk(
db: &PostgresDb,
file_uuid: &str,
summary: &SummaryChunk,
group: &[Rule3Chunk],
) -> anyhow::Result<()> {
let table = crate::core::db::schema::table_name("chunks");
let content = serde_json::json!({
"rule": "rule4",
"data": {
"summary": summary.summary,
"5w1h": {
"who": summary.analysis_5w1h.who,
"what": summary.analysis_5w1h.what,
"where": summary.analysis_5w1h.location,
"when": summary.analysis_5w1h.when,
"why": summary.analysis_5w1h.why,
"how": summary.analysis_5w1h.how,
}
}
});
let metadata = serde_json::json!({
"scene_count": summary.scene_count,
"scene_chunk_ids": group.iter().map(|c| c.chunk_id.clone()).collect::<Vec<_>>(),
});
let query = format!(
r#"
INSERT INTO {} (
uuid, chunk_id, chunk_index, chunk_type,
start_frame, end_frame, fps, start_time, end_time, content, metadata
)
VALUES ($1, $2, $3, 'summary', $4, $5, $6, $7, $8, $9::jsonb, $10::jsonb)
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
content = EXCLUDED.content,
metadata = EXCLUDED.metadata,
updated_at = CURRENT_TIMESTAMP
"#,
table
);
let start_time = summary.start_time;
let end_time = summary.end_time;
sqlx::query(&query)
.bind(file_uuid)
.bind(&summary.chunk_id)
.bind(0)
.bind(summary.start_frame)
.bind(summary.end_frame)
.bind(summary.fps)
.bind(start_time)
.bind(end_time)
.bind(&content)
.bind(&metadata)
.execute(db.pool())
.await?;
Ok(())
}
async fn update_agent_status(
db: &PostgresDb,
file_uuid: &str,
status: &str,
scenes_processed: i32,
scenes_total: i32,
progress_pct: f64,
) -> anyhow::Result<()> {
let table = crate::core::db::schema::table_name("videos");
let agent_status = serde_json::json!({
"five_w1h": {
"status": status,
"scenes_processed": scenes_processed,
"scenes_total": scenes_total,
"progress_pct": progress_pct
}
});
let query = format!(
r#"
UPDATE {}
SET processing_status = jsonb_set(
COALESCE(processing_status, '{{}}'::jsonb),
'{{agents}}',
$1::jsonb
)
WHERE uuid = $2
"#,
table
);
sqlx::query(&query)
.bind(&agent_status)
.bind(file_uuid)
.execute(db.pool())
.await?;
Ok(())
}
async fn fetch_videos_with_5w1h_status(db: &PostgresDb) -> anyhow::Result<Vec<serde_json::Value>> {
let table = crate::core::db::schema::table_name("videos");
let query = format!(
r#"
SELECT uuid, processing_status->'agents'->'five_w1h' as agent_status
FROM {}
WHERE processing_status->'agents'->'five_w1h' IS NOT NULL
ORDER BY updated_at DESC
LIMIT 50
"#,
table
);
let rows = sqlx::query(&query).fetch_all(db.pool()).await?;
let videos: Vec<serde_json::Value> = rows
.iter()
.map(|row| {
let uuid: String = row.try_get("uuid").unwrap_or_default();
let status: Option<serde_json::Value> = row.try_get("agent_status").ok();
serde_json::json!({
"uuid": uuid,
"five_w1h_status": status
})
})
.collect();
Ok(videos)
}
async fn process_single_video_5w1h(
db: &PostgresDb,
file_uuid: &str,
scene_group_size: usize,
) -> anyhow::Result<()> {
let rule3_chunks = fetch_rule3_chunks(db, file_uuid).await?;
if rule3_chunks.is_empty() {
return Ok(());
}
let scenes_total = rule3_chunks.len() as i32;
update_agent_status(db, file_uuid, "running", 0, scenes_total, 0.0).await?;
let _ = process_scene_groups(
db,
file_uuid,
&rule3_chunks,
scene_group_size,
"gemma4:latest",
)
.await?;
update_agent_status(
db,
file_uuid,
"completed",
scenes_total,
scenes_total,
100.0,
)
.await?;
Ok(())
}
fn aggregate_faces(group: &[Rule3Chunk]) -> Vec<String> {
group
.iter()
.flat_map(|c| {
c.metadata
.get("faces")
.and_then(|f| f.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect::<Vec<_>>()
})
.unwrap_or_default()
})
.collect()
}
fn aggregate_objects(group: &[Rule3Chunk]) -> Vec<String> {
group
.iter()
.flat_map(|c| {
c.metadata
.get("objects")
.and_then(|o| o.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect::<Vec<_>>()
})
.unwrap_or_default()
})
.collect()
}
#[derive(Debug, Clone)]
struct Rule3Chunk {
chunk_id: String,
start_frame: i64,
end_frame: i64,
fps: f64,
summary: String,
metadata: serde_json::Value,
}
#[derive(Debug)]
struct LLM5W1HResult {
summary: String,
analysis_5w1h: FiveW1HAnalysis,
}

View File

@@ -1,22 +1,31 @@
use axum::{
extract::{Query, State},
http::StatusCode,
response::Json,
body::Body,
extract::{Path, Query, State},
http::{header, StatusCode},
response::{IntoResponse, Json},
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
use std::process::Command;
use crate::core::db::{schema, Database, PostgresDb};
use crate::core::db::{Database, PostgresDb};
#[derive(Debug, Deserialize)]
pub struct RegisterFromPersonRequest {
pub video_uuid: String,
pub file_uuid: String,
pub person_id: String,
pub identity_name: String,
pub metadata: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
pub struct RegisterFromFaceRequest {
pub face_json_path: String,
pub identity_name: String,
pub schema: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct RegisterFromPersonResponse {
pub success: bool,
@@ -26,10 +35,135 @@ pub struct RegisterFromPersonResponse {
pub person_id: String,
}
#[derive(Debug, Serialize)]
pub struct RegisterFromFaceResponse {
pub success: bool,
pub message: String,
pub identity_uuid: Option<String>,
pub identity_name: String,
pub total_vectors: Option<i32>,
pub angle_coverage: Option<Vec<String>>,
pub quality_avg: Option<f64>,
}
pub fn identity_routes() -> Router<crate::api::server::AppState> {
Router::new()
.route("/api/v1/identities/from-person", post(register_from_person))
.route("/api/v1/identities/from-face", post(register_from_face))
.route("/api/v1/identities", get(list_identities))
.route("/api/v1/faces/candidates", get(list_face_candidates))
.route(
"/api/v1/identities/:identity_id/faces",
get(get_identity_faces),
)
.route("/api/v1/faces/:face_id/thumbnail", get(get_face_thumbnail))
}
/// Register a Global Identity from face.json with multi-angle reference vectors.
/// Calls select_face_reference_vectors_v2.py for automatic reference selection.
async fn register_from_face(
State(_state): State<crate::api::server::AppState>,
Json(req): Json<RegisterFromFaceRequest>,
) -> Result<Json<RegisterFromFaceResponse>, (StatusCode, String)> {
let schema = req.schema.unwrap_or("dev".to_string());
let python_path =
std::env::var("MOMENTRY_PYTHON_PATH").unwrap_or("/opt/homebrew/bin/python3.11".to_string());
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
let mut path = std::env::current_dir().unwrap_or_default();
path.push("scripts");
path.to_string_lossy().to_string()
});
let script_path = format!("{}/select_face_reference_vectors_v2.py", scripts_dir);
tracing::info!(
"Registering identity '{}' from face.json: {}",
req.identity_name,
req.face_json_path
);
let output = Command::new(&python_path)
.arg(&script_path)
.arg("--face-json")
.arg(&req.face_json_path)
.arg("--identity-name")
.arg(&req.identity_name)
.arg("--register")
.arg("--schema")
.arg(&schema)
.output()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to execute script: {}", e),
)
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Script failed: {}", stderr),
));
}
let db = PostgresDb::init().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("DB error: {}", e),
)
})?;
let query = r#"
SELECT uuid, reference_data->'total_references' as total,
reference_data->'angles_covered' as angles,
reference_data->'quality_avg' as quality
FROM identities
WHERE name = $1
ORDER BY created_at DESC
LIMIT 1
"#;
let row: Option<(String, Option<i32>, Option<Vec<String>>, Option<f64>)> =
sqlx::query_as(query)
.bind(&req.identity_name)
.fetch_optional(db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
)
})?;
match row {
Some((uuid, total, angles, quality)) => Ok(Json(RegisterFromFaceResponse {
success: true,
message: format!(
"Successfully registered identity '{}' with {} reference vectors",
req.identity_name,
total.unwrap_or(0)
),
identity_uuid: Some(uuid),
identity_name: req.identity_name,
total_vectors: total,
angle_coverage: angles,
quality_avg: quality,
})),
None => Ok(Json(RegisterFromFaceResponse {
success: true,
message: format!(
"Identity '{}' registered, but details not found",
req.identity_name
),
identity_uuid: None,
identity_name: req.identity_name,
total_vectors: None,
angle_coverage: None,
quality_avg: None,
})),
}
}
/// Register a Global Identity from a specific Person in a video.
@@ -61,10 +195,10 @@ async fn register_from_person(
// 1. Check if Person exists
let person_query =
"SELECT id, name FROM person_identities WHERE person_id = $1 AND video_uuid = $2";
"SELECT id, name FROM person_identities WHERE person_id = $1 AND file_uuid = $2";
let person: Option<(i32, Option<String>)> = match sqlx::query_as(person_query)
.bind(&req.person_id)
.bind(&req.video_uuid)
.bind(&req.file_uuid)
.fetch_optional(&mut *tx)
.await
{
@@ -84,7 +218,7 @@ async fn register_from_person(
StatusCode::NOT_FOUND,
format!(
"Person '{}' not found in video '{}'",
req.person_id, req.video_uuid
req.person_id, req.file_uuid
),
))
}
@@ -149,7 +283,7 @@ async fn register_from_person(
.bind("person_id") // identity_type
.bind(&req.person_id) // identity_value
.bind(1.0) // confidence
.bind(&serde_json::json!({"auto_updated": true}))
.bind(serde_json::to_string(&serde_json::json!({"auto_updated": true})).unwrap())
.execute(&mut *tx)
.await
{
@@ -286,3 +420,420 @@ pub struct IdentityListResponse {
pub page: usize,
pub page_size: usize,
}
#[derive(Debug, Deserialize)]
pub struct FaceCandidatesQuery {
pub file_uuid: Option<String>,
pub min_confidence: Option<f64>,
pub page: Option<usize>,
pub page_size: Option<usize>,
pub limit: Option<usize>,
}
#[derive(Debug, Serialize)]
pub struct FaceCandidate {
pub id: i32,
pub face_id: Option<String>,
pub file_uuid: String,
pub frame_number: i64,
pub confidence: f64,
pub bbox: Option<serde_json::Value>,
pub attributes: Option<serde_json::Value>,
}
#[derive(Debug, Serialize)]
pub struct FaceCandidatesResponse {
pub candidates: Vec<FaceCandidate>,
pub total: i64,
pub page: usize,
pub page_size: usize,
}
#[derive(Debug, Deserialize)]
pub struct IdentityFacesQuery {
pub page: Option<usize>,
pub page_size: Option<usize>,
pub limit: Option<usize>,
}
#[derive(Debug, Serialize)]
pub struct IdentityFace {
pub id: i32,
pub face_id: Option<String>,
pub file_uuid: String,
pub frame_number: i64,
pub confidence: f64,
pub bbox: Option<serde_json::Value>,
pub attributes: Option<serde_json::Value>,
}
#[derive(Debug, Serialize)]
pub struct IdentityFacesResponse {
pub identity_id: i32,
pub faces: Vec<IdentityFace>,
pub total: i64,
}
async fn list_face_candidates(
Query(query): Query<FaceCandidatesQuery>,
) -> Result<Json<FaceCandidatesResponse>, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to connect to database: {}", e),
))
}
};
let page = query.page.unwrap_or(1);
let page_size = std::cmp::min(query.page_size.unwrap_or(15), 100);
let offset = (page - 1) * page_size;
let min_confidence = query.min_confidence.unwrap_or(0.5);
let table = crate::core::db::schema::table_name("face_detections");
let total: i64 = if let Some(file_uuid) = &query.file_uuid {
let count_sql = format!(
"SELECT COUNT(*) FROM {} WHERE identity_id IS NULL AND confidence >= $1 AND file_uuid = $2",
table
);
match sqlx::query_scalar(&count_sql)
.bind(min_confidence)
.bind(file_uuid)
.fetch_one(db.pool())
.await
{
Ok(count) => count,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
))
}
}
} else {
let count_sql = format!(
"SELECT COUNT(*) FROM {} WHERE identity_id IS NULL AND confidence >= $1",
table
);
match sqlx::query_scalar(&count_sql)
.bind(min_confidence)
.fetch_one(db.pool())
.await
{
Ok(count) => count,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
))
}
}
};
let rows = if let Some(file_uuid) = &query.file_uuid {
let sql = format!(
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox, attributes
FROM {}
WHERE identity_id IS NULL AND confidence >= $1 AND file_uuid = $2
ORDER BY confidence DESC
LIMIT $3 OFFSET $4",
table
);
match sqlx::query_as::<
_,
(
i32,
Option<String>,
String,
i64,
f64,
Option<serde_json::Value>,
Option<serde_json::Value>,
),
>(&sql)
.bind(min_confidence)
.bind(file_uuid)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
.await
{
Ok(rows) => rows,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
))
}
}
} else {
let sql = format!(
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox, attributes
FROM {}
WHERE identity_id IS NULL AND confidence >= $1
ORDER BY confidence DESC
LIMIT $2 OFFSET $3",
table
);
match sqlx::query_as::<
_,
(
i32,
Option<String>,
String,
i64,
f64,
Option<serde_json::Value>,
Option<serde_json::Value>,
),
>(&sql)
.bind(min_confidence)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
.await
{
Ok(rows) => rows,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
))
}
}
};
let candidates: Vec<FaceCandidate> = rows
.into_iter()
.map(|r| FaceCandidate {
id: r.0,
face_id: r.1,
file_uuid: r.2,
frame_number: r.3,
confidence: r.4,
bbox: r.5,
attributes: r.6,
})
.collect();
Ok(Json(FaceCandidatesResponse {
candidates,
total,
page,
page_size,
}))
}
async fn get_identity_faces(
axum::extract::Path(identity_id): axum::extract::Path<i32>,
Query(query): Query<IdentityFacesQuery>,
) -> Result<Json<IdentityFacesResponse>, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to connect to database: {}", e),
))
}
};
let page_size = std::cmp::min(query.page_size.unwrap_or(100), 1000);
let offset = (query.page.unwrap_or(1) - 1) * page_size;
let table = crate::core::db::schema::table_name("face_detections");
let count_sql = format!("SELECT COUNT(*) FROM {} WHERE identity_id = $1", table);
let total: i64 = match sqlx::query_scalar(&count_sql)
.bind(identity_id)
.fetch_one(db.pool())
.await
{
Ok(count) => count,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Count error: {}", e),
))
}
};
let sql = format!(
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox, attributes
FROM {}
WHERE identity_id = $1
ORDER BY confidence DESC
LIMIT $2 OFFSET $3",
table
);
let rows = match sqlx::query_as::<
_,
(
i32,
Option<String>,
String,
i64,
f64,
Option<serde_json::Value>,
Option<serde_json::Value>,
),
>(&sql)
.bind(identity_id)
.bind(page_size as i64)
.bind(offset as i64)
.fetch_all(db.pool())
.await
{
Ok(rows) => rows,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
))
}
};
let faces: Vec<IdentityFace> = rows
.into_iter()
.map(|r| IdentityFace {
id: r.0,
face_id: r.1,
file_uuid: r.2,
frame_number: r.3,
confidence: r.4,
bbox: r.5,
attributes: r.6,
})
.collect();
Ok(Json(IdentityFacesResponse {
identity_id,
faces,
total,
}))
}
async fn get_face_thumbnail(
Path(face_id): Path<i32>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to connect to database: {}", e),
))
}
};
let table_fd = crate::core::db::schema::table_name("face_detections");
let table_v = crate::core::db::schema::table_name("videos");
let sql = format!(
"SELECT fd.frame_number, fd.bbox, v.file_path, v.fps
FROM {} fd
JOIN {} v ON fd.file_uuid = v.uuid
WHERE fd.id = $1",
table_fd, table_v
);
let row: Option<(i64, Option<serde_json::Value>, String, f64)> = match sqlx::query_as(&sql)
.bind(face_id)
.fetch_optional(db.pool())
.await
{
Ok(row) => row,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Query error: {}", e),
))
}
};
let (frame_number, bbox_json, file_path, fps) = match row {
Some(r) => r,
None => return Err((StatusCode::NOT_FOUND, format!("Face {} not found", face_id))),
};
let bbox: Bbox = match bbox_json {
Some(json) => serde_json::from_value(json).unwrap_or(Bbox {
x: 0,
y: 0,
width: 100,
height: 100,
}),
None => Bbox {
x: 0,
y: 0,
width: 100,
height: 100,
},
};
let timestamp = frame_number as f64 / fps;
let crop_filter = format!("crop={}:{}:{}:{}", bbox.width, bbox.height, bbox.x, bbox.y);
let output = match Command::new("ffmpeg")
.args(&[
"-ss",
&timestamp.to_string(),
"-i",
&file_path,
"-vf",
&crop_filter,
"-frames:v",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
{
Ok(o) => o,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("ffmpeg error: {}", e),
))
}
};
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("ffmpeg failed: {}", stderr),
));
}
let response = axum::response::Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=3600")
.body(Body::from(output.stdout))
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Response error: {}", e),
)
})?;
Ok(response)
}
#[derive(Debug, Deserialize)]
struct Bbox {
x: i32,
y: i32,
width: i32,
height: i32,
}

View File

@@ -0,0 +1,603 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
use sqlx::Row;
use std::path::PathBuf;
use crate::api::server::AppState;
pub fn identity_agent_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/identity/analyze", post(analyze_identity))
.route("/api/v1/agents/identity/suggest", post(suggest_merges))
.route("/api/v1/agents/identity/status", get(get_identity_status))
.route(
"/api/v1/agents/suggest/clustering",
post(suggest_clustering),
)
.route("/api/v1/agents/suggest/merge", post(suggest_merge))
}
#[derive(Debug, Deserialize)]
pub struct AnalyzeIdentityRequest {
pub file_uuid: String,
pub auto_merge_threshold: Option<f64>,
pub llm_threshold: Option<f64>,
pub use_llm: Option<bool>,
pub model: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct AnalyzeIdentityResponse {
pub success: bool,
pub file_uuid: String,
pub identities: Vec<IdentityResult>,
pub processing_status: IdentityProcessingStatus,
}
#[derive(Debug, Serialize)]
pub struct IdentityResult {
pub identity_id: String,
pub person_ids: Vec<String>,
pub speaker_ids: Vec<String>,
pub confidence: f64,
pub evidence: IdentityEvidence,
pub reasoning: String,
}
#[derive(Debug, Serialize)]
pub struct IdentityEvidence {
pub face_similarity: Option<f64>,
pub speaker_overlap: f64,
pub time_overlap: f64,
pub frame_ratio: f64,
}
#[derive(Debug, Serialize)]
pub struct IdentityProcessingStatus {
pub status: String,
pub persons_analyzed: i32,
pub identities_created: i32,
pub merges_suggested: i32,
}
#[derive(Debug, Deserialize)]
pub struct SuggestMergesRequest {
pub file_uuid: String,
}
#[derive(Debug, Serialize)]
pub struct SuggestMergesResponse {
pub success: bool,
pub file_uuid: String,
pub merge_suggestions: Vec<MergeSuggestion>,
pub naming_suggestions: Vec<NamingSuggestion>,
}
#[derive(Debug, Serialize)]
pub struct MergeSuggestion {
pub target_person_id: String,
pub source_person_ids: Vec<String>,
pub confidence: f64,
pub reasons: Vec<String>,
pub action: String,
}
#[derive(Debug, Serialize)]
pub struct NamingSuggestion {
pub person_id: String,
pub suggested_name: String,
pub confidence: f64,
pub reasoning: String,
}
#[derive(Debug, Serialize)]
pub struct IdentityStatusResponse {
pub success: bool,
pub agent_name: String,
pub version: String,
pub supported_models: Vec<String>,
pub default_thresholds: DefaultThresholds,
}
#[derive(Debug, Serialize)]
pub struct DefaultThresholds {
pub auto_merge_threshold: f64,
pub llm_threshold: f64,
pub face_similarity_threshold: f64,
}
async fn analyze_identity(
State(state): State<AppState>,
Json(req): Json<AnalyzeIdentityRequest>,
) -> Result<Json<AnalyzeIdentityResponse>, (StatusCode, String)> {
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
let video_dir = PathBuf::from(&output_dir).join(&req.file_uuid);
let face_clustered_path = video_dir.join(format!("{}.face_clustered.json", req.file_uuid));
let asrx_path = video_dir.join(format!("{}.asrx.json", req.file_uuid));
if !face_clustered_path.exists() {
return Err((
StatusCode::NOT_FOUND,
format!("Face clustered data not found for video: {}", req.file_uuid),
));
}
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to read face data: {}", e),
)
})?
.parse()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse face data: {}", e),
)
})?;
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
Some(
std::fs::read_to_string(&asrx_path)
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to read asrx data: {}", e),
)
})?
.parse()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse asrx data: {}", e),
)
})?,
)
} else {
None
};
let persons = extract_persons_from_face_data(&face_data);
let speakers = extract_speakers_from_asrx_data(&asrx_data);
let identities = analyze_person_speaker_overlap(&persons, &speakers);
let processing_status = IdentityProcessingStatus {
status: "completed".to_string(),
persons_analyzed: persons.len() as i32,
identities_created: identities.len() as i32,
merges_suggested: 0,
};
Ok(Json(AnalyzeIdentityResponse {
success: true,
file_uuid: req.file_uuid.clone(),
identities,
processing_status,
}))
}
async fn suggest_merges(
State(state): State<AppState>,
Json(req): Json<SuggestMergesRequest>,
) -> Result<Json<SuggestMergesResponse>, (StatusCode, String)> {
let analyze_req = AnalyzeIdentityRequest {
file_uuid: req.file_uuid.clone(),
auto_merge_threshold: Some(0.8),
llm_threshold: Some(0.5),
use_llm: Some(true),
model: Some("gemma4".to_string()),
};
let analyze_result = analyze_identity(State(state), Json(analyze_req)).await?;
let merge_suggestions: Vec<MergeSuggestion> = analyze_result
.identities
.iter()
.filter(|id| id.person_ids.len() > 1)
.map(|id| {
let reasons = vec![
format!(
"Shared speaker overlap: {:.0}%",
id.evidence.speaker_overlap * 100.0
),
format!(
"Face similarity: {:.2}",
id.evidence.face_similarity.unwrap_or(0.0)
),
format!("Confidence: {:.2}", id.confidence),
];
MergeSuggestion {
target_person_id: id.person_ids[0].clone(),
source_person_ids: id.person_ids[1..].to_vec(),
confidence: id.confidence,
reasons,
action: if id.confidence > 0.8 {
"auto_apply"
} else {
"review_needed"
}
.to_string(),
}
})
.collect();
Ok(Json(SuggestMergesResponse {
success: true,
file_uuid: req.file_uuid,
merge_suggestions,
naming_suggestions: vec![],
}))
}
async fn get_identity_status() -> Result<Json<IdentityStatusResponse>, (StatusCode, String)> {
Ok(Json(IdentityStatusResponse {
success: true,
agent_name: "Identity Agent".to_string(),
version: "1.0.0".to_string(),
supported_models: vec!["gemma4".to_string(), "qwen3".to_string()],
default_thresholds: DefaultThresholds {
auto_merge_threshold: 0.8,
llm_threshold: 0.5,
face_similarity_threshold: 0.3,
},
}))
}
fn extract_persons_from_face_data(face_data: &serde_json::Value) -> Vec<PersonData> {
let mut persons = Vec::new();
if let Some(frames) = face_data.get("frames").and_then(|f| f.as_array()) {
let mut person_frames_map: std::collections::HashMap<String, Vec<i32>> =
std::collections::HashMap::new();
for frame in frames {
if let Some(frame_num) = frame.get("frame").and_then(|f| f.as_i64()) {
if let Some(person_id) = frame.get("person_id").and_then(|p| p.as_str()) {
person_frames_map
.entry(person_id.to_string())
.or_insert_with(Vec::new)
.push(frame_num as i32);
}
}
}
for (person_id, frames) in person_frames_map {
persons.push(PersonData {
person_id,
frames,
avg_embedding: None,
});
}
}
persons
}
fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec<SpeakerData> {
let mut speakers = Vec::new();
if let Some(data) = asrx_data {
if let Some(segments) = data.get("segments").and_then(|s| s.as_array()) {
let mut speaker_segments_map: std::collections::HashMap<String, Vec<(f64, f64)>> =
std::collections::HashMap::new();
for segment in segments {
if let Some(speaker_id) = segment.get("speaker").and_then(|s| s.as_str()) {
let start = segment.get("start").and_then(|s| s.as_f64()).unwrap_or(0.0);
let end = segment.get("end").and_then(|e| e.as_f64()).unwrap_or(0.0);
speaker_segments_map
.entry(speaker_id.to_string())
.or_insert_with(Vec::new)
.push((start, end));
}
}
for (speaker_id, segments) in speaker_segments_map {
speakers.push(SpeakerData {
speaker_id,
segments,
});
}
}
}
speakers
}
fn analyze_person_speaker_overlap(
persons: &[PersonData],
speakers: &[SpeakerData],
) -> Vec<IdentityResult> {
let mut identities = Vec::new();
for (i, person) in persons.iter().enumerate() {
let identity_id = format!("identity_{}", i + 1);
let mut speaker_ids = Vec::new();
let mut max_overlap: f64 = 0.0;
for speaker in speakers {
let overlap_frames = calculate_overlap(person, speaker);
let overlap_ratio = overlap_frames as f64 / person.frames.len() as f64;
if overlap_ratio > 0.5 {
speaker_ids.push(speaker.speaker_id.clone());
max_overlap = max_overlap.max(overlap_ratio);
}
}
let confidence = if speaker_ids.len() > 0 {
0.7 + max_overlap * 0.2
} else {
0.5
};
let reasoning = if speaker_ids.len() > 0 {
format!(
"Person has high overlap with speakers: {}",
speaker_ids.join(", ")
)
} else {
"Person has no speaker overlap".to_string()
};
identities.push(IdentityResult {
identity_id,
person_ids: vec![person.person_id.clone()],
speaker_ids,
confidence,
evidence: IdentityEvidence {
face_similarity: None,
speaker_overlap: max_overlap,
time_overlap: max_overlap,
frame_ratio: person.frames.len() as f64 / 1000.0,
},
reasoning,
});
}
identities
}
fn calculate_overlap(person: &PersonData, speaker: &SpeakerData) -> i32 {
let mut overlap_count = 0;
for frame_num in &person.frames {
let frame_time = *frame_num as f64 / 23.976;
for (start, end) in &speaker.segments {
if frame_time >= *start && frame_time <= *end {
overlap_count += 1;
break;
}
}
}
overlap_count
}
#[derive(Debug, Deserialize)]
pub struct SuggestClusteringRequest {
pub file_uuid: Option<String>,
pub min_cluster_size: Option<usize>,
pub similarity_threshold: Option<f64>,
}
#[derive(Debug, Serialize)]
pub struct SuggestClusteringResponse {
pub success: bool,
pub suggestions: Vec<ClusteringSuggestion>,
pub total_unclustered: usize,
}
#[derive(Debug, Serialize)]
pub struct ClusteringSuggestion {
pub cluster_id: String,
pub face_count: usize,
pub avg_confidence: f64,
pub suggested_name: Option<String>,
pub representative_face: Option<String>,
}
async fn suggest_clustering(
State(state): State<AppState>,
Json(req): Json<SuggestClusteringRequest>,
) -> Result<Json<SuggestClusteringResponse>, (StatusCode, String)> {
let min_cluster_size = req.min_cluster_size.unwrap_or(3);
let file_filter = match &req.file_uuid {
Some(uuid) => format!("AND fc.file_uuid = '{}'", uuid),
None => String::new(),
};
let query = format!(
r#"
SELECT fc.cluster_id, fc.file_uuid, fc.n_faces, fc.metadata
FROM face_clusters fc
WHERE fc.n_faces >= $1
AND NOT EXISTS (
SELECT 1 FROM identities i
WHERE i.metadata->>'cluster_id' = fc.cluster_id
)
{}
ORDER BY fc.n_faces DESC
"#,
file_filter
);
let pool = state.db.pool();
let rows = sqlx::query(&query)
.bind(min_cluster_size as i64)
.fetch_all(pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let suggestions: Vec<ClusteringSuggestion> = rows
.into_iter()
.map(|row| {
let cluster_id: String = row.get("cluster_id");
let n_faces: i32 = row.get("n_faces");
let metadata: serde_json::Value =
row.try_get("metadata").unwrap_or(serde_json::Value::Null);
let avg_confidence = metadata
.get("avg_confidence")
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let representative_face = metadata
.get("representative_face_id")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
ClusteringSuggestion {
cluster_id,
face_count: n_faces as usize,
avg_confidence,
suggested_name: None,
representative_face,
}
})
.collect();
let total_unclustered: i64 = sqlx::query_scalar(
r#"
SELECT COUNT(*) FROM face_detections fd
WHERE fd.identity_id IS NULL
"#,
)
.fetch_one(pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(SuggestClusteringResponse {
success: true,
suggestions,
total_unclustered: total_unclustered as usize,
}))
}
#[derive(Debug, Deserialize)]
pub struct SuggestMergeRequest {
pub identity_id: Option<String>,
pub similarity_threshold: Option<f64>,
}
#[derive(Debug, Serialize)]
pub struct SuggestMergeResponse {
pub success: bool,
pub suggestions: Vec<IdentityMergeSuggestion>,
}
#[derive(Debug, Serialize)]
pub struct IdentityMergeSuggestion {
pub source_identity_id: String,
pub target_identity_id: String,
pub source_name: String,
pub target_name: String,
pub similarity_score: f64,
pub shared_files: usize,
pub reason: String,
}
async fn suggest_merge(
State(state): State<AppState>,
Json(req): Json<SuggestMergeRequest>,
) -> Result<Json<SuggestMergeResponse>, (StatusCode, String)> {
let similarity_threshold = req.similarity_threshold.unwrap_or(0.8);
let identity_filter = match &req.identity_id {
Some(id) => format!("AND i1.uuid = '{}' OR i2.uuid = '{}'", id, id),
None => String::new(),
};
let query = format!(
r#"
SELECT
i1.uuid as source_uuid,
i2.uuid as target_uuid,
i1.name as source_name,
i2.name as target_name,
COUNT(DISTINCT fi1.file_uuid) as shared_files
FROM identities i1
JOIN identities i2 ON i1.id < i2.id
LEFT JOIN file_identities fi1 ON fi1.identity_id = i1.id
LEFT JOIN file_identities fi2 ON fi2.identity_id = i2.id AND fi1.file_uuid = fi2.file_uuid
WHERE i1.identity_type = 'people'
AND i2.identity_type = 'people'
AND i1.id != i2.id
{}
GROUP BY i1.uuid, i2.uuid, i1.name, i2.name
HAVING COUNT(DISTINCT fi1.file_uuid) > 0
ORDER BY shared_files DESC
LIMIT 50
"#,
identity_filter
);
let pool = state.db.pool();
let rows = sqlx::query(&query)
.fetch_all(pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let suggestions: Vec<IdentityMergeSuggestion> = rows
.into_iter()
.filter_map(|row| {
let shared_files: i64 = row.get("shared_files");
if shared_files > 0 {
let similarity = (shared_files as f64 / 10.0).min(1.0);
if similarity >= similarity_threshold {
Some(IdentityMergeSuggestion {
source_identity_id: row.get("source_uuid"),
target_identity_id: row.get("target_uuid"),
source_name: row.get("source_name"),
target_name: row.get("target_name"),
similarity_score: similarity,
shared_files: shared_files as usize,
reason: format!(
"Share {} file(s) - similarity: {:.1}%",
shared_files,
similarity * 100.0
),
})
} else {
None
}
} else {
None
}
})
.collect();
Ok(Json(SuggestMergeResponse {
success: true,
suggestions,
}))
}
#[derive(Debug)]
struct PersonData {
person_id: String,
frames: Vec<i32>,
avg_embedding: Option<Vec<f64>>,
}
#[derive(Debug)]
struct SpeakerData {
speaker_id: String,
segments: Vec<(f64, f64)>,
}

View File

@@ -8,17 +8,27 @@ use axum::{
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::core::db::{Database, PostgresDb, ResourceRecord};
use crate::core::db::ResourceRecord;
pub fn identity_routes() -> Router<crate::api::server::AppState> {
Router::new()
.route("/api/v1/people", get(list_people))
.route("/api/v1/people/search", post(search_people))
.route("/api/v1/people/candidates", get(list_candidates))
.route("/api/v1/people/{identity_id}/confirm-candidate", post(confirm_candidate))
.route("/api/v1/people/{identity_id}/reject-candidate", post(reject_candidate))
.route(
"/api/v1/people/:identity_id/confirm-candidate",
post(confirm_candidate),
)
.route(
"/api/v1/people/:identity_id/reject-candidate",
post(reject_candidate),
)
.route("/api/v1/files", get(list_files))
.route("/api/v1/files/{uuid}", get(get_file_detail))
.route("/api/v1/files/:uuid", get(get_file_detail))
.route("/api/v1/files/:uuid/identities", get(get_file_identities))
.route("/api/v1/identities/:uuid", get(get_identity_detail))
.route("/api/v1/identities/:uuid/files", get(get_identity_files))
.route("/api/v1/identities/:uuid/chunks", get(get_identity_chunks))
.route("/api/v1/resources/register", post(register_resource))
.route("/api/v1/resources/heartbeat", post(heartbeat_resource))
.route("/api/v1/resources", get(list_resources))
@@ -59,18 +69,24 @@ async fn list_people(
let page_size = params.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state.db.list_people(page_size as i32, offset).await
let records = state
.db
.list_people(page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
// TODO: Get total count
let total = 100; // Placeholder
let data = records.into_iter().map(|r| PeopleItem {
identity_id: r.uuid,
name: r.name,
metadata: r.metadata,
created_at: r.created_at,
}).collect();
let data = records
.into_iter()
.map(|r| PeopleItem {
identity_id: r.uuid,
name: r.name,
metadata: r.metadata,
created_at: r.created_at,
})
.collect();
Ok(Json(PeopleResponse {
success: true,
@@ -96,15 +112,21 @@ async fn search_people(
let page_size = req.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state.db.search_people(&req.query, page_size as i32, offset).await
let records = state
.db
.search_people(&req.query, page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data: Vec<PeopleItem> = records.into_iter().map(|r| PeopleItem {
identity_id: r.uuid,
name: r.name,
metadata: r.metadata,
created_at: r.created_at,
}).collect();
let data: Vec<PeopleItem> = records
.into_iter()
.map(|r| PeopleItem {
identity_id: r.uuid,
name: r.name,
metadata: r.metadata,
created_at: r.created_at,
})
.collect();
Ok(Json(PeopleResponse {
success: true,
@@ -145,14 +167,20 @@ async fn list_candidates(
let page_size = params.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state.db.get_people_candidates(page_size as i32, offset).await
let records = state
.db
.get_people_candidates(page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data = records.into_iter().map(|r| CandidateItem {
pre_chunk_id: r.id,
file_uuid: r.file_uuid,
data: r.data,
}).collect();
let data = records
.into_iter()
.map(|r| CandidateItem {
pre_chunk_id: r.id,
file_uuid: r.file_uuid,
data: r.data,
})
.collect();
Ok(Json(CandidatesResponse {
success: true,
@@ -184,7 +212,10 @@ async fn confirm_candidate(
let identity_id = Uuid::parse_str(&identity_id_str)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid UUID: {}", e)))?;
state.db.confirm_candidate(req.pre_chunk_id, identity_id).await
state
.db
.confirm_candidate(req.pre_chunk_id, identity_id)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(ConfirmCandidateResponse {
@@ -198,7 +229,10 @@ async fn reject_candidate(
Path(_identity_id_str): Path<String>, // Unused, but consistent with route
Json(req): Json<ConfirmCandidateRequest>,
) -> Result<Json<ConfirmCandidateResponse>, (StatusCode, String)> {
state.db.reject_candidate(req.pre_chunk_id).await
state
.db
.reject_candidate(req.pre_chunk_id)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(ConfirmCandidateResponse {
@@ -240,15 +274,21 @@ async fn list_files(
let page_size = params.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state.db.list_files(page_size as i32, offset).await
let records = state
.db
.list_files(page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data = records.into_iter().map(|r| FileItem {
file_uuid: r.uuid,
file_name: r.file_name,
file_path: r.file_path,
status: "ready".to_string(),
}).collect();
let data = records
.into_iter()
.map(|r| FileItem {
file_uuid: r.file_uuid,
file_name: r.file_name,
file_path: r.file_path,
status: "ready".to_string(),
})
.collect();
Ok(Json(FilesResponse {
success: true,
@@ -261,23 +301,349 @@ async fn list_files(
#[derive(Debug, Serialize)]
pub struct FileDetailResponse {
pub success: bool,
pub file_uuid: String,
pub file_name: String,
pub file_path: String,
pub metadata: serde_json::Value,
pub metadata: Option<serde_json::Value>,
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
}
async fn get_file_detail(
State(state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
) -> Result<Json<FileDetailResponse>, (StatusCode, String)> {
// Need a method to get single file
// For now, placeholder
Ok(Json(FileDetailResponse {
let file = state
.db
.get_file_by_uuid(&uuid)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
match file {
Some(f) => Ok(Json(FileDetailResponse {
success: true,
file_uuid: f.file_uuid,
file_name: f.file_name,
file_path: f.file_path,
metadata: f.probe_json,
created_at: f.created_at,
})),
None => Err((StatusCode::NOT_FOUND, format!("File not found: {}", uuid))),
}
}
#[derive(Debug, Serialize)]
pub struct FileIdentitiesResponse {
pub success: bool,
pub file_uuid: String,
pub total: i64,
pub page: usize,
pub page_size: usize,
pub data: Vec<FileIdentityItem>,
}
#[derive(Debug, Serialize)]
pub struct FileIdentityItem {
pub identity_id: i32,
pub name: String,
pub metadata: serde_json::Value,
pub face_count: Option<i32>,
pub speaker_count: Option<i32>,
pub first_appearance: Option<f64>,
pub last_appearance: Option<f64>,
pub confidence: Option<f64>,
}
async fn get_file_identities(
State(state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
Query(params): Query<FilesQuery>,
) -> Result<Json<FileIdentitiesResponse>, (StatusCode, String)> {
let page = params.page.unwrap_or(1);
let page_size = params.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state
.db
.get_file_identities(&uuid, page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data: Vec<FileIdentityItem> = records
.into_iter()
.map(|r| FileIdentityItem {
identity_id: r.identity_id,
name: r.name,
metadata: r.metadata,
face_count: r.face_count,
speaker_count: r.speaker_count,
first_appearance: r.first_appearance,
last_appearance: r.last_appearance,
confidence: r.confidence,
})
.collect();
Ok(Json(FileIdentitiesResponse {
success: true,
file_uuid: uuid,
file_name: "Unknown".to_string(),
file_path: "/path/to/file".to_string(),
metadata: serde_json::json!({}),
total: data.len() as i64,
page,
page_size,
data,
}))
}
#[derive(Debug, Serialize)]
pub struct IdentityDetailResponse {
pub success: bool,
pub uuid: Uuid,
pub name: String,
pub identity_type: Option<String>,
pub source: Option<String>,
pub status: Option<String>,
pub metadata: serde_json::Value,
pub reference_data: serde_json::Value,
pub tmdb_id: Option<i32>,
pub tmdb_profile: Option<String>,
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
}
async fn get_identity_detail(
State(state): State<crate::api::server::AppState>,
Path(uuid_str): Path<String>,
) -> Result<Json<IdentityDetailResponse>, (StatusCode, String)> {
let uuid = Uuid::parse_str(&uuid_str)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid UUID: {}", e)))?;
let identity = state
.db
.get_identity_by_uuid(&uuid)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
match identity {
Some(i) => Ok(Json(IdentityDetailResponse {
success: true,
uuid: i.uuid,
name: i.name,
identity_type: i.identity_type,
source: i.source,
status: i.status,
metadata: i.metadata,
reference_data: i.reference_data,
tmdb_id: i.tmdb_id,
tmdb_profile: i.tmdb_profile,
created_at: i.created_at,
updated_at: i.updated_at,
})),
None => Err((
StatusCode::NOT_FOUND,
format!("Identity not found: {}", uuid),
)),
}
}
#[derive(Debug, Serialize)]
pub struct IdentityFilesResponse {
pub success: bool,
pub identity_uuid: Uuid,
pub total: i64,
pub page: usize,
pub page_size: usize,
pub data: Vec<IdentityFileItem>,
}
#[derive(Debug, Serialize)]
pub struct IdentityFileItem {
pub file_uuid: String,
pub file_name: String,
pub file_path: String,
pub status: String,
pub face_count: Option<i32>,
pub speaker_count: Option<i32>,
pub first_appearance: Option<f64>,
pub last_appearance: Option<f64>,
pub confidence: Option<f64>,
}
async fn get_identity_files(
State(state): State<crate::api::server::AppState>,
Path(uuid_str): Path<String>,
Query(params): Query<FilesQuery>,
) -> Result<Json<IdentityFilesResponse>, (StatusCode, String)> {
let uuid = Uuid::parse_str(&uuid_str)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid UUID: {}", e)))?;
let page = params.page.unwrap_or(1);
let page_size = params.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state
.db
.get_identity_files(&uuid, page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data: Vec<IdentityFileItem> = records
.into_iter()
.map(|r| IdentityFileItem {
file_uuid: r.file_uuid,
file_name: r.file_name,
file_path: r.file_path,
status: r.status,
face_count: r.face_count,
speaker_count: r.speaker_count,
first_appearance: r.first_appearance,
last_appearance: r.last_appearance,
confidence: r.confidence,
})
.collect();
Ok(Json(IdentityFilesResponse {
success: true,
identity_uuid: uuid,
total: data.len() as i64,
page,
page_size,
data,
}))
}
#[derive(Debug, Serialize)]
pub struct IdentityFacesResponse {
pub success: bool,
pub identity_uuid: Uuid,
pub total: i64,
pub page: usize,
pub page_size: usize,
pub data: Vec<IdentityFaceItem>,
}
#[derive(Debug, Serialize)]
pub struct IdentityFaceItem {
pub id: i64,
pub file_uuid: String,
pub frame_number: i64,
pub timestamp_secs: f64,
pub face_id: Option<String>,
pub bbox: BBox,
pub confidence: f64,
}
#[derive(Debug, Serialize)]
pub struct BBox {
pub x: f64,
pub y: f64,
pub width: f64,
pub height: f64,
}
async fn get_identity_faces(
State(state): State<crate::api::server::AppState>,
Path(uuid_str): Path<String>,
Query(params): Query<FilesQuery>,
) -> Result<Json<IdentityFacesResponse>, (StatusCode, String)> {
let uuid = Uuid::parse_str(&uuid_str)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid UUID: {}", e)))?;
let page = params.page.unwrap_or(1);
let page_size = params.page_size.unwrap_or(50);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state
.db
.get_identity_faces(&uuid, page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data: Vec<IdentityFaceItem> = records
.into_iter()
.map(|r| IdentityFaceItem {
id: r.id,
file_uuid: r.file_uuid,
frame_number: r.frame_number,
timestamp_secs: r.timestamp_secs,
face_id: r.face_id,
bbox: BBox {
x: r.x,
y: r.y,
width: r.width,
height: r.height,
},
confidence: r.confidence,
})
.collect();
Ok(Json(IdentityFacesResponse {
success: true,
identity_uuid: uuid,
total: data.len() as i64,
page,
page_size,
data,
}))
}
#[derive(Debug, Serialize)]
pub struct IdentityChunksResponse {
pub success: bool,
pub identity_uuid: Uuid,
pub total: i64,
pub page: usize,
pub page_size: usize,
pub data: Vec<IdentityChunkItem>,
}
#[derive(Debug, Serialize)]
pub struct IdentityChunkItem {
pub id: i64,
pub file_uuid: String,
pub chunk_id: String,
pub chunk_type: String,
pub start_time: Option<f64>,
pub end_time: Option<f64>,
pub text_content: Option<String>,
}
async fn get_identity_chunks(
State(state): State<crate::api::server::AppState>,
Path(uuid_str): Path<String>,
Query(params): Query<FilesQuery>,
) -> Result<Json<IdentityChunksResponse>, (StatusCode, String)> {
let uuid = Uuid::parse_str(&uuid_str)
.map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid UUID: {}", e)))?;
let page = params.page.unwrap_or(1);
let page_size = params.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
let records = state
.db
.get_identity_chunks(&uuid, page_size as i32, offset)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data: Vec<IdentityChunkItem> = records
.into_iter()
.map(|r| IdentityChunkItem {
id: r.id,
file_uuid: r.file_uuid,
chunk_id: r.chunk_id,
chunk_type: r.chunk_type,
start_time: r.start_time,
end_time: r.end_time,
text_content: r.text_content,
})
.collect();
Ok(Json(IdentityChunksResponse {
success: true,
identity_uuid: uuid,
total: data.len() as i64,
page,
page_size,
data,
}))
}
@@ -326,7 +692,10 @@ async fn register_resource(
created_at: None,
};
state.db.register_resource(resource).await
state
.db
.register_resource(resource)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(ResourceResponse {
@@ -347,7 +716,10 @@ async fn heartbeat_resource(
Json(req): Json<HeartbeatRequest>,
) -> Result<Json<ResourceResponse>, (StatusCode, String)> {
let status = req.status.unwrap_or("online".to_string());
state.db.heartbeat_resource(&req.resource_id, &status).await
state
.db
.heartbeat_resource(&req.resource_id, &status)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(ResourceResponse {
@@ -360,17 +732,23 @@ async fn heartbeat_resource(
async fn list_resources(
State(state): State<crate::api::server::AppState>,
) -> Result<Json<ResourceResponse>, (StatusCode, String)> {
let records = state.db.list_resources().await
let records = state
.db
.list_resources()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let data: Vec<ResourceItem> = records.into_iter().map(|r| ResourceItem {
resource_id: r.resource_id,
resource_type: r.resource_type,
category: r.category,
capabilities: r.capabilities,
status: r.status,
last_heartbeat: r.last_heartbeat,
}).collect();
let data: Vec<ResourceItem> = records
.into_iter()
.map(|r| ResourceItem {
resource_id: r.resource_id,
resource_type: r.resource_type,
category: r.category,
capabilities: r.capabilities,
status: r.status,
last_heartbeat: r.last_heartbeat,
})
.collect();
Ok(Json(ResourceResponse {
success: true,

View File

@@ -211,7 +211,7 @@ pub async fn get_signal_timeline(
#[derive(Debug, Deserialize)]
pub struct AVSuggestRequest {
pub video_uuid: String,
pub file_uuid: String,
pub overlap_threshold: Option<f64>, // default 0.6
}
@@ -233,7 +233,7 @@ pub async fn suggest_audio_visual_bindings(
// 1. Get Face signals and their time ranges
let face_signals = db
.list_unbound_signals(&req.video_uuid, "face")
.list_unbound_signals(&req.file_uuid, "face")
.await
.map_err(|e| {
(
@@ -243,7 +243,7 @@ pub async fn suggest_audio_visual_bindings(
})?;
let speaker_signals = db
.list_unbound_signals(&req.video_uuid, "speaker")
.list_unbound_signals(&req.file_uuid, "speaker")
.await
.map_err(|e| {
(
@@ -263,11 +263,11 @@ pub async fn suggest_audio_visual_bindings(
// Placeholder: Calculate overlap by fetching timelines
let face_timeline = db
.get_chunks_by_signal(&req.video_uuid, "face", face_id)
.get_chunks_by_signal(&req.file_uuid, "face", face_id)
.await
.unwrap_or_default();
let speaker_timeline = db
.get_chunks_by_signal(&req.video_uuid, "speaker", speaker_id)
.get_chunks_by_signal(&req.file_uuid, "speaker", speaker_id)
.await
.unwrap_or_default();

View File

@@ -21,13 +21,27 @@ pub struct ApiState {
pub db: Arc<PostgresDb>,
}
const PUBLIC_PATHS: &[&str] = &[
"/api/v1/faces/", // Thumbnail paths (partial match)
];
fn is_public_path(path: &str) -> bool {
PUBLIC_PATHS.iter().any(|prefix| path.starts_with(prefix)) && path.ends_with("/thumbnail")
}
pub async fn api_key_validation(
State(state): State<ApiState>,
request: Request,
next: Next,
) -> Response {
let path = request.uri().path();
tracing::info!("[MIDDLEWARE] Starting API key validation");
tracing::info!("[MIDDLEWARE] Path: {:?}", request.uri().path());
tracing::info!("[MIDDLEWARE] Path: {:?}", path);
if is_public_path(path) {
tracing::info!("[MIDDLEWARE] Public path, skipping auth: {}", path);
return next.run(request).await;
}
let headers = request.headers();
tracing::info!("[MIDDLEWARE] All headers: {:?}", headers);

View File

@@ -1,6 +1,8 @@
pub mod agent_api;
pub mod face_recognition;
pub mod five_w1h_agent_api;
pub mod identities;
pub mod identity_agent_api;
pub mod identity_api;
pub mod identity_binding;
pub mod middleware;
@@ -8,6 +10,7 @@ pub mod n8n_search;
pub mod person_identity;
pub mod search;
pub mod server;
pub mod snapshot_api;
pub mod universal_search;
pub mod visual_chunk_search;
pub mod who;

View File

@@ -1,5 +1,4 @@
use crate::core::db::{Bm25Result, PostgresDb};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
@@ -35,7 +34,7 @@ pub async fn n8n_search_smart(
req: SmartSearchRequest,
) -> Result<SmartSearchResponse, Box<dyn std::error::Error + Send + Sync>> {
let limit = req.limit.unwrap_or(10);
let video_uuid = req.uuid.clone();
let file_uuid = req.uuid.clone();
// 1. Call LLM to extract 5W1H (Fallback to keywords if LLM fails)
let dimensions = match parse_query_with_llm(&req.query).await {
@@ -93,10 +92,7 @@ pub async fn n8n_search_smart(
// A. Keyword Search (BM25)
if !keywords.is_empty() {
if let Ok(results) = db
.search_bm25(&keywords, video_uuid.as_deref(), limit)
.await
{
if let Ok(results) = db.search_bm25(&keywords, file_uuid.as_deref(), limit).await {
for sr in results {
add_hit(&mut hits, &mut seen_chunk_ids, sr, 1.0);
}
@@ -106,7 +102,7 @@ pub async fn n8n_search_smart(
// B. Who Search (Person Matching)
if let Some(who_query) = &dimensions.who {
// 1. Search Person
if let Ok(persons) = db.search_person_candidates(who_query, &video_uuid, 5).await {
if let Ok(persons) = db.search_person_candidates(who_query, &file_uuid, 5).await {
if !persons.is_empty() {
let person_id = persons[0]
.get("candidate_id")
@@ -122,7 +118,7 @@ pub async fn n8n_search_smart(
// Re-run BM25 with person name to find specific chunks and boost them
if let Ok(results) = db
.search_bm25(person_name, video_uuid.as_deref(), limit)
.search_bm25(person_name, file_uuid.as_deref(), limit)
.await
{
for sr in results {

View File

@@ -10,7 +10,6 @@ use axum::{
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::core::db::schema;
use crate::core::db::{Database, PostgresDb};
use crate::core::person_identity::{
ChunkPersonInfo, CreatePersonIdentityRequest, PersonIdentity, PersonIdentityResponse,
@@ -20,7 +19,7 @@ use crate::core::person_identity::{
#[derive(Debug, Deserialize)]
pub struct IdentifyPersonsRequest {
pub video_uuid: String,
pub file_uuid: String,
pub auto_match: Option<bool>,
pub match_threshold: Option<f64>,
}
@@ -39,19 +38,19 @@ pub struct FaceListQuery {
#[derive(Debug, Deserialize)]
pub struct VideoUuidQuery {
pub video_uuid: String,
pub file_uuid: String,
}
#[derive(Debug, Deserialize)]
pub struct PersonTimelineQuery {
pub video_uuid: String,
pub file_uuid: String,
}
#[derive(Debug, Deserialize)]
pub struct FaceThumbnailQuery {
pub video_uuid: String,
pub file_uuid: String,
#[serde(default)]
pub index: Option<usize>, // Which face detection to use (default: 0)
pub index: Option<usize>,
}
// Structs for parsing face_clustered.json
@@ -85,7 +84,7 @@ pub struct ChunkPersonsResponse {
#[derive(Debug, Deserialize)]
pub struct MergePersonsRequest {
pub video_uuid: String,
pub file_uuid: String,
pub target_person_id: String,
pub source_person_ids: Vec<String>,
}
@@ -115,7 +114,7 @@ pub struct MergeHistoryResponse {
#[derive(Debug, Deserialize)]
pub struct PersonListQuery {
pub video_uuid: String,
pub file_uuid: String,
pub page: Option<usize>,
pub page_size: Option<usize>,
pub min_appearances: Option<i32>,
@@ -124,13 +123,13 @@ pub struct PersonListQuery {
#[derive(Debug, Deserialize)]
pub struct AutoIdentifyRequest {
pub video_uuid: String,
pub file_uuid: String,
pub min_speaker_confidence: Option<f64>,
}
#[derive(Debug, Deserialize)]
pub struct SimilarPersonsQuery {
pub video_uuid: String,
pub file_uuid: String,
pub threshold: Option<f64>,
pub limit: Option<i32>,
}
@@ -210,7 +209,7 @@ pub struct AutoIdentifyResponse {
}
pub struct AggregateBySpeakerRequest {
pub video_uuid: String,
pub file_uuid: String,
pub auto_merge: bool, // If true, automatically merge duplicates
}
@@ -311,14 +310,14 @@ async fn identify_persons(
tracing::info!(
"[PERSON_IDENTITY] Identifying persons for video: {}",
request.video_uuid
request.file_uuid
);
let auto_match = request.auto_match.unwrap_or(true);
let threshold = request.match_threshold.unwrap_or(0.5);
if auto_match {
let matches = match auto_match_face_speaker(&db, &request.video_uuid, threshold).await {
let matches = match auto_match_face_speaker(&db, &request.file_uuid, threshold).await {
Ok(m) => m,
Err(e) => {
return Err((
@@ -333,7 +332,7 @@ async fn identify_persons(
let person = match create_person_identity(
&db,
CreatePersonIdentityRequest {
video_uuid: request.video_uuid.clone(),
file_uuid: request.file_uuid.clone(),
face_identity_id: None,
speaker_id: Some(match_result.speaker_id.clone()),
name: None,
@@ -372,7 +371,7 @@ async fn identify_persons(
#[derive(Debug, Deserialize)]
pub struct PersonDetailQuery {
pub video_uuid: String,
pub file_uuid: String,
}
#[derive(Debug, Deserialize)]
@@ -381,7 +380,7 @@ pub struct SearchIdentitiesRequest {
pub speaker_id: Option<String>,
pub gender: Option<String>,
pub min_appearances: Option<i32>,
pub video_uuid: Option<String>, // Optional: search only in specific video
pub file_uuid: Option<String>, // Optional: search only in specific video
pub limit: Option<usize>,
}
@@ -404,7 +403,7 @@ async fn search_identities(
let mut sql = format!(
r#"
SELECT
id, person_id, face_identity_id, speaker_id, video_uuid,
id, person_id, face_identity_id, speaker_id, file_uuid,
name, original_name, character_name, gender, age,
appearance_count, total_appearance_duration,
first_appearance_time, last_appearance_time, is_confirmed
@@ -435,8 +434,8 @@ async fn search_identities(
conditions.push(format!("appearance_count >= {}", min_count));
}
if let Some(vid) = &req.video_uuid {
conditions.push(format!("video_uuid = '{}'", vid.replace('\'', "''")));
if let Some(vid) = &req.file_uuid {
conditions.push(format!("file_uuid = '{}'", vid.replace('\'', "''")));
}
if !conditions.is_empty() {
@@ -482,7 +481,7 @@ async fn search_identities(
person_id,
face_id,
speaker_id,
video_uuid,
file_uuid,
name,
original_name,
character_name,
@@ -498,7 +497,7 @@ async fn search_identities(
"id": id,
"person_id": person_id,
"face_identity_id": face_id,
"video_uuid": video_uuid,
"file_uuid": file_uuid,
"profile": {
"name": name,
"original_name": original_name,
@@ -579,12 +578,12 @@ async fn get_identity_videos(
// Each video has its own local person_id, speaker_id, and character_name
let videos_query = r#"
SELECT
pi.video_uuid, v.file_name, v.file_path,
pi.file_uuid, v.file_name, v.file_path,
pi.person_id, pi.speaker_id, pi.character_name,
pi.appearance_count,
pi.total_appearance_duration, pi.first_appearance_time, pi.last_appearance_time
FROM '"{}' pi
LEFT JOIN videos v ON pi.video_uuid = v.uuid
LEFT JOIN videos v ON pi.file_uuid = v.uuid
WHERE pi.face_identity_id = $1
ORDER BY pi.last_appearance_time DESC
"#;
@@ -599,7 +598,7 @@ async fn get_identity_videos(
.map(|row| {
use sqlx::Row;
serde_json::json!({
"video_uuid": row.get::<String, _>("video_uuid"),
"file_uuid": row.get::<String, _>("file_uuid"),
"file_name": row.get::<Option<String>, _>("file_name"),
"file_path": row.get::<Option<String>, _>("file_path"),
"person_id": row.get::<String, _>("person_id"),
@@ -659,12 +658,12 @@ async fn get_identity_faces(
// Fetch distinct face detections for this identity
let sql = r#"
SELECT
fd.id as detection_id, fd.video_uuid, fd.frame_number,
fd.id as detection_id, fd.file_uuid, fd.frame_number,
fd.timestamp_secs, fd.x, fd.y, fd.width, fd.height,
fd.cluster_id, v.file_name
FROM face_detections fd
JOIN person_identities pi ON fd.video_uuid = pi.video_uuid
LEFT JOIN videos v ON fd.video_uuid = v.uuid
JOIN person_identities pi ON fd.file_uuid = pi.file_uuid
LEFT JOIN videos v ON fd.file_uuid = v.uuid
WHERE pi.face_identity_id = $1
ORDER BY fd.timestamp_secs DESC
LIMIT $2
@@ -682,7 +681,7 @@ async fn get_identity_faces(
use sqlx::Row;
serde_json::json!({
"detection_id": row.get::<i32, _>("detection_id"),
"video_uuid": row.get::<String, _>("video_uuid"),
"file_uuid": row.get::<String, _>("file_uuid"),
"file_name": row.get::<Option<String>, _>("file_name"),
"frame_number": row.get::<i64, _>("frame_number"),
"timestamp": row.get::<f64, _>("timestamp_secs"),
@@ -715,7 +714,7 @@ async fn get_identity_faces(
/// List all faces in a video (both registered and unregistered)
async fn get_video_faces(
State(_state): State<crate::api::server::AppState>,
Path(video_uuid): Path<String>,
Path(file_uuid): Path<String>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let db = match PostgresDb::init().await {
Ok(db) => db,
@@ -735,13 +734,13 @@ async fn get_video_faces(
pi.person_id, pi.face_identity_id, pi.name, pi.is_confirmed
FROM face_clusters fc
LEFT JOIN person_identities pi ON fc.cluster_id = pi.person_id
AND pi.video_uuid = fc.video_uuid
WHERE fc.video_uuid = $1
AND pi.file_uuid = fc.file_uuid
WHERE fc.file_uuid = $1
ORDER BY fc.size DESC
"#;
let clusters: Vec<serde_json::Value> = match sqlx::query(clusters_query)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_all(db.pool())
.await
{
@@ -785,7 +784,7 @@ async fn get_video_faces(
Ok(Json(serde_json::json!({
"success": true,
"video_uuid": video_uuid,
"file_uuid": file_uuid,
"total_faces": total_faces,
"registered_count": registered_count,
"unregistered_count": total_faces - registered_count,
@@ -799,7 +798,7 @@ async fn register_identity(
Path(person_id): Path<String>,
Query(query): Query<VideoUuidQuery>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let video_uuid = query.video_uuid;
let file_uuid = query.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -813,11 +812,11 @@ async fn register_identity(
// 1. Fetch person info
let person_query = r#"
SELECT id, name, face_identity_id FROM '"{}'
WHERE person_id = $1 AND video_uuid = $2
WHERE person_id = $1 AND file_uuid = $2
"#;
let person: Option<(i32, Option<String>, Option<i32>)> = match sqlx::query_as(person_query)
.bind(&person_id)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_optional(db.pool())
.await
{
@@ -835,7 +834,7 @@ async fn register_identity(
None => {
return Err((
StatusCode::NOT_FOUND,
format!("Person '{}' not found in video '{}'", person_id, video_uuid),
format!("Person '{}' not found in video '{}'", person_id, file_uuid),
))
}
};
@@ -851,11 +850,11 @@ async fn register_identity(
// 2. Get cluster centroid or detections embedding
let cluster_query = r#"
SELECT centroid FROM face_clusters WHERE cluster_id = $1 AND video_uuid = $2
SELECT centroid FROM face_clusters WHERE cluster_id = $1 AND file_uuid = $2
"#;
let centroid: Option<Vec<f32>> = sqlx::query_scalar(cluster_query)
.bind(&person_id)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_optional(db.pool())
.await
.ok()
@@ -923,7 +922,7 @@ async fn get_person_details(
Path(person_id): Path<String>,
Query(query): Query<PersonDetailQuery>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let video_uuid = query.video_uuid;
let file_uuid = query.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -941,7 +940,7 @@ async fn get_person_details(
first_appearance_time, last_appearance_time,
is_confirmed, created_at, updated_at
FROM '"{}'
WHERE person_id = $1 AND video_uuid = $2
WHERE person_id = $1 AND file_uuid = $2
"#;
let person: Option<(
@@ -959,7 +958,7 @@ async fn get_person_details(
chrono::DateTime<chrono::Utc>,
)> = match sqlx::query_as(query)
.bind(&person_id)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_optional(db.pool())
.await
{
@@ -1001,7 +1000,7 @@ async fn get_person_details(
#[derive(Debug, Deserialize)]
pub struct UpdatePersonQuery {
pub video_uuid: String,
pub file_uuid: String,
}
async fn update_person_identity(
@@ -1010,7 +1009,7 @@ async fn update_person_identity(
Query(query): Query<UpdatePersonQuery>,
Json(request): Json<UpdatePersonIdentityRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let video_uuid = query.video_uuid;
let file_uuid = query.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -1083,10 +1082,10 @@ async fn get_person_timeline(
}
};
let name_query = "SELECT name FROM person_identities WHERE person_id = $1 AND video_uuid = $2";
let name_query = "SELECT name FROM person_identities WHERE person_id = $1 AND file_uuid = $2";
let name: Option<String> = match sqlx::query_scalar(name_query)
.bind(&person_id)
.bind(&query.video_uuid)
.bind(&query.file_uuid)
.fetch_optional(db.pool())
.await
{
@@ -1102,13 +1101,13 @@ async fn get_person_timeline(
let timeline_query = r#"
SELECT start_time, end_time, duration, confidence
FROM person_appearances
WHERE person_id = $1 AND video_uuid = $2
WHERE person_id = $1 AND file_uuid = $2
ORDER BY start_time ASC
"#;
let timeline: Vec<(f64, f64, f64, f64)> = match sqlx::query_as(timeline_query)
.bind(&person_id)
.bind(&query.video_uuid)
.bind(&query.file_uuid)
.fetch_all(db.pool())
.await
{
@@ -1139,13 +1138,13 @@ async fn get_person_timeline(
MAX(end_time) as last_appearance,
AVG(confidence) as average_confidence
FROM person_appearances
WHERE person_id = $1 AND video_uuid = $2
WHERE person_id = $1 AND file_uuid = $2
"#;
let stats: (i64, Option<f64>, Option<f64>, Option<f64>, Option<f64>) =
match sqlx::query_as(stats_query)
.bind(&person_id)
.bind(&query.video_uuid)
.bind(&query.file_uuid)
.fetch_one(db.pool())
.await
{
@@ -1179,7 +1178,7 @@ async fn get_person_appearances(
Path(person_id): Path<String>,
Query(query): Query<PersonDetailQuery>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
let video_uuid = query.video_uuid;
let file_uuid = query.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -1192,18 +1191,18 @@ async fn get_person_appearances(
let query = r#"
SELECT
person_id, video_uuid, start_time, end_time, duration,
person_id, file_uuid, start_time, end_time, duration,
face_detection_id, asrx_segment_start, asrx_segment_end,
confidence, created_at
FROM person_appearances
WHERE person_id = $1 AND video_uuid = $2
WHERE person_id = $1 AND file_uuid = $2
ORDER BY start_time DESC
LIMIT 100
"#;
let appearances: Vec<serde_json::Value> = match sqlx::query(query)
.bind(&person_id)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_all(db.pool())
.await
{
@@ -1213,7 +1212,7 @@ async fn get_person_appearances(
use sqlx::Row;
serde_json::json!({
"person_id": row.get::<String, _>("person_id"),
"video_uuid": row.get::<String, _>("video_uuid"),
"file_uuid": row.get::<String, _>("file_uuid"),
"start_time": row.get::<f64, _>("start_time"),
"end_time": row.get::<f64, _>("end_time"),
"duration": row.get::<f64, _>("duration"),
@@ -1240,7 +1239,7 @@ async fn get_person_appearances(
#[derive(Debug, Deserialize)]
pub struct ChunkPersonsQuery {
pub video_uuid: String,
pub file_uuid: String,
}
async fn get_chunk_persons(
@@ -1248,7 +1247,7 @@ async fn get_chunk_persons(
Path(chunk_id): Path<String>,
Query(query): Query<ChunkPersonsQuery>,
) -> Result<Json<ChunkPersonsResponse>, (StatusCode, String)> {
let video_uuid = &query.video_uuid;
let file_uuid = &query.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -1290,7 +1289,7 @@ async fn get_chunk_persons(
}
};
let (video_uuid, start_time, end_time, _metadata) = chunk;
let (file_uuid, start_time, end_time, _metadata) = chunk;
let persons_query = r#"
SELECT
@@ -1300,7 +1299,7 @@ async fn get_chunk_persons(
LEAST(pa.end_time, $3) - GREATEST(pa.start_time, $2) as overlap_duration
FROM person_appearances pa
JOIN person_identities pi ON pa.person_id = pi.person_id
WHERE pa.video_uuid = $1
WHERE pa.file_uuid = $1
AND pa.start_time < $3
AND pa.end_time > $2
ORDER BY overlap_duration DESC
@@ -1308,7 +1307,7 @@ async fn get_chunk_persons(
let persons: Vec<ChunkPersonInfo> =
match sqlx::query_as::<_, (String, Option<String>, f64, f64)>(persons_query)
.bind(&video_uuid)
.bind(&file_uuid)
.bind(start_time)
.bind(end_time)
.fetch_all(db.pool())
@@ -1349,15 +1348,15 @@ async fn get_person_thumbnail(
// 1. Locate the face_clustered.json file
let json_path = format!(
"output/{}/{}_face_clustered.json",
query.video_uuid, query.video_uuid
query.file_uuid, query.file_uuid
);
let json_path2 = format!(
"output/{}/{}.face_clustered.json",
query.video_uuid, query.video_uuid
query.file_uuid, query.file_uuid
);
// Fallback path if the naming convention is slightly different
let fallback_path = format!("output/{}/face_clustered.json", query.video_uuid);
let fallback_path = format!("output/{}/face_clustered.json", query.file_uuid);
let path = if std::path::Path::new(&json_path).exists() {
json_path
@@ -1370,7 +1369,7 @@ async fn get_person_thumbnail(
StatusCode::NOT_FOUND,
format!(
"Face data not found for video: {}. Tried: {}, {}, {}",
query.video_uuid, json_path, json_path2, fallback_path
query.file_uuid, json_path, json_path2, fallback_path
),
));
};
@@ -1418,7 +1417,7 @@ async fn get_person_thumbnail(
let (timestamp, face) = detections[index];
// 3. Locate the video file
let video_path = format!("output/{}/{}.mp4", query.video_uuid, query.video_uuid);
let video_path = format!("output/{}/{}.mp4", query.file_uuid, query.file_uuid);
if !std::path::Path::new(&video_path).exists() {
return Err((
StatusCode::NOT_FOUND,
@@ -1489,12 +1488,12 @@ async fn create_person_identity(
let query = r#"
INSERT INTO person_identities (
person_id, video_uuid, face_identity_id, speaker_id,
person_id, file_uuid, face_identity_id, speaker_id,
name, metadata, confidence
) VALUES ($1, $2, $3, $4, $5, $6, $7)
RETURNING
id, person_id, face_identity_id, speaker_id,
video_uuid, confidence, name, metadata,
file_uuid, confidence, name, metadata,
first_appearance_time, last_appearance_time,
total_appearance_duration, appearance_count,
created_at, updated_at, is_confirmed
@@ -1502,11 +1501,11 @@ async fn create_person_identity(
let person: PersonIdentity = sqlx::query_as(query)
.bind(&person_id)
.bind(&request.video_uuid)
.bind(&request.file_uuid)
.bind(&request.face_identity_id)
.bind(&request.speaker_id)
.bind(&request.name)
.bind(&request.metadata.unwrap_or(serde_json::json!({})))
.bind(serde_json::to_string(&request.metadata.unwrap_or(serde_json::json!({}))).unwrap())
.bind(0.0)
.fetch_one(db.pool())
.await?;
@@ -1516,13 +1515,13 @@ async fn create_person_identity(
async fn auto_match_face_speaker(
db: &PostgresDb,
video_uuid: &str,
file_uuid: &str,
threshold: f64,
) -> Result<Vec<PersonMatch>, anyhow::Error> {
let query = "SELECT * FROM auto_match_face_speaker($1, $2)";
let matches: Vec<PersonMatch> = sqlx::query_as(query)
.bind(video_uuid)
.bind(file_uuid)
.bind(threshold)
.fetch_all(db.pool())
.await?;
@@ -1548,9 +1547,9 @@ async fn auto_identify_persons(
// 1. Load face_clustered.json
let clustered_path = format!(
"output/{}/{}.face_clustered.json",
request.video_uuid, request.video_uuid
request.file_uuid, request.file_uuid
);
let fallback_path = format!("output/{}/face_clustered.json", request.video_uuid);
let fallback_path = format!("output/{}/face_clustered.json", request.file_uuid);
let path = if std::path::Path::new(&clustered_path).exists() {
clustered_path
} else if std::path::Path::new(&fallback_path).exists() {
@@ -1560,7 +1559,7 @@ async fn auto_identify_persons(
StatusCode::NOT_FOUND,
format!(
"face_clustered.json not found for video: {}",
request.video_uuid
request.file_uuid
),
));
};
@@ -1615,7 +1614,7 @@ async fn auto_identify_persons(
// 3. Load ASRX from chunks
let asrx_query = "SELECT chunk_id, content::text FROM chunks WHERE uuid = $1 AND chunk_type = 'trace' AND chunk_id LIKE 'trace_asrx_%'";
let asrx_chunks: Vec<(String, String)> = match sqlx::query_as(asrx_query)
.bind(&request.video_uuid)
.bind(&request.file_uuid)
.fetch_all(db.pool())
.await
{
@@ -1631,7 +1630,7 @@ async fn auto_identify_persons(
// Also check sentence chunks for speaker_id
let sentence_query = "SELECT content::text FROM chunks WHERE uuid = $1 AND chunk_type = 'sentence' AND content ? 'speaker_id'";
let sentence_chunks: Vec<String> = match sqlx::query_scalar(sentence_query)
.bind(&request.video_uuid)
.bind(&request.file_uuid)
.fetch_all(db.pool())
.await
{
@@ -1801,7 +1800,7 @@ async fn list_persons(
}
};
let video_uuid = query.video_uuid.replace("'", "''");
let file_uuid = query.file_uuid.replace("'", "''");
let page = query.page.unwrap_or(1);
let page_size = query.page_size.unwrap_or(20);
let offset = ((page - 1) as i64) * (page_size as i64);
@@ -1811,25 +1810,25 @@ async fn list_persons(
let (sql, count_sql) = if has_speaker {
if min_appearances > 0 {
(
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE video_uuid = '{}' AND speaker_id IS NOT NULL AND appearance_count >= $1 ORDER BY appearance_count DESC LIMIT $2 OFFSET $3", video_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE video_uuid = '{}' AND speaker_id IS NOT NULL AND appearance_count >= $1", video_uuid),
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE file_uuid = '{}' AND speaker_id IS NOT NULL AND appearance_count >= $1 ORDER BY appearance_count DESC LIMIT $2 OFFSET $3", file_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE file_uuid = '{}' AND speaker_id IS NOT NULL AND appearance_count >= $1", file_uuid),
)
} else {
(
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE video_uuid = '{}' AND speaker_id IS NOT NULL ORDER BY appearance_count DESC LIMIT $1 OFFSET $2", video_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE video_uuid = '{}' AND speaker_id IS NOT NULL", video_uuid),
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE file_uuid = '{}' AND speaker_id IS NOT NULL ORDER BY appearance_count DESC LIMIT $1 OFFSET $2", file_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE file_uuid = '{}' AND speaker_id IS NOT NULL", file_uuid),
)
}
} else {
if min_appearances > 0 {
(
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE video_uuid = '{}' AND appearance_count >= $1 ORDER BY appearance_count DESC LIMIT $2 OFFSET $3", video_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE video_uuid = '{}' AND appearance_count >= $1", video_uuid),
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE file_uuid = '{}' AND appearance_count >= $1 ORDER BY appearance_count DESC LIMIT $2 OFFSET $3", file_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE file_uuid = '{}' AND appearance_count >= $1", file_uuid),
)
} else {
(
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE video_uuid = '{}' ORDER BY appearance_count DESC LIMIT $1 OFFSET $2", video_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE video_uuid = '{}'", video_uuid),
format!("SELECT person_id, name, speaker_id, appearance_count, total_appearance_duration, first_appearance_time, last_appearance_time, is_confirmed, metadata::text FROM person_identities WHERE file_uuid = '{}' ORDER BY appearance_count DESC LIMIT $1 OFFSET $2", file_uuid),
format!("SELECT COUNT(*) FROM person_identities WHERE file_uuid = '{}'", file_uuid),
)
}
};
@@ -1925,7 +1924,7 @@ async fn merge_persons(
State(_state): State<crate::api::server::AppState>,
Json(request): Json<MergePersonsRequest>,
) -> Result<Json<MergePersonsResponse>, (StatusCode, String)> {
let video_uuid = &request.video_uuid;
let file_uuid = &request.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -2150,7 +2149,7 @@ async fn undo_merge(
State(_state): State<crate::api::server::AppState>,
Json(request): Json<UndoMergeRequest>,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
// video_uuid is validated through merge_history lookup
// file_uuid is validated through merge_history lookup
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -2285,7 +2284,10 @@ async fn undo_merge(
.bind(source_duration)
.bind(source_first)
.bind(source_last)
.bind(&serde_json::json!({"restored_from_merge": _merge_id}))
.bind(
serde_json::to_string(&serde_json::json!({"restored_from_merge": _merge_id}))
.unwrap(),
)
.execute(&mut *tx)
.await
{
@@ -2333,14 +2335,14 @@ async fn undo_merge(
/// Get merge history
#[derive(Debug, Deserialize)]
pub struct MergeHistoryQuery {
pub video_uuid: String,
pub file_uuid: String,
}
async fn get_merge_history(
State(_state): State<crate::api::server::AppState>,
Query(query): Query<MergeHistoryQuery>,
) -> Result<Json<MergeHistoryResponse>, (StatusCode, String)> {
let _video_uuid = &query.video_uuid;
let _file_uuid = &query.file_uuid;
let db = match PostgresDb::init().await {
Ok(db) => db,
Err(e) => {
@@ -2412,16 +2414,16 @@ async fn get_similar_persons(
}
};
let video_uuid = query.video_uuid;
let file_uuid = query.file_uuid;
let threshold = query.threshold.unwrap_or(0.5);
let limit = query.limit.unwrap_or(10);
// Find the speaker_id of the requested person
let get_speaker_query =
"SELECT speaker_id FROM person_identities WHERE person_id = $1 AND video_uuid = $2";
"SELECT speaker_id FROM person_identities WHERE person_id = $1 AND file_uuid = $2";
let current_speaker_id: Option<String> = match sqlx::query_scalar(get_speaker_query)
.bind(&person_id)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_optional(db.pool())
.await
{
@@ -2437,7 +2439,7 @@ async fn get_similar_persons(
let results = match current_speaker_id {
Some(sid) => {
// Find others with same speaker_id
let similar_query = "SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM person_identities WHERE speaker_id = $1 AND person_id != $2 AND video_uuid = $3 ORDER BY appearance_count DESC LIMIT $4";
let similar_query = "SELECT person_id, name, speaker_id, appearance_count, first_appearance_time, last_appearance_time FROM person_identities WHERE speaker_id = $1 AND person_id != $2 AND file_uuid = $3 ORDER BY appearance_count DESC LIMIT $4";
let rows: Vec<(
String,
Option<String>,
@@ -2448,7 +2450,7 @@ async fn get_similar_persons(
)> = match sqlx::query_as(similar_query)
.bind(&sid)
.bind(&person_id)
.bind(&video_uuid)
.bind(&file_uuid)
.bind(limit)
.fetch_all(db.pool())
.await
@@ -2509,13 +2511,13 @@ async fn get_person_suggestions(
// 1. Naming suggestions: Persons with NULL name but high appearance count.
// 2. Merge suggestions: Persons sharing the same speaker_id.
let video_uuid = req.video_uuid;
let file_uuid = req.file_uuid;
// Naming suggestions
let naming_query = "SELECT person_id, name, speaker_id, appearance_count FROM person_identities WHERE video_uuid = $1 AND (name IS NULL OR name = person_id) AND appearance_count > 50 ORDER BY appearance_count DESC LIMIT 10";
let naming_query = "SELECT person_id, name, speaker_id, appearance_count FROM person_identities WHERE file_uuid = $1 AND (name IS NULL OR name = person_id) AND appearance_count > 50 ORDER BY appearance_count DESC LIMIT 10";
let naming_rows: Vec<(String, Option<String>, Option<String>, i32)> =
match sqlx::query_as(naming_query)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_all(db.pool())
.await
{
@@ -2538,9 +2540,9 @@ async fn get_person_suggestions(
}
// Merge suggestions (Speaker overlap)
let merge_query = "SELECT person_id, speaker_id, appearance_count FROM person_identities WHERE video_uuid = $1 AND speaker_id IS NOT NULL ORDER BY speaker_id, appearance_count DESC";
let merge_query = "SELECT person_id, speaker_id, appearance_count FROM person_identities WHERE file_uuid = $1 AND speaker_id IS NOT NULL ORDER BY speaker_id, appearance_count DESC";
let merge_rows: Vec<(String, String, i32)> = match sqlx::query_as(merge_query)
.bind(&video_uuid)
.bind(&file_uuid)
.fetch_all(db.pool())
.await
{
@@ -2644,14 +2646,14 @@ async fn confirm_person_suggestion(
/// Request to unbind speaker from person
#[derive(Debug, Deserialize)]
pub struct UnbindSpeakerRequest {
pub video_uuid: String,
pub file_uuid: String,
pub reason: Option<String>,
}
/// Request to reassign speaker to person
#[derive(Debug, Deserialize)]
pub struct ReassignSpeakerRequest {
pub video_uuid: String,
pub file_uuid: String,
pub speaker_id: String,
pub reason: Option<String>,
}
@@ -2659,7 +2661,7 @@ pub struct ReassignSpeakerRequest {
/// Request to remove a specific appearance
#[derive(Debug, Deserialize)]
pub struct RemoveAppearanceRequest {
pub video_uuid: String,
pub file_uuid: String,
pub appearance_id: i32,
pub reason: Option<String>,
}
@@ -2667,7 +2669,7 @@ pub struct RemoveAppearanceRequest {
/// Request to reassign appearance to another person
#[derive(Debug, Deserialize)]
pub struct ReassignAppearanceRequest {
pub video_uuid: String,
pub file_uuid: String,
pub appearance_id: i32,
pub target_person_id: String,
pub reason: Option<String>,
@@ -2676,7 +2678,7 @@ pub struct ReassignAppearanceRequest {
/// Request to split a person into two
#[derive(Debug, Deserialize)]
pub struct SplitPersonRequest {
pub video_uuid: String,
pub file_uuid: String,
pub new_person_id: String,
pub appearance_ids_to_move: Vec<i32>,
pub new_person_name: Option<String>,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -4,9 +4,6 @@
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use serde::{Deserialize, Serialize};
use serde_json;
use tracing;
use crate::core::db::PostgresDb;
// --- Request / Response Structures ---

File diff suppressed because it is too large Load Diff

335
src/api/snapshot_api.rs Normal file
View File

@@ -0,0 +1,335 @@
use axum::{
extract::{Path, State},
http::StatusCode,
response::Json,
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
use crate::core::processor::snapshot_agent::SnapshotAgent;
use crate::core::storage::snapshot_manager::SnapshotManager;
pub fn snapshot_routes() -> Router<crate::api::server::AppState> {
Router::new()
.route(
"/api/v1/files/:uuid/snapshots",
get(get_file_snapshots).post(generate_file_snapshots),
)
.route(
"/api/v1/files/:uuid/snapshots/status",
get(get_file_snapshot_status),
)
.route(
"/api/v1/files/:uuid/snapshots/migrate",
post(migrate_file_snapshots),
)
.route(
"/api/v1/files/:uuid/snapshots/teardown",
post(teardown_file_snapshots),
)
.route(
"/api/v1/identities/:uuid/snapshots",
get(get_identity_snapshots).post(generate_identity_snapshots),
)
}
// --- File Snapshot Endpoints ---
#[derive(Debug, Serialize)]
pub struct FileSnapshotsResponse {
pub success: bool,
pub file_uuid: String,
pub tier: String,
pub hits: u64,
pub types: Vec<String>,
}
async fn get_file_snapshots(
State(state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
) -> Result<Json<FileSnapshotsResponse>, (StatusCode, String)> {
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let manager = SnapshotManager::new(output_dir);
let hits = state
.redis_cache
.get_snapshot_hits(&uuid)
.await
.unwrap_or(0);
let tier = SnapshotManager::compute_tier(hits);
let types = manager.list_snapshot_types(&uuid);
state.redis_cache.update_last_access(&uuid).await.ok();
Ok(Json(FileSnapshotsResponse {
success: true,
file_uuid: uuid,
tier: tier.to_string(),
hits,
types,
}))
}
#[derive(Debug, Serialize)]
pub struct SnapshotStatusResponse {
pub success: bool,
pub file_uuid: String,
pub status: String,
pub progress: Option<f32>,
pub tier: String,
}
async fn get_file_snapshot_status(
State(state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
) -> Result<Json<SnapshotStatusResponse>, (StatusCode, String)> {
let status_json = state
.redis_cache
.get_snapshot_status(&uuid)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let status: String = status_json
.get("status")
.and_then(|v| v.as_str())
.unwrap_or("cold")
.to_string();
let progress: Option<f32> = status_json
.get("progress")
.and_then(|v| v.as_f64())
.map(|f| f as f32);
let hits = state
.redis_cache
.get_snapshot_hits(&uuid)
.await
.unwrap_or(0);
let tier = SnapshotManager::compute_tier(hits);
Ok(Json(SnapshotStatusResponse {
success: true,
file_uuid: uuid,
status,
progress,
tier: tier.to_string(),
}))
}
#[derive(Debug, Deserialize)]
pub struct GenerateSnapshotRequest {
#[serde(rename = "type")]
pub snapshot_type: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct GenerateSnapshotResponse {
pub success: bool,
pub message: String,
pub file_uuid: String,
}
async fn generate_file_snapshots(
State(state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
Json(req): Json<GenerateSnapshotRequest>,
) -> Result<Json<GenerateSnapshotResponse>, (StatusCode, String)> {
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let manager = SnapshotManager::new(output_dir);
let agent = SnapshotAgent::default();
manager
.ensure_file_dirs(&uuid)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
state
.redis_cache
.set_snapshot_status(&uuid, "generating", Some(0.0))
.await
.map_err(|e: anyhow::Error| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let snapshot_type = req.snapshot_type.as_deref().unwrap_or("faces");
tracing::info!(
"Starting snapshot generation for file_uuid={}, type={}",
uuid,
snapshot_type
);
match agent.generate_file_snapshots(&uuid, snapshot_type).await {
Ok(_) => {
state
.redis_cache
.set_snapshot_status(&uuid, "ready", Some(1.0))
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
tracing::info!("Snapshot generation completed for file_uuid={}", uuid);
Ok(Json(GenerateSnapshotResponse {
success: true,
message: format!("Snapshot generation completed for type: {}", snapshot_type),
file_uuid: uuid,
}))
}
Err(e) => {
state
.redis_cache
.set_snapshot_status(&uuid, "failed", None)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
tracing::error!("Snapshot generation failed for file_uuid={}: {}", uuid, e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Snapshot generation failed: {}", e),
))
}
}
}
#[derive(Debug, Deserialize)]
pub struct MigrateSnapshotRequest {
pub parent_uuid: String,
}
#[derive(Debug, Serialize)]
pub struct MigrateSnapshotResponse {
pub success: bool,
pub message: String,
pub file_uuid: String,
pub migrated_types: Vec<String>,
}
async fn migrate_file_snapshots(
State(state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
Json(req): Json<MigrateSnapshotRequest>,
) -> Result<Json<MigrateSnapshotResponse>, (StatusCode, String)> {
let agent = SnapshotAgent::default();
tracing::info!(
"Starting snapshot migration from parent_uuid={} to file_uuid={}",
req.parent_uuid,
uuid
);
match agent.migrate_snapshots(&uuid, &req.parent_uuid).await {
Ok(migrated) => {
state
.redis_cache
.set_migrate_hint(&uuid, &req.parent_uuid, migrated.len() as u64)
.await
.map_err(|e: anyhow::Error| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(MigrateSnapshotResponse {
success: true,
message: format!("Migrated {} snapshot types", migrated.len()),
file_uuid: uuid,
migrated_types: migrated,
}))
}
Err(e) => {
tracing::error!("Snapshot migration failed: {}", e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Migration failed: {}", e),
))
}
}
}
// --- Identity Snapshot Endpoints ---
#[derive(Debug, Serialize)]
pub struct IdentitySnapshotsResponse {
pub success: bool,
pub identity_uuid: String,
pub has_reference: bool,
pub face_count: usize,
}
async fn get_identity_snapshots(
State(_state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
) -> Result<Json<IdentitySnapshotsResponse>, (StatusCode, String)> {
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let manager = SnapshotManager::new(output_dir);
let identity_dir = manager.identity_snapshot_dir(&uuid);
let has_reference = identity_dir.join("reference.jpg").exists();
let face_count = if identity_dir.join("faces").exists() {
std::fs::read_dir(identity_dir.join("faces"))
.map(|entries| entries.flatten().count())
.unwrap_or(0)
} else {
0
};
Ok(Json(IdentitySnapshotsResponse {
success: true,
identity_uuid: uuid,
has_reference,
face_count,
}))
}
#[derive(Debug, Serialize)]
pub struct GenerateIdentitySnapshotResponse {
pub success: bool,
pub message: String,
pub identity_uuid: String,
}
async fn generate_identity_snapshots(
State(_state): State<crate::api::server::AppState>,
Path(uuid): Path<String>,
) -> Result<Json<GenerateIdentitySnapshotResponse>, (StatusCode, String)> {
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let manager = SnapshotManager::new(output_dir);
manager
.ensure_identity_dirs(&uuid)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
tracing::info!("Snapshot generation requested for identity_uuid={}", uuid);
Ok(Json(GenerateIdentitySnapshotResponse {
success: true,
message: "Identity snapshot directories created".to_string(),
identity_uuid: uuid,
}))
}
#[derive(Debug, Serialize)]
pub struct TeardownSnapshotResponse {
pub success: bool,
pub message: String,
pub file_uuid: String,
}
async fn teardown_file_snapshots(
Path(uuid): Path<String>,
) -> Result<Json<TeardownSnapshotResponse>, (StatusCode, String)> {
let agent = SnapshotAgent::default();
tracing::info!("Manual teardown requested for file_uuid={}", uuid);
match agent.auto_tear_down(&uuid).await {
Ok(_) => Ok(Json(TeardownSnapshotResponse {
success: true,
message: "Snapshot teardown completed".to_string(),
file_uuid: uuid,
})),
Err(e) => {
tracing::error!("Snapshot teardown failed for file_uuid={}: {}", uuid, e);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Teardown failed: {}", e),
))
}
}
}

View File

@@ -273,7 +273,7 @@ pub struct FrameResult {
#[derive(Debug, Deserialize)]
pub struct PersonSearchQuery {
pub video_uuid: String,
pub file_uuid: String,
pub query: Option<String>,
pub min_appearances: Option<i32>,
pub max_age: Option<i32>, // New filter for "children"

View File

@@ -9,8 +9,6 @@ use axum::{
};
use serde::{Deserialize, Serialize};
use crate::core::db::Database;
// --- Request / Response Structures ---
#[derive(Debug, Deserialize)]
@@ -24,7 +22,7 @@ pub struct WhoQuery {
#[derive(Debug, Deserialize)]
pub struct WhoCandidatesRequest {
pub query: String,
pub video_uuid: Option<String>,
pub file_uuid: Option<String>,
pub limit: Option<i32>,
}
@@ -93,7 +91,7 @@ pub async fn get_who_candidates(
let query_str = format!("%{}%", req.query);
let results = db
.search_person_candidates(&query_str, &req.video_uuid, limit)
.search_person_candidates(&query_str, &req.file_uuid, limit)
.await
.map_err(|e| {
(

View File

@@ -31,7 +31,7 @@ async fn main() -> Result<()> {
);
// 顯示轉換後的 tsquery (除錯用)
match pg.prepare_tsquery(query_str) {
match pg.prepare_tsquery(query_str).await {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}

View File

@@ -20,7 +20,7 @@ async fn main() -> Result<()> {
println!("\n🔍 測試查詢 (簡體): '{}'", query_str);
// 顯示轉換後的 tsquery
match pg.prepare_tsquery(query_str) {
match pg.prepare_tsquery(query_str).await {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}

View File

@@ -39,7 +39,7 @@ async fn main() -> Result<()> {
}
// 顯示轉換後的 tsquery
match pg.prepare_tsquery(query_str) {
match pg.prepare_tsquery(query_str).await {
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
Err(e) => println!(" TSQUERY 錯誤: {}", e),
}

215
src/cli/args.rs Normal file
View File

@@ -0,0 +1,215 @@
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(name = "momentry")]
#[command(about = "Digital asset management system with video analysis and RAG")]
#[command(version = env!("BUILD_VERSION"))]
pub struct Cli {
#[command(subcommand)]
pub command: Commands,
}
#[derive(Subcommand)]
pub enum Commands {
/// Register a video file
Register {
/// Video file path or URL
path: String,
},
/// Process video (generate all JSON files)
Process {
/// UUID or path
target: String,
/// Modules to process (comma separated: asr,cut,asrx,yolo,ocr,face,pose,story,caption)
/// If not specified, processes all modules
#[arg(short, long, value_delimiter = ',')]
modules: Option<Vec<String>>,
/// Modules to process via cloud (comma separated)
/// Example: --cloud asr,yolo
#[arg(long, value_delimiter = ',')]
cloud: Option<Vec<String>>,
/// Force reprocess even if JSON exists (skip completeness check)
#[arg(long, default_value = "false")]
force: bool,
/// Resume from last checkpoint if processing was interrupted
#[arg(long, default_value = "false")]
resume: bool,
},
/// Generate chunks and store in database
Chunk {
/// UUID
uuid: String,
},
/// Generate story for cut scenes
Story {
/// UUID
uuid: String,
},
/// Vectorize chunks
Vectorize {
/// UUID (or 'all' for all)
uuid: String,
},
/// Play video with overlays
Play {
/// Video path or UUID
target: String,
},
/// Start watching directories
Watch {
/// Directories to watch (comma separated)
directories: Option<String>,
},
/// Check system resources and recommend processing strategy
System {
/// Show detailed GPU info (NVIDIA/MPS)
#[arg(long)]
gpu: bool,
},
/// Start API server
Server {
/// Host
#[arg(long, default_value = "127.0.0.1")]
host: String,
/// Port (defaults to MOMENTRY_SERVER_PORT env var, or 3002 for production)
#[arg(long)]
port: Option<u16>,
},
/// Start job worker
Worker {
/// Max concurrent processors
#[arg(long)]
max_concurrent: Option<usize>,
/// Poll interval in seconds
#[arg(long)]
poll_interval: Option<u64>,
/// Batch size
#[arg(long)]
batch_size: Option<i32>,
},
/// Query using RAG
Query {
/// Query text
query: String,
},
/// Lookup UUID from path
Lookup {
/// File path
path: String,
},
/// Resolve path from UUID
Resolve {
/// UUID
uuid: String,
},
/// Generate thumbnails for videos
Thumbnails {
/// UUID (optional, generates for all if not specified)
uuid: Option<String>,
/// Number of thumbnails per video
#[arg(short, long, default_value = "6")]
count: u32,
},
/// Show storage status report
Status {
/// UUID (optional, shows all if not specified)
uuid: Option<String>,
},
/// Manage output backups
Backup {
/// Action: list, cleanup
action: String,
/// Days to keep (for cleanup)
days: Option<u32>,
},
/// Manage API keys
ApiKey {
/// Action: create, list, validate, revoke, rotate, stats
#[arg(value_enum)]
action: ApiKeyAction,
/// Key name (for create)
name: Option<String>,
/// Key type (system, user, service, integration, emergency)
#[arg(long)]
key_type: Option<String>,
/// TTL in days (for create)
#[arg(long)]
ttl: Option<i64>,
/// API key to validate/revoke
#[arg(long)]
key: Option<String>,
},
/// Manage Gitea API tokens
Gitea {
/// Action: create, list, delete, verify
#[arg(value_enum)]
action: GiteaAction,
/// Gitea username
#[arg(long)]
username: Option<String>,
/// Gitea password (for create/list/delete)
#[arg(long)]
password: Option<String>,
/// Token name (for create/delete)
#[arg(long)]
token_name: Option<String>,
/// Token scopes (comma separated: read:repository,write:issue)
#[arg(long)]
scopes: Option<String>,
},
/// Manage n8n API keys
N8n {
/// Action: create, list, delete, verify
#[arg(value_enum)]
action: N8nAction,
/// n8n API key (for create/list/delete)
#[arg(long)]
api_key: Option<String>,
/// API key label (for create/delete)
#[arg(long)]
label: Option<String>,
/// Expiration days (for create)
#[arg(long)]
expires_in_days: Option<i64>,
},
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum ApiKeyAction {
Create,
List,
Validate,
Revoke,
Rotate,
Stats,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum GiteaAction {
Create,
List,
Delete,
Verify,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum N8nAction {
Create,
List,
Delete,
Verify,
}
/// Parse key type from string
pub fn parse_key_type(s: Option<&str>) -> momentry_core::core::api_key::ApiKeyType {
use momentry_core::core::api_key::ApiKeyType;
match s.map(|s| s.to_lowercase()).as_deref() {
Some("system") => ApiKeyType::System,
Some("user") => ApiKeyType::User,
Some("service") => ApiKeyType::Service,
Some("integration") => ApiKeyType::Integration,
Some("emergency") => ApiKeyType::Emergency,
_ => ApiKeyType::User,
}
}

5
src/cli/mod.rs Normal file
View File

@@ -0,0 +1,5 @@
//! CLI command definitions and argument parsing
pub mod args;
pub use args::*;

View File

@@ -4,7 +4,7 @@ use serde::{de::DeserializeOwned, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::core::config::{cache as cache_config, REDIS_KEY_PREFIX};
use crate::core::config::{cache as cache_config, snapshot as snapshot_config, REDIS_KEY_PREFIX};
use crate::core::db::RedisClient;
pub struct RedisCache {
@@ -133,6 +133,135 @@ impl RedisCache {
pub async fn invalidate_videos_list(&self) -> Result<u64> {
self.invalidate_pattern("videos:*").await
}
// --- Snapshot Cache Methods ---
pub async fn snapshot_hits_key(file_uuid: &str) -> String {
format!("snapshot:hits:{}", file_uuid)
}
pub async fn snapshot_status_key(file_uuid: &str) -> String {
format!("snapshot:status:{}", file_uuid)
}
pub async fn snapshot_last_access_key(file_uuid: &str) -> String {
format!("snapshot:last_access:{}", file_uuid)
}
pub async fn snapshot_migrate_hint_key(file_uuid: &str) -> String {
format!("snapshot:migrate_hint:{}", file_uuid)
}
pub async fn increment_snapshot_hits(&self, file_uuid: &str) -> Result<u64> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_hits_key(file_uuid).await;
let new_count: u64 = redis::cmd("INCR").arg(&key).query_async(&mut conn).await?;
let _: () = redis::cmd("EXPIRE")
.arg(&key)
.arg(*snapshot_config::WARM_TTL_SECS)
.query_async(&mut conn)
.await?;
Ok(new_count)
}
pub async fn get_snapshot_hits(&self, file_uuid: &str) -> Result<u64> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_hits_key(file_uuid).await;
let count: Option<u64> = conn.get(&key).await?;
Ok(count.unwrap_or(0))
}
pub async fn update_last_access(&self, file_uuid: &str) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_last_access_key(file_uuid).await;
let now: i64 = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
let _: String = conn
.set_ex(&key, now, *snapshot_config::WARM_TTL_SECS)
.await?;
Ok(())
}
pub async fn set_snapshot_status(
&self,
file_uuid: &str,
status: &str,
progress: Option<f32>,
) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_status_key(file_uuid).await;
let payload = serde_json::json!({
"status": status,
"progress": progress,
});
let json = serde_json::to_string(&payload)?;
let ttl = if status == "generating" {
*crate::core::config::snapshot::GENERATING_TIMEOUT_SECS
} else {
*snapshot_config::WARM_TTL_SECS
};
let _: String = conn.set_ex(&key, json, ttl).await?;
Ok(())
}
pub async fn get_snapshot_status(&self, file_uuid: &str) -> Result<serde_json::Value> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_status_key(file_uuid).await;
let value: Option<String> = conn.get(&key).await?;
match value {
Some(json) => Ok(serde_json::from_str(&json)?),
None => Ok(serde_json::json!({
"status": "cold",
"progress": null,
})),
}
}
pub async fn clear_snapshot_status(&self, file_uuid: &str) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_status_key(file_uuid).await;
let _: () = conn.del(&key).await?;
Ok(())
}
pub async fn set_migrate_hint(
&self,
file_uuid: &str,
parent_uuid: &str,
count: u64,
) -> Result<()> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_migrate_hint_key(file_uuid).await;
let payload = serde_json::json!({
"parent_uuid": parent_uuid,
"count": count,
});
let json = serde_json::to_string(&payload)?;
let _: String = conn
.set_ex(&key, json, *snapshot_config::WARM_TTL_SECS)
.await?;
Ok(())
}
pub async fn get_migrate_hint(&self, file_uuid: &str) -> Result<Option<serde_json::Value>> {
let client = self.client.read().await;
let mut conn = client.get_conn_internal().await?;
let key = Self::snapshot_migrate_hint_key(file_uuid).await;
let value: Option<String> = conn.get(&key).await?;
match value {
Some(json) => Ok(Some(serde_json::from_str(&json)?)),
None => Ok(None),
}
}
}
impl Clone for RedisCache {

View File

@@ -3,7 +3,7 @@ pub mod rule3_ingest;
pub mod splitter;
pub mod types;
pub use rule1_ingest::ingest_rule1;
pub use rule1_ingest::execute_rule1;
pub use rule3_ingest::ingest_rule3;
pub use splitter::{AsrSegment, ChunkSplitter};
pub use types::{Chunk, ChunkType};

View File

@@ -1,94 +1,367 @@
use crate::core::config::OUTPUT_DIR;
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use crate::core::db::schema;
use crate::core::db::PostgresDb;
use anyhow::{Context, Result};
use serde::Deserialize;
use sqlx::PgPool;
use std::fs;
use std::path::Path;
use serde_json::Value;
use sqlx::{PgPool, Row};
use tracing::info;
// --- 結構體定義 (對齊外部處理器產出格式) ---
pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result<usize> {
let pool = db.pool();
let pre_chunks_table = schema::table_name("pre_chunks");
#[derive(Debug, Deserialize)]
struct AsrSegment {
start: f64,
end: f64,
text: String,
}
let asr_segments = fetch_asr_segments(pool, file_uuid, &pre_chunks_table).await?;
#[derive(Debug, Deserialize)]
struct AsrxSegment {
start: f64,
end: f64,
speaker: String,
}
let asrx_segments = fetch_asrx_segments(pool, file_uuid, &pre_chunks_table).await?;
// --- 核心邏輯 ---
let yolo_frames = fetch_yolo_frames(pool, file_uuid, &pre_chunks_table).await?;
/// 執行 Rule 1 入庫
/// 讀取 asr.json 與 asrx.json合併 Speaker 資訊,寫入 chunks_rule1
pub async fn ingest_rule1(pool: &PgPool, asset_uuid: &str, fps: f64) -> Result<usize> {
// 1. 讀取檔案
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
let asrx_path = format!("{}/{}.asrx.json", *OUTPUT_DIR, asset_uuid);
let face_frames = fetch_face_frames(pool, file_uuid).await?;
let asr_content = fs::read_to_string(&asr_path)
.with_context(|| format!("Failed to read ASR file: {}", asr_path))?;
let asrx_content = fs::read_to_string(&asrx_path)
.with_context(|| format!("Failed to read ASRX file: {}", asrx_path))?;
let video = db
.get_video_by_uuid(file_uuid)
.await?
.context("Video not found")?;
let asr_segments: Vec<AsrSegment> = serde_json::from_str(&asr_content)?;
let asrx_segments: Vec<AsrxSegment> = serde_json::from_str(&asrx_content)?;
let file_id = video.id;
let mut count = 0;
// 2. 交易處理
let mut tx = pool.begin().await?;
for seg in &asr_segments {
// 時間轉幀
let start_frame = (seg.start * fps).round() as i64;
let end_frame = (seg.end * fps).round() as i64;
for (idx, seg) in asr_segments.iter().enumerate() {
let speaker_id = find_best_speaker(&seg, &asrx_segments);
// 3. 尋找重疊最多的 Speaker
let mut best_speaker: Option<String> = None;
let mut max_overlap = 0.0f64;
let yolo_objects = find_yolo_objects(seg.start_frame, seg.end_frame, &yolo_frames);
for spk in &asrx_segments {
let overlap = (seg.end.min(spk.end) - seg.start.max(spk.start)).max(0.0);
if overlap > max_overlap {
max_overlap = overlap;
best_speaker = Some(spk.speaker.clone());
}
}
let face_ids = find_face_ids(seg.start_frame, seg.end_frame, &face_frames);
let speaker_id = best_speaker.unwrap_or("UNKNOWN".to_string());
let metadata = serde_json::json!({
"speaker_id": speaker_id,
"yolo_objects": yolo_objects,
"face_ids": face_ids,
"language": seg.language,
});
// 4. 寫入 DB
sqlx::query!(
r#"
INSERT INTO chunks_rule1 (
id, asset_uuid, start_frame, end_frame, content, speaker_id
) VALUES (
gen_random_uuid(), $1, $2, $3, $4, $5
)
"#,
asset_uuid,
start_frame,
end_frame,
seg.text,
speaker_id
let content = serde_json::json!({
"text": seg.text,
"text_normalized": seg.text.to_lowercase(),
});
let chunk = Chunk::from_seconds(
file_id as i32,
file_uuid.to_string(),
idx as u32,
ChunkType::Sentence,
ChunkRule::Rule1,
seg.start_time,
seg.end_time,
fps,
content,
)
.execute(&mut *tx)
.await?;
.with_metadata(metadata);
db.store_chunk_in_tx(&chunk, &mut tx).await?;
count += 1;
// 每 100 筆 Commit 一次 (可選優化)
if count % 500 == 0 {
tx.commit().await?;
tx = pool.begin().await?;
if count % 100 == 0 {
info!(
"Rule 1: Processed {} segments for video {}",
count, file_uuid
);
}
}
tx.commit().await?;
info!(
"Rule 1 completed: {} sentence chunks created for video {}",
count, file_uuid
);
Ok(count)
}
#[derive(Debug, Clone)]
struct AsrSegment {
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
text: String,
language: String,
}
#[derive(Debug, Clone)]
struct AsrxSegment {
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
speaker: String,
}
#[derive(Debug, Clone)]
struct YoloFrame {
frame: i64,
detections: Vec<YoloDetection>,
}
#[derive(Debug, Clone)]
struct YoloDetection {
class_name: String,
confidence: f64,
}
#[derive(Debug, Clone)]
struct FaceFrame {
frame: i64,
faces: Vec<FaceDetection>,
}
#[derive(Debug, Clone)]
struct FaceDetection {
face_id: String,
confidence: f64,
identity_id: Option<i32>,
}
async fn fetch_asr_segments(
pool: &PgPool,
file_uuid: &str,
table: &str,
) -> Result<Vec<AsrSegment>> {
let query = format!(
r#"
SELECT
start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asr'
ORDER BY start_frame
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let segments: Vec<AsrSegment> = rows
.iter()
.map(|row| {
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let text = data.get("text").and_then(|t| t.as_str()).unwrap_or("");
let language = data
.get("language")
.and_then(|l| l.as_str())
.unwrap_or("unknown");
AsrSegment {
start_frame,
end_frame,
start_time,
end_time,
text: text.to_string(),
language: language.to_string(),
}
})
.collect();
Ok(segments)
}
async fn fetch_asrx_segments(
pool: &PgPool,
file_uuid: &str,
table: &str,
) -> Result<Vec<AsrxSegment>> {
let query = format!(
r#"
SELECT
start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asrx'
ORDER BY start_frame
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let segments: Vec<AsrxSegment> = rows
.iter()
.map(|row| {
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let speaker = data
.get("speaker")
.and_then(|s| s.as_str())
.unwrap_or("UNKNOWN");
AsrxSegment {
start_frame,
end_frame,
start_time,
end_time,
speaker: speaker.to_string(),
}
})
.collect();
Ok(segments)
}
async fn fetch_yolo_frames(pool: &PgPool, file_uuid: &str, table: &str) -> Result<Vec<YoloFrame>> {
let query = format!(
r#"
SELECT
coordinate_index as frame, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'yolo'
ORDER BY coordinate_index
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let frames: Vec<YoloFrame> = rows
.iter()
.map(|row| {
let frame: i64 = row.try_get("frame").unwrap_or(0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let detections: Vec<YoloDetection> = data
.get("detections")
.and_then(|d| d.as_array())
.map(|arr| {
arr.iter()
.filter_map(|det| {
let class_name = det.get("class_name").and_then(|c| c.as_str());
let confidence = det.get("confidence").and_then(|c| c.as_f64());
if class_name.is_some() && confidence.is_some() {
Some(YoloDetection {
class_name: class_name.unwrap().to_string(),
confidence: confidence.unwrap(),
})
} else {
None
}
})
.collect()
})
.unwrap_or_default();
YoloFrame { frame, detections }
})
.collect();
Ok(frames)
}
async fn fetch_face_frames(pool: &PgPool, file_uuid: &str) -> Result<Vec<FaceFrame>> {
let face_detections_table = schema::table_name("face_detections");
let query = format!(
r#"
SELECT
frame_number as frame,
face_id,
confidence,
identity_id
FROM {}
WHERE file_uuid = $1
ORDER BY frame_number
"#,
face_detections_table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let mut frame_map: std::collections::HashMap<i64, FaceFrame> = std::collections::HashMap::new();
for row in rows {
let frame: i64 = row.try_get("frame").unwrap_or(0);
let face_id: Option<String> = row.try_get("face_id").ok();
let confidence: f64 = row.try_get("confidence").unwrap_or(0.0);
let identity_id: Option<i32> = row.try_get("identity_id").ok();
if let Some(face_id) = face_id {
let detection = FaceDetection {
face_id: face_id.clone(),
confidence,
identity_id,
};
frame_map
.entry(frame)
.or_insert_with(|| FaceFrame {
frame,
faces: Vec::new(),
})
.faces
.push(detection);
}
}
let mut frames: Vec<FaceFrame> = frame_map.into_values().collect();
frames.sort_by_key(|f| f.frame);
Ok(frames)
}
fn find_best_speaker(asr_seg: &AsrSegment, asrx_segments: &[AsrxSegment]) -> String {
let mut best_speaker = "UNKNOWN".to_string();
let mut max_overlap = 0.0f64;
for spk in asrx_segments {
let overlap =
(asr_seg.end_time.min(spk.end_time) - asr_seg.start_time.max(spk.start_time)).max(0.0);
if overlap > max_overlap {
max_overlap = overlap;
best_speaker = spk.speaker.clone();
}
}
best_speaker
}
fn find_yolo_objects(start_frame: i64, end_frame: i64, yolo_frames: &[YoloFrame]) -> Vec<String> {
let mut objects = Vec::new();
for frame in yolo_frames {
if frame.frame >= start_frame && frame.frame <= end_frame {
for det in &frame.detections {
if det.confidence > 0.5 && !objects.contains(&det.class_name) {
objects.push(det.class_name.clone());
}
}
}
}
objects
}
fn find_face_ids(start_frame: i64, end_frame: i64, face_frames: &[FaceFrame]) -> Vec<String> {
let mut face_ids = Vec::new();
for frame in face_frames {
if frame.frame >= start_frame && frame.frame <= end_frame {
for face in &frame.faces {
if face.confidence > 0.5 && !face_ids.contains(&face.face_id) {
face_ids.push(face.face_id.clone());
}
}
}
}
face_ids
}

View File

@@ -165,6 +165,42 @@ pub mod cache {
});
}
pub mod snapshot {
use super::*;
pub static SNAPSHOT_DIR_NAME: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_DIR_NAME").unwrap_or_else(|_| ".momentry_snapshots".to_string())
});
pub static HOT_THRESHOLD: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_HOT_THRESHOLD")
.unwrap_or_else(|_| "5".to_string())
.parse()
.unwrap_or(5)
});
pub static HOT_TTL_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_HOT_TTL_SECS")
.unwrap_or_else(|_| "86400".to_string())
.parse()
.unwrap_or(86400)
});
pub static WARM_TTL_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_WARM_TTL_SECS")
.unwrap_or_else(|_| "604800".to_string())
.parse()
.unwrap_or(604800)
});
pub static GENERATING_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_SNAPSHOT_GENERATING_TIMEOUT")
.unwrap_or_else(|_| "1800".to_string())
.parse()
.unwrap_or(1800)
});
}
pub mod llm {
use super::*;

View File

@@ -41,9 +41,10 @@ pub mod sync_db;
pub use mongodb_db::MongoDb;
pub use postgres_db::{
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileRecord, HybridSearchResult, MonitorJob,
MonitorJobStats, MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorResult,
ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileIdentityRecord, FileRecord,
HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
IdentityFileRecord, MonitorJob, MonitorJobStats, MonitorJobStatus, PostgresDb,
ProcessorJobStatus, ProcessorResult, ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
};
pub use qdrant_db::{QdrantDb, VectorPayload};
pub use redis_client::{

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,8 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use super::Database;
use crate::core::config;
use crate::core::storage::snapshot_manager::SnapshotTier;
pub struct RedisDb {
#[allow(dead_code)]
@@ -28,14 +30,20 @@ pub struct Job {
pub updated_at: String,
}
#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct SnapshotCacheEntry {
pub hits: u64,
pub last_access: i64,
pub status: String,
pub progress: Option<f32>,
}
impl RedisDb {
pub async fn push_job(&self, _job: &Job) -> Result<()> {
// TODO: Implement Redis client
Ok(())
}
pub async fn get_pending_jobs(&self) -> Result<Vec<Job>> {
// TODO: Implement Redis client
Ok(vec![])
}
@@ -45,14 +53,82 @@ impl RedisDb {
_status: &str,
_progress: f32,
) -> Result<()> {
// TODO: Implement Redis client
Ok(())
}
pub async fn publish_event(&self, _channel: &str, _message: &str) -> Result<()> {
// TODO: Implement Redis Pub/Sub
Ok(())
}
// --- Snapshot Cache Methods ---
pub async fn increment_snapshot_hits(&self, _file_uuid: &str) -> Result<u64> {
// TODO: Redis HINCRBY snapshot:hits:{uuid}
Ok(0)
}
pub async fn get_snapshot_hits(&self, _file_uuid: &str) -> Result<u64> {
// TODO: Redis GET snapshot:hits:{uuid}
Ok(0)
}
pub async fn update_last_access(&self, _file_uuid: &str) -> Result<()> {
// TODO: Redis SET snapshot:last_access:{uuid} = now EX 7d
Ok(())
}
pub async fn set_snapshot_status(
&self,
_file_uuid: &str,
status: &str,
progress: Option<f32>,
) -> Result<()> {
// TODO: Redis SET snapshot:status:{uuid} = {status, progress} EX 30m
let _ = (status, progress);
Ok(())
}
pub async fn get_snapshot_status(&self, _file_uuid: &str) -> Result<SnapshotCacheEntry> {
// TODO: Redis GET snapshot:status:{uuid}
Ok(SnapshotCacheEntry {
hits: 0,
last_access: 0,
status: "cold".to_string(),
progress: None,
})
}
pub async fn clear_snapshot_status(&self, _file_uuid: &str) -> Result<()> {
// TODO: Redis DEL snapshot:status:{uuid}
Ok(())
}
pub async fn set_migrate_hint(
&self,
_file_uuid: &str,
_parent_uuid: &str,
_count: u64,
) -> Result<()> {
// TODO: Redis SET snapshot:migrate_hint:{uuid}
Ok(())
}
pub async fn get_migrate_hint(&self, _file_uuid: &str) -> Result<Option<(String, u64)>> {
// TODO: Redis GET snapshot:migrate_hint:{uuid}
Ok(None)
}
pub fn compute_status_ttl(&self, tier: SnapshotTier) -> u64 {
match tier {
SnapshotTier::Hot => *config::snapshot::HOT_TTL_SECS,
SnapshotTier::Warm => *config::snapshot::WARM_TTL_SECS,
SnapshotTier::Cold => 0,
}
}
pub fn generating_timeout() -> u64 {
*config::snapshot::GENERATING_TIMEOUT_SECS
}
}
#[async_trait]

View File

@@ -1,5 +1,5 @@
use anyhow::{Context, Result};
use serde_json::json;
use serde_json;
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use crate::core::db::mongodb_db::MongoDb;
@@ -77,9 +77,9 @@ impl SyncDb {
let client = reqwest::Client::new();
let response = client
.post("http://localhost:11434/api/embeddings")
.json(&json!({
"model": "nomic-embed-text-v2-moe:latest",
"prompt": text
.json(&serde_json::json!({
"model": "all-minilm",
"prompt": text,
}))
.send()
.await
@@ -107,12 +107,21 @@ impl SyncDb {
for (i, segment) in asr_result.segments.iter().enumerate() {
let segment: &AsrSegment = segment;
let content = json!({
"text": segment.text,
"text_normalized": segment.text.to_lowercase(),
let content = serde_json::json!({
"rule": "rule1",
"data": {
"text": segment.text,
"start": segment.start,
"end": segment.end,
},
});
let metadata = json!({
let metadata = serde_json::json!({
"file_uuid": uuid,
"chunk_type": "sentence",
"chunk_rule": "rule1",
"fps": 0.0, // Will be set later
"start_frame": 0,
"end_frame": 0,
"language": asr_result.language,
"language_probability": asr_result.language_probability,
});

View File

@@ -1,17 +1,14 @@
use anyhow::{Context, Result};
use chrono::Utc;
use sqlx;
use std::path::Path;
use tracing::{info, warn};
use crate::core::db::{Database, PostgresDb, VideoRecord, VideoStatus};
use crate::core::db::{PostgresDb, VideoRecord, VideoStatus};
use crate::core::probe;
use crate::core::storage::uuid as uuid_utils;
use crate::core::storage::FileManager;
use crate::uuid as uuid_utils;
/// Handles the automatic ingestion of video files.
/// This service is responsible for:
/// 1. Running `ffprobe` (Pre-processing)
/// 2. Saving probe JSON
/// 3. Registering the video in the database (making it visible in the API)
pub struct IngestionService {
db: PostgresDb,
}
@@ -21,20 +18,56 @@ impl IngestionService {
Self { db }
}
/// Registers a video file found in the watched directory.
/// This function is idempotent: if the video (UUID) already exists, it skips.
pub async fn ingest(&self, file_path: &str) -> Result<Option<String>> {
let path = Path::new(file_path);
// 1. Validate extension
if !is_video_extension(path) {
return Ok(None);
}
// 2. Compute UUID
let uuid = uuid_utils::compute_uuid_from_path(file_path);
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
let filename = path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
// Stable UUID based on MAC + Birthday + Filename.
// Moving the file (path change) keeps the SAME identity.
// 1. Look for existing Birthday (Identity Anchor)
// If the file (by name) was registered before, use its original birth time.
let birthday = sqlx::query_scalar::<_, chrono::DateTime<chrono::Utc>>(
"SELECT registration_time FROM dev.videos WHERE file_name = $1 AND registration_time IS NOT NULL LIMIT 1"
)
.bind(&filename)
.fetch_optional(self.db.pool())
.await
.ok()
.flatten()
.map(|t| t.to_rfc3339())
.unwrap_or_else(|| Utc::now().to_rfc3339());
let parent = canonical_path
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
// 2. Compute UUID
let uuid = uuid_utils::compute_birth_uuid(
&uuid_utils::get_mac_address(),
&birthday,
&canonical_path.to_string_lossy(),
&filename,
);
let parent = canonical_path
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
let username = uuid_utils::extract_username_from_path(&parent);
// 3. Check if already registered
if let Ok(Some(_)) = self.db.get_video_by_uuid(&uuid).await {
info!(
"Video already registered: {} ({})",
@@ -46,11 +79,9 @@ impl IngestionService {
info!("Starting ingestion for: {} ({})", path.display(), uuid);
// 4. Run ffprobe
let probe_result = probe::probe_video(file_path)
.with_context(|| format!("Failed to probe video: {}", file_path))?;
// 5. Extract metadata
let duration = probe_result
.format
.duration
@@ -78,7 +109,6 @@ impl IngestionService {
}
}
// 6. Save Probe JSON
let file_manager = FileManager::new(std::path::PathBuf::from("."));
let probe_json_str = serde_json::to_string_pretty(&probe_result)?;
@@ -88,33 +118,72 @@ impl IngestionService {
info!("Probe JSON saved for {}", uuid);
}
// 7. Create Record
// Use absolute path for safety
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
let total_frames = {
let video_stream = probe_result
.streams
.iter()
.find(|s| s.codec_type.as_deref() == Some("video"));
if let Some(stream) = video_stream {
if let Some(nb_frames_str) = &stream.nb_frames {
if let Ok(nb_frames) = nb_frames_str.parse::<u64>() {
info!(
"Using nb_frames from ffprobe: {} frames for {}",
nb_frames,
path.display()
);
Some(nb_frames)
} else {
warn!(
"Failed to parse nb_frames, using duration * fps fallback for {}",
path.display()
);
Some((duration * fps).floor() as u64)
}
} else {
warn!(
"nb_frames not available, using duration * fps fallback for {}",
path.display()
);
Some((duration * fps).floor() as u64)
}
} else {
warn!("No video stream found for {}", path.display());
Some(0)
}
};
let birth_registration = serde_json::json!({
"registration_source": {
"username": username,
"original_path": parent,
"original_filename": filename
}
});
let record = VideoRecord {
id: 0,
uuid: uuid.clone(),
file_uuid: uuid.clone(),
file_path: canonical_path.to_string_lossy().to_string(),
file_name: path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
file_name: filename,
file_type: None,
duration,
width,
height,
fps,
probe_json: Some(probe_json_str),
storage: Default::default(),
status: VideoStatus::Pending, // Ready for processing
status: VideoStatus::Pending,
processing_status: Some(serde_json::json!({"phase": "REGISTERED"})),
birth_registration: None,
user_id: None,
job_id: None,
created_at: String::new(),
registration_time: None,
total_frames: total_frames.unwrap_or(0),
parent_uuid: None,
};
// 8. Insert DB
self.db
.register_video(&record)
.await
@@ -125,9 +194,14 @@ impl IngestionService {
.await
.with_context(|| "Failed to set registration_time")?;
self.db
.update_birth_registration(&uuid, &birth_registration)
.await
.with_context(|| "Failed to set birth_registration")?;
info!(
"Successfully registered video: {} (UUID: {})",
record.file_name, uuid
"Successfully registered video: {} (UUID: {}, Birth UUID: {})",
record.file_name, uuid, uuid
);
Ok(Some(uuid))
}

View File

@@ -12,7 +12,7 @@ pub struct PersonIdentity {
pub person_id: String,
pub face_identity_id: Option<i32>,
pub speaker_id: Option<String>,
pub video_uuid: String,
pub file_uuid: String,
pub confidence: f64,
pub name: Option<String>,
pub metadata: serde_json::Value,
@@ -85,7 +85,7 @@ pub struct SuggestedBinding {
pub struct PersonAppearance {
pub id: i32,
pub person_id: String,
pub video_uuid: String,
pub file_uuid: String,
pub start_time: f64,
pub end_time: f64,
pub duration: f64,
@@ -124,7 +124,7 @@ pub struct PersonStatistics {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CreatePersonIdentityRequest {
pub video_uuid: String,
pub file_uuid: String,
pub face_identity_id: Option<i32>,
pub speaker_id: Option<String>,
pub name: Option<String>,
@@ -196,7 +196,7 @@ mod tests {
person_id: "person_001".to_string(),
face_identity_id: Some(123),
speaker_id: Some("SPEAKER_00".to_string()),
video_uuid: "video_abc".to_string(),
file_uuid: "video_abc".to_string(),
confidence: 0.85,
name: Some("张三".to_string()),
metadata: serde_json::json!({"role": "host"}),
@@ -220,7 +220,7 @@ mod tests {
let appearance = PersonAppearance {
id: 1,
person_id: "person_001".to_string(),
video_uuid: "video_abc".to_string(),
file_uuid: "video_abc".to_string(),
start_time: 10.5,
end_time: 25.3,
duration: 14.8,

View File

@@ -16,6 +16,7 @@ pub struct StreamInfo {
pub width: Option<u32>,
pub height: Option<u32>,
pub r_frame_rate: Option<String>,
pub nb_frames: Option<String>,
pub duration: Option<String>,
pub sample_rate: Option<String>,
pub channels: Option<u32>,
@@ -64,6 +65,7 @@ pub fn probe_video(video_path: &str) -> Result<ProbeResult> {
width: s["width"].as_u64().map(|v| v as u32),
height: s["height"].as_u64().map(|v| v as u32),
r_frame_rate: s["r_frame_rate"].as_str().map(String::from),
nb_frames: s["nb_frames"].as_str().map(String::from),
duration: s["duration"].as_str().map(String::from),
sample_rate: s["sample_rate"].as_str().map(String::from),
channels: s["channels"].as_u64().map(|v| v as u32),

View File

@@ -28,6 +28,15 @@ pub struct Face {
pub width: i32,
pub height: i32,
pub confidence: f32,
pub embedding: Option<Vec<f32>>,
pub landmarks: Option<Vec<Vec<f32>>>,
pub attributes: Option<FaceAttributes>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FaceAttributes {
pub age: Option<i32>,
pub gender: Option<String>,
}
pub async fn process_face(
@@ -89,6 +98,12 @@ mod tests {
width: 50,
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1, 0.2, 0.3]),
landmarks: Some(vec![vec![10.0, 20.0], vec![30.0, 40.0]]),
attributes: Some(FaceAttributes {
age: Some(30),
gender: Some("male".to_string()),
}),
}],
}],
};
@@ -96,6 +111,9 @@ mod tests {
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("face_1"));
assert!(json.contains("\"width\":50"));
assert!(json.contains("embedding"));
assert!(json.contains("landmarks"));
assert!(json.contains("attributes"));
}
#[test]
@@ -108,7 +126,17 @@ mod tests {
"frame": 30,
"timestamp": 1.2,
"faces": [
{"face_id": "f1", "x": 10, "y": 20, "width": 30, "height": 40, "confidence": 0.85}
{
"face_id": "f1",
"x": 10,
"y": 20,
"width": 30,
"height": 40,
"confidence": 0.85,
"embedding": [0.1, 0.2, 0.3],
"landmarks": [[5.0, 10.0]],
"attributes": {"age": 25, "gender": "female"}
}
]
}
]
@@ -118,6 +146,9 @@ mod tests {
assert_eq!(result.frame_count, 50);
assert_eq!(result.frames.len(), 1);
assert_eq!(result.frames[0].faces[0].x, 10);
assert!(result.frames[0].faces[0].embedding.is_some());
assert!(result.frames[0].faces[0].landmarks.is_some());
assert!(result.frames[0].faces[0].attributes.is_some());
}
#[test]
@@ -139,7 +170,33 @@ mod tests {
width: 10,
height: 10,
confidence: 0.5,
embedding: None,
landmarks: None,
attributes: None,
};
assert!(face.confidence >= 0.0 && face.confidence <= 1.0);
}
#[test]
fn test_face_with_embedding() {
let face = Face {
face_id: Some("face_001".to_string()),
x: 100,
y: 200,
width: 50,
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1; 512]),
landmarks: None,
attributes: Some(FaceAttributes {
age: Some(35),
gender: Some("male".to_string()),
}),
};
assert!(face.embedding.is_some());
let embedding = face.embedding.unwrap();
assert_eq!(embedding.len(), 512);
assert_eq!(embedding[0], 0.1);
}
}

View File

@@ -8,6 +8,7 @@ pub mod face_recognition;
pub mod ocr;
pub mod pose;
pub mod scene_classification;
pub mod snapshot_agent;
pub mod story;
pub mod visual_chunk;
pub mod yolo;
@@ -28,6 +29,7 @@ pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
};
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -0,0 +1,491 @@
use std::path::Path;
use std::process::Command;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::core::config;
use crate::core::db::{Database, PostgresDb};
use crate::core::storage::snapshot_manager::SnapshotManager;
pub struct SnapshotAgentConfig {
pub output_dir: String,
pub hot_threshold: u64,
}
impl Default for SnapshotAgentConfig {
fn default() -> Self {
Self {
output_dir: config::OUTPUT_DIR.clone(),
hot_threshold: *config::snapshot::HOT_THRESHOLD,
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct FaceDetection {
id: i32,
file_uuid: String,
frame_number: i64,
confidence: f64,
bbox: Option<serde_json::Value>,
}
#[derive(Debug, Serialize, Deserialize)]
struct VideoInfo {
file_path: String,
fps: f64,
}
pub struct SnapshotAgent {
config: SnapshotAgentConfig,
manager: SnapshotManager,
}
impl SnapshotAgent {
pub fn new(config: SnapshotAgentConfig) -> Self {
let manager = SnapshotManager::new(&config.output_dir);
Self { config, manager }
}
pub fn default() -> Self {
Self::new(SnapshotAgentConfig::default())
}
pub async fn generate_file_snapshots(
&self,
file_uuid: &str,
snapshot_type: &str,
) -> Result<()> {
info!(
"Starting snapshot generation: file_uuid={}, type={}",
file_uuid, snapshot_type
);
let db = PostgresDb::init()
.await
.context("Failed to connect to database")?;
let video_info = self
.get_video_info(db.pool(), file_uuid)
.await
.context("Failed to get video info")?;
self.manager
.ensure_file_dirs(file_uuid)
.context("Failed to create snapshot directories")?;
match snapshot_type {
"faces" => {
self.extract_face_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"ocr" => {
self.extract_ocr_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"logos" => {
self.extract_logo_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
"products" => {
self.extract_product_snapshots(db.pool(), file_uuid, &video_info)
.await?
}
_ => {
return Err(anyhow::anyhow!(
"Unsupported snapshot type: {}",
snapshot_type
))
}
}
info!(
"Snapshot generation completed: file_uuid={}, type={}",
file_uuid, snapshot_type
);
Ok(())
}
async fn extract_face_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let face_table = crate::core::db::schema::table_name("face_detections");
let query = format!(
"SELECT id, face_id, file_uuid, frame_number, confidence, bbox
FROM {}
WHERE file_uuid = $1 AND confidence >= 0.5
ORDER BY confidence DESC
LIMIT 50",
face_table
);
let faces: Vec<(i32, String, i64, f64, Option<serde_json::Value>)> = sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query face detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "faces");
let mut saved_count = 0;
for (face_id_db, _uuid, frame_num, confidence, bbox_json) in faces {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path =
output_dir.join(format!("face_{}_conf{:.2}.jpg", face_id_db, confidence));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} face snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_ocr_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let ocr_table = crate::core::db::schema::table_name("ocr_detections");
let query = format!(
"SELECT id, frame_number, text, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND confidence >= 0.7
ORDER BY confidence DESC
LIMIT 30",
ocr_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query OCR detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "ocr");
let mut saved_count = 0;
for (det_id, frame_num, text, bbox_json, _confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let safe_text = text
.chars()
.take(20)
.filter(|c| c.is_alphanumeric() || *c == ' ')
.collect::<String>()
.replace(' ', "_");
let output_path = output_dir.join(format!("ocr_{}_{}.jpg", det_id, safe_text));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} OCR snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_logo_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
let query = format!(
"SELECT id, frame_number, class_name, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND class_name IN ('logo', 'brand') AND confidence >= 0.6
ORDER BY confidence DESC
LIMIT 20",
yolo_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query logo detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "logos");
let mut saved_count = 0;
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path = output_dir.join(format!(
"logo_{}_{}_{:.2}.jpg",
det_id, class_name, confidence
));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} logo snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_product_snapshots(
&self,
pool: &sqlx::PgPool,
file_uuid: &str,
video_info: &VideoInfo,
) -> Result<()> {
let yolo_table = crate::core::db::schema::table_name("yolo_detections");
let query = format!(
"SELECT id, frame_number, class_name, bbox, confidence
FROM {}
WHERE file_uuid = $1 AND class_name NOT IN ('logo', 'brand', 'person', 'face') AND confidence >= 0.6
ORDER BY confidence DESC
LIMIT 20",
yolo_table
);
let detections: Vec<(i32, i64, String, Option<serde_json::Value>, f64)> =
sqlx::query_as(&query)
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to query product detections")?;
let output_dir = self.manager.file_type_dir(file_uuid, "products");
let mut saved_count = 0;
for (det_id, frame_num, class_name, bbox_json, confidence) in detections {
let bbox = match bbox_json {
Some(json) => serde_json::from_value::<Bbox>(json).unwrap_or_default(),
None => Bbox::default(),
};
let timestamp = frame_num as f64 / video_info.fps;
let output_path = output_dir.join(format!(
"product_{}_{}_{:.2}.jpg",
det_id, class_name, confidence
));
if self
.extract_frame(&video_info.file_path, timestamp, &bbox, &output_path)
.await
.is_ok()
{
saved_count += 1;
}
}
info!(
"Extracted {} product snapshots for file_uuid={}",
saved_count, file_uuid
);
Ok(())
}
async fn extract_frame(
&self,
video_path: &str,
timestamp: f64,
bbox: &Bbox,
output_path: &Path,
) -> Result<()> {
let crop_filter = format!("crop={}:{}:{}:{}", bbox.width, bbox.height, bbox.x, bbox.y);
let output = Command::new("ffmpeg")
.args(&[
"-ss",
&format!("{:.3}", timestamp),
"-i",
video_path,
"-vf",
&crop_filter,
"-frames:v",
"1",
"-f",
"image2",
"-y",
output_path.to_str().context("Invalid output path")?,
])
.output()
.context("Failed to execute ffmpeg")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!("ffmpeg failed: {}", stderr));
}
Ok(())
}
async fn get_video_info(&self, pool: &sqlx::PgPool, file_uuid: &str) -> Result<VideoInfo> {
let video_table = crate::core::db::schema::table_name("videos");
let query = format!(
"SELECT file_path, fps FROM {} WHERE file_uuid = $1",
video_table
);
let row: Option<(String, f64)> = sqlx::query_as(&query)
.bind(file_uuid)
.fetch_optional(pool)
.await
.context("Failed to query video info")?;
match row {
Some((file_path, fps)) => Ok(VideoInfo { file_path, fps }),
None => Err(anyhow::anyhow!("Video not found: file_uuid={}", file_uuid)),
}
}
pub async fn migrate_snapshots(
&self,
new_uuid: &str,
parent_uuid: &str,
) -> Result<Vec<String>> {
info!(
"Starting snapshot migration: {} -> {}",
parent_uuid, new_uuid
);
self.manager
.ensure_file_dirs(new_uuid)
.context("Failed to create snapshot directories")?;
let parent_types = self.manager.list_snapshot_types(parent_uuid);
let mut migrated = Vec::new();
for snap_type in &parent_types {
let src = self.manager.file_type_dir(parent_uuid, snap_type);
let dst = self.manager.file_type_dir(new_uuid, snap_type);
if src.exists() {
if let Err(e) = copy_dir_recursive(&src, &dst) {
warn!("Failed to migrate {} snapshots: {}", snap_type, e);
} else {
migrated.push(snap_type.clone());
info!(
"Migrated {} snapshots: {} -> {}",
snap_type, parent_uuid, new_uuid
);
}
}
}
info!("Migration completed: {} types migrated", migrated.len());
Ok(migrated)
}
pub async fn auto_tear_down(&self, file_uuid: &str) -> Result<()> {
info!("Starting auto tear down for file_uuid={}", file_uuid);
let types = self.manager.list_snapshot_types(file_uuid);
let hits = types.len() as u64;
let tier = SnapshotManager::compute_tier(hits);
if tier != crate::core::storage::snapshot_manager::SnapshotTier::Cold {
info!(
"Skipping tear down: file_uuid={} is not Cold (tier={:?})",
file_uuid, tier
);
return Ok(());
}
let redis_cache = crate::core::cache::redis_cache::RedisCache::new()
.context("Failed to create Redis cache")?;
let last_access = redis_cache.get_snapshot_hits(file_uuid).await.unwrap_or(0);
if last_access > 0 {
info!(
"Skipping tear down: file_uuid={} has recent access (hits={})",
file_uuid, last_access
);
return Ok(());
}
self.manager
.remove_file_snapshots(file_uuid)
.context("Failed to remove snapshot files")?;
let status_key =
crate::core::cache::redis_cache::RedisCache::snapshot_status_key(file_uuid).await;
let hits_key =
crate::core::cache::redis_cache::RedisCache::snapshot_hits_key(file_uuid).await;
let access_key =
crate::core::cache::redis_cache::RedisCache::snapshot_last_access_key(file_uuid).await;
redis_cache.delete(&status_key).await.ok();
redis_cache.delete(&hits_key).await.ok();
redis_cache.delete(&access_key).await.ok();
info!("Auto tear down completed for file_uuid={}", file_uuid);
Ok(())
}
pub fn manager(&self) -> &SnapshotManager {
&self.manager
}
}
#[derive(Debug, Default, Serialize, Deserialize)]
struct Bbox {
x: i32,
y: i32,
width: i32,
height: i32,
}
fn copy_dir_recursive(src: &Path, dst: &Path) -> std::io::Result<()> {
std::fs::create_dir_all(dst)?;
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let path = entry.path();
let dest_path = dst.join(entry.file_name());
if path.is_dir() {
copy_dir_recursive(&path, &dest_path)?;
} else {
std::fs::copy(&path, &dest_path)?;
}
}
Ok(())
}

View File

@@ -1,7 +1,9 @@
pub mod file_manager;
pub mod output_dir;
pub mod snapshot_manager;
pub mod uuid;
pub use file_manager::FileManager;
pub use output_dir::OutputDir;
pub use snapshot_manager::SnapshotManager;
pub use uuid::compute_uuid;

View File

@@ -0,0 +1,268 @@
use std::path::{Path, PathBuf};
use crate::core::config;
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SnapshotTier {
Hot,
Warm,
Cold,
}
impl std::fmt::Display for SnapshotTier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SnapshotTier::Hot => write!(f, "hot"),
SnapshotTier::Warm => write!(f, "warm"),
SnapshotTier::Cold => write!(f, "cold"),
}
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct SnapshotStatus {
pub file_uuid: String,
pub tier: SnapshotTier,
pub hits: u64,
pub types: Vec<String>,
pub generated_at: Option<String>,
}
#[derive(Debug, Clone)]
pub struct SnapshotManager {
base_dir: PathBuf,
}
impl SnapshotManager {
pub fn new(user_dir: &str) -> Self {
let snapshot_dir_name = config::snapshot::SNAPSHOT_DIR_NAME.as_str();
let base_dir = Path::new(user_dir).join(snapshot_dir_name);
Self { base_dir }
}
pub fn base_dir(&self) -> &Path {
&self.base_dir
}
pub fn file_snapshot_dir(&self, file_uuid: &str) -> PathBuf {
self.base_dir.join(file_uuid)
}
pub fn file_type_dir(&self, file_uuid: &str, snapshot_type: &str) -> PathBuf {
self.base_dir.join(file_uuid).join(snapshot_type)
}
pub fn identity_snapshot_dir(&self, identity_uuid: &str) -> PathBuf {
self.base_dir.join("identities").join(identity_uuid)
}
pub fn identity_face_dir(&self, identity_uuid: &str) -> PathBuf {
self.base_dir
.join("identities")
.join(identity_uuid)
.join("faces")
}
pub fn ensure_file_dirs(&self, file_uuid: &str) -> std::io::Result<()> {
let dir = self.file_snapshot_dir(file_uuid);
std::fs::create_dir_all(&dir)?;
for snap_type in ["faces", "logos", "products", "ocr"] {
std::fs::create_dir_all(dir.join(snap_type))?;
}
Ok(())
}
pub fn ensure_identity_dirs(&self, identity_uuid: &str) -> std::io::Result<()> {
let dir = self.identity_snapshot_dir(identity_uuid);
std::fs::create_dir_all(&dir)?;
std::fs::create_dir_all(dir.join("faces"))?;
Ok(())
}
pub fn compute_tier(hits: u64) -> SnapshotTier {
let threshold = *config::snapshot::HOT_THRESHOLD;
if hits >= threshold {
SnapshotTier::Hot
} else if hits > 0 {
SnapshotTier::Warm
} else {
SnapshotTier::Cold
}
}
pub fn tier_ttl(&self, tier: SnapshotTier) -> u64 {
match tier {
SnapshotTier::Hot => *config::snapshot::HOT_TTL_SECS,
SnapshotTier::Warm => *config::snapshot::WARM_TTL_SECS,
SnapshotTier::Cold => 0,
}
}
pub fn snapshot_exists(&self, file_uuid: &str, snapshot_type: &str) -> bool {
self.file_type_dir(file_uuid, snapshot_type).exists()
}
pub fn list_snapshot_types(&self, file_uuid: &str) -> Vec<String> {
let dir = self.file_snapshot_dir(file_uuid);
if !dir.exists() {
return Vec::new();
}
std::fs::read_dir(&dir)
.into_iter()
.flatten()
.filter_map(|e| e.ok())
.filter(|e| e.path().is_dir())
.filter_map(|e| e.file_name().to_str().map(String::from))
.collect()
}
pub fn remove_file_snapshots(&self, file_uuid: &str) -> std::io::Result<()> {
let dir = self.file_snapshot_dir(file_uuid);
if dir.exists() {
std::fs::remove_dir_all(&dir)?;
}
Ok(())
}
pub fn remove_identity_snapshots(&self, identity_uuid: &str) -> std::io::Result<()> {
let dir = self.identity_snapshot_dir(identity_uuid);
if dir.exists() {
std::fs::remove_dir_all(&dir)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_manager() -> (SnapshotManager, tempfile::TempDir) {
let temp_dir = tempfile::tempdir().unwrap();
let manager = SnapshotManager::new(temp_dir.path().to_str().unwrap());
(manager, temp_dir)
}
#[test]
fn test_compute_tier_hot() {
assert_eq!(SnapshotManager::compute_tier(5), SnapshotTier::Hot);
assert_eq!(SnapshotManager::compute_tier(10), SnapshotTier::Hot);
assert_eq!(SnapshotManager::compute_tier(100), SnapshotTier::Hot);
}
#[test]
fn test_compute_tier_warm() {
assert_eq!(SnapshotManager::compute_tier(1), SnapshotTier::Warm);
assert_eq!(SnapshotManager::compute_tier(4), SnapshotTier::Warm);
}
#[test]
fn test_compute_tier_cold() {
assert_eq!(SnapshotManager::compute_tier(0), SnapshotTier::Cold);
}
#[test]
fn test_tier_display() {
assert_eq!(SnapshotTier::Hot.to_string(), "hot");
assert_eq!(SnapshotTier::Warm.to_string(), "warm");
assert_eq!(SnapshotTier::Cold.to_string(), "cold");
}
#[test]
fn test_ensure_file_dirs_creates_structure() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_file_123";
manager.ensure_file_dirs(file_uuid).unwrap();
assert!(manager.file_snapshot_dir(file_uuid).exists());
assert!(manager.file_type_dir(file_uuid, "faces").exists());
assert!(manager.file_type_dir(file_uuid, "logos").exists());
assert!(manager.file_type_dir(file_uuid, "products").exists());
assert!(manager.file_type_dir(file_uuid, "ocr").exists());
}
#[test]
fn test_ensure_identity_dirs_creates_structure() {
let (manager, _temp) = create_test_manager();
let identity_uuid = "test_identity_456";
manager.ensure_identity_dirs(identity_uuid).unwrap();
assert!(manager.identity_snapshot_dir(identity_uuid).exists());
assert!(manager.identity_face_dir(identity_uuid).exists());
}
#[test]
fn test_list_snapshot_types_empty() {
let (manager, _temp) = create_test_manager();
let types = manager.list_snapshot_types("nonexistent");
assert!(types.is_empty());
}
#[test]
fn test_list_snapshot_types_after_creation() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_file_789";
manager.ensure_file_dirs(file_uuid).unwrap();
let types = manager.list_snapshot_types(file_uuid);
assert_eq!(types.len(), 4);
assert!(types.contains(&"faces".to_string()));
assert!(types.contains(&"logos".to_string()));
assert!(types.contains(&"products".to_string()));
assert!(types.contains(&"ocr".to_string()));
}
#[test]
fn test_remove_file_snapshots() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_file_remove";
manager.ensure_file_dirs(file_uuid).unwrap();
assert!(manager.file_snapshot_dir(file_uuid).exists());
manager.remove_file_snapshots(file_uuid).unwrap();
assert!(!manager.file_snapshot_dir(file_uuid).exists());
}
#[test]
fn test_remove_identity_snapshots() {
let (manager, _temp) = create_test_manager();
let identity_uuid = "test_identity_remove";
manager.ensure_identity_dirs(identity_uuid).unwrap();
assert!(manager.identity_snapshot_dir(identity_uuid).exists());
manager.remove_identity_snapshots(identity_uuid).unwrap();
assert!(!manager.identity_snapshot_dir(identity_uuid).exists());
}
#[test]
fn test_snapshot_exists() {
let (manager, _temp) = create_test_manager();
let file_uuid = "test_exists";
assert!(!manager.snapshot_exists(file_uuid, "faces"));
manager.ensure_file_dirs(file_uuid).unwrap();
assert!(manager.snapshot_exists(file_uuid, "faces"));
assert!(!manager.snapshot_exists(file_uuid, "nonexistent"));
}
#[test]
fn test_tier_ttl() {
let (manager, _temp) = create_test_manager();
let hot_ttl = manager.tier_ttl(SnapshotTier::Hot);
assert_eq!(hot_ttl, *config::snapshot::HOT_TTL_SECS);
let warm_ttl = manager.tier_ttl(SnapshotTier::Warm);
assert_eq!(warm_ttl, *config::snapshot::WARM_TTL_SECS);
let cold_ttl = manager.tier_ttl(SnapshotTier::Cold);
assert_eq!(cold_ttl, 0);
}
}

View File

@@ -2,12 +2,12 @@ use sha2::{Digest, Sha256};
use std::path::PathBuf;
/// Compute UUID from file path using SHA256
/// UUID = SHA256(user_path + filename)[0:16]
/// UUID = SHA256(user_path + filename)[0:32]
pub fn compute_uuid(user_path: &str, filename: &str) -> String {
let key = format!("{}/{}", user_path.trim_end_matches('/'), filename);
let hash = Sha256::digest(key.as_bytes());
let hash_str = hex::encode(hash);
hash_str[0..16].to_string()
hash_str[0..32].to_string()
}
/// Compute UUID from full file path
@@ -29,19 +29,16 @@ pub fn compute_uuid_from_path(full_path: &str) -> String {
/// Input: ./demo/video.mp4 or ./demo/path/to/video.mp4
/// Returns: (username, filepath) e.g., ("demo", "video.mp4") or ("demo", "path/to/video.mp4")
pub fn extract_user_from_relative_path(relative_path: &str) -> (String, String) {
// Remove leading ./
let path = relative_path.strip_prefix("./").unwrap_or(relative_path);
let path_buf = PathBuf::from(path);
// First component is username
let mut components = path_buf.components();
let username = components
.next()
.map(|c| c.as_os_str().to_string_lossy().to_string())
.unwrap_or_default();
// Remaining path (filepath)
let filepath: String = components
.map(|c| c.as_os_str().to_string_lossy().to_string())
.collect::<Vec<_>>()
@@ -57,6 +54,62 @@ pub fn compute_uuid_from_relative_path(relative_path: &str) -> String {
compute_uuid(&username, &filepath)
}
/// Get MAC address of primary network interface
/// Returns MAC address in format: a1:b2:c3:d4:e5:f6
pub fn get_mac_address() -> String {
use mac_address::get_mac_address;
match get_mac_address() {
Ok(Some(mac)) => {
let bytes = mac.bytes();
format!(
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
)
}
Ok(None) => "00:00:00:00:00:00".to_string(),
Err(_) => "00:00:00:00:00:00".to_string(),
}
}
/// Compute Birth UUID (Stable Identity with Location)
/// UUID = SHA256(mac_address|birthday|path|filename)[0:32]
///
/// This UUID format ensures:
/// - Location Encoding: Path is part of the identity (like location code in ID card).
/// - Stability: Uses the original Birthday, not the current timestamp.
/// - Uniqueness: Different MAC, Birthday, Path, or Filename produces different UUIDs.
pub fn compute_birth_uuid(
mac_address: &str,
birthday: &str, // Fixed timestamp of original registration
path: &str, // Canonical file path (Location)
filename: &str,
) -> String {
let key = format!(
"{}|{}|{}|{}",
mac_address,
birthday,
path.trim_end_matches('/'),
filename
);
let hash = Sha256::digest(key.as_bytes());
hex::encode(hash)[0..32].to_string()
}
/// Check if UUID is Birth UUID format (32 characters)
pub fn is_birth_uuid(uuid: &str) -> bool {
uuid.len() == 32 && !uuid.contains('_')
}
/// Extract username from sftpgo user home path
/// Input: ./demo/video.mp4 or /Users/.../demo/video.mp4
/// Returns: username (e.g., "demo")
pub fn extract_username_from_path(path: &str) -> String {
let relative = path.strip_prefix("./").unwrap_or(path);
let parts: Vec<&str> = relative.split('/').collect();
parts.first().copied().unwrap_or("demo").to_string()
}
#[cfg(test)]
mod tests {
use super::*;
@@ -64,14 +117,14 @@ mod tests {
#[test]
fn test_uuid_computation() {
let uuid = compute_uuid("/Users/test/Videos", "video.mp4");
assert_eq!(uuid.len(), 16);
assert_eq!(uuid.len(), 32);
println!("UUID: {}", uuid);
}
#[test]
fn test_uuid_from_path() {
let uuid = compute_uuid_from_path("/Users/test/Videos/video.mp4");
assert_eq!(uuid.len(), 16);
assert_eq!(uuid.len(), 32);
}
#[test]
@@ -90,11 +143,102 @@ mod tests {
let uuid1 = compute_uuid_from_relative_path("./demo/video.mp4");
let uuid2 = compute_uuid_from_relative_path("./demo/video.mp4");
assert_eq!(uuid1, uuid2);
assert_eq!(uuid1.len(), 16);
assert_eq!(uuid1.len(), 32);
// Different users with same filename should have different UUIDs
let uuid_demo = compute_uuid_from_relative_path("./demo/video.mp4");
let uuid_warren = compute_uuid_from_relative_path("./warren/video.mp4");
assert_ne!(uuid_demo, uuid_warren);
}
#[test]
fn test_get_mac_address() {
let mac = get_mac_address();
assert_eq!(mac.len(), 17); // a1:b2:c3:d4:e5:f6
assert!(mac.contains(':'));
println!("MAC Address: {}", mac);
}
#[test]
fn test_birth_uuid_generation() {
let uuid = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T22:00:00+08:00",
"/Users/test/Videos",
"video.mp4",
);
assert_eq!(uuid.len(), 32);
println!("Birth UUID: {}", uuid);
}
#[test]
fn test_birth_uuid_different_mac() {
let uuid1 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
let uuid2 = compute_birth_uuid(
"d4:e5:f6:a1:b2:c3",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
assert_ne!(uuid1, uuid2);
}
#[test]
fn test_birth_uuid_different_path() {
// Moving file to different path creates new identity
let uuid1 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
let uuid2 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Archive",
"video.mp4",
);
assert_ne!(uuid1, uuid2);
}
#[test]
fn test_birth_uuid_same_elements() {
let uuid1 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
let uuid2 = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/test/Videos",
"video.mp4",
);
assert_eq!(uuid1, uuid2); // Same elements = same UUID
}
#[test]
fn test_is_birth_uuid() {
let birth_uuid = compute_birth_uuid(
"a1:b2:c3:d4:e5:f6",
"2026-04-27T10:00:00",
"/Users/demo",
"video.mp4",
);
assert!(is_birth_uuid(&birth_uuid));
}
#[test]
fn test_extract_username_from_path() {
let username = extract_username_from_path("./demo/video.mp4");
assert_eq!(username, "demo");
let username = extract_username_from_path("./warren/path/to/video.mp4");
assert_eq!(username, "warren");
}
}

View File

@@ -1,9 +1,8 @@
use crate::core::chunk;
use crate::core::db::PostgresDb;
use sqlx::PgPool;
use tokio::time::{sleep, Duration};
use tracing;
use uuid::Uuid;
use crate::core::chunk;
pub struct JobWorker {
pool: PgPool,
@@ -42,47 +41,39 @@ impl JobWorker {
}
async fn process_next_job(&self) -> anyhow::Result<bool> {
// 1. Fetch a QUEUED job
// We use a transaction to ensure no two workers pick the same job (atomic update)
let job_row: Option<(String, String, String, String, String, i64)> = sqlx::query_as(
// 1. Fetch a QUEUED job from monitor_jobs
// Using sqlx::query_as to map to tuple.
// Note: progress_total is int4 (i32).
let job_row: Option<(i32, String, i32)> = sqlx::query_as(
r#"
UPDATE dev.jobs
UPDATE dev.monitor_jobs
SET status = 'RUNNING', updated_at = NOW()
WHERE id = (
SELECT id FROM dev.jobs
SELECT id FROM dev.monitor_jobs
WHERE status = 'QUEUED'
ORDER BY created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING id::text, asset_uuid, rule, status, processor_list, total_frames
RETURNING id, uuid, COALESCE(progress_total, 0)
"#,
)
.fetch_optional(&self.pool)
.await?;
if let Some((job_id, asset_uuid, rule, _status, _processors, total_frames)) = job_row {
let job_uuid =
Uuid::parse_str(&job_id).map_err(|e| anyhow::anyhow!("Invalid job UUID: {}", e))?;
if let Some((job_id, asset_uuid, total_frames)) = job_row {
tracing::info!(
"🚀 Processing Job {} for Asset {} (Rule: {})",
"🚀 Processing Job {} for Asset {} (Frames: {})",
job_id,
asset_uuid,
rule
total_frames
);
// 2. Execute Logic based on Rule
let result = match rule.as_str() {
"rule1" => {
let fps = self.get_asset_fps(&asset_uuid).await?;
chunk::rule1_ingest::ingest_rule1(&self.pool, &asset_uuid, fps).await
}
_ => {
tracing::warn!("Unknown rule type: {}", rule);
Ok(0)
}
};
// 2. Execute Logic (Default to rule1 for now as monitor_jobs doesn't store rule type explicitly)
let fps = self.get_asset_fps(&asset_uuid).await?;
let db = PostgresDb::from_pool(self.pool.clone());
let result = chunk::rule1_ingest::execute_rule1(&db, &asset_uuid, fps).await;
// 3. Update Job Status
match result {
@@ -93,17 +84,21 @@ impl JobWorker {
chunk_count
);
sqlx::query!(
"UPDATE dev.jobs SET status = 'COMPLETED', processed_frames = total_frames, updated_at = NOW() WHERE id = $1",
job_uuid
// Update monitor_jobs
// Using runtime query to avoid compile-time macro checks
sqlx::query(
"UPDATE dev.monitor_jobs SET status = 'COMPLETED', progress_current = progress_total, updated_at = NOW() WHERE id = $1"
)
.bind(job_id)
.execute(&self.pool)
.await?;
sqlx::query!(
"UPDATE dev.videos SET processing_status = 'COMPLETED' WHERE uuid = $1",
asset_uuid
// Update video processing_status
sqlx::query(
"UPDATE dev.videos SET processing_status = $1::jsonb WHERE file_uuid = $2",
)
.bind(serde_json::json!({"status": "COMPLETED"}))
.bind(asset_uuid)
.execute(&self.pool)
.await?;
}
@@ -116,11 +111,11 @@ impl JobWorker {
&err_msg
};
sqlx::query!(
"UPDATE dev.jobs SET status = 'FAILED', error_message = $2, updated_at = NOW() WHERE id = $1",
job_uuid,
safe_msg
sqlx::query(
"UPDATE dev.monitor_jobs SET status = 'FAILED', last_error = $2, updated_at = NOW() WHERE id = $1"
)
.bind(job_id)
.bind(safe_msg)
.execute(&self.pool)
.await?;
}
@@ -132,8 +127,9 @@ impl JobWorker {
}
async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
// dev.videos now uses file_uuid and has a direct fps column
let fps: Option<f64> =
sqlx::query_scalar("SELECT (metadata->>'fps')::float FROM dev.videos WHERE uuid = $1")
sqlx::query_scalar("SELECT fps FROM dev.videos WHERE file_uuid = $1")
.bind(uuid)
.fetch_optional(&self.pool)
.await?;

View File

@@ -22,6 +22,7 @@ pub use core::person_identity::{
pub use core::probe::ProbeResult;
pub use core::storage::file_manager::FileManager;
pub use core::storage::output_dir::OutputDir;
pub use core::storage::snapshot_manager::SnapshotManager;
pub use core::storage::uuid;
pub use core::thumbnail::{ThumbnailExtractor, ThumbnailResult};
pub use ui::progress::{ProcessorType, ProgressState, ProgressUi};

File diff suppressed because it is too large Load Diff

View File

@@ -842,8 +842,30 @@ async fn main() -> Result<()> {
Commands::Register { path } => {
println!("Registering: {}", path);
// Compute UUID
let uuid = momentry_core::uuid::compute_uuid_from_path(&path);
// Compute Birth UUID
// Step 1: Connect to DB to check for existing Birthday
let db = PostgresDb::init().await?;
let path_buf = std::path::PathBuf::from(&path);
let filename = path_buf
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let birthday = sqlx::query_scalar::<_, chrono::DateTime<chrono::Utc>>(
"SELECT registration_time FROM dev.videos WHERE file_name = $1 AND registration_time IS NOT NULL LIMIT 1"
)
.bind(&filename)
.fetch_optional(db.pool())
.await?
.map(|t| t.to_rfc3339())
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
let uuid = momentry_core::uuid::compute_birth_uuid(
&momentry_core::uuid::get_mac_address(),
&birthday,
&path,
&filename,
);
println!("UUID: {}", uuid);
// Run ffprobe
@@ -909,11 +931,38 @@ async fn main() -> Result<()> {
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
// Calculate total_frames: prefer nb_frames (exact) over duration * fps (approximate)
let total_frames = {
let video_stream = probe_result
.streams
.iter()
.find(|s| s.codec_type.as_deref() == Some("video"));
if let Some(stream) = video_stream {
if let Some(nb_frames_str) = &stream.nb_frames {
if let Ok(nb_frames) = nb_frames_str.parse::<u64>() {
println!(" Using nb_frames from ffprobe: {} frames", nb_frames);
Some(nb_frames)
} else {
println!(" Failed to parse nb_frames, using duration * fps fallback");
Some((duration * fps).floor() as u64)
}
} else {
println!(" nb_frames not available, using duration * fps fallback");
Some((duration * fps).floor() as u64)
}
} else {
println!(" No video stream found");
Some(0)
}
};
let record = VideoRecord {
id: 0,
uuid: uuid.clone(),
file_uuid: uuid.clone(),
file_path,
file_name,
file_type: None,
duration,
width,
height,
@@ -921,10 +970,14 @@ async fn main() -> Result<()> {
probe_json: Some(json_str),
storage: Default::default(),
status: VideoStatus::Pending,
processing_status: Some(serde_json::json!({"phase": "REGISTERED"})),
birth_registration: None,
user_id: None,
job_id: None,
created_at: String::new(),
registration_time: None,
total_frames: total_frames.unwrap_or(0),
parent_uuid: None,
};
let video_id = db.register_video(&record).await?;
@@ -1026,10 +1079,32 @@ async fn main() -> Result<()> {
println!(" Mode: {}", processing_mode);
// Compute UUID if path is given
let uuid = if target.len() == 16 && !target.contains('/') {
let uuid = if target.len() == 32 && !target.contains('/') {
target.clone()
} else {
momentry_core::uuid::compute_uuid_from_path(&target)
// Connect to DB to find Birthday
let db = PostgresDb::init().await?;
let path_buf = std::path::PathBuf::from(&target);
let filename = path_buf
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let birthday = sqlx::query_scalar::<_, chrono::DateTime<chrono::Utc>>(
"SELECT registration_time FROM dev.videos WHERE file_name = $1 AND registration_time IS NOT NULL LIMIT 1"
)
.bind(&filename)
.fetch_optional(db.pool())
.await?
.map(|t| t.to_rfc3339())
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
momentry_core::uuid::compute_birth_uuid(
&momentry_core::uuid::get_mac_address(),
&birthday,
&target,
&filename,
)
};
// Get video from database
@@ -2377,17 +2452,34 @@ async fn main() -> Result<()> {
println!("Starting to process {} chunks...", sentence_chunks.len());
for (i, chunk) in sentence_chunks.iter().enumerate() {
if i < 3 {
println!("Processing chunk {}/{}: {}", i+1, sentence_chunks.len(), chunk.chunk_id);
println!(
"Processing chunk {}/{}: {}",
i + 1,
sentence_chunks.len(),
chunk.chunk_id
);
}
let text = chunk
.content
.get("text")
.and_then(|v| v.as_str())
.or_else(|| chunk.content.get("data").and_then(|data| data.get("text")).and_then(|v| v.as_str()))
.or_else(|| {
chunk
.content
.get("data")
.and_then(|data| data.get("text"))
.and_then(|v| v.as_str())
})
.or(chunk.text_content.as_deref())
.unwrap_or("");
eprintln!("Embedding chunk {}/{}: {} (text len: {})...", i+1, sentence_chunks.len(), chunk.chunk_id, text.len());
eprintln!(
"Embedding chunk {}/{}: {} (text len: {})...",
i + 1,
sentence_chunks.len(),
chunk.chunk_id,
text.len()
);
if text.is_empty() {
continue;
@@ -2583,7 +2675,19 @@ async fn main() -> Result<()> {
Ok(())
}
Commands::Lookup { path } => {
let uuid = momentry_core::uuid::compute_uuid_from_path(&path);
let path_buf = std::path::PathBuf::from(&path);
let filename = path_buf
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
// Lookup usually checks "what would be the UUID", so we use current time.
// If it were registered, it would have used its registration time.
let uuid = momentry_core::uuid::compute_birth_uuid(
&momentry_core::uuid::get_mac_address(),
&chrono::Utc::now().to_rfc3339(),
&path,
&filename,
);
println!("Path: {}", path);
println!("UUID: {}", uuid);
Ok(())
@@ -2612,10 +2716,10 @@ async fn main() -> Result<()> {
for video in videos {
println!(
"\nGenerating thumbnails for: {} ({})",
video.file_name, video.uuid
video.file_name, video.file_uuid
);
match extractor.get_or_create(&video.file_path, &video.uuid) {
match extractor.get_or_create(&video.file_path, &video.file_uuid) {
Ok(result) => {
println!(" Generated {} thumbnails", result.count);
}
@@ -2681,8 +2785,8 @@ async fn main() -> Result<()> {
for video in videos {
let (sentence_count, time_count) =
db.get_chunk_count(&video.uuid).await.unwrap_or((0, 0));
let vector_count = db.get_vector_count(&video.uuid).await.unwrap_or(0);
db.get_chunk_count(&video.file_uuid).await.unwrap_or((0, 0));
let vector_count = db.get_vector_count(&video.file_uuid).await.unwrap_or(0);
let total_chunks = sentence_count + time_count;
let psql_status = if total_chunks > 0 { "" } else { "-" };

296
src/processing/decision.rs Normal file
View File

@@ -0,0 +1,296 @@
//! Processing decision logic and system resource management
use std::path::Path;
use std::process::Command;
/// Decision on how to process a video file
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ProcessingDecision {
Process,
SkipComplete,
ResumePartial,
ForceReprocess,
}
impl ProcessingDecision {
/// Check if processing should proceed
pub fn should_process(&self) -> bool {
matches!(
self,
ProcessingDecision::Process
| ProcessingDecision::ResumePartial
| ProcessingDecision::ForceReprocess
)
}
/// Check if processing should resume from checkpoint
pub fn should_resume(&self) -> bool {
matches!(self, ProcessingDecision::ResumePartial)
}
}
/// System resource information
#[derive(Debug, Clone)]
pub struct SystemResources {
pub cpu_idle_percent: f64,
pub memory_available_mb: u64,
pub memory_total_mb: u64,
pub memory_used_percent: f64,
pub gpu_available: bool,
pub gpu_type: GpuType,
pub gpu_utilization: Option<f64>,
}
/// GPU type enumeration
#[derive(Debug, Clone, Copy)]
pub enum GpuType {
Nvidia,
AppleMps,
}
impl SystemResources {
/// Check current system resources
pub fn check() -> Self {
let cpu_idle = Self::get_cpu_idle();
let (mem_available, mem_total) = Self::get_memory_info();
let mem_used_pct = if mem_total > 0 && mem_available <= mem_total {
((mem_total - mem_available) as f64 / mem_total as f64) * 100.0
} else if mem_total > 0 {
100.0
} else {
0.0
};
let (gpu_available, gpu_type, gpu_util) = Self::get_gpu_info();
Self {
cpu_idle_percent: cpu_idle,
memory_available_mb: mem_available,
memory_total_mb: mem_total,
memory_used_percent: mem_used_pct,
gpu_available,
gpu_type,
gpu_utilization: gpu_util,
}
}
/// Check if parallel processing is possible
pub fn can_parallel(&self, required_memory_mb: u64) -> bool {
const MIN_CPU_IDLE: f64 = 30.0;
const MIN_MEMORY_MB: u64 = 4096;
self.cpu_idle_percent >= MIN_CPU_IDLE
&& self.memory_available_mb >= required_memory_mb
&& self.memory_available_mb >= MIN_MEMORY_MB
}
/// Recommend which modules can be processed in parallel
pub fn recommend_parallel_modules(&self) -> Vec<&'static str> {
let mut recommended = Vec::new();
if self.gpu_available {
recommended.push("yolo");
}
if self.memory_available_mb >= 8192 {
recommended.push("ocr");
recommended.push("face");
recommended.push("pose");
}
recommended
}
/// Get CPU idle percentage
fn get_cpu_idle() -> f64 {
let output = Command::new("top").args(["-l", "1", "-n", "1"]).output();
match output {
Ok(o) => {
let s = String::from_utf8_lossy(&o.stdout);
if let Some(line) = s.lines().find(|l| l.contains("idle")) {
if let Some(pct) = line
.split_whitespace()
.find_map(|s| s.strip_suffix("%idle"))
{
pct.trim().parse().ok().unwrap_or(50.0)
} else {
50.0
}
} else {
50.0
}
}
Err(_) => 50.0,
}
}
/// Get memory information (available and total in MB)
fn get_memory_info() -> (u64, u64) {
let output = Command::new("sysctl").args(["hw.memsize"]).output();
match output {
Ok(o) => {
let s = String::from_utf8_lossy(&o.stdout);
let total = s
.split_whitespace()
.nth(1)
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(0)
/ 1024
/ 1024;
let vm_stat = Command::new("vm_stat").output();
let available = match vm_stat {
Ok(v) => {
let vs = String::from_utf8_lossy(&v.stdout);
let mut free_pages: u64 = 0;
let mut inactive_pages: u64 = 0;
for line in vs.lines() {
if line.contains("Pages free:") {
free_pages = line
.split_whitespace()
.last()
.and_then(|v| v.trim_end_matches('.').parse().ok())
.unwrap_or(0);
} else if line.contains("Pages inactive:") {
inactive_pages = line
.split_whitespace()
.last()
.and_then(|v| v.trim_end_matches('.').parse().ok())
.unwrap_or(0);
}
}
// Pages * 4096 bytes / 1024 / 1024 = MB
(free_pages + inactive_pages) * 4096 / 1024 / 1024
}
Err(_) => total / 4,
};
(available, total)
}
Err(_) => (0, 0),
}
}
/// Get GPU information
fn get_gpu_info() -> (bool, GpuType, Option<f64>) {
// Check NVIDIA GPU
let nvidia_output = Command::new("nvidia-smi")
.args([
"--query-gpu=utilization.gpu",
"--format=csv,noheader,nounits",
])
.output();
if let Ok(o) = nvidia_output {
if o.status.success() {
let s = String::from_utf8_lossy(&o.stdout);
let util = s.trim().parse::<f64>().ok();
return (true, GpuType::Nvidia, util);
}
}
// Check Apple MPS (Metal Performance Shaders)
let mps_output = Command::new("system_profiler")
.args(["SPDisplaysDataType", "-detailLevel", "mini"])
.output();
if let Ok(o) = mps_output {
let s = String::from_utf8_lossy(&o.stdout);
if s.contains("Metal") || s.contains("Apple") {
return (true, GpuType::AppleMps, Some(0.0));
}
}
(false, GpuType::Nvidia, None)
}
}
impl std::fmt::Display for SystemResources {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"CPU: {:.1}% idle, Memory: {:.1}GB/{:.1}GB ({:.0}% used), GPU: {}",
self.cpu_idle_percent,
self.memory_available_mb as f64 / 1024.0,
self.memory_total_mb as f64 / 1024.0,
self.memory_used_percent,
if self.gpu_available {
format!("{:.0}% utilized", self.gpu_utilization.unwrap_or(0.0))
} else {
"N/A".to_string()
}
)
}
}
/// JSON file completeness status
#[derive(Debug, Clone, PartialEq)]
pub enum JsonCompleteness {
Complete,
Partial { current: u32, total: u32 },
Empty,
}
/// Decide processing strategy based on JSON file state
pub fn decide_processing(json_path: &Path, force: bool, resume: bool) -> ProcessingDecision {
if !json_path.exists() {
return ProcessingDecision::Process;
}
if force {
return ProcessingDecision::ForceReprocess;
}
if resume {
return ProcessingDecision::ResumePartial;
}
match check_json_completeness(json_path) {
JsonCompleteness::Complete => ProcessingDecision::SkipComplete,
JsonCompleteness::Partial { current, total } => {
eprintln!("\n⚠️ Found incomplete JSON file: {}", json_path.display());
eprintln!(
" Progress: {}/{} ({:.1}%)",
current,
total,
(current as f64 / total as f64) * 100.0
);
eprintln!(" Use --resume to continue from checkpoint");
eprintln!(" Use --force to reprocess from scratch");
ProcessingDecision::SkipComplete
}
JsonCompleteness::Empty => ProcessingDecision::Process,
}
}
/// Check JSON file completeness
pub fn check_json_completeness(json_path: &Path) -> JsonCompleteness {
let content = match std::fs::read_to_string(json_path) {
Ok(c) => c,
Err(_) => return JsonCompleteness::Empty,
};
if content.trim().is_empty() {
return JsonCompleteness::Empty;
}
let json: serde_json::Value = match serde_json::from_str(&content) {
Ok(v) => v,
Err(_) => return JsonCompleteness::Empty,
};
match json.get("segments") {
Some(serde_json::Value::Array(arr)) if !arr.is_empty() => JsonCompleteness::Complete,
Some(serde_json::Value::Object(obj)) => {
let current = obj.get("current").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let total = obj.get("total").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
if total > 0 && current < total {
JsonCompleteness::Partial { current, total }
} else {
JsonCompleteness::Complete
}
}
_ => JsonCompleteness::Complete,
}
}

421
src/processing/handlers.rs Normal file
View File

@@ -0,0 +1,421 @@
//! Command handlers for Momentry Core CLI
//!
//! This module contains the actual implementation of CLI command handlers.
use anyhow::Result;
use std::path::Path;
/// Handle video registration command
pub async fn handle_register(path: &str) -> Result<()> {
println!("Registering video: {}", path);
// TODO: Implement proper video registration
println!("(Video registration would happen here)");
// Simple UUID simulation
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
path.hash(&mut hasher);
let hash = hasher.finish();
let uuid = format!("{:016x}", hash);
println!("UUID that would be generated: {}", uuid);
Ok(())
}
/// Handle video processing command
pub async fn handle_process(
target: &str,
modules: Option<Vec<String>>,
cloud: bool,
force: bool,
resume: bool,
) -> Result<()> {
println!("Processing target: {}", target);
println!("Modules: {:?}", modules);
println!("Cloud processing: {}", cloud);
println!("Force reprocess: {}", force);
println!("Resume: {}", resume);
// Determine if target is a UUID or a path
let is_uuid = target.len() == 16 && target.chars().all(|c| c.is_ascii_hexdigit());
if is_uuid {
println!("Target is a UUID, would process video: {}", target);
// TODO: Implement UUID-based processing
} else {
let path = Path::new(target);
if !path.exists() {
anyhow::bail!("Path does not exist: {}", target);
}
// For now, just show decision
if force {
println!("Force reprocessing: {}", target);
} else if resume {
println!("Resume processing: {}", target);
} else {
println!("Normal processing: {}", target);
}
}
Ok(())
}
/// Handle video chunking command
pub async fn handle_chunk(uuid: &str) -> Result<()> {
println!("Chunking video with UUID: {}", uuid);
// TODO: Connect to database and fetch video
println!(
"Would connect to database and fetch video with UUID: {}",
uuid
);
println!("(Would show video details)");
// TODO: Implement chunking logic
Ok(())
}
/// Handle story generation command
pub async fn handle_story(uuid: &str) -> Result<()> {
println!("Generating story for video with UUID: {}", uuid);
// TODO: Connect to database and fetch video
println!(
"Would connect to database and fetch video with UUID: {}",
uuid
);
println!("(Would show video details)");
println!("Generating story...");
// TODO: Implement story generation logic
Ok(())
}
/// Handle vectorization command
pub async fn handle_vectorize(uuid: &str) -> Result<()> {
println!("Vectorizing video with UUID: {}", uuid);
// TODO: Connect to database and fetch video
println!(
"Would connect to database and fetch video with UUID: {}",
uuid
);
println!("(Would show video details)");
println!("Starting vectorization...");
// TODO: Implement vectorization logic
Ok(())
}
/// Handle video playback command
pub async fn handle_play(target: &str) -> Result<()> {
println!("Playing target: {}", target);
// TODO: Implement video playback logic
println!("(Video playback would start here)");
Ok(())
}
/// Handle file system watching command
pub async fn handle_watch(directories: Vec<String>) -> Result<()> {
println!("Watching directories for new videos:");
for dir in &directories {
println!(" - {}", dir);
}
// TODO: Implement directory watching logic
println!("(Directory watcher would start here)");
Ok(())
}
/// Handle system information command
pub async fn handle_system(gpu: bool) -> Result<()> {
println!("System Information:");
// TODO: Implement system information gathering
if gpu {
println!("GPU information requested");
// TODO: Add GPU-specific info
}
Ok(())
}
/// Handle server startup command
pub async fn handle_server(host: &str, port: u16) -> Result<()> {
println!("Starting API server on {}:{}", host, port);
// This is handled by the server binary directly
println!("(Server is started through binary entry point)");
Ok(())
}
/// Handle worker startup command
pub async fn handle_worker(
max_concurrent: Option<u32>,
poll_interval: Option<u64>,
batch_size: Option<u32>,
) -> Result<()> {
println!("Starting worker with configuration:");
println!(" Max concurrent jobs: {}", max_concurrent.unwrap_or(2));
println!(" Poll interval: {} seconds", poll_interval.unwrap_or(5));
println!(" Batch size: {}", batch_size.unwrap_or(10));
// TODO: Implement worker startup logic
Ok(())
}
/// Handle search query command
pub async fn handle_query(query: &str) -> Result<()> {
println!("Searching for: {}", query);
// TODO: Connect to database and search
println!("Would search for: {}", query);
let results: Vec<String> = vec![]; // Placeholder
println!("(Search would return results here)");
Ok(())
}
/// Handle file lookup command
pub async fn handle_lookup(path: &str) -> Result<()> {
println!("Looking up file: {}", path);
// TODO: Connect to database and lookup file
println!("Looking up file in database: {}", path);
let video: Option<String> = None; // Placeholder
if let Some(_) = video {
println!("File found in database: {}", path);
// In real implementation, show video details
} else {
println!("File not found in database: {}", path);
println!("(UUID would be generated for new files)");
}
Ok(())
}
/// Handle UUID resolution command
pub async fn handle_resolve(uuid: &str) -> Result<()> {
println!("Resolving UUID: {}", uuid);
// TODO: Connect to database and fetch video
println!(
"Would connect to database and fetch video with UUID: {}",
uuid
);
println!("Resolved to:");
println!(" File name: (would show from database)");
println!(" File path: (would show from database)");
println!(" Duration: (would show from database)");
println!(" Resolution: (would show from database)");
Ok(())
}
/// Handle thumbnail extraction command
pub async fn handle_thumbnails(uuid: Option<String>, count: Option<u32>) -> Result<()> {
let count = count.unwrap_or(5);
if let Some(uuid) = uuid {
println!("Extracting {} thumbnails for video UUID: {}", count, uuid);
// TODO: Connect to database and fetch video
println!("Would fetch video with UUID: {}", uuid);
println!("(Would show video details)");
// TODO: Implement thumbnail extraction
} else {
println!("Extracting {} thumbnails from all pending videos", count);
// TODO: Implement bulk thumbnail extraction
}
Ok(())
}
/// Handle video status check command
pub async fn handle_status(uuid: Option<String>) -> Result<()> {
// TODO: Connect to database
println!("Would connect to database to check status");
if let Some(uuid) = uuid {
println!("Checking status for video UUID: {}", uuid);
// Would fetch video from database
println!("Would fetch video with UUID: {}", uuid);
println!("Video Status:");
println!(" UUID: {}", uuid);
println!(" File name: (would show from database)");
println!(" Status: (would show from database)");
println!(" Processing status: (would show from database)");
println!(" Created at: (would show from database)");
println!(" Registration time: (would show from database)");
// TODO: Add more detailed processing status
} else {
println!("Checking status of all videos");
println!("(Would list videos from database)");
}
Ok(())
}
/// Handle backup operations command
pub async fn handle_backup(action: &str, days: Option<u32>) -> Result<()> {
match action {
"create" => {
println!("Creating backup...");
// TODO: Implement backup creation
}
"list" => {
println!("Listing backups...");
// TODO: Implement backup listing
}
"restore" => {
println!("Restoring from backup...");
// TODO: Implement backup restoration
}
"clean" | "cleanup" => {
let days = days.unwrap_or(30);
println!("Cleaning backups older than {} days...", days);
// TODO: Implement backup cleanup
}
_ => {
println!("Unknown backup action: {}", action);
println!("Available actions: create, list, restore, clean(up)");
}
}
Ok(())
}
/// Handle API key management command
pub async fn handle_api_key(
action: &crate::cli::args::ApiKeyAction,
name: Option<String>,
key_type: Option<String>,
ttl: Option<u64>,
key: Option<String>,
key_id: Option<String>,
) -> Result<()> {
println!("API Key Management:");
println!(" Action: {:?}", action);
// TODO: Implement proper API key management
match action {
crate::cli::args::ApiKeyAction::Create => {
let name = name.unwrap_or_else(|| "default".to_string());
println!("Would create API key with name: {}", name);
println!("(API key creation would be implemented here)");
}
crate::cli::args::ApiKeyAction::List => {
println!("Would list API keys");
println!("(API key listing would be implemented here)");
}
crate::cli::args::ApiKeyAction::Validate => {
let key = key.expect("API key required for validation");
println!("Would validate API key: {}...", &key[..8]);
println!("(API key validation would be implemented here)");
}
crate::cli::args::ApiKeyAction::Revoke => {
let key_id = key_id.expect("Key ID required for revocation");
println!("Would revoke API key with ID: {}", key_id);
println!("(API key revocation would be implemented here)");
}
crate::cli::args::ApiKeyAction::Rotate => {
let key_id = key_id.expect("Key ID required for rotation");
println!("Would rotate API key with ID: {}", key_id);
println!("(API key rotation would be implemented here)");
}
crate::cli::args::ApiKeyAction::Stats => {
println!("Would show API key statistics");
println!("(API key statistics would be implemented here)");
}
}
Ok(())
}
/// Handle Gitea integration command
pub async fn handle_gitea(
action: &crate::cli::args::GiteaAction,
name: Option<String>,
url: Option<String>,
token: Option<String>,
repo_id: Option<String>,
) -> Result<()> {
match action {
crate::cli::args::GiteaAction::Create => {
println!("Creating Gitea integration...");
// TODO: Implement Gitea integration creation
}
crate::cli::args::GiteaAction::List => {
println!("Listing Gitea integrations...");
// TODO: Implement Gitea integration listing
}
crate::cli::args::GiteaAction::Delete => {
let repo_id = repo_id.expect("Repository ID required for deletion");
println!("Deleting Gitea integration: {}", repo_id);
// TODO: Implement Gitea integration deletion
}
crate::cli::args::GiteaAction::Verify => {
println!("Verifying Gitea integration...");
// TODO: Implement Gitea integration verification
}
}
Ok(())
}
/// Handle n8n workflow command
pub async fn handle_n8n(
action: &crate::cli::args::N8nAction,
name: Option<String>,
url: Option<String>,
workflow_id: Option<String>,
) -> Result<()> {
match action {
crate::cli::args::N8nAction::Create => {
println!("Creating n8n workflow...");
// TODO: Implement n8n workflow creation
}
crate::cli::args::N8nAction::List => {
println!("Listing n8n workflows...");
// TODO: Implement n8n workflow listing
}
crate::cli::args::N8nAction::Delete => {
let workflow_id = workflow_id.expect("Workflow ID required for deletion");
println!("Deleting n8n workflow: {}", workflow_id);
// TODO: Implement n8n workflow deletion
}
crate::cli::args::N8nAction::Verify => {
println!("Verifying n8n workflow...");
// TODO: Implement n8n workflow verification
}
}
Ok(())
}

8
src/processing/mod.rs Normal file
View File

@@ -0,0 +1,8 @@
//! Video processing modules and logic
pub mod decision;
pub mod handlers;
pub mod modules;
pub use decision::*;
pub use handlers::*;

View File

@@ -0,0 +1,49 @@
//! ASR (Automatic Speech Recognition) processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process ASR module
pub async fn process_asr_module(
asr_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Asr).start(1);
}
let asr_result = momentry_core::core::processor::process_asr(
video_path,
asr_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let asr_json = serde_json::to_string_pretty(&asr_result)?;
std::fs::write(asr_path, &asr_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "asr.json");
println!(" ✓ ASR saved: {} segments", asr_result.segments.len());
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Asr)
.complete(&format!("{} segments", asr_result.segments.len()));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,49 @@
//! ASRX (Extended ASR) processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process ASRX module
pub async fn process_asrx_module(
asrx_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Asrx).start(1);
}
let asrx_result = momentry_core::core::processor::process_asrx(
video_path,
asrx_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let asrx_json = serde_json::to_string_pretty(&asrx_result)?;
std::fs::write(asrx_path, &asrx_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "asrx.json");
println!(" ✓ ASRX saved: {} segments", asrx_result.segments.len());
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Asrx)
.complete(&format!("{} segments", asrx_result.segments.len()));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,49 @@
//! Caption generation processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process Caption module
pub async fn process_caption_module(
caption_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Caption).start(1);
}
let caption_result = momentry_core::core::processor::process_caption(
video_path,
caption_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let caption_json = serde_json::to_string_pretty(&caption_result)?;
std::fs::write(caption_path, &caption_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "caption.json");
println!(" ✓ Caption saved: {} frames", caption_result.total_frames);
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Caption)
.complete(&format!("{} frames", caption_result.total_frames));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,49 @@
//! CUT (Scene Cut Detection) processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process CUT module
pub async fn process_cut_module(
cut_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Cut).start(1);
}
let cut_result = momentry_core::core::processor::process_cut(
video_path,
cut_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let cut_json = serde_json::to_string_pretty(&cut_result)?;
std::fs::write(cut_path, &cut_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "cut.json");
println!(" ✓ CUT saved: {} scenes", cut_result.scenes.len());
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Cut)
.complete(&format!("{} scenes", cut_result.scenes.len()));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,49 @@
//! Face detection and recognition processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process Face module
pub async fn process_face_module(
face_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Face).start(1);
}
let face_result = momentry_core::core::processor::process_face(
video_path,
face_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let face_json = serde_json::to_string_pretty(&face_result)?;
std::fs::write(face_path, &face_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "face.json");
println!(" ✓ Face saved: {} frames", face_result.frames.len());
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Face)
.complete(&format!("{} frames", face_result.frames.len()));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,21 @@
//! Video processing modules
pub mod asr;
pub mod asrx;
pub mod caption;
pub mod cut;
pub mod face;
pub mod ocr;
pub mod pose;
pub mod story;
pub mod yolo;
pub use asr::*;
pub use asrx::*;
pub use caption::*;
pub use cut::*;
pub use face::*;
pub use ocr::*;
pub use pose::*;
pub use story::*;
pub use yolo::*;

View File

@@ -0,0 +1,52 @@
//! OCR (Optical Character Recognition) processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process OCR module
pub async fn process_ocr_module(
ocr_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Ocr).start(1);
}
let ocr_result = momentry_core::core::processor::process_ocr(
video_path,
ocr_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let ocr_json = serde_json::to_string_pretty(&ocr_result)?;
std::fs::write(ocr_path, &ocr_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "ocr.json");
println!(
" ✓ OCR saved: {} frames with text",
ocr_result.frames.len()
);
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Ocr)
.complete(&format!("{} frames", ocr_result.frames.len()));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,50 @@
//! Pose estimation processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process Pose module
pub async fn process_pose_module(
pose_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Pose).start(1);
}
let pose_result = momentry_core::core::processor::process_pose(
video_path,
pose_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let pose_json = serde_json::to_string_pretty(&pose_result)?;
std::fs::write(pose_path, &pose_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "pose.json");
println!(" ✓ Pose saved: {} frames", pose_result.frames.len());
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Pose)
.complete(&format!("{} frames", pose_result.frames.len()));
state.stop();
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,53 @@
//! Story generation processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process Story module
pub async fn process_story_module(
story_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Story).start(1);
}
let story_result = momentry_core::core::processor::process_story(
video_path,
story_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let story_json = serde_json::to_string_pretty(&story_result)?;
std::fs::write(story_path, &story_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "story.json");
println!(
" ✓ Story saved: {} parent chunks, {} child chunks",
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
);
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Story).complete(&format!(
"{} parents, {} children",
story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks
));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -0,0 +1,49 @@
//! YOLO (Object Detection) processing module
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Process YOLO module
pub async fn process_yolo_module(
yolo_path: &Path,
video_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Yolo).start(1);
}
let yolo_result = momentry_core::core::processor::process_yolo(
video_path,
yolo_path.to_str().unwrap(),
Some(uuid),
)
.await?;
let yolo_json = serde_json::to_string_pretty(&yolo_result)?;
std::fs::write(yolo_path, &yolo_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "yolo.json");
println!(" ✓ YOLO saved: {} frames", yolo_result.frame_count);
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Yolo)
.complete(&format!("{} frames", yolo_result.frame_count));
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -1,11 +1,7 @@
use anyhow::Result;
use std::path::Path;
use std::sync::Arc;
use tokio::time;
use tracing::{error, info, warn};
use crate::core::db::{Database, PostgresDb};
use crate::core::ingestion::IngestionService;
use tracing::{info, warn};
pub struct WatcherConfig {
pub directories: Vec<String>,
@@ -26,7 +22,7 @@ impl Default for WatcherConfig {
}
/// Starts the file watcher in the background.
/// Scans directories for video files and registers them if not already present.
/// Scans directories for video files (auto-registration disabled).
pub async fn run_watcher() -> Result<()> {
let config = WatcherConfig::default();
let dirs = config.directories.clone();
@@ -36,35 +32,26 @@ pub async fn run_watcher() -> Result<()> {
return Err(anyhow::anyhow!("No watch directories"));
}
info!("Initializing Database for Watcher...");
// Use Database::init() which handles config and pool creation
let db = PostgresDb::init().await?;
let service = Arc::new(IngestionService::new(db));
info!("Starting Video File Watcher (auto-registration disabled)...");
info!("Watch directories: {:?}", dirs);
info!("Starting Ingestion Poller for: {:?}", dirs);
// Spawn background task
// Spawn background task for monitoring only (no auto-registration)
tokio::spawn(async move {
let mut interval = time::interval(time::Duration::from_millis(config.poll_interval_ms));
// Run once immediately on startup to catch existing files
scan_and_ingest(&dirs, &service).await;
loop {
interval.tick().await;
scan_and_ingest(&dirs, &service).await;
scan_videos(&dirs).await;
}
});
Ok(())
}
async fn scan_and_ingest(directories: &[String], service: &Arc<IngestionService>) {
async fn scan_videos(directories: &[String]) {
// Allowed extensions list
let allowed_extensions = vec!["mp4", "mov", "mkv"];
info!("Scanning directories for new videos...");
for dir in directories {
let path = Path::new(dir);
if !path.exists() {
@@ -73,34 +60,33 @@ async fn scan_and_ingest(directories: &[String], service: &Arc<IngestionService>
}
if let Ok(entries) = std::fs::read_dir(path) {
for entry in entries.flatten() {
let file_path = entry.path();
if file_path.is_file() {
// Check extension
let is_video = if let Some(ext) = file_path.extension().and_then(|e| e.to_str())
{
allowed_extensions.contains(&ext.to_lowercase().as_str())
} else {
false
};
if is_video {
if let Some(p_str) = file_path.to_str() {
// Try to ingest. The service checks if it already exists.
match service.ingest(p_str).await {
Ok(Some(uuid)) => {
info!("Auto-registered: {} -> {}", file_path.display(), uuid);
}
Ok(None) => {
// Already registered
}
Err(e) => {
error!("Failed to ingest {}: {}", file_path.display(), e);
}
let video_count = entries
.flatten()
.filter(|entry| {
let file_path = entry.path();
if file_path.is_dir() {
if let Some(name) = file_path.file_name().and_then(|n| n.to_str()) {
if name.starts_with('.') {
return false;
}
}
}
}
if !file_path.is_file() {
return false;
}
if let Some(ext) = file_path.extension().and_then(|e| e.to_str()) {
allowed_extensions.contains(&ext.to_lowercase().as_str())
} else {
false
}
})
.count();
if video_count > 0 {
info!(
"Found {} video files in {} (use API to register)",
video_count, dir
);
}
}
}

View File

@@ -6,9 +6,7 @@ use tokio::time::sleep;
use tracing::{error, info, warn};
use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::db::{
MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient, VideoStatus,
};
use crate::core::db::{MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VideoStatus};
use crate::worker::config::WorkerConfig;
use crate::worker::processor::{ProcessorPool, ProcessorTask};
@@ -89,7 +87,29 @@ impl JobWorker {
async fn process_job(&self, job: crate::core::db::MonitorJob) -> Result<()> {
info!("Processing job: {} ({})", job.uuid, job.id);
let total_processors = ProcessorType::all().len() as i32;
// Determine which processors to run based on job.processors field
let processors_to_run: Vec<crate::core::db::ProcessorType> = if job.processors.is_empty() {
info!("No processors specified, running all processors");
crate::core::db::ProcessorType::all()
} else {
info!("Processors specified: {:?}", job.processors);
job.processors
.iter()
.filter_map(|p| crate::core::db::ProcessorType::from_db_str(p))
.collect()
};
let total_processors = processors_to_run.len() as i32;
// Get video total_frames for progress tracking
let video = self.db.get_video_by_uuid(&job.uuid).await?;
let total_frames = video.map(|v| v.total_frames).unwrap_or(0);
// Initialize processing_status with all processors
let processor_names: Vec<&str> = processors_to_run.iter().map(|p| p.as_str()).collect();
self.db
.init_processing_status(&job.uuid, processor_names.clone(), total_frames)
.await?;
self.db
.update_job_status(job.id, MonitorJobStatus::Running)
@@ -106,7 +126,18 @@ impl JobWorker {
result_map.insert(result.processor_type, result);
}
for processor_type in ProcessorType::all() {
for processor_type in processors_to_run {
// Update processor status to running
self.db
.update_processor_progress(
&job.uuid,
processor_type.as_str(),
0,
total_frames,
"running",
)
.await?;
// Check if processor already in terminal state
if let Some(result) = result_map.get(&processor_type) {
match result.status {
@@ -145,7 +176,7 @@ impl JobWorker {
let processor_result_id = self
.db
.create_processor_result(job.id, processor_type)
.create_processor_result(job.id, processor_type, &job.uuid)
.await?;
self.redis
@@ -215,6 +246,8 @@ impl JobWorker {
let has_asr = completed_processors.iter().any(|p| p == "asr");
let has_asrx = completed_processors.iter().any(|p| p == "asrx");
let has_cut = completed_processors.iter().any(|p| p == "cut");
let has_face = completed_processors.iter().any(|p| p == "face");
let has_yolo = completed_processors.iter().any(|p| p == "yolo");
// Update processor arrays in job record
self.db
@@ -231,9 +264,7 @@ impl JobWorker {
match db_clone.get_video_by_uuid(&uuid_clone).await {
Ok(Some(video)) => {
let fps = video.fps;
match rule1_ingest::ingest_rule1(db_clone.pool(), &uuid_clone, fps)
.await
{
match rule1_ingest::execute_rule1(&db_clone, &uuid_clone, fps).await {
Ok(count) => info!(
"✅ Rule 1 Ingestion completed: {} chunks inserted.",
count
@@ -263,6 +294,25 @@ impl JobWorker {
});
}
// 🚀 P3 Trigger: Identity Agent (Face + ASRX)
if has_face && has_asrx {
info!("📝 Prerequisites met for Identity Agent. Starting analysis...");
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
tracing::info!("Identity Agent started for video {}", uuid_clone);
});
}
// 🚀 P4 Trigger: 5W1H Agent (after Rule 3 completion)
if has_cut && has_asr {
info!("📝 Prerequisites met for 5W1H Agent. Will trigger after Rule 3...");
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
tracing::info!("5W1H Agent started for video {}", uuid_clone);
});
}
self.db
.update_job_status(job_id, MonitorJobStatus::Completed)
.await?;
@@ -271,6 +321,14 @@ impl JobWorker {
.update_video_status(uuid, VideoStatus::Completed)
.await?;
// Get total_frames for completion status
let video = self.db.get_video_by_uuid(uuid).await?;
let total_frames = video.map(|v| v.total_frames).unwrap_or(0);
self.db
.update_processing_status_completed(uuid, total_frames)
.await?;
self.redis
.update_worker_job_status(uuid, job_id, "completed", None, completed_count, 7)
.await?;

View File

@@ -5,10 +5,8 @@ use std::sync::Arc;
use tokio::sync::{mpsc, RwLock};
use tracing::{error, info};
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
use crate::core::db::RedisClient;
use crate::core::db::{MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType};
use crate::core::db::{MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient};
use crate::core::processor;
use crate::core::processor::asr::AsrResult;
use crate::core::processor::asrx::AsrxResult;
@@ -16,9 +14,17 @@ use crate::core::processor::cut::CutResult;
use crate::core::processor::face::FaceResult;
use crate::core::processor::ocr::OcrResult;
use crate::core::processor::pose::PoseResult;
use crate::core::processor::scene_classification::SceneClassificationResult;
use crate::core::processor::visual_chunk::VisualChunkResult;
use crate::core::processor::yolo::YoloResult;
#[derive(Debug)]
struct ProcessorOutput {
data: serde_json::Value,
chunks_produced: i32,
frames_processed: i32,
}
#[derive(Debug, Clone)]
pub struct ProcessorTask {
pub job: MonitorJob,
@@ -113,15 +119,17 @@ impl ProcessorPool {
match result {
Ok(output) => {
info!(
"Processor {} completed for job {}",
processor_name, job.uuid
"Processor {} completed for job {} ({} chunks, {} frames)",
processor_name, job.uuid, output.chunks_produced, output.frames_processed
);
if let Err(e) = db
.update_processor_result(
.update_processor_result_with_stats(
processor_result_id,
ProcessorJobStatus::Completed,
None,
Some(&output),
Some(&output.data),
output.chunks_produced,
output.frames_processed,
)
.await
{
@@ -146,11 +154,13 @@ impl ProcessorPool {
processor_name, job.uuid, e
);
if let Err(db_err) = db
.update_processor_result(
.update_processor_result_with_stats(
processor_result_id,
ProcessorJobStatus::Failed,
Some(&e.to_string()),
None,
0,
0,
)
.await
{
@@ -181,7 +191,7 @@ impl ProcessorPool {
job: &MonitorJob,
processor_type: ProcessorType,
_cancel_rx: mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
) -> Result<ProcessorOutput> {
let video_path = job.video_path.as_ref().context("No video path in job")?;
// Generate output path
@@ -199,109 +209,139 @@ impl ProcessorPool {
}
let uuid = Some(job.uuid.as_str());
let video = db.get_video_by_uuid(&job.uuid).await?;
let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0);
match processor_type {
ProcessorType::Asr => {
let result =
processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
// Store ASR chunks in database
let chunks_produced = result.segments.len() as i32;
tracing::info!(
"ASR completed, storing {} segments for {}",
result.segments.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Cut => {
let result =
processor::process_cut(video_path, output_path.to_str().unwrap(), uuid).await?;
// Store CUT chunks in database
let chunks_produced = result.scenes.len() as i32;
tracing::info!(
"CUT completed, storing {} scenes for {}",
result.scenes.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_cut_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store CUT chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Yolo => {
let result =
processor::process_yolo(video_path, output_path.to_str().unwrap(), uuid)
.await?;
// Store YOLO chunks in database
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"YOLO completed, storing {} frames for {}",
result.frames.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_yolo_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store YOLO chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Ocr => {
let result =
processor::process_ocr(video_path, output_path.to_str().unwrap(), uuid).await?;
// Store OCR chunks in database
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"OCR completed, storing {} frames for {}",
result.frames.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_ocr_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store OCR chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Face => {
let result =
processor::process_face(video_path, output_path.to_str().unwrap(), uuid)
.await?;
// Store FACE chunks in database
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"FACE completed, storing {} frames for {}",
result.frames.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Pose => {
let result =
processor::process_pose(video_path, output_path.to_str().unwrap(), uuid)
.await?;
// Store POSE chunks in database
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"POSE completed, storing {} frames for {}",
result.frames.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_pose_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store POSE chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Asrx => {
let result =
processor::process_asrx(video_path, output_path.to_str().unwrap(), uuid)
.await?;
// Store ASRX chunks in database
let chunks_produced = result.segments.len() as i32;
tracing::info!(
"ASRX completed, storing {} segments for {}",
result.segments.len(),
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_asrx_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store ASRX chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::VisualChunk => {
let result = processor::process_visual_chunk_advanced(
@@ -310,16 +350,42 @@ impl ProcessorPool {
uuid,
)
.await?;
// Store VisualChunk chunks in database
let chunks_produced = result.chunk_count as i32;
tracing::info!(
"VisualChunk completed, storing {} chunks for {}",
result.chunk_count,
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
ProcessorType::Scene => {
let result = processor::process_scene_classification(
video_path,
output_path.to_str().unwrap(),
uuid,
)
.await?;
let chunks_produced = result.scenes.len() as i32;
tracing::info!(
"Scene classification completed, storing {} scenes for {}",
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_scene_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store Scene chunks for {}: {}", job.uuid, e);
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
})
}
}
}
@@ -482,7 +548,7 @@ impl ProcessorPool {
_cancel_rx: &mut mpsc::Receiver<()>,
) -> Result<serde_json::Value> {
let script_path = std::env::var("MOMENTRY_ASRX_SCRIPT")
.unwrap_or_else(|_| format!("{}/asrx_processor.py", SCRIPTS_DIR.as_str()));
.unwrap_or_else(|_| format!("{}/asrx_processor_custom.py", SCRIPTS_DIR.as_str()));
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
.arg(&script_path)
@@ -504,43 +570,44 @@ impl ProcessorPool {
uuid: &str,
asr_result: &AsrResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = db
.get_video_by_uuid(uuid)
.await?
.ok_or_else(|| anyhow::anyhow!("Video not found for uuid: {}", uuid))?;
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
for (i, segment) in asr_result.segments.iter().enumerate() {
let chunk = Chunk::from_seconds(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Sentence,
ChunkRule::Rule1,
segment.start,
segment.end,
fps,
serde_json::json!({
let segments: Vec<(i64, i64, i64, f64, f64, serde_json::Value)> = asr_result
.segments
.iter()
.enumerate()
.map(|(i, segment)| {
let start_frame = (segment.start * fps).round() as i64;
let end_frame = (segment.end * fps).round() as i64;
let data = serde_json::json!({
"text": segment.text,
"text_normalized": segment.text.to_lowercase(),
}),
)
.with_metadata(serde_json::json!({
"language": asr_result.language,
"language_probability": asr_result.language_probability,
}));
"language": asr_result.language,
"language_probability": asr_result.language_probability,
});
(
i as i64,
start_frame,
end_frame,
segment.start,
segment.end,
data,
)
})
.collect();
db.store_asr_pre_chunks_batch(uuid, &segments).await?;
tracing::info!(
"Stored {} ASR pre-chunks for video {}",
segments.len(),
uuid
);
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!("Stored ASR chunk {} for video {}", i, uuid);
}
Err(e) => {
tracing::error!("Failed to store ASR chunk {}: {}", i, e);
}
}
}
Ok(())
}
@@ -549,40 +616,35 @@ impl ProcessorPool {
uuid: &str,
cut_result: &CutResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = db
.get_video_by_uuid(uuid)
.await?
.ok_or_else(|| anyhow::anyhow!("Video not found for uuid: {}", uuid))?;
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
for (i, scene) in cut_result.scenes.iter().enumerate() {
let chunk = Chunk::from_seconds(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Cut,
ChunkRule::Rule1,
scene.start_time,
scene.end_time,
fps,
serde_json::json!({
let scenes: Vec<(i64, i64, i64, f64, f64, serde_json::Value)> = cut_result
.scenes
.iter()
.enumerate()
.map(|(i, scene)| {
let data = serde_json::json!({
"scene_number": scene.scene_number,
"start_frame": scene.start_frame,
"end_frame": scene.end_frame,
}),
);
});
(
i as i64,
scene.start_frame as i64,
scene.end_frame as i64,
scene.start_time,
scene.end_time,
data,
)
})
.collect();
db.store_cut_pre_chunks_batch(uuid, &scenes).await?;
tracing::info!("Stored {} CUT pre-chunks for video {}", scenes.len(), uuid);
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!("Stored CUT chunk {} for video {}", i, uuid);
}
Err(e) => {
tracing::error!("Failed to store CUT chunk {}: {}", i, e);
}
}
}
Ok(())
}
@@ -591,60 +653,32 @@ impl ProcessorPool {
uuid: &str,
yolo_result: &YoloResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = match db.get_video_by_uuid(uuid).await {
Ok(Some(video)) => video,
Ok(None) => {
tracing::error!("Video not found for uuid: {}", uuid);
return Ok(());
}
Err(e) => {
tracing::error!("Failed to get video for uuid {}: {}", uuid, e);
return Ok(());
}
};
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
let frames_count = yolo_result.frames.len();
tracing::info!(
"Storing {} YOLO pre-chunks for video {}",
frames_count,
uuid
);
for (i, frame) in yolo_result.frames.iter().enumerate() {
let mut chunk = Chunk::new(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Trace,
ChunkRule::Rule1,
frame.frame as i64,
frame.frame as i64 + 1,
fps,
serde_json::json!({
"objects": frame.objects,
"timestamp": frame.timestamp,
}),
);
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_yolo_{:04}", i);
let mut pre_chunks_to_store = Vec::new();
// Populate text_content for BM25 search
let object_names: Vec<String> =
frame.objects.iter().map(|o| o.class_name.clone()).collect();
if !object_names.is_empty() {
chunk = chunk.with_text_content(object_names.join(" "));
}
for frame in yolo_result.frames.iter() {
let data = serde_json::json!({
"objects": frame.objects,
"timestamp": frame.timestamp,
});
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
"Stored YOLO chunk {} (frame {}) for video {}",
i,
frame.frame,
uuid
);
}
Err(e) => {
tracing::error!("Failed to store YOLO chunk {}: {}", i, e);
}
}
pre_chunks_to_store.push((
frame.frame as i64, // coordinate_index
Some(frame.timestamp), // timestamp
data,
None, // identity_id
None, // confidence
));
}
db.store_raw_pre_chunks_batch(uuid, "yolo", &pre_chunks_to_store)
.await?;
Ok(())
}
@@ -653,59 +687,22 @@ impl ProcessorPool {
uuid: &str,
ocr_result: &OcrResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = match db.get_video_by_uuid(uuid).await {
Ok(Some(video)) => video,
Ok(None) => {
tracing::error!("Video not found for uuid: {}", uuid);
return Ok(());
}
Err(e) => {
tracing::error!("Failed to get video for uuid {}: {}", uuid, e);
return Ok(());
}
};
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
let frames_count = ocr_result.frames.len();
tracing::info!("Storing {} OCR pre-chunks for video {}", frames_count, uuid);
for (i, frame) in ocr_result.frames.iter().enumerate() {
let mut chunk = Chunk::new(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Trace,
ChunkRule::Rule1,
frame.frame as i64,
frame.frame as i64 + 1,
fps,
serde_json::json!({
"texts": frame.texts,
"timestamp": frame.timestamp,
}),
);
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_ocr_{:04}", i);
let mut pre_chunks_to_store = Vec::new();
// Populate text_content for BM25 search
let texts: Vec<String> = frame.texts.iter().map(|t| t.text.clone()).collect();
if !texts.is_empty() {
chunk = chunk.with_text_content(texts.join(" "));
}
for frame in ocr_result.frames.iter() {
let data = serde_json::json!({
"texts": frame.texts,
"timestamp": frame.timestamp,
});
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
"Stored OCR chunk {} (frame {}) for video {}",
i,
frame.frame,
uuid
);
}
Err(e) => {
tracing::error!("Failed to store OCR chunk {}: {}", i, e);
}
}
pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None));
}
db.store_raw_pre_chunks_batch(uuid, "ocr", &pre_chunks_to_store)
.await?;
Ok(())
}
@@ -714,63 +711,33 @@ impl ProcessorPool {
uuid: &str,
face_result: &FaceResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = match db.get_video_by_uuid(uuid).await {
Ok(Some(video)) => video,
Ok(None) => {
tracing::error!("Video not found for uuid: {}", uuid);
return Ok(());
}
Err(e) => {
tracing::error!("Failed to get video for uuid {}: {}", uuid, e);
return Ok(());
}
};
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
let frames_count = face_result.frames.len();
tracing::info!(
"Storing {} Face pre-chunks for video {}",
frames_count,
uuid
);
for (i, frame) in face_result.frames.iter().enumerate() {
let mut chunk = Chunk::new(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Trace,
ChunkRule::Rule1,
let mut pre_chunks_to_store = Vec::new();
for frame in face_result.frames.iter() {
let data = serde_json::json!({
"faces": frame.faces,
"timestamp": frame.timestamp,
});
// We could potentially parse identity_id if it's already matched, but for raw ingestion it's None.
pre_chunks_to_store.push((
frame.frame as i64,
frame.frame as i64 + 1,
fps,
serde_json::json!({
"faces": frame.faces,
"timestamp": frame.timestamp,
}),
);
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_face_{:04}", i);
// Populate text_content for BM25 search (face IDs)
let face_ids: Vec<String> = frame
.faces
.iter()
.filter_map(|f| f.face_id.clone())
.collect();
if !face_ids.is_empty() {
chunk = chunk.with_text_content(face_ids.join(" "));
}
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
"Stored FACE chunk {} (frame {}) for video {}",
i,
frame.frame,
uuid
);
}
Err(e) => {
tracing::error!("Failed to store FACE chunk {}: {}", i, e);
}
}
Some(frame.timestamp),
data,
None, // identity_id
None, // confidence
));
}
db.store_raw_pre_chunks_batch(uuid, "face", &pre_chunks_to_store)
.await?;
Ok(())
}
@@ -779,63 +746,26 @@ impl ProcessorPool {
uuid: &str,
pose_result: &PoseResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = match db.get_video_by_uuid(uuid).await {
Ok(Some(video)) => video,
Ok(None) => {
tracing::error!("Video not found for uuid: {}", uuid);
return Ok(());
}
Err(e) => {
tracing::error!("Failed to get video for uuid {}: {}", uuid, e);
return Ok(());
}
};
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
let frames_count = pose_result.frames.len();
tracing::info!(
"Storing {} Pose pre-chunks for video {}",
frames_count,
uuid
);
for (i, frame) in pose_result.frames.iter().enumerate() {
let mut chunk = Chunk::new(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Trace,
ChunkRule::Rule1,
frame.frame as i64,
frame.frame as i64 + 1,
fps,
serde_json::json!({
"persons": frame.persons,
"timestamp": frame.timestamp,
}),
);
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_pose_{:04}", i);
let mut pre_chunks_to_store = Vec::new();
// Populate text_content for BM25 search (person count indicator)
let person_count = frame.persons.len();
if person_count > 0 {
let text = format!("person person person")
.repeat(person_count.min(10))
.trim()
.to_string();
chunk = chunk.with_text_content(text);
}
for frame in pose_result.frames.iter() {
let data = serde_json::json!({
"persons": frame.persons,
"timestamp": frame.timestamp,
});
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!(
"Stored POSE chunk {} (frame {}) for video {}",
i,
frame.frame,
uuid
);
}
Err(e) => {
tracing::error!("Failed to store POSE chunk {}: {}", i, e);
}
}
pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None));
}
db.store_raw_pre_chunks_batch(uuid, "pose", &pre_chunks_to_store)
.await?;
Ok(())
}
@@ -844,58 +774,29 @@ impl ProcessorPool {
uuid: &str,
asrx_result: &AsrxResult,
) -> Result<()> {
// Get video record to obtain file_id and fps
let video = match db.get_video_by_uuid(uuid).await {
Ok(Some(video)) => video,
Ok(None) => {
tracing::error!("Video not found for uuid: {}", uuid);
return Ok(());
}
Err(e) => {
tracing::error!("Failed to get video for uuid {}: {}", uuid, e);
return Ok(());
}
};
let file_id = video.id;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
let segments_count = asrx_result.segments.len();
tracing::info!(
"Storing {} ASRX pre-chunks for video {}",
segments_count,
uuid
);
let mut pre_chunks_to_store = Vec::new();
for (i, segment) in asrx_result.segments.iter().enumerate() {
let mut chunk = Chunk::from_seconds(
file_id as i32,
uuid.to_string(),
i as u32,
ChunkType::Trace,
ChunkRule::Rule1,
segment.start,
segment.end,
fps,
serde_json::json!({
"text": segment.text,
"timestamp": segment.start,
}),
);
// Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_asrx_{:04}", i);
// Populate text_content for BM25 search (already has text)
chunk = chunk.with_text_content(segment.text.clone());
// Also store speaker_id in content
chunk.content = serde_json::json!({
let data = serde_json::json!({
"text": segment.text,
"speaker_id": segment.speaker_id,
"timestamp": segment.start,
});
match db.store_chunk(&chunk).await {
Ok(_) => {
tracing::info!("Stored ASRX chunk {} for video {}", i, uuid);
}
Err(e) => {
tracing::error!("Failed to store ASRX chunk {}: {}", i, e);
}
}
// ASRX is time-based, so we use segment index or start time as coordinate.
// Let's use index for simplicity in pre_chunks, or start time.
pre_chunks_to_store.push((i as i64, Some(segment.start), data, None, None));
}
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)
.await?;
Ok(())
}
@@ -917,6 +818,52 @@ impl ProcessorPool {
Ok(())
}
pub async fn store_scene_chunks(
db: &PostgresDb,
uuid: &str,
scene_result: &SceneClassificationResult,
) -> Result<()> {
let video = db
.get_video_by_uuid(uuid)
.await?
.ok_or_else(|| anyhow::anyhow!("Video not found for uuid: {}", uuid))?;
let fps = if video.fps > 0.0 { video.fps } else { 30.0 };
let scenes: Vec<(i64, i64, i64, f64, f64, serde_json::Value)> = scene_result
.scenes
.iter()
.enumerate()
.map(|(i, scene)| {
let start_frame = (scene.start_time * fps).round() as i64;
let end_frame = (scene.end_time * fps).round() as i64;
let data = serde_json::json!({
"scene_type": scene.scene_type,
"scene_type_zh": scene.scene_type_zh,
"confidence": scene.confidence,
"top_5": scene.top_5,
});
(
i as i64,
start_frame,
end_frame,
scene.start_time,
scene.end_time,
data,
)
})
.collect();
db.store_scene_pre_chunks_batch(uuid, &scenes).await?;
tracing::info!(
"Stored {} Scene pre-chunks for video {}",
scenes.len(),
uuid
);
Ok(())
}
pub async fn get_running_count(&self) -> usize {
*self.running_count.read().await
}