feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
@@ -1,10 +1,4 @@
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
Router,
|
||||
};
|
||||
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
@@ -13,7 +7,10 @@ use std::time::Instant;
|
||||
|
||||
use crate::api::types::AppState;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::llm::function_calling::{self, ChatMessage, LlmResponse, ToolCall, ToolDef};
|
||||
use crate::core::llm::function_calling::{
|
||||
self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
|
||||
};
|
||||
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
|
||||
|
||||
// ── Conversation Manager ─────────────────────────────────────────
|
||||
|
||||
@@ -43,11 +40,14 @@ fn get_or_create_conv(conv_id: Option<&str>) -> (String, Vec<ChatMessage>) {
|
||||
}
|
||||
}
|
||||
let id = uuid::Uuid::new_v4().to_string().replace('-', "")[..16].to_string();
|
||||
map.insert(id.clone(), Conversation {
|
||||
messages: Vec::new(),
|
||||
created_at: Instant::now(),
|
||||
last_active: Instant::now(),
|
||||
});
|
||||
map.insert(
|
||||
id.clone(),
|
||||
Conversation {
|
||||
messages: Vec::new(),
|
||||
created_at: Instant::now(),
|
||||
last_active: Instant::now(),
|
||||
},
|
||||
);
|
||||
(id, Vec::new())
|
||||
}
|
||||
|
||||
@@ -85,8 +85,13 @@ const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶
|
||||
## 工具使用規則
|
||||
1. 先確認用戶在問哪部影片 — 使用 find_file 或 list_files
|
||||
2. 人物問題優先使用 tkg_query
|
||||
3. 語意/內容問題使用 smart_search 或 universal_search
|
||||
4. 可以同時呼叫多個工具
|
||||
3. 人物台詞/發言問題使用 identities_search(輸入人名→回傳台詞片段)
|
||||
4. 人物對話互動(誰跟誰說話)使用 tkg_query 的 speaker_interaction
|
||||
5. 人物台詞內容使用 tkg_query 的 speaker_dialogue
|
||||
6. 用文字反查人物使用 identity_text(輸入關鍵字→找出誰說/提到這段話)
|
||||
7. 語意/內容問題使用 smart_search 或 universal_search
|
||||
8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容(場景、人物表情、動作、物件等)
|
||||
9. 可以同時呼叫多個工具
|
||||
|
||||
## 引導規則
|
||||
- 如果用戶沒說片名 → 用 find_file 搜尋,如果名稱不明確就反問
|
||||
@@ -120,16 +125,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"tkg_query",
|
||||
"查詢影片的人物互動、配對、同框資料。query_type 包括:top_identities(人物排名)、first_cooccurrence(第一次同框)、identity_details(人物詳細)、mutual_gaze(互看)、interaction_network(互動網絡)、identity_traces(出場片段)、file_info(影片資訊)。",
|
||||
"查詢影片的人物互動、配對、同框、台詞資料。query_type 包括:top_identities(人物排名)、first_cooccurrence(第一次同框)、identity_details(人物詳細)、mutual_gaze(互看)、interaction_network(互動網絡)、identity_traces(出場片段)、file_info(影片資訊)、speaker_dialogue(人物台詞)、speaker_interaction(兩人對話互動)。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"query_type": {
|
||||
"type": "string",
|
||||
"enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info"],
|
||||
"enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info", "speaker_dialogue", "speaker_interaction"],
|
||||
"description": "查詢類型"
|
||||
},
|
||||
"identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces)"},
|
||||
"identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze)"},
|
||||
"identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces / speaker_dialogue / speaker_interaction)"},
|
||||
"identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze / speaker_interaction)"},
|
||||
"limit": {"type": "integer", "default": 5}
|
||||
}),
|
||||
vec!["file_uuid", "query_type"],
|
||||
@@ -144,6 +149,26 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
}),
|
||||
vec!["query"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"identity_text",
|
||||
"搜尋文字關鍵字,找出有提及該內容的影片人物。適合回答「誰說了OOO」、「誰跟OOO有關」。不是查詢人物的台詞,而是用文字反查人物。",
|
||||
serde_json::json!({
|
||||
"q": {"type": "string", "description": "搜尋關鍵字(台詞片段、主題等)"},
|
||||
"file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"},
|
||||
"limit": {"type": "integer", "default": 10}
|
||||
}),
|
||||
vec!["q"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"identities_search",
|
||||
"查詢特定人物的台詞/發言內容。輸入人物名稱,回傳該人物在影片中說過的話。適合回答「某某人說了什麼」、「某某人的台詞」。",
|
||||
serde_json::json!({
|
||||
"q": {"type": "string", "description": "人物名稱關鍵字(姓名、角色名、別名)"},
|
||||
"file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"},
|
||||
"limit": {"type": "integer", "default": 10}
|
||||
}),
|
||||
vec!["q"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"get_identity_detail",
|
||||
"查詢單一身份的詳細資料(名字、角色、TMDb 資訊)。",
|
||||
@@ -168,6 +193,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"analyze_frame",
|
||||
"分析影片中指定畫面的視覺內容(場景、人物表情、動作、物件等)。若不指定 frame_number,會使用代表性畫面。問題會傳給視覺 LLM 分析。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"question": {"type": "string", "description": "關於畫面的問題,例如「這個場景發生什麼事?」"},
|
||||
"frame_number": {"type": "integer", "description": "指定的 frame 編號(可選)"}
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -193,9 +228,10 @@ async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
if rows.is_empty() {
|
||||
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
|
||||
}
|
||||
let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
|
||||
serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
|
||||
}).collect();
|
||||
let files: Vec<serde_json::Value> = rows
|
||||
.into_iter()
|
||||
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
|
||||
.collect();
|
||||
Ok(serde_json::json!({"found": true, "files": files}).to_string())
|
||||
}
|
||||
|
||||
@@ -214,15 +250,19 @@ async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Resul
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
|
||||
serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
|
||||
}).collect();
|
||||
let files: Vec<serde_json::Value> = rows
|
||||
.into_iter()
|
||||
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
|
||||
.collect();
|
||||
Ok(serde_json::json!({"files": files}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let query_type = args.get("query_type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let query_type = args
|
||||
.get("query_type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let identity_name = args.get("identity_name").and_then(|v| v.as_str());
|
||||
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
|
||||
@@ -242,9 +282,11 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
|
||||
fd_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(limit)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"identities": rows}).to_string())
|
||||
}
|
||||
"first_cooccurrence" => {
|
||||
@@ -325,8 +367,9 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
}
|
||||
"identity_traces" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
let rows: Vec<(i32, i64, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::int, MAX(fd.frame_number)::int \
|
||||
// MIN/MAX frame_number should be bigint (i64), not int
|
||||
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
|
||||
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
|
||||
GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
|
||||
@@ -344,14 +387,133 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
|
||||
}
|
||||
_ => Ok(serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string()),
|
||||
"speaker_dialogue" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
|
||||
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
|
||||
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
|
||||
AND fn.node_type = 'face_trace' \
|
||||
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
|
||||
JOIN {} e ON e.source_node_id = fn.id \
|
||||
AND e.edge_type = 'SPEAKS_AS' \
|
||||
AND ($2::text IS NULL OR e.file_uuid = $2) \
|
||||
JOIN {} sn ON sn.id = e.target_node_id \
|
||||
WHERE i.name ILIKE $1 \
|
||||
LIMIT $3",
|
||||
id_table, fd_table, nodes, edges, nodes
|
||||
))
|
||||
.bind(name)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
|
||||
serde_json::json!({"speaker_id": sid, "dialogue": text})
|
||||
}).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
"speaker_interaction" => {
|
||||
let name_a = identity_name.unwrap_or("");
|
||||
let name_b = identity_b.unwrap_or("");
|
||||
if name_a.is_empty() || name_b.is_empty() {
|
||||
return Ok(
|
||||
serde_json::json!({"error": "identity_name and identity_b are required"})
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
// Get both speakers' segments from TKG
|
||||
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
|
||||
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
|
||||
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
|
||||
AND fn.node_type = 'face_trace' \
|
||||
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
|
||||
JOIN {} e ON e.source_node_id = fn.id \
|
||||
AND e.edge_type = 'SPEAKS_AS' \
|
||||
AND ($3::text IS NULL OR e.file_uuid = $3) \
|
||||
JOIN {} sn ON sn.id = e.target_node_id \
|
||||
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
|
||||
ORDER BY sn.external_id",
|
||||
id_table, fd_table, nodes, edges, nodes
|
||||
))
|
||||
.bind(name_a)
|
||||
.bind(name_b)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let mut interactions = Vec::new();
|
||||
for i in 0..rows.len() {
|
||||
for j in i + 1..rows.len() {
|
||||
let (sid_a, text_a, segs_a_val) = &rows[i];
|
||||
let (sid_b, text_b, segs_b_val) = &rows[j];
|
||||
let segs_a = segs_a_val.as_array();
|
||||
let segs_b = segs_b_val.as_array();
|
||||
if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
|
||||
for sa in a_list {
|
||||
let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if sa_text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
for sb in b_list {
|
||||
let sb_start =
|
||||
sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if sb_text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Check temporal overlap
|
||||
let overlap_start = sa_start.max(sb_start);
|
||||
let overlap_end = sa_end.min(sb_end);
|
||||
if overlap_start < overlap_end {
|
||||
interactions.push(serde_json::json!({
|
||||
"speaker_a": sid_a,
|
||||
"speaker_b": sid_b,
|
||||
"time_range_s": [overlap_start, overlap_end],
|
||||
"dialogue_a": sa_text,
|
||||
"dialogue_b": sb_text,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
interactions.sort_by(|a, b| {
|
||||
let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
|
||||
let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
|
||||
a_start.partial_cmp(&b_start).unwrap()
|
||||
});
|
||||
interactions.truncate(limit as usize);
|
||||
|
||||
Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
|
||||
}
|
||||
_ => Ok(
|
||||
serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_smart_search(
|
||||
_pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
|
||||
@@ -359,7 +521,8 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let mut sql = format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
|
||||
FROM {} WHERE text_content ILIKE $1", chunk_table
|
||||
FROM {} WHERE text_content ILIKE $1",
|
||||
chunk_table
|
||||
);
|
||||
if file_uuid.is_some() {
|
||||
sql.push_str(" AND file_uuid = $2");
|
||||
@@ -369,21 +532,147 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
|
||||
if let Some(fuid) = file_uuid {
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
|
||||
.bind(&like).bind(fuid)
|
||||
.bind(&like)
|
||||
.bind(fuid)
|
||||
.fetch_all(_pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"results": rows}).to_string())
|
||||
} else {
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
|
||||
.bind(&like)
|
||||
.fetch_all(_pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"results": rows}).to_string())
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_identity_text(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(10)
|
||||
.min(50);
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let id_table = schema::table_name("identities");
|
||||
let like_q = format!("%{}%", q.replace('%', "%%"));
|
||||
|
||||
let sql = format!(
|
||||
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
|
||||
i.name AS identity_name, fd.trace_id, i.source AS identity_source \
|
||||
FROM {} c \
|
||||
JOIN {} fd ON fd.file_uuid = c.file_uuid \
|
||||
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
|
||||
AND fd.identity_id IS NOT NULL \
|
||||
JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
|
||||
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
|
||||
ORDER BY c.start_time \
|
||||
LIMIT $3",
|
||||
chunk_table, fd_table, id_table
|
||||
);
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
f64,
|
||||
f64,
|
||||
Option<String>,
|
||||
String,
|
||||
Option<i32>,
|
||||
String,
|
||||
)> = sqlx::query_as(&sql)
|
||||
.bind(file_uuid)
|
||||
.bind(&like_q)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
|
||||
serde_json::json!({
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": st,
|
||||
"end_time": et,
|
||||
"text": txt,
|
||||
"identity_name": name,
|
||||
"trace_id": tid,
|
||||
"source": src
|
||||
})
|
||||
} ).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn exec_identities_search(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(10)
|
||||
.min(50);
|
||||
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let like_q = format!("%{}%", q.replace('%', "%%"));
|
||||
|
||||
let sql = format!(
|
||||
"SELECT DISTINCT ON (i.name, c.chunk_id) \
|
||||
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id \
|
||||
JOIN {} c ON c.file_uuid = fd.file_uuid \
|
||||
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
|
||||
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
|
||||
WHERE (i.name ILIKE $1 \
|
||||
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
|
||||
AND ($2::text IS NULL OR fd.file_uuid = $2) \
|
||||
ORDER BY i.name, c.chunk_id, c.start_time \
|
||||
LIMIT $3",
|
||||
id_table, fd_table, chunk_table
|
||||
);
|
||||
|
||||
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
|
||||
.bind(&like_q)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
|
||||
serde_json::json!({
|
||||
"identity_name": name,
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": st,
|
||||
"end_time": et,
|
||||
"text": txt,
|
||||
"trace_id": tid,
|
||||
})
|
||||
}).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn exec_get_identity_detail(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let id_table = schema::table_name("identities");
|
||||
let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
|
||||
@@ -396,7 +685,10 @@ async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value)
|
||||
Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_get_file_info(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let videos = schema::table_name("videos");
|
||||
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
|
||||
@@ -405,11 +697,15 @@ async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_get_representative_frame(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
|
||||
Ok(r) => Ok(serde_json::json!({
|
||||
@@ -417,24 +713,131 @@ async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::V
|
||||
"face_quality": r.face_quality,
|
||||
"main_identities": r.main_identities,
|
||||
"traces": r.traces,
|
||||
}).to_string()),
|
||||
})
|
||||
.to_string()),
|
||||
Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_analyze_frame(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let question = args
|
||||
.get("question")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("請描述這個畫面中的內容");
|
||||
|
||||
if file_uuid.is_empty() {
|
||||
return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
|
||||
}
|
||||
|
||||
let videos = schema::table_name("videos");
|
||||
let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
|
||||
"SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.ok_or_else(|| "Video not found".to_string())?;
|
||||
|
||||
let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
|
||||
.await
|
||||
{
|
||||
Ok(r) => r.frame_number,
|
||||
Err(_) => {
|
||||
let duration: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.unwrap_or(0.0);
|
||||
if duration > 0.0 {
|
||||
((duration / 2.0) * fps) as i64
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp_secs = frame_number as f64 / fps;
|
||||
|
||||
let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
|
||||
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
|
||||
if std::path::Path::new(full).exists() {
|
||||
full.to_string()
|
||||
} else {
|
||||
"ffmpeg".to_string()
|
||||
}
|
||||
});
|
||||
|
||||
let output = tokio::process::Command::new(&ffmpeg_path)
|
||||
.args([
|
||||
"-ss",
|
||||
&format!("{:.3}", timestamp_secs),
|
||||
"-i",
|
||||
&video_path,
|
||||
"-vframes",
|
||||
"1",
|
||||
"-f",
|
||||
"image2pipe",
|
||||
"-vcodec",
|
||||
"mjpeg",
|
||||
"-",
|
||||
])
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| format!("ffmpeg execution error: {}", e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
|
||||
}
|
||||
|
||||
let base64_img = BASE64.encode(&output.stdout);
|
||||
|
||||
let system_prompt =
|
||||
"你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
|
||||
let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"frame_number": frame_number,
|
||||
"timestamp_secs": timestamp_secs,
|
||||
"analysis": vision_result,
|
||||
})
|
||||
.to_string())
|
||||
}
|
||||
|
||||
// ── Tool Router ───────────────────────────────────────────────────
|
||||
|
||||
async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
|
||||
let name = tool_call.function.name.clone();
|
||||
let args: serde_json::Value = serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
|
||||
let args: serde_json::Value =
|
||||
serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
|
||||
let result = match name.as_str() {
|
||||
"find_file" => exec_find_file(pool, &args).await,
|
||||
"list_files" => exec_list_files(pool, &args).await,
|
||||
"tkg_query" => exec_tkg_query(pool, &args).await,
|
||||
"smart_search" => exec_smart_search(pool, &args).await,
|
||||
"identity_text" => exec_identity_text(pool, &args).await,
|
||||
"identities_search" => exec_identities_search(pool, &args).await,
|
||||
"get_identity_detail" => exec_get_identity_detail(pool, &args).await,
|
||||
"get_file_info" => exec_get_file_info(pool, &args).await,
|
||||
"get_representative_frame" => exec_get_representative_frame(pool, &args).await,
|
||||
"analyze_frame" => exec_analyze_frame(pool, &args).await,
|
||||
_ => Err(format!("Unknown tool: {}", name)),
|
||||
};
|
||||
let content = match result {
|
||||
@@ -476,7 +879,11 @@ async fn run_tool_loop(
|
||||
for call in &calls {
|
||||
let (tool_call_id, name, content) = execute_tool(pool, call).await;
|
||||
sources.push(serde_json::json!({"tool": name, "result": content}));
|
||||
messages.push(function_calling::make_tool_result(&tool_call_id, &name, &content));
|
||||
messages.push(function_calling::make_tool_result(
|
||||
&tool_call_id,
|
||||
&name,
|
||||
&content,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -484,7 +891,10 @@ async fn run_tool_loop(
|
||||
}
|
||||
}
|
||||
}
|
||||
("已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(), sources)
|
||||
(
|
||||
"已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(),
|
||||
sources,
|
||||
)
|
||||
}
|
||||
|
||||
// ── Handler ───────────────────────────────────────────────────────
|
||||
@@ -495,13 +905,8 @@ async fn agent_search(
|
||||
) -> Result<Json<AgentSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref());
|
||||
|
||||
let (answer, sources) = run_tool_loop(
|
||||
state.db.pool(),
|
||||
SYSTEM_PROMPT,
|
||||
&req.query,
|
||||
history,
|
||||
)
|
||||
.await;
|
||||
let (answer, sources) =
|
||||
run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await;
|
||||
|
||||
// Save updated messages for conversation continuation
|
||||
let new_msgs = function_calling::build_conversation(SYSTEM_PROMPT, &req.query, vec![]);
|
||||
@@ -509,7 +914,11 @@ async fn agent_search(
|
||||
|
||||
let needs_input = answer.contains('?') || answer.contains('?');
|
||||
let suggestions = if needs_input {
|
||||
Some(vec!["演員名".to_string(), "電影片名".to_string(), "年份".to_string()])
|
||||
Some(vec![
|
||||
"演員名".to_string(),
|
||||
"電影片名".to_string(),
|
||||
"年份".to_string(),
|
||||
])
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -526,6 +935,5 @@ async fn agent_search(
|
||||
// ── Routes ─────────────────────────────────────────────────────────
|
||||
|
||||
pub fn agent_search_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/agents/search", post(agent_search))
|
||||
Router::new().route("/api/v1/agents/search", post(agent_search))
|
||||
}
|
||||
|
||||
@@ -8,8 +8,7 @@ async fn doc_redirect() -> axum::response::Redirect {
|
||||
|
||||
async fn wasm_doc_handler() -> Result<impl axum::response::IntoResponse, (StatusCode, &'static str)>
|
||||
{
|
||||
let path =
|
||||
std::path::Path::new("/Users/accusys/momentry_core/docs_v1.0/doc_wasm/index.html");
|
||||
let path = std::path::Path::new("/Users/accusys/momentry_core/docs_v1.0/doc_wasm/index.html");
|
||||
match tokio::fs::read_to_string(path).await {
|
||||
Ok(html) => Ok(([("content-type", "text/html; charset=utf-8")], html)),
|
||||
Err(_) => Err((StatusCode::NOT_FOUND, "Doc not found")),
|
||||
|
||||
198
src/api/files.rs
198
src/api/files.rs
@@ -12,7 +12,7 @@ use std::collections::HashMap;
|
||||
use super::types::AppState;
|
||||
use crate::core::config;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::db::{Database, PostgresDb};
|
||||
use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient};
|
||||
use crate::core::storage::content_hash;
|
||||
use crate::FileManager;
|
||||
|
||||
@@ -767,17 +767,7 @@ async fn register_file(
|
||||
if let Some(ref vp) = video_path {
|
||||
if let Ok(job) = auto_state.db.create_monitor_job(&auto_uuid, Some(vp)).await {
|
||||
tracing::info!("[AUTO-PIPELINE] Job {} created for {}", job.id, auto_uuid);
|
||||
let all_procs: Vec<&str> = vec![
|
||||
"asr",
|
||||
"cut",
|
||||
"yolo",
|
||||
"ocr",
|
||||
"face",
|
||||
"pose",
|
||||
"asrx",
|
||||
"visual_chunk",
|
||||
"5w1h",
|
||||
];
|
||||
let all_procs: Vec<&str> = vec!["cut", "yolo", "ocr", "face", "pose", "asrx"];
|
||||
let total = sqlx::query_scalar::<_, i64>(&format!(
|
||||
"SELECT COALESCE(total_frames, 0) FROM {} WHERE file_uuid = $1",
|
||||
schema::table_name("videos")
|
||||
@@ -986,6 +976,10 @@ struct UnregisterResponse {
|
||||
deleted_face_detections: u64,
|
||||
deleted_processor_results: u64,
|
||||
deleted_chunks: u64,
|
||||
deleted_tkg_nodes: u64,
|
||||
deleted_qdrant_vectors: Option<u64>,
|
||||
deleted_redis_keys: Option<u64>,
|
||||
deleted_output_files: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -994,18 +988,30 @@ struct UnregisterRequest {
|
||||
file_path: Option<String>,
|
||||
}
|
||||
|
||||
fn delete_output_files(uuid: &str) {
|
||||
let output_dir = config::OUTPUT_DIR.to_string();
|
||||
if let Ok(entries) = std::fs::read_dir(&output_dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
if name.starts_with(uuid) {
|
||||
let _ = std::fs::remove_file(&path);
|
||||
fn delete_output_files(uuid: &str) -> u64 {
|
||||
let mut deleted_count = 0u64;
|
||||
let output_dirs = [
|
||||
config::OUTPUT_DIR.to_string(),
|
||||
"/Users/accusys/momentry/output_dev".to_string(),
|
||||
"/Users/accusys/momentry/output".to_string(),
|
||||
];
|
||||
|
||||
for output_dir in &output_dirs {
|
||||
if let Ok(entries) = std::fs::read_dir(output_dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
if name.starts_with(uuid) && name.ends_with(".json") {
|
||||
if std::fs::remove_file(&path).is_ok() {
|
||||
deleted_count += 1;
|
||||
tracing::info!("[UNREGISTER] Deleted output file: {}", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
deleted_count
|
||||
}
|
||||
|
||||
async fn unregister(
|
||||
@@ -1024,65 +1030,54 @@ async fn unregister(
|
||||
let processor_table = schema::table_name("processor_results");
|
||||
let chunks_table = schema::table_name("chunk");
|
||||
let parent_chunks_table = schema::table_name("parent_chunks");
|
||||
|
||||
let deleted_faces: i64 =
|
||||
sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
|
||||
.bind(&uuid)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to delete faces: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.rows_affected() as i64;
|
||||
|
||||
let deleted_processors: i64 = sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE file_uuid = $1",
|
||||
processor_table
|
||||
))
|
||||
.bind(&uuid)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to delete processors: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.rows_affected() as i64;
|
||||
|
||||
let deleted_parent_chunks: i64 = sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE uuid = $1",
|
||||
parent_chunks_table
|
||||
))
|
||||
.bind(&uuid)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to delete parent chunks: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.rows_affected() as i64;
|
||||
|
||||
let deleted_chunks: i64 = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunks_table))
|
||||
.bind(&uuid)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to delete chunks: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.rows_affected() as i64;
|
||||
|
||||
// Delete pre_chunks
|
||||
let pre_chunks_table = schema::table_name("pre_chunks");
|
||||
let deleted_pre_chunks: i64 = sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE file_uuid = $1",
|
||||
pre_chunks_table
|
||||
let tkg_nodes_table = schema::table_name("tkg_nodes");
|
||||
let cuts_table = schema::table_name("cuts");
|
||||
let strangers_table = schema::table_name("strangers");
|
||||
let chunk_vectors_table = schema::table_name("chunk_vectors");
|
||||
let monitor_jobs_table = schema::table_name("monitor_jobs");
|
||||
let frames_table = schema::table_name("frames");
|
||||
|
||||
let mut tx = state.db.pool().begin().await.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to start transaction: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
macro_rules! delete_safe {
|
||||
($table:expr, $where:expr, $bind:expr, $label:expr) => {{
|
||||
sqlx::query(&format!("DELETE FROM {} WHERE {}", $table, $where))
|
||||
.bind($bind)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to delete {}: {}", $label, e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.rows_affected() as i64
|
||||
}};
|
||||
}
|
||||
|
||||
let deleted_faces = delete_safe!(face_table, "file_uuid = $1", &uuid, "faces");
|
||||
let deleted_processors = delete_safe!(processor_table, "file_uuid = $1", &uuid, "processors");
|
||||
let deleted_parent_chunks =
|
||||
delete_safe!(parent_chunks_table, "uuid = $1", &uuid, "parent chunks");
|
||||
let deleted_chunks = delete_safe!(chunks_table, "file_uuid = $1", &uuid, "chunks");
|
||||
let deleted_pre_chunks = delete_safe!(pre_chunks_table, "file_uuid = $1", &uuid, "pre_chunks");
|
||||
let deleted_tkg_nodes = delete_safe!(tkg_nodes_table, "file_uuid = $1", &uuid, "TKG nodes");
|
||||
let deleted_cuts = delete_safe!(cuts_table, "file_uuid = $1", &uuid, "cuts");
|
||||
let deleted_strangers = delete_safe!(strangers_table, "file_uuid = $1", &uuid, "strangers");
|
||||
let deleted_chunk_vectors =
|
||||
delete_safe!(chunk_vectors_table, "uuid = $1", &uuid, "chunk vectors");
|
||||
let deleted_monitor_jobs = delete_safe!(monitor_jobs_table, "uuid = $1", &uuid, "monitor jobs");
|
||||
let deleted_frames: i64 = sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE file_id = (SELECT id FROM {} WHERE file_uuid = $1)",
|
||||
frames_table, videos_table
|
||||
))
|
||||
.bind(&uuid)
|
||||
.execute(state.db.pool())
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to delete pre_chunks: {}", e);
|
||||
tracing::error!("[unregister] Failed to delete frames: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.rows_affected() as i64;
|
||||
@@ -1092,14 +1087,59 @@ async fn unregister(
|
||||
videos_table
|
||||
))
|
||||
.bind(&uuid)
|
||||
.execute(state.db.pool())
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
delete_output_files(&uuid);
|
||||
tx.commit().await.map_err(|e| {
|
||||
tracing::error!("[unregister] Failed to commit transaction: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
tracing::info!(
|
||||
"[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames",
|
||||
deleted_faces, deleted_processors, deleted_parent_chunks, deleted_chunks,
|
||||
deleted_pre_chunks, deleted_tkg_nodes, deleted_cuts, deleted_strangers,
|
||||
deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames
|
||||
);
|
||||
|
||||
let deleted_output_files = delete_output_files(&uuid);
|
||||
|
||||
let deleted_qdrant_vectors = {
|
||||
let qdrant = QdrantDb::new();
|
||||
match qdrant.delete_by_uuid(&uuid).await {
|
||||
Ok(_) => {
|
||||
tracing::info!("[UNREGISTER] Deleted Qdrant vectors for {}", uuid);
|
||||
Some(1)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("[UNREGISTER] Failed to delete Qdrant vectors: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let deleted_redis_keys = {
|
||||
match RedisClient::new() {
|
||||
Ok(redis) => match redis.delete_worker_job(&uuid).await {
|
||||
Ok(_) => {
|
||||
tracing::info!("[UNREGISTER] Deleted Redis keys for {}", uuid);
|
||||
Some(1)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("[UNREGISTER] Failed to delete Redis keys: {}", e);
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!("[UNREGISTER] Failed to create Redis client: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Json(UnregisterResponse {
|
||||
success: true,
|
||||
@@ -1107,7 +1147,11 @@ async fn unregister(
|
||||
file_uuid: uuid,
|
||||
deleted_face_detections: deleted_faces as u64,
|
||||
deleted_processor_results: deleted_processors as u64,
|
||||
deleted_chunks: (deleted_chunks + deleted_parent_chunks) as u64,
|
||||
deleted_chunks: (deleted_chunks + deleted_parent_chunks + deleted_pre_chunks) as u64,
|
||||
deleted_tkg_nodes: deleted_tkg_nodes as u64,
|
||||
deleted_qdrant_vectors,
|
||||
deleted_redis_keys,
|
||||
deleted_output_files,
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -471,7 +471,7 @@ async fn store_parent_summary(
|
||||
"sentence_count": sentences.len(),
|
||||
});
|
||||
sqlx::query(&format!(
|
||||
r#"UPDATE {} SET summary_text = $1, metadata = metadata || $2::jsonb
|
||||
r#"UPDATE {} SET summary_text = $1, metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $2::jsonb)
|
||||
WHERE chunk_id = $3 AND file_uuid = $4"#,
|
||||
table
|
||||
))
|
||||
@@ -743,7 +743,7 @@ pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<
|
||||
|
||||
// Auto-vectorize sentences with EmbeddingGemma (768D)
|
||||
tracing::info!("[5W1H] Starting vectorize for sentence chunks...");
|
||||
let embedder = Embedder::new("embeddinggemma-300M-Q8_0.gguf".to_string());
|
||||
let embedder = Embedder::new("embeddinggemma-300m".to_string());
|
||||
let qdrant = QdrantDb::new();
|
||||
qdrant.init_collection(768).await?;
|
||||
|
||||
|
||||
@@ -388,10 +388,18 @@ async fn health_detailed(State(state): State<AppState>) -> Json<DetailedHealthRe
|
||||
let directory_exists = identities_root.is_dir();
|
||||
let files_count = crate::core::identity::storage::count_identity_files();
|
||||
let index_ok = crate::core::identity::storage::read_index().is_ok();
|
||||
let db_count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM identities")
|
||||
let id_cnt: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM identities")
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let st_cnt: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid IS NOT NULL",
|
||||
crate::core::db::schema::table_name("strangers")
|
||||
))
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let db_count = id_cnt + st_cnt;
|
||||
IdentityHealth {
|
||||
directory_exists,
|
||||
files_count,
|
||||
|
||||
@@ -220,8 +220,8 @@ async fn list_identities(
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
let auto_identities: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'auto'",
|
||||
identities_table
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid IS NOT NULL",
|
||||
crate::core::db::schema::table_name("strangers")
|
||||
))
|
||||
.fetch_one(db.pool())
|
||||
.await
|
||||
@@ -258,7 +258,7 @@ pub struct FaceCandidate {
|
||||
pub id: i32,
|
||||
pub face_id: Option<String>,
|
||||
pub file_uuid: String,
|
||||
pub frame_number: i32,
|
||||
pub frame_number: i64,
|
||||
pub confidence: f32,
|
||||
pub bbox: Option<serde_json::Value>,
|
||||
pub attributes: Option<serde_json::Value>,
|
||||
@@ -352,7 +352,7 @@ async fn list_face_candidates(
|
||||
|
||||
let rows = if let Some(file_uuid) = &query.file_uuid {
|
||||
let sql = format!(
|
||||
"SELECT id, face_id, file_uuid, frame_number::int, confidence::float4,
|
||||
"SELECT id, face_id, file_uuid, frame_number::bigint, confidence::float4,
|
||||
jsonb_build_object('x', x, 'y', y, 'width', width, 'height', height) as bbox,
|
||||
NULL::jsonb as attributes
|
||||
FROM {}
|
||||
@@ -367,7 +367,7 @@ async fn list_face_candidates(
|
||||
i32,
|
||||
Option<String>,
|
||||
String,
|
||||
i32,
|
||||
i64,
|
||||
f32,
|
||||
Option<serde_json::Value>,
|
||||
Option<serde_json::Value>,
|
||||
@@ -390,7 +390,7 @@ async fn list_face_candidates(
|
||||
}
|
||||
} else {
|
||||
let sql = format!(
|
||||
"SELECT id, face_id, file_uuid, frame_number::int, confidence::float4,
|
||||
"SELECT id, face_id, file_uuid, frame_number::bigint, confidence::float4,
|
||||
jsonb_build_object('x', x, 'y', y, 'width', width, 'height', height) as bbox,
|
||||
NULL::jsonb as attributes
|
||||
FROM {}
|
||||
@@ -405,7 +405,7 @@ async fn list_face_candidates(
|
||||
i32,
|
||||
Option<String>,
|
||||
String,
|
||||
i32,
|
||||
i64,
|
||||
f32,
|
||||
Option<serde_json::Value>,
|
||||
Option<serde_json::Value>,
|
||||
|
||||
@@ -640,8 +640,9 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
);
|
||||
|
||||
// Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i64, Vec<f32>)>(&format!(
|
||||
"SELECT trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id, frame_number",
|
||||
@@ -658,7 +659,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
|
||||
// 分組:trace_id → (frame_number, embedding)
|
||||
use std::collections::HashMap;
|
||||
let mut trace_faces_raw: HashMap<i32, Vec<(i32, Vec<f32>)>> = HashMap::new();
|
||||
let mut trace_faces_raw: HashMap<i32, Vec<(i64, Vec<f32>)>> = HashMap::new();
|
||||
for (tid, frame, emb) in &fd_rows {
|
||||
trace_faces_raw
|
||||
.entry(*tid)
|
||||
@@ -723,6 +724,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
|
||||
// Step 5: 寫入 DB — Round 1 結果先存
|
||||
let identities_table = schema::table_name("identities");
|
||||
let strangers_table = schema::table_name("strangers");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut updated = 0usize;
|
||||
for (tid, name) in &matched {
|
||||
@@ -805,13 +807,28 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
|
||||
// trace_id 在同一個 file 內是 sequential integer,直接複用為 stranger_id
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = strangers.id (FK)
|
||||
// First: ensure strangers records exist
|
||||
let _ = sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, trace_id) \
|
||||
SELECT $1, fd.trace_id FROM {} fd \
|
||||
WHERE fd.file_uuid = $1 AND fd.trace_id IS NOT NULL \
|
||||
AND fd.identity_id IS NULL \
|
||||
ON CONFLICT (file_uuid, trace_id) DO NOTHING",
|
||||
strangers_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// Then: update face_detections.stranger_id = strangers.id
|
||||
let stranger_update = sqlx::query(&format!(
|
||||
"UPDATE {} SET stranger_id = trace_id \
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
|
||||
AND (stranger_id IS NULL OR stranger_id != trace_id)",
|
||||
fd_table
|
||||
"UPDATE {} fd SET stranger_id = s.id \
|
||||
FROM {} s \
|
||||
WHERE s.file_uuid = fd.file_uuid AND s.trace_id = fd.trace_id \
|
||||
AND fd.file_uuid = $1 AND fd.identity_id IS NULL \
|
||||
AND fd.trace_id IS NOT NULL AND fd.stranger_id IS NULL",
|
||||
fd_table, strangers_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
@@ -971,16 +988,30 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
|
||||
|
||||
let ib_table = schema::table_name("identity_bindings");
|
||||
let _ = sqlx::query(
|
||||
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, confidence, metadata) \
|
||||
VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
|
||||
&format!("INSERT INTO {} (identity_id, identity_type, identity_value, file_uuid, confidence, metadata) \
|
||||
VALUES ($1, 'speaker', $2, $3, $4, $5::jsonb) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value, file_uuid) \
|
||||
DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata", ib_table)
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(&best_speaker)
|
||||
.bind(file_uuid)
|
||||
.bind(overlap_ratio)
|
||||
.bind(&metadata)
|
||||
.execute(pool).await;
|
||||
|
||||
// Also update speaker_detections with the identity_id
|
||||
let sd_table = schema::table_name("speaker_detections");
|
||||
let _ = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id = $1, confidence = $2 \
|
||||
WHERE file_uuid = $3 AND speaker_id = $4 AND identity_id IS NULL", sd_table)
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(overlap_ratio)
|
||||
.bind(file_uuid)
|
||||
.bind(&best_speaker)
|
||||
.execute(pool).await;
|
||||
|
||||
bindings += 1;
|
||||
}
|
||||
}
|
||||
@@ -1028,31 +1059,31 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
|
||||
let speakers = extract_speakers_from_asrx_data(&asrx_data);
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
for (idx, id_result) in identities.iter().enumerate() {
|
||||
let identity_name = format!("stranger_{}", idx);
|
||||
let _ = identities.len();
|
||||
if !identities.is_empty() {
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
"speaker_ids": id_result.speaker_ids,
|
||||
"confidence": id_result.confidence,
|
||||
"speaker_ids": identities[0].speaker_ids,
|
||||
"confidence": identities[0].confidence,
|
||||
"evidence": {
|
||||
"speaker_overlap": id_result.evidence.speaker_overlap,
|
||||
"frame_ratio": id_result.evidence.frame_ratio,
|
||||
"speaker_overlap": identities[0].evidence.speaker_overlap,
|
||||
"frame_ratio": identities[0].evidence.frame_ratio,
|
||||
},
|
||||
"reasoning": id_result.reasoning,
|
||||
"reasoning": identities[0].reasoning,
|
||||
});
|
||||
let _ = sqlx::query(
|
||||
&format!("INSERT INTO {} (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING", schema::table_name("identities"))
|
||||
)
|
||||
.bind(&identity_name)
|
||||
let _ = sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, trace_id, metadata) \
|
||||
VALUES ($1, NULL, $2::jsonb) ON CONFLICT DO NOTHING",
|
||||
schema::table_name("strangers")
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&metadata)
|
||||
.execute(pool)
|
||||
.await;
|
||||
}
|
||||
let _created = identities.len();
|
||||
tracing::info!(
|
||||
"[IdentityAgent] Created {} auto identities from face_clustered for {}",
|
||||
_created,
|
||||
"[IdentityAgent] Analyzed {} face clusters from face_clustered for {}",
|
||||
identities.len(),
|
||||
file_uuid
|
||||
);
|
||||
} else {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -57,6 +57,10 @@ pub fn bbox_routes() -> Router<crate::api::types::AppState> {
|
||||
"/api/v1/file/:file_uuid/trace/:trace_id/video",
|
||||
get(trace_video),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/stranger/:stranger_id/video",
|
||||
get(stranger_video),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/video", get(stream_video))
|
||||
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
|
||||
.route("/api/v1/file/:file_uuid/clip", get(video_clip))
|
||||
@@ -210,8 +214,9 @@ async fn bbox_overlay_video(
|
||||
let start_sec = start_f as f64 / fps;
|
||||
|
||||
// Get face bboxes
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let face_table = schema::table_name("face_detections");
|
||||
let rows: Vec<(i32, i32, i32, i32, i32, Option<i32>, Option<String>)> = sqlx::query_as(
|
||||
let rows: Vec<(i64, i32, i32, i32, i32, Option<i32>, Option<String>)> = sqlx::query_as(
|
||||
&format!("SELECT frame_number, x, y, width, height, trace_id, face_id FROM {} WHERE file_uuid = $1 AND frame_number BETWEEN $2 AND $3 ORDER BY frame_number", face_table)
|
||||
)
|
||||
.bind(face_fuid).bind(start_f).bind(end_f)
|
||||
@@ -222,7 +227,7 @@ async fn bbox_overlay_video(
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
for (frame, x, y, w, h, trace_id, _) in &rows {
|
||||
let text = format!("t{}", trace_id.unwrap_or(0));
|
||||
let offset = frame - start_f;
|
||||
let offset = (*frame as i32) - start_f;
|
||||
parts.push(format!(
|
||||
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=4:enable='eq(n,{})'",
|
||||
x, y, w, h, offset
|
||||
@@ -300,6 +305,15 @@ async fn trace_video(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, trace_id)): Path<(String, i32)>,
|
||||
Query(params): Query<std::collections::HashMap<String, String>>,
|
||||
) -> Result<impl IntoResponse, StatusCode> {
|
||||
trace_video_inner(&state, &file_uuid, trace_id, ¶ms).await
|
||||
}
|
||||
|
||||
async fn trace_video_inner(
|
||||
state: &crate::api::types::AppState,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
params: &std::collections::HashMap<String, String>,
|
||||
) -> Result<impl IntoResponse, StatusCode> {
|
||||
use axum::http::header;
|
||||
|
||||
@@ -317,8 +331,9 @@ async fn trace_video(
|
||||
let (video_path, fps, _width, _height) = row.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
// Query face detections to find frame range for target trace
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let face_table = schema::table_name("face_detections");
|
||||
let rows: Vec<(i32, i32, i32, i32, i32)> = sqlx::query_as(&format!(
|
||||
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND trace_id = $2 ORDER BY frame_number",
|
||||
face_table
|
||||
))
|
||||
@@ -371,11 +386,12 @@ async fn trace_video(
|
||||
|
||||
// === DEBUG MODE: text overlay, list all traces in frame range ===
|
||||
let start_fn = (start_sec * fps) as i32;
|
||||
let end_fn = ((start_sec + duration) * fps) as i32;
|
||||
let end_fn = ((start_sec + duration) * fps) as i64;
|
||||
|
||||
// Query all traces with identity names and bbox positions in the visible frame range
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let identities_table = schema::table_name("identities");
|
||||
let all_rows: Vec<(i32, i32, i32, i32, i32, i32, Option<String>)> = sqlx::query_as(&format!(
|
||||
let all_rows: Vec<(i32, i64, i32, i32, i32, i32, Option<String>)> = sqlx::query_as(&format!(
|
||||
"SELECT fd.trace_id, fd.frame_number, fd.x, fd.y, fd.width, fd.height, i.name \
|
||||
FROM {} fd \
|
||||
LEFT JOIN {} i ON fd.identity_id = i.id \
|
||||
@@ -391,9 +407,10 @@ async fn trace_video(
|
||||
.unwrap_or_default();
|
||||
|
||||
// Group frames by trace_id, compute start_frame per trace; collect bbox per frame
|
||||
let mut trace_frames: HashMap<i32, Vec<i32>> = HashMap::new();
|
||||
// frame_number is i64 (BIGINT), so HashMaps need i64 for frame values
|
||||
let mut trace_frames: HashMap<i32, Vec<i64>> = HashMap::new();
|
||||
let mut trace_identity: HashMap<i32, String> = HashMap::new();
|
||||
let mut bbox_per_frame: HashMap<(i32, i32), (i32, i32, i32, i32)> = HashMap::new(); // (tid, fn) -> (x, y, w, h)
|
||||
let mut bbox_per_frame: HashMap<(i32, i64), (i32, i32, i32, i32)> = HashMap::new(); // (tid, fn) -> (x, y, w, h)
|
||||
for (tid, fn_, x, y, w, h, name_opt) in &all_rows {
|
||||
trace_frames.entry(*tid).or_default().push(*fn_);
|
||||
bbox_per_frame.insert((*tid, *fn_), (*x, *y, *w, *h));
|
||||
@@ -417,7 +434,7 @@ async fn trace_video(
|
||||
.unwrap_or_else(|| "-".to_string());
|
||||
|
||||
// Sort traces for consistent ordering
|
||||
let mut sorted_traces: Vec<(i32, &Vec<i32>)> =
|
||||
let mut sorted_traces: Vec<(i32, &Vec<i64>)> =
|
||||
trace_frames.iter().map(|(k, v)| (*k, v)).collect();
|
||||
sorted_traces.sort_by_key(|(tid, _)| *tid);
|
||||
|
||||
@@ -695,6 +712,7 @@ struct ThumbQuery {
|
||||
y: Option<i32>,
|
||||
w: Option<i32>,
|
||||
h: Option<i32>,
|
||||
trace_id: Option<i32>,
|
||||
}
|
||||
|
||||
async fn face_thumbnail(
|
||||
@@ -717,15 +735,70 @@ async fn face_thumbnail(
|
||||
}
|
||||
};
|
||||
|
||||
let row: Option<(String,)> = sqlx::query_as(&format!(
|
||||
"SELECT file_path FROM {} WHERE file_uuid = $1",
|
||||
// Step 1: Check for pre-stored face crop if trace_id is provided
|
||||
if let Some(trace_id) = q.trace_id {
|
||||
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
|
||||
let cached_path = std::path::PathBuf::from(output_dir)
|
||||
.join(".faces")
|
||||
.join(&file_uuid)
|
||||
.join(trace_id.to_string())
|
||||
.join(format!("{}.jpg", frame));
|
||||
|
||||
if cached_path.exists() {
|
||||
tracing::debug!("[thumbnail] Using cached face crop: {}", cached_path.display());
|
||||
let bytes = tokio::fs::read(&cached_path)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
// Validate cached JPEG
|
||||
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
|
||||
tracing::warn!("[thumbnail] Cached JPEG validation failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
return Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "image/jpeg")
|
||||
.header(header::CACHE_CONTROL, "public, max-age=86400")
|
||||
.body(Body::from(bytes))
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
// Cached file not found, fallback to ffmpeg
|
||||
tracing::debug!("[thumbnail] Cached file not found, falling back to ffmpeg");
|
||||
}
|
||||
|
||||
// Step 2: Fallback to ffmpeg on-demand extraction
|
||||
let row: Option<(String, Option<i64>, Option<i32>, Option<i32>)> = sqlx::query_as(&format!(
|
||||
"SELECT file_path, total_frames, width, height FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let (file_path,) = row.ok_or(StatusCode::NOT_FOUND)?;
|
||||
let (file_path, total_frames, video_width, video_height) = row.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
if let Some(total) = total_frames {
|
||||
if total > 0 {
|
||||
crate::core::thumbnail::validator::validate_frame(frame, total).map_err(|e| {
|
||||
tracing::warn!("[thumbnail] Frame validation failed: {}", e);
|
||||
StatusCode::BAD_REQUEST
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
|
||||
if let (Some(vw), Some(vh)) = (video_width, video_height) {
|
||||
crate::core::thumbnail::validator::validate_crop(x, y, w, h, vw, vh).map_err(|e| {
|
||||
tracing::warn!("[thumbnail] Crop validation failed: {}", e);
|
||||
StatusCode::BAD_REQUEST
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
let select = format!("select=eq(n\\,{})", frame);
|
||||
let vf = if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
|
||||
@@ -755,6 +828,11 @@ async fn face_thumbnail(
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| {
|
||||
tracing::warn!("[thumbnail] JPEG validation failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "image/jpeg")
|
||||
@@ -849,3 +927,127 @@ async fn video_clip(
|
||||
.body(Body::from(output.stdout))
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
async fn stranger_video(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, stranger_id)): Path<(String, i32)>,
|
||||
Query(params): Query<std::collections::HashMap<String, String>>,
|
||||
) -> Result<impl IntoResponse, StatusCode> {
|
||||
stranger_video_inner(&state, &file_uuid, stranger_id, ¶ms).await
|
||||
}
|
||||
|
||||
async fn stranger_video_inner(
|
||||
state: &crate::api::types::AppState,
|
||||
file_uuid: &str,
|
||||
stranger_id: i32,
|
||||
params: &std::collections::HashMap<String, String>,
|
||||
) -> Result<impl IntoResponse, StatusCode> {
|
||||
use axum::http::header;
|
||||
use uuid::Uuid;
|
||||
|
||||
tracing::info!("[stranger_video] Starting for file={}, stranger={}", file_uuid, stranger_id);
|
||||
|
||||
let (mode, audio) = parse_video_params(¶ms);
|
||||
|
||||
let videos_table = schema::table_name("videos");
|
||||
tracing::debug!("[stranger_video] videos_table: {}", videos_table);
|
||||
|
||||
let row: Option<(String, f64, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT file_path, COALESCE(fps, 24.0), COALESCE(width, 0), COALESCE(height, 0) FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[stranger_video] Video query error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
let (video_path, fps, _width, _height) = row.ok_or_else(|| {
|
||||
tracing::error!("[stranger_video] Video not found for uuid={}", file_uuid);
|
||||
StatusCode::NOT_FOUND
|
||||
})?;
|
||||
|
||||
tracing::info!("[stranger_video] Found video: path={}, fps={}", video_path, fps);
|
||||
|
||||
// Query face detections by stranger_id directly
|
||||
let face_table = schema::table_name("face_detections");
|
||||
tracing::debug!("[stranger_video] face_table: {}", face_table);
|
||||
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND stranger_id = $2 ORDER BY frame_number",
|
||||
face_table
|
||||
))
|
||||
.bind(&file_uuid).bind(stranger_id)
|
||||
.fetch_all(state.db.pool()).await
|
||||
.unwrap_or_else(|e| {
|
||||
tracing::error!("[stranger_video] Face query error: {}", e);
|
||||
vec![]
|
||||
});
|
||||
|
||||
tracing::info!("[stranger_video] Found {} faces", rows.len());
|
||||
|
||||
if rows.is_empty() {
|
||||
tracing::error!("[stranger_video] No faces found for stranger_id={}", stranger_id);
|
||||
return Err(StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
let first_frame = rows[0].0;
|
||||
let last_frame = rows[rows.len() - 1].0;
|
||||
let start_sec = first_frame as f64 / fps;
|
||||
let padding = params
|
||||
.get("padding")
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(2.0);
|
||||
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
|
||||
let seek = (start_sec - padding).max(0.0);
|
||||
|
||||
tracing::info!("[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s",
|
||||
first_frame, last_frame, seek, seek + duration);
|
||||
|
||||
// Only support normal mode for stranger video
|
||||
let tmp = std::env::temp_dir().join(format!("stranger_{}.mp4", Uuid::new_v4()));
|
||||
let tmp_str = tmp.to_str().unwrap_or("").to_string();
|
||||
let sk = seek.to_string();
|
||||
let du = duration.to_string();
|
||||
let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
|
||||
if audio == "off" {
|
||||
cmd_args.push("-an");
|
||||
}
|
||||
cmd_args.extend_from_slice(&["-y", &tmp_str]);
|
||||
|
||||
tracing::debug!("[stranger_video] ffmpeg args: {:?}", cmd_args);
|
||||
|
||||
let result = ffmpeg_cmd()
|
||||
.args(&cmd_args)
|
||||
.output()
|
||||
.map_err(|e| {
|
||||
tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
if !result.status.success() {
|
||||
tracing::error!("[stranger_video] ffmpeg failed: {}", String::from_utf8_lossy(&result.stderr));
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
tracing::info!("[stranger_video] ffmpeg success, output size: {} bytes", result.stdout.len());
|
||||
|
||||
let data = tokio::fs::read(&tmp)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[stranger_video] Read output error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
|
||||
tracing::info!("[stranger_video] Returning video, size: {} bytes", data.len());
|
||||
|
||||
Ok(Response::builder()
|
||||
.header(header::CONTENT_TYPE, "video/mp4")
|
||||
.header(header::CONTENT_LENGTH, data.len())
|
||||
.body(Body::from(data))
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ pub mod auth;
|
||||
pub mod docs;
|
||||
pub mod files;
|
||||
pub mod five_w1h_agent_api;
|
||||
pub mod processing;
|
||||
pub mod health;
|
||||
pub mod identities;
|
||||
pub mod identity_agent_api;
|
||||
@@ -12,6 +11,7 @@ pub mod identity_api;
|
||||
pub mod identity_binding;
|
||||
pub mod media_api;
|
||||
pub mod middleware;
|
||||
pub mod processing;
|
||||
pub mod scan;
|
||||
pub mod search;
|
||||
pub mod server;
|
||||
@@ -19,7 +19,5 @@ pub mod tmdb_api;
|
||||
pub mod trace_agent_api;
|
||||
pub mod types;
|
||||
pub mod universal_search;
|
||||
pub mod visual_chunk_search;
|
||||
pub mod visual_search;
|
||||
|
||||
pub use server::start_server;
|
||||
|
||||
@@ -233,50 +233,54 @@ async fn trigger_processing(
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let processors_to_run: Vec<&str> = if let Some(procs) = &req.processors {
|
||||
// 檢查 job 是否存在,不存在則 INSERT(state machine entry)
|
||||
let existing_id: Option<i32> = sqlx::query_scalar(&format!(
|
||||
"SELECT id FROM {monitor_jobs_table} WHERE uuid = $1"
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
if existing_id.is_none() {
|
||||
state
|
||||
.db
|
||||
.create_monitor_job(&file_uuid, Some(&file_path))
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(
|
||||
"[TRIGGER] Failed to create monitor job for {}: {}",
|
||||
file_uuid,
|
||||
e
|
||||
);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
}
|
||||
|
||||
// UPDATE processors + reset 狀態讓 worker 可 pickup
|
||||
let procs_db: Vec<String> = procs.iter().map(|s| s.to_string()).collect();
|
||||
sqlx::query(&format!(
|
||||
"UPDATE {monitor_jobs_table} SET processors = $1::text[], status = 'pending' WHERE uuid = $2"
|
||||
))
|
||||
.bind(&procs_db)
|
||||
.bind(&file_uuid)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[TRIGGER] Failed to update monitor job for {}: {}", file_uuid, e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
procs.iter().map(|s| s.as_str()).collect()
|
||||
let processors_to_run: Vec<String> = if let Some(procs) = &req.processors {
|
||||
procs.iter().map(|s| s.to_string()).collect()
|
||||
} else {
|
||||
vec![]
|
||||
crate::core::db::ProcessorType::all()
|
||||
.iter()
|
||||
.map(|p| p.as_str().to_string())
|
||||
.collect()
|
||||
};
|
||||
|
||||
// 確保 monitor_job 存在
|
||||
let existing_id: Option<i32> = sqlx::query_scalar(&format!(
|
||||
"SELECT id FROM {monitor_jobs_table} WHERE uuid = $1"
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
if existing_id.is_none() {
|
||||
state
|
||||
.db
|
||||
.create_monitor_job(&file_uuid, Some(&file_path))
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!(
|
||||
"[TRIGGER] Failed to create monitor job for {}: {}",
|
||||
file_uuid,
|
||||
e
|
||||
);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
}
|
||||
|
||||
// UPDATE processors + reset 狀態讓 worker 可 pickup
|
||||
sqlx::query(&format!(
|
||||
"UPDATE {monitor_jobs_table} SET processors = $1::text[], status = 'pending' WHERE uuid = $2"
|
||||
))
|
||||
.bind(&processors_to_run)
|
||||
.bind(&file_uuid)
|
||||
.execute(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[TRIGGER] Failed to update monitor job for {}: {}", file_uuid, e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
let processors_to_run_refs: Vec<&str> = processors_to_run.iter().map(|s| s.as_str()).collect();
|
||||
|
||||
let notification = serde_json::json!({
|
||||
"action": "process",
|
||||
"file_uuid": file_uuid,
|
||||
@@ -285,7 +289,7 @@ async fn trigger_processing(
|
||||
"file_type": file_type,
|
||||
"content_hash": content_hash,
|
||||
"output_dir": output_dir,
|
||||
"processors": processors_to_run,
|
||||
"processors": processors_to_run_refs,
|
||||
});
|
||||
|
||||
let notification_key = format!("{}notifications", REDIS_KEY_PREFIX.as_str());
|
||||
|
||||
@@ -414,8 +414,6 @@ async fn get_ingestion_status(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid = '{file_uuid}'",
|
||||
schema::table_name("tkg_edges")
|
||||
));
|
||||
let scene_5w1h = count_sql!(&format!("SELECT COUNT(*) FROM {chunk} WHERE file_uuid = '{file_uuid}' AND chunk_type = 'cut' AND summary_text IS NOT NULL AND summary_text != ''"));
|
||||
|
||||
let related_identities: Vec<IdentityRef> =
|
||||
match sqlx::query_as::<_, (String, String)>(&format!(
|
||||
"SELECT DISTINCT i.uuid::text, i.name FROM {identities} i \
|
||||
@@ -491,11 +489,6 @@ async fn get_ingestion_status(
|
||||
Some(format!("{identity_count} identities matched"))
|
||||
),
|
||||
step!("scene_metadata", scene_meta_ok, None),
|
||||
step!(
|
||||
"5w1h",
|
||||
scene_5w1h > 0,
|
||||
Some(format!("{scene_5w1h} scenes with 5W1H"))
|
||||
),
|
||||
];
|
||||
|
||||
Ok(Json(IngestionStatusResponse {
|
||||
|
||||
@@ -5,7 +5,7 @@ use tokio::time::timeout;
|
||||
use tower_http::cors::{Any, CorsLayer};
|
||||
|
||||
use crate::core::cache::{MongoCache, RedisCache};
|
||||
use crate::core::db::{Database, PostgresDb};
|
||||
use crate::core::db::{Database, PostgresDb, QdrantDb};
|
||||
use crate::Embedder;
|
||||
|
||||
use super::agent_api;
|
||||
@@ -14,7 +14,6 @@ use super::auth;
|
||||
use super::docs;
|
||||
use super::files;
|
||||
use super::five_w1h_agent_api;
|
||||
use super::processing;
|
||||
use super::health;
|
||||
use super::identities;
|
||||
use super::identity_agent_api;
|
||||
@@ -22,18 +21,18 @@ use super::identity_api;
|
||||
use super::identity_binding;
|
||||
use super::media_api;
|
||||
use super::middleware::unified_auth;
|
||||
use super::processing;
|
||||
use super::scan;
|
||||
use super::search::search_routes;
|
||||
use super::tmdb_api;
|
||||
use super::trace_agent_api;
|
||||
use super::types::AppState;
|
||||
use super::universal_search::universal_search_routes;
|
||||
use super::visual_search;
|
||||
|
||||
pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
health::init_server_state(host, port);
|
||||
|
||||
let embedder = std::sync::Arc::new(Embedder::new("nomic-embed-text-v2-moe:latest".to_string()));
|
||||
let embedder = std::sync::Arc::new(Embedder::new("embeddinggemma-300m".to_string()));
|
||||
|
||||
// ── ⚠️ WARNING: DO NOT move MongoCache::init() back to critical path ──
|
||||
//
|
||||
@@ -57,6 +56,9 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
let redis_cache = RedisCache::new()?;
|
||||
let db = PostgresDb::init().await?;
|
||||
|
||||
// Run migrations (create identity_history table if not exists)
|
||||
PostgresDb::run_migrations(db.pool()).await?;
|
||||
|
||||
let schema_health = health::check_schema_migrations(db.pool()).await;
|
||||
if schema_health.ok {
|
||||
tracing::info!(
|
||||
@@ -89,8 +91,10 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
let db = std::sync::Arc::new(db);
|
||||
let api_state = super::middleware::ApiState { db: db.clone() };
|
||||
|
||||
let qdrant = std::sync::Arc::new(QdrantDb::new());
|
||||
let state = AppState {
|
||||
db,
|
||||
qdrant,
|
||||
embedder,
|
||||
embedder_model: "nomic-embed-text-v2-moe:latest".to_string(),
|
||||
mongo_cache,
|
||||
@@ -129,7 +133,6 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
.merge(auth::auth_routes())
|
||||
.merge(health::health_routes())
|
||||
.merge(docs::doc_routes())
|
||||
.merge(visual_search::visual_search_routes())
|
||||
.merge(protected_routes)
|
||||
.layer(cors)
|
||||
.with_state(state);
|
||||
|
||||
@@ -25,14 +25,19 @@ pub fn trace_agent_routes() -> Router<crate::api::types::AppState> {
|
||||
"/api/v1/file/:file_uuid/trace/:trace_id/thumbnail",
|
||||
get(get_trace_thumbnail),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/stranger/:stranger_id/representative-face",
|
||||
get(get_stranger_representative_face),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/stranger/:stranger_id/thumbnail",
|
||||
get(get_stranger_thumbnail),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/identities/:identity_uuid_a/co-occur-with/:identity_uuid_b",
|
||||
get(get_cooccurrence),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/tkg/rebuild",
|
||||
post(rebuild_tkg),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/tkg/rebuild", post(rebuild_tkg))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/representative-frame",
|
||||
get(get_representative_frame),
|
||||
@@ -54,8 +59,8 @@ struct TracesRequest {
|
||||
struct TraceInfo {
|
||||
trace_id: i32,
|
||||
face_count: i64,
|
||||
start_frame: i32,
|
||||
end_frame: i32,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
duration_sec: f64,
|
||||
@@ -110,8 +115,8 @@ async fn list_traces_sorted(
|
||||
"SELECT tt.*, fd.id AS sample_face_id FROM (
|
||||
SELECT trace_id::int AS trace_id,
|
||||
COUNT(*) AS face_count,
|
||||
MIN(frame_number)::int AS start_frame,
|
||||
MAX(frame_number)::int AS end_frame,
|
||||
MIN(frame_number)::bigint AS start_frame,
|
||||
MAX(frame_number)::bigint AS end_frame,
|
||||
(MAX(frame_number) - MIN(frame_number))::float8 AS duration_sec,
|
||||
AVG(confidence)::float8 AS avg_confidence
|
||||
FROM {}
|
||||
@@ -132,7 +137,7 @@ async fn list_traces_sorted(
|
||||
crate::core::db::schema::table_name("face_detections"),
|
||||
);
|
||||
|
||||
let rows: Vec<(i32, i64, i32, i32, f64, f64, Option<i32>)> = sqlx::query_as(&query)
|
||||
let rows: Vec<(i32, i64, i64, i64, f64, f64, Option<i32>)> = sqlx::query_as(&query)
|
||||
.bind(&file_uuid)
|
||||
.bind(min_faces)
|
||||
.bind(effective_limit)
|
||||
@@ -193,8 +198,8 @@ struct TraceFacesQuery {
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TraceFaceItem {
|
||||
id: i32,
|
||||
start_frame: i32,
|
||||
end_frame: i32,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
x: Option<i32>,
|
||||
@@ -260,14 +265,14 @@ async fn list_trace_faces(
|
||||
|
||||
let rows: Vec<(
|
||||
i32,
|
||||
i32,
|
||||
i64,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
Option<i32>,
|
||||
f32,
|
||||
)> = sqlx::query_as(&format!(
|
||||
"SELECT id, frame_number::int, x, y, width, height, confidence::float4 \
|
||||
"SELECT id, frame_number, x, y, width, height, confidence::float4 \
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id = $2 \
|
||||
ORDER BY frame_number ASC LIMIT $3 OFFSET $4",
|
||||
crate::core::db::schema::table_name("face_detections")
|
||||
@@ -405,7 +410,8 @@ where
|
||||
let video_table = schema::table_name("videos");
|
||||
|
||||
let fps: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1", video_table
|
||||
"SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
|
||||
video_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
@@ -414,7 +420,8 @@ where
|
||||
.unwrap_or(25.0);
|
||||
|
||||
let face_count: (i64,) = sqlx::query_as(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2", fd_table
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND trace_id = $2",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(trace_id)
|
||||
@@ -422,7 +429,15 @@ where
|
||||
.await
|
||||
.map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?;
|
||||
|
||||
struct Candidate { frame: i64, x: i32, y: i32, w: i32, h: i32, conf: f64, score: f64 }
|
||||
struct Candidate {
|
||||
frame: i64,
|
||||
x: i32,
|
||||
y: i32,
|
||||
w: i32,
|
||||
h: i32,
|
||||
conf: f64,
|
||||
score: f64,
|
||||
}
|
||||
|
||||
let rows = sqlx::query_as::<_, (i64, i32, i32, i32, i32, f64)>(&format!(
|
||||
"SELECT frame_number::bigint, x, y, width, height, confidence::float8 \
|
||||
@@ -431,7 +446,8 @@ where
|
||||
ORDER BY (width::float8 * height::float8) * confidence::float8 DESC LIMIT 10",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(trace_id)
|
||||
.bind(file_uuid)
|
||||
.bind(trace_id)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| err_fn(anyhow::anyhow!("{}", e)))?;
|
||||
@@ -440,15 +456,25 @@ where
|
||||
return Err(err_fn(anyhow::anyhow!("No suitable face found")));
|
||||
}
|
||||
|
||||
let candidates: Vec<Candidate> = rows.into_iter()
|
||||
let candidates: Vec<Candidate> = rows
|
||||
.into_iter()
|
||||
.map(|(frame, x, y, w, h, conf)| {
|
||||
let score = (w as f64 * h as f64) * conf;
|
||||
Candidate { frame, x, y, w, h, conf, score }
|
||||
Candidate {
|
||||
frame,
|
||||
x,
|
||||
y,
|
||||
w,
|
||||
h,
|
||||
conf,
|
||||
score,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let video_path: String = sqlx::query_scalar(&format!(
|
||||
"SELECT file_path FROM {} WHERE file_uuid = $1", video_table
|
||||
"SELECT file_path FROM {} WHERE file_uuid = $1",
|
||||
video_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
@@ -463,16 +489,31 @@ where
|
||||
for (i, c) in candidates.iter().enumerate() {
|
||||
let seek = c.frame as f64 / fps;
|
||||
if let Ok(output) = tokio::process::Command::new("ffmpeg")
|
||||
.args(["-ss", &format!("{:.2}", seek), "-i", &video_path,
|
||||
"-vframes", "1", "-vf", &format!("crop={}:{}:{}:{},blurdetect", c.w, c.h, c.x, c.y),
|
||||
"-f", "null", "-"])
|
||||
.output().await
|
||||
.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", seek),
|
||||
"-i",
|
||||
&video_path,
|
||||
"-vframes",
|
||||
"1",
|
||||
"-vf",
|
||||
&format!("crop={}:{}:{}:{},blurdetect", c.w, c.h, c.x, c.y),
|
||||
"-f",
|
||||
"null",
|
||||
"-",
|
||||
])
|
||||
.output()
|
||||
.await
|
||||
{
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
for line in stderr.lines() {
|
||||
if let Some(blur_str) = line.split("blur mean: ").nth(1) {
|
||||
if let Ok(blur) = blur_str.trim().parse::<f64>() {
|
||||
if blur < best_blur { best_blur = blur; best = c.frame; best_idx = i; }
|
||||
if blur < best_blur {
|
||||
best_blur = blur;
|
||||
best = c.frame;
|
||||
best_idx = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -481,9 +522,17 @@ where
|
||||
|
||||
let chosen = &candidates[best_idx];
|
||||
Ok(RepFaceSelection {
|
||||
frame: chosen.frame, x: chosen.x, y: chosen.y, w: chosen.w, h: chosen.h,
|
||||
conf: chosen.conf, blur: best_blur, score: chosen.score,
|
||||
video_path, fps, face_count: face_count.0,
|
||||
frame: chosen.frame,
|
||||
x: chosen.x,
|
||||
y: chosen.y,
|
||||
w: chosen.w,
|
||||
h: chosen.h,
|
||||
conf: chosen.conf,
|
||||
blur: best_blur,
|
||||
score: chosen.score,
|
||||
video_path,
|
||||
fps,
|
||||
face_count: face_count.0,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -491,19 +540,36 @@ async fn get_representative_face(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, trace_id)): Path<(String, i32)>,
|
||||
) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let sel = select_rep_face(state.db.pool(), &file_uuid, trace_id, |e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
}).await?;
|
||||
get_representative_face_inner(&state, &file_uuid, trace_id).await
|
||||
}
|
||||
|
||||
async fn get_representative_face_inner(
|
||||
state: &crate::api::types::AppState,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let sel = select_rep_face(state.db.pool(), file_uuid, trace_id, |e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(Json(RepFaceResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
file_uuid: file_uuid.to_string(),
|
||||
trace_id,
|
||||
face_count: sel.face_count,
|
||||
representative: RepFaceResult {
|
||||
frame_number: sel.frame,
|
||||
timestamp_secs: sel.frame as f64 / sel.fps,
|
||||
bbox: RepFaceBbox { x: sel.x, y: sel.y, width: sel.w, height: sel.h },
|
||||
bbox: RepFaceBbox {
|
||||
x: sel.x,
|
||||
y: sel.y,
|
||||
width: sel.w,
|
||||
height: sel.h,
|
||||
},
|
||||
confidence: sel.conf,
|
||||
quality_score: sel.score,
|
||||
blur_score: sel.blur,
|
||||
@@ -515,34 +581,118 @@ async fn get_trace_thumbnail(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, trace_id)): Path<(String, i32)>,
|
||||
) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
|
||||
get_trace_thumbnail_inner(&state, &file_uuid, trace_id).await
|
||||
}
|
||||
|
||||
async fn get_trace_thumbnail_inner(
|
||||
state: &crate::api::types::AppState,
|
||||
file_uuid: &str,
|
||||
trace_id: i32,
|
||||
) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
|
||||
// Step 1: Check for pre-stored face crops in .faces/{file_uuid}/{trace_id}/
|
||||
// For trace_id=0 (untracked/stranger), check unbound directory instead
|
||||
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
|
||||
let trace_id_str = trace_id.to_string();
|
||||
let trace_dir_name = if trace_id == 0 { "unbound" } else { &trace_id_str };
|
||||
let trace_dir = std::path::PathBuf::from(output_dir)
|
||||
.join(".faces")
|
||||
.join(&file_uuid)
|
||||
.join(trace_dir_name);
|
||||
|
||||
if trace_dir.exists() {
|
||||
// Find any cached face crop in this trace directory
|
||||
if let Ok(mut entries) = std::fs::read_dir(&trace_dir) {
|
||||
while let Some(Ok(entry)) = entries.next() {
|
||||
let path = entry.path();
|
||||
if path.extension().map_or(false, |e| e == "jpg") {
|
||||
tracing::info!("[trace_thumbnail] Using cached face crop: {}", path.display());
|
||||
let bytes = tokio::fs::read(&path)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Validate cached JPEG
|
||||
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
|
||||
tracing::warn!("[trace_thumbnail] Cached JPEG validation failed: {}", e);
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": "Invalid cached JPEG"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
return Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "image/jpeg")
|
||||
.header(header::CACHE_CONTROL, "public, max-age=86400")
|
||||
.body(Body::from(bytes))
|
||||
.unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Fallback to ffmpeg on-demand extraction
|
||||
let sel = select_rep_face(state.db.pool(), &file_uuid, trace_id, |e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
}).await?;
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})
|
||||
.await?;
|
||||
|
||||
let seek = sel.frame as f64 / sel.fps;
|
||||
let tmp = std::env::temp_dir().join(format!("trace_{}_{}.jpg", file_uuid, trace_id));
|
||||
|
||||
tracing::debug!("[trace_thumbnail] Fallback to ffmpeg for trace {} frame {}", trace_id, sel.frame);
|
||||
|
||||
let status = tokio::process::Command::new("ffmpeg")
|
||||
.args([
|
||||
"-ss", &format!("{:.2}", seek),
|
||||
"-i", &sel.video_path,
|
||||
"-vframes", "1",
|
||||
"-vf", &format!("crop={}:{}:{}:{},scale=320:320", sel.w, sel.h, sel.x, sel.y),
|
||||
"-q:v", "2",
|
||||
"-y", &tmp.to_string_lossy().to_string(),
|
||||
"-ss",
|
||||
&format!("{:.2}", seek),
|
||||
"-i",
|
||||
&sel.video_path,
|
||||
"-vframes",
|
||||
"1",
|
||||
"-vf",
|
||||
&format!("crop={}:{}:{}:{},scale=320:320", sel.w, sel.h, sel.x, sel.y),
|
||||
"-q:v",
|
||||
"2",
|
||||
"-y",
|
||||
&tmp.to_string_lossy().to_string(),
|
||||
])
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?;
|
||||
|
||||
if !status.status.success() {
|
||||
return Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": "FFmpeg failed"}))));
|
||||
return Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": "FFmpeg failed"})),
|
||||
));
|
||||
}
|
||||
|
||||
let bytes = tokio::fs::read(&tmp).await.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?;
|
||||
|
||||
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
|
||||
tracing::warn!("[trace_thumbnail] JPEG validation failed: {}", e);
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": "Invalid JPEG output"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let _ = tokio::fs::remove_file(&tmp).await;
|
||||
@@ -605,10 +755,16 @@ async fn get_cooccurrence(
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
(StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "Identity A not found"})))
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({"error": "Identity A not found"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let id_b = sqlx::query_as::<_, (i32, String)>(&format!(
|
||||
@@ -619,31 +775,38 @@ async fn get_cooccurrence(
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
.ok_or_else(|| {
|
||||
(StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "Identity B not found"})))
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({"error": "Identity B not found"})),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Stage 2: Find first frame where both identity_ids appear
|
||||
let cooccur: Option<(i64,)> = sqlx::query_as(
|
||||
&format!(
|
||||
"SELECT MIN(fd.frame_number)::bigint FROM {} fd \
|
||||
let cooccur: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT MIN(fd.frame_number)::bigint FROM {} fd \
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id = $2 \
|
||||
AND fd.frame_number IN ( \
|
||||
SELECT frame_number FROM {} \
|
||||
WHERE file_uuid = $1 AND identity_id = $3 \
|
||||
)",
|
||||
fd_table, fd_table
|
||||
)
|
||||
)
|
||||
fd_table, fd_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(id_a.0)
|
||||
.bind(id_b.0)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let (first_frame,) = cooccur.ok_or_else(|| {
|
||||
@@ -653,13 +816,17 @@ async fn get_cooccurrence(
|
||||
// Get fps for timestamp
|
||||
let video_table = schema::table_name("videos");
|
||||
let fps: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1", video_table
|
||||
"SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
|
||||
video_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
.unwrap_or(25.0);
|
||||
|
||||
@@ -685,40 +852,67 @@ async fn get_cooccurrence(
|
||||
// Stage 4: Get representative faces for both traces (reusing select_rep_face)
|
||||
let rep_a = if let Some((tid,)) = trace_a {
|
||||
select_rep_face(state.db.pool(), &file_uuid, tid, |e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
}).await.ok().map(|sel| CoOccurRepFace {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})
|
||||
.await
|
||||
.ok()
|
||||
.map(|sel| CoOccurRepFace {
|
||||
frame_number: sel.frame,
|
||||
bbox: RepFaceBbox { x: sel.x, y: sel.y, width: sel.w, height: sel.h },
|
||||
bbox: RepFaceBbox {
|
||||
x: sel.x,
|
||||
y: sel.y,
|
||||
width: sel.w,
|
||||
height: sel.h,
|
||||
},
|
||||
confidence: sel.conf,
|
||||
thumbnail_url: format!("/api/v1/file/{}/trace/{}/thumbnail", file_uuid, tid),
|
||||
})
|
||||
} else { None };
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let rep_b = if let Some((tid,)) = trace_b {
|
||||
select_rep_face(state.db.pool(), &file_uuid, tid, |e| {
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()})))
|
||||
}).await.ok().map(|sel| CoOccurRepFace {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})
|
||||
.await
|
||||
.ok()
|
||||
.map(|sel| CoOccurRepFace {
|
||||
frame_number: sel.frame,
|
||||
bbox: RepFaceBbox { x: sel.x, y: sel.y, width: sel.w, height: sel.h },
|
||||
bbox: RepFaceBbox {
|
||||
x: sel.x,
|
||||
y: sel.y,
|
||||
width: sel.w,
|
||||
height: sel.h,
|
||||
},
|
||||
confidence: sel.conf,
|
||||
thumbnail_url: format!("/api/v1/file/{}/trace/{}/thumbnail", file_uuid, tid),
|
||||
})
|
||||
} else { None };
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Total co-occurrence frames (from TKG if available, otherwise from face_detections)
|
||||
let total_cooccurrence_frames: i64 = sqlx::query_scalar(
|
||||
&format!(
|
||||
"SELECT COUNT(DISTINCT fd.frame_number)::bigint FROM {} fd \
|
||||
let total_cooccurrence_frames: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(DISTINCT fd.frame_number)::bigint FROM {} fd \
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id = $2 \
|
||||
AND fd.frame_number IN ( \
|
||||
SELECT frame_number FROM {} \
|
||||
WHERE file_uuid = $1 AND identity_id = $3 \
|
||||
)",
|
||||
fd_table, fd_table
|
||||
)
|
||||
)
|
||||
.bind(&file_uuid).bind(id_a.0).bind(id_b.0)
|
||||
.fetch_one(state.db.pool()).await
|
||||
fd_table, fd_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(id_a.0)
|
||||
.bind(id_b.0)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
|
||||
Ok(Json(CoOccurResponse {
|
||||
@@ -758,12 +952,7 @@ async fn rebuild_tkg(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Json<TkgRebuildResponse> {
|
||||
let result = crate::core::processor::tkg::build_tkg(
|
||||
&state.db,
|
||||
&file_uuid,
|
||||
&OUTPUT_DIR,
|
||||
)
|
||||
.await;
|
||||
let result = crate::core::processor::tkg::build_tkg(&state.db, &file_uuid, &OUTPUT_DIR).await;
|
||||
|
||||
match result {
|
||||
Ok(r) => Json(TkgRebuildResponse {
|
||||
@@ -807,14 +996,14 @@ async fn get_representative_frame(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Result<Json<RepFrameResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let result = tkg::query_auto_representative_frame(
|
||||
state.db.pool(),
|
||||
&file_uuid,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(StatusCode::NOT_FOUND, Json(serde_json::json!({"error": e.to_string()})))
|
||||
})?;
|
||||
let result = tkg::query_auto_representative_frame(state.db.pool(), &file_uuid)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let fps = query_fps(state.db.pool(), &file_uuid).await;
|
||||
|
||||
@@ -843,3 +1032,59 @@ async fn query_fps(pool: &sqlx::PgPool, file_uuid: &str) -> f64 {
|
||||
.flatten()
|
||||
.unwrap_or(25.0)
|
||||
}
|
||||
|
||||
async fn get_stranger_representative_face(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, stranger_id)): Path<(String, i32)>,
|
||||
) -> Result<Json<RepFaceResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let faces_table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
let trace_id: i32 = sqlx::query_scalar(&format!(
|
||||
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND stranger_id = $2 LIMIT 1",
|
||||
faces_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(stranger_id)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
.ok_or((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({"error": "Stranger not found"})),
|
||||
))?;
|
||||
|
||||
get_representative_face_inner(&state, &file_uuid, trace_id).await
|
||||
}
|
||||
|
||||
async fn get_stranger_thumbnail(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, stranger_id)): Path<(String, i32)>,
|
||||
) -> Result<Response, (StatusCode, Json<serde_json::Value>)> {
|
||||
let faces_table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
let trace_id: i32 = sqlx::query_scalar(&format!(
|
||||
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND stranger_id = $2 LIMIT 1",
|
||||
faces_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(stranger_id)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
.ok_or((
|
||||
StatusCode::NOT_FOUND,
|
||||
Json(serde_json::json!({"error": "Stranger not found"})),
|
||||
))?;
|
||||
|
||||
get_trace_thumbnail_inner(&state, &file_uuid, trace_id).await
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub db: std::sync::Arc<crate::core::db::PostgresDb>,
|
||||
pub qdrant: std::sync::Arc<crate::core::db::QdrantDb>,
|
||||
pub embedder: std::sync::Arc<crate::Embedder>,
|
||||
pub embedder_model: String,
|
||||
pub mongo_cache: crate::core::cache::MongoCache,
|
||||
|
||||
@@ -60,13 +60,12 @@ pub struct UniversalSearchResponse {
|
||||
pub enum SearchResult {
|
||||
#[serde(rename = "chunk")]
|
||||
Chunk {
|
||||
file_uuid: String,
|
||||
chunk_id: String,
|
||||
chunk_type: String,
|
||||
// Primary: frame-accurate position
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
// Reference: time derived from frames (subject to FPS variation)
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
score: f64,
|
||||
@@ -76,9 +75,8 @@ pub enum SearchResult {
|
||||
},
|
||||
#[serde(rename = "frame")]
|
||||
Frame {
|
||||
// Primary: exact frame number
|
||||
file_uuid: String,
|
||||
frame_number: i64,
|
||||
// Reference: time derived from frame (subject to FPS variation)
|
||||
timestamp: f64,
|
||||
score: f64,
|
||||
objects: Option<Vec<serde_json::Value>>,
|
||||
@@ -88,6 +86,7 @@ pub enum SearchResult {
|
||||
},
|
||||
#[serde(rename = "person")]
|
||||
Person {
|
||||
file_uuid: Option<String>,
|
||||
identity_id: i32,
|
||||
identity_uuid: String,
|
||||
name: Option<String>,
|
||||
@@ -328,17 +327,15 @@ async fn search_chunks(
|
||||
db: &PostgresDb,
|
||||
req: &UniversalSearchRequest,
|
||||
) -> Result<Vec<SearchResult>, anyhow::Error> {
|
||||
// uuid is required for chunk search - chunk_id is only unique within a video
|
||||
let uuid = match &req.file_uuid {
|
||||
Some(u) => u.replace('\'', "''"),
|
||||
None => return Err(anyhow::anyhow!("file_uuid is required for chunk search")),
|
||||
};
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let mut sql = format!(
|
||||
"SELECT chunk_id, chunk_type, start_time, end_time, (start_time * fps)::bigint as start_frame, (end_time * fps)::bigint as end_frame, fps, text_content, content FROM {} WHERE file_uuid = '{}'",
|
||||
chunk_table, uuid
|
||||
"SELECT file_uuid, chunk_id, chunk_type, start_time, end_time, (start_time * fps)::bigint as start_frame, (end_time * fps)::bigint as end_frame, fps, text_content, content FROM {} WHERE 1=1",
|
||||
chunk_table
|
||||
);
|
||||
|
||||
if let Some(uuid) = &req.file_uuid {
|
||||
sql.push_str(&format!(" AND file_uuid = '{}'", uuid.replace('\'', "''")));
|
||||
}
|
||||
if let Some(tr) = &req.time_range {
|
||||
sql.push_str(&format!(
|
||||
" AND start_time >= {} AND end_time <= {}",
|
||||
@@ -422,6 +419,7 @@ async fn search_chunks(
|
||||
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
String,
|
||||
String,
|
||||
f64,
|
||||
@@ -437,6 +435,7 @@ async fn search_chunks(
|
||||
.into_iter()
|
||||
.map(
|
||||
|(
|
||||
file_uuid,
|
||||
chunk_id,
|
||||
chunk_type,
|
||||
start_time,
|
||||
@@ -457,7 +456,6 @@ async fn search_chunks(
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
});
|
||||
// Simple scoring: if query matches, score 0.8
|
||||
let score = if !req.query.is_empty()
|
||||
&& text.as_ref().map_or(false, |t| {
|
||||
t.to_lowercase().contains(&req.query.to_lowercase())
|
||||
@@ -468,6 +466,7 @@ async fn search_chunks(
|
||||
};
|
||||
|
||||
SearchResult::Chunk {
|
||||
file_uuid,
|
||||
chunk_id,
|
||||
chunk_type,
|
||||
start_time,
|
||||
@@ -549,7 +548,7 @@ async fn search_frames_internal(
|
||||
|
||||
let results: Vec<SearchResult> = rows
|
||||
.into_iter()
|
||||
.map(|(frame_number, timestamp, yolo, ocr, face, _uuid)| {
|
||||
.map(|(frame_number, timestamp, yolo, ocr, face, file_uuid)| {
|
||||
let objects = yolo.as_ref().and_then(|v| {
|
||||
v.get("objects")
|
||||
.map(|o| o.as_array().cloned().unwrap_or_default())
|
||||
@@ -571,6 +570,7 @@ async fn search_frames_internal(
|
||||
});
|
||||
|
||||
SearchResult::Frame {
|
||||
file_uuid,
|
||||
frame_number,
|
||||
timestamp,
|
||||
score: 0.7,
|
||||
@@ -589,37 +589,54 @@ async fn search_persons_internal(
|
||||
db: &PostgresDb,
|
||||
req: &UniversalSearchRequest,
|
||||
) -> Result<Vec<SearchResult>, anyhow::Error> {
|
||||
let uuid = match &req.file_uuid {
|
||||
Some(u) => u.replace('\'', "''"),
|
||||
None => return Err(anyhow::anyhow!("file_uuid is required for person search")),
|
||||
};
|
||||
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut sql = format!(
|
||||
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id) AS appearance_count, \
|
||||
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time \
|
||||
FROM {} i JOIN {} fd ON fd.identity_id = i.id \
|
||||
WHERE fd.file_uuid = '{}'",
|
||||
id_table, fd_table, uuid
|
||||
MIN(fd.timestamp_secs) AS first_time, MAX(fd.timestamp_secs) AS last_time, \
|
||||
fd.file_uuid \
|
||||
FROM {} i JOIN {} fd ON fd.identity_id = i.id WHERE 1=1",
|
||||
id_table, fd_table
|
||||
);
|
||||
|
||||
if let Some(uuid) = &req.file_uuid {
|
||||
sql.push_str(&format!(
|
||||
" AND fd.file_uuid = '{}'",
|
||||
uuid.replace('\'', "''")
|
||||
));
|
||||
}
|
||||
|
||||
if !req.query.is_empty() {
|
||||
let q = req.query.replace('\'', "''");
|
||||
sql.push_str(&format!(" AND i.name ILIKE '%{}%'", q));
|
||||
}
|
||||
|
||||
sql.push_str(" GROUP BY i.id, i.uuid, i.name");
|
||||
sql.push_str(" GROUP BY i.id, i.uuid, i.name, fd.file_uuid");
|
||||
sql.push_str(" ORDER BY appearance_count DESC");
|
||||
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
|
||||
|
||||
let rows: Vec<(i32, String, Option<String>, i64, Option<f64>, Option<f64>)> =
|
||||
sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
let rows: Vec<(
|
||||
i32,
|
||||
String,
|
||||
Option<String>,
|
||||
i64,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
String,
|
||||
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
|
||||
let results: Vec<SearchResult> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(identity_id, identity_uuid, name, appearance_count, first_time, last_time)| {
|
||||
|(
|
||||
identity_id,
|
||||
identity_uuid,
|
||||
name,
|
||||
appearance_count,
|
||||
first_time,
|
||||
last_time,
|
||||
file_uuid,
|
||||
)| {
|
||||
let score = if !req.query.is_empty()
|
||||
&& name.as_ref().map_or(false, |n| {
|
||||
n.to_lowercase().contains(&req.query.to_lowercase())
|
||||
@@ -630,6 +647,7 @@ async fn search_persons_internal(
|
||||
};
|
||||
|
||||
SearchResult::Person {
|
||||
file_uuid: Some(file_uuid),
|
||||
identity_id,
|
||||
identity_uuid,
|
||||
name,
|
||||
|
||||
@@ -1,513 +0,0 @@
|
||||
//! Visual chunk search functionality.
|
||||
//!
|
||||
//! This module provides search capabilities for visual chunks based on:
|
||||
//! - Object classes (e.g., "person", "car", "envelope")
|
||||
//! - Confidence thresholds
|
||||
//! - Object counts
|
||||
//! - Spatial density
|
||||
//! - Object relationships
|
||||
|
||||
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
use crate::core::db::{schema, PostgresDb};
|
||||
use anyhow::Result;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Criteria for searching visual chunks
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct VisualChunkSearchCriteria {
|
||||
/// Minimum average confidence across frames
|
||||
pub min_avg_confidence: Option<f32>,
|
||||
/// Minimum number of frames with objects
|
||||
pub min_frames_with_objects: Option<u32>,
|
||||
/// Minimum number of unique object classes
|
||||
pub min_unique_classes: Option<u32>,
|
||||
/// Specific object classes to include (empty means all)
|
||||
#[serde(default)]
|
||||
pub required_classes: Vec<String>,
|
||||
/// Object class counts to filter by
|
||||
#[serde(default)]
|
||||
pub class_counts: HashMap<String, (u32, u32)>,
|
||||
/// Time range (optional)
|
||||
pub time_range: Option<(f64, f64)>,
|
||||
}
|
||||
|
||||
impl Default for VisualChunkSearchCriteria {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
min_avg_confidence: None,
|
||||
min_frames_with_objects: None,
|
||||
min_unique_classes: None,
|
||||
required_classes: Vec::new(),
|
||||
class_counts: HashMap::new(),
|
||||
time_range: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Search visual chunks based on criteria
|
||||
pub async fn search_visual_chunks(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
criteria: &VisualChunkSearchCriteria,
|
||||
) -> Result<Vec<Chunk>> {
|
||||
// First, get all visual chunks for this video
|
||||
let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
|
||||
|
||||
// Apply filters
|
||||
let filtered_chunks: Vec<Chunk> = all_chunks
|
||||
.into_iter()
|
||||
.filter(|chunk| {
|
||||
// Check min avg confidence
|
||||
if let Some(min_avg_confidence) = criteria.min_avg_confidence {
|
||||
if let Some(content) = &chunk.content.as_object() {
|
||||
if let Some(metadata) = content.get("metadata") {
|
||||
if let Some(avg_confidence) = metadata.get("avg_confidence") {
|
||||
if let Some(conf) = avg_confidence.as_f64() {
|
||||
if conf < min_avg_confidence as f64 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check min frames with objects
|
||||
if let Some(min_frames) = criteria.min_frames_with_objects {
|
||||
if let Some(stats) = &chunk.visual_stats {
|
||||
if let Some(frames_with_objects) = stats.get("frames_with_objects") {
|
||||
if let Some(count) = frames_with_objects.as_u64() {
|
||||
if count < min_frames as u64 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check min unique classes
|
||||
if let Some(min_unique_classes) = criteria.min_unique_classes {
|
||||
if let Some(content) = &chunk.content.as_object() {
|
||||
if let Some(metadata) = content.get("metadata") {
|
||||
if let Some(unique_classes) = metadata.get("unique_classes") {
|
||||
if let Some(classes) = unique_classes.as_array() {
|
||||
if (classes.len() as u32) < min_unique_classes {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check required classes
|
||||
if !criteria.required_classes.is_empty() {
|
||||
if let Some(content) = &chunk.content.as_object() {
|
||||
if let Some(keyframe_objects) = content.get("keyframe_objects") {
|
||||
if let Some(objects) = keyframe_objects.as_array() {
|
||||
let mut found_all = true;
|
||||
for required_class in &criteria.required_classes {
|
||||
let mut found = false;
|
||||
for obj in objects {
|
||||
if let Some(class_name) = obj.get("class_name") {
|
||||
if let Some(class_str) = class_name.as_str() {
|
||||
if class_str == required_class {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
found_all = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found_all {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check class counts
|
||||
if !criteria.class_counts.is_empty() {
|
||||
if let Some(content) = &chunk.content.as_object() {
|
||||
if let Some(metadata) = content.get("metadata") {
|
||||
if let Some(object_counts) = metadata.get("object_counts") {
|
||||
for (class, (min, max)) in &criteria.class_counts {
|
||||
if let Some(count_value) = object_counts.get(class) {
|
||||
if let Some(count) = count_value.as_u64() {
|
||||
if *min > 0 && count < *min as u64 {
|
||||
return false;
|
||||
}
|
||||
if *max < u32::MAX && count > *max as u64 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if *min > 0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if criteria.class_counts.values().any(|(min, _)| *min > 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check time range
|
||||
if let Some((start_time, end_time)) = criteria.time_range {
|
||||
// Calculate chunk time from frames
|
||||
let chunk_start_time = chunk.start_frame as f64 / chunk.fps;
|
||||
let chunk_end_time = chunk.end_frame as f64 / chunk.fps;
|
||||
|
||||
if chunk_start_time < start_time || chunk_end_time > end_time {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(filtered_chunks)
|
||||
}
|
||||
|
||||
/// Get all visual chunks for a video UUID
|
||||
async fn get_visual_chunks_by_uuid(db: &PostgresDb, uuid: &str) -> Result<Vec<Chunk>> {
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let sql = format!(
|
||||
"SELECT file_id, file_uuid, chunk_id, chunk_type, fps, start_frame, end_frame, text_content, content, metadata, vector_id, visual_stats FROM {} WHERE file_uuid = '{}' AND chunk_type = 'visual' ORDER BY start_frame ASC",
|
||||
chunk_table, uuid.replace('\'', "''")
|
||||
);
|
||||
|
||||
let rows: Vec<(
|
||||
i32, // file_id
|
||||
String, // uuid
|
||||
String, // chunk_id
|
||||
String, // chunk_type
|
||||
f64, // fps
|
||||
i64, // start_frame
|
||||
i64, // end_frame
|
||||
Option<String>, // text_content
|
||||
Value, // content
|
||||
Option<Value>, // metadata
|
||||
Option<String>, // vector_id
|
||||
Option<Value>, // visual_stats
|
||||
)> = sqlx::query_as(&sql).fetch_all(db.pool()).await?;
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
for row in rows {
|
||||
let chunk_type = match row.3.as_str() {
|
||||
"visual" => ChunkType::Visual,
|
||||
"sentence" => ChunkType::Sentence,
|
||||
"time_based" => ChunkType::TimeBased,
|
||||
"cut" => ChunkType::Cut,
|
||||
"trace" => ChunkType::Trace,
|
||||
"story" => ChunkType::Story,
|
||||
_ => ChunkType::TimeBased,
|
||||
};
|
||||
|
||||
// Calculate frame_count
|
||||
let frame_count = (row.6 - row.5) as i32;
|
||||
|
||||
chunks.push(Chunk {
|
||||
file_id: row.0,
|
||||
uuid: row.1,
|
||||
chunk_id: row.2,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule2, // Visual chunks use Rule2
|
||||
fps: row.4,
|
||||
start_frame: row.5,
|
||||
end_frame: row.6,
|
||||
text_content: row.7,
|
||||
content: row.8,
|
||||
metadata: row.9,
|
||||
vector_id: row.10,
|
||||
frame_count,
|
||||
pre_chunk_ids: Vec::new(),
|
||||
parent_chunk_id: None,
|
||||
child_chunk_ids: Vec::new(),
|
||||
visual_stats: row.11,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
/// Search visual chunks by object class
|
||||
pub async fn search_visual_chunks_by_class(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
object_class: &str,
|
||||
min_count: Option<u32>,
|
||||
max_count: Option<u32>,
|
||||
) -> Result<Vec<Chunk>> {
|
||||
let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
|
||||
|
||||
let filtered_chunks: Vec<Chunk> = all_chunks
|
||||
.into_iter()
|
||||
.filter(|chunk| {
|
||||
// Check if chunk contains the object class
|
||||
let mut contains_class = false;
|
||||
if let Some(content) = &chunk.content.as_object() {
|
||||
if let Some(keyframe_objects) = content.get("keyframe_objects") {
|
||||
if let Some(objects) = keyframe_objects.as_array() {
|
||||
for obj in objects {
|
||||
if let Some(class_name) = obj.get("class_name") {
|
||||
if let Some(class_str) = class_name.as_str() {
|
||||
if class_str == object_class {
|
||||
contains_class = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !contains_class {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check count in visual_stats
|
||||
if let Some(stats) = &chunk.visual_stats {
|
||||
if let Some(count) = stats.get(object_class) {
|
||||
if let Some(c) = count.as_u64() {
|
||||
if let Some(min) = min_count {
|
||||
if c < min as u64 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if let Some(max) = max_count {
|
||||
if c > max as u64 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(filtered_chunks)
|
||||
}
|
||||
|
||||
/// Search visual chunks by spatial density
|
||||
pub async fn search_visual_chunks_by_density(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
min_density: f32,
|
||||
max_density: Option<f32>,
|
||||
) -> Result<Vec<Chunk>> {
|
||||
let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
|
||||
|
||||
let filtered_chunks: Vec<Chunk> = all_chunks
|
||||
.into_iter()
|
||||
.filter(|chunk| {
|
||||
if let Some(content) = &chunk.content.as_object() {
|
||||
if let Some(metadata) = content.get("metadata") {
|
||||
if let Some(density_value) = metadata.get("spatial_density") {
|
||||
if let Some(density) = density_value.as_f64() {
|
||||
if density < min_density as f64 {
|
||||
return false;
|
||||
}
|
||||
if let Some(max_dens) = max_density {
|
||||
if density > max_dens as f64 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(filtered_chunks)
|
||||
}
|
||||
|
||||
/// Find chunks containing specific object combinations
|
||||
pub async fn search_visual_chunks_by_combination(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
combination: &[(&str, u32)],
|
||||
) -> Result<Vec<Chunk>> {
|
||||
let all_chunks = get_visual_chunks_by_uuid(db, uuid).await?;
|
||||
|
||||
let filtered_chunks: Vec<Chunk> = all_chunks
|
||||
.into_iter()
|
||||
.filter(|chunk| {
|
||||
// Check if all required combinations are present
|
||||
for (object_class, min_count) in combination {
|
||||
let mut found = false;
|
||||
if let Some(stats) = &chunk.visual_stats {
|
||||
if let Some(object_counts) = stats.get("object_counts") {
|
||||
if let Some(count_value) = object_counts.get(*object_class) {
|
||||
if let Some(count) = count_value.as_u64() {
|
||||
if count >= *min_count as u64 {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(filtered_chunks)
|
||||
}
|
||||
|
||||
/// Get visual chunk statistics
|
||||
pub async fn get_visual_chunk_statistics(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
) -> Result<HashMap<String, Value>> {
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let sql = format!(
|
||||
"SELECT
|
||||
COUNT(*) as total_chunks,
|
||||
AVG((content->'metadata'->>'avg_confidence')::float) as avg_confidence,
|
||||
MIN((content->'metadata'->>'avg_confidence')::float) as min_confidence,
|
||||
MAX((content->'metadata'->>'avg_confidence')::float) as max_confidence,
|
||||
SUM((content->'metadata'->>'object_count')::int) as total_objects,
|
||||
AVG((content->'metadata'->>'spatial_density')::float) as avg_density
|
||||
FROM {}
|
||||
WHERE file_uuid = '{}'
|
||||
AND chunk_type = 'visual'",
|
||||
chunk_table,
|
||||
uuid.replace('\'', "''")
|
||||
);
|
||||
|
||||
let row: (
|
||||
i64,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
Option<i64>,
|
||||
Option<f64>,
|
||||
) = sqlx::query_as(&sql).fetch_one(db.pool()).await?;
|
||||
|
||||
let mut stats = HashMap::new();
|
||||
stats.insert("total_chunks".to_string(), Value::from(row.0));
|
||||
stats.insert(
|
||||
"avg_confidence".to_string(),
|
||||
Value::from(row.1.unwrap_or(0.0)),
|
||||
);
|
||||
stats.insert(
|
||||
"min_confidence".to_string(),
|
||||
Value::from(row.2.unwrap_or(0.0)),
|
||||
);
|
||||
stats.insert(
|
||||
"max_confidence".to_string(),
|
||||
Value::from(row.3.unwrap_or(0.0)),
|
||||
);
|
||||
stats.insert("total_objects".to_string(), Value::from(row.4.unwrap_or(0)));
|
||||
stats.insert("avg_density".to_string(), Value::from(row.5.unwrap_or(0.0)));
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_search_criteria_default() {
|
||||
let criteria = VisualChunkSearchCriteria::default();
|
||||
|
||||
assert_eq!(criteria.min_avg_confidence, None);
|
||||
assert_eq!(criteria.min_frames_with_objects, None);
|
||||
assert_eq!(criteria.min_unique_classes, None);
|
||||
assert!(criteria.required_classes.is_empty());
|
||||
assert!(criteria.class_counts.is_empty());
|
||||
assert_eq!(criteria.time_range, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_search_criteria_with_values() {
|
||||
let mut criteria = VisualChunkSearchCriteria::default();
|
||||
criteria.min_avg_confidence = Some(0.8);
|
||||
criteria.min_frames_with_objects = Some(10);
|
||||
criteria.min_unique_classes = Some(3);
|
||||
criteria.required_classes = vec!["person".to_string(), "car".to_string()];
|
||||
criteria.time_range = Some((0.0, 60.0));
|
||||
|
||||
assert_eq!(criteria.min_avg_confidence, Some(0.8));
|
||||
assert_eq!(criteria.min_frames_with_objects, Some(10));
|
||||
assert_eq!(criteria.min_unique_classes, Some(3));
|
||||
assert_eq!(criteria.required_classes.len(), 2);
|
||||
assert_eq!(criteria.time_range, Some((0.0, 60.0)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_search_criteria_serialization() {
|
||||
let criteria = VisualChunkSearchCriteria {
|
||||
min_avg_confidence: Some(0.85),
|
||||
min_frames_with_objects: Some(5),
|
||||
min_unique_classes: Some(2),
|
||||
required_classes: vec!["person".to_string()],
|
||||
class_counts: HashMap::new(),
|
||||
time_range: Some((10.0, 30.0)),
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&criteria).unwrap();
|
||||
assert!(json.contains("min_avg_confidence"));
|
||||
assert!(json.contains("required_classes"));
|
||||
|
||||
let deserialized: VisualChunkSearchCriteria = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(deserialized.min_avg_confidence, Some(0.85));
|
||||
assert_eq!(deserialized.required_classes.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_search_criteria_with_class_counts() {
|
||||
let mut criteria = VisualChunkSearchCriteria::default();
|
||||
criteria.class_counts.insert("person".to_string(), (5, 20));
|
||||
criteria.class_counts.insert("car".to_string(), (1, 10));
|
||||
|
||||
assert_eq!(criteria.class_counts.len(), 2);
|
||||
assert_eq!(criteria.class_counts.get("person"), Some(&(5, 20)));
|
||||
assert_eq!(criteria.class_counts.get("car"), Some(&(1, 10)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_type_conversion() {
|
||||
// Test chunk type string to enum conversion logic
|
||||
let test_cases = vec![
|
||||
("visual", ChunkType::Visual),
|
||||
("sentence", ChunkType::Sentence),
|
||||
("time_based", ChunkType::TimeBased),
|
||||
("cut", ChunkType::Cut),
|
||||
("trace", ChunkType::Trace),
|
||||
("story", ChunkType::Story),
|
||||
("unknown", ChunkType::TimeBased), // Default fallback
|
||||
];
|
||||
|
||||
for (input, expected) in test_cases {
|
||||
let chunk_type = match input {
|
||||
"visual" => ChunkType::Visual,
|
||||
"sentence" => ChunkType::Sentence,
|
||||
"time_based" => ChunkType::TimeBased,
|
||||
"cut" => ChunkType::Cut,
|
||||
"trace" => ChunkType::Trace,
|
||||
"story" => ChunkType::Story,
|
||||
_ => ChunkType::TimeBased,
|
||||
};
|
||||
assert_eq!(chunk_type, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,217 +0,0 @@
|
||||
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use super::types::AppState;
|
||||
use super::visual_chunk_search;
|
||||
use crate::core::cache::keys;
|
||||
use crate::core::chunk::types::Chunk;
|
||||
use crate::core::db::{Database, PostgresDb};
|
||||
|
||||
fn generate_visual_search_hash(
|
||||
uuid: &str,
|
||||
criteria: &visual_chunk_search::VisualChunkSearchCriteria,
|
||||
) -> String {
|
||||
let data = serde_json::json!({
|
||||
"uuid": uuid,
|
||||
"criteria": criteria,
|
||||
});
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data.to_string().as_bytes());
|
||||
format!("{:x}", hasher.finalize())[..16].to_string()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct VisualChunkSearchRequest {
|
||||
file_uuid: String,
|
||||
criteria: visual_chunk_search::VisualChunkSearchCriteria,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct VisualChunkSearchResponse {
|
||||
chunks: Vec<Chunk>,
|
||||
total: usize,
|
||||
}
|
||||
|
||||
async fn search_visual_chunks(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<VisualChunkSearchRequest>,
|
||||
) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
|
||||
let criteria_hash = generate_visual_search_hash(&req.file_uuid, &req.criteria);
|
||||
let cache_key = keys::visual_search(&req.file_uuid, &criteria_hash);
|
||||
let ttl = state.mongo_cache.ttl_visual_search();
|
||||
|
||||
let chunks = state
|
||||
.mongo_cache
|
||||
.get_or_fetch(&cache_key, ttl, keys::CATEGORY_VISUAL_SEARCH, || async {
|
||||
let db = PostgresDb::init()
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?;
|
||||
|
||||
visual_chunk_search::search_visual_chunks(&db, &req.file_uuid, &req.criteria)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Visual search failed: {}", e))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Visual chunk search failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(VisualChunkSearchResponse {
|
||||
total: chunks.len(),
|
||||
chunks,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct VisualChunkSearchByClassRequest {
|
||||
uuid: String,
|
||||
object_class: String,
|
||||
min_count: Option<u32>,
|
||||
max_count: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct VisualChunkSearchByDensityRequest {
|
||||
uuid: String,
|
||||
min_density: f32,
|
||||
max_density: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct VisualChunkStatsRequest {
|
||||
uuid: String,
|
||||
}
|
||||
|
||||
async fn search_visual_chunks_by_class(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<VisualChunkSearchByClassRequest>,
|
||||
) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
|
||||
let db = PostgresDb::init()
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let chunks = visual_chunk_search::search_visual_chunks_by_class(
|
||||
&db,
|
||||
&req.uuid,
|
||||
&req.object_class,
|
||||
req.min_count,
|
||||
req.max_count,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Visual chunk search by class failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(VisualChunkSearchResponse {
|
||||
total: chunks.len(),
|
||||
chunks,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn search_visual_chunks_by_density(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<VisualChunkSearchByDensityRequest>,
|
||||
) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
|
||||
let db = PostgresDb::init()
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let chunks = visual_chunk_search::search_visual_chunks_by_density(
|
||||
&db,
|
||||
&req.uuid,
|
||||
req.min_density,
|
||||
req.max_density,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Visual chunk search by density failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(VisualChunkSearchResponse {
|
||||
total: chunks.len(),
|
||||
chunks,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct VisualChunkStatsResponse {
|
||||
uuid: String,
|
||||
stats: std::collections::HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
async fn get_visual_chunk_stats(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<VisualChunkStatsRequest>,
|
||||
) -> Result<Json<VisualChunkStatsResponse>, StatusCode> {
|
||||
let db = PostgresDb::init()
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let stats = visual_chunk_search::get_visual_chunk_statistics(&db, &req.uuid)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Get visual chunk stats failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(VisualChunkStatsResponse {
|
||||
uuid: req.uuid,
|
||||
stats,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct VisualChunkSearchByCombinationRequest {
|
||||
uuid: String,
|
||||
combination: Vec<(String, u32)>,
|
||||
}
|
||||
|
||||
async fn search_visual_chunks_by_combination(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<VisualChunkSearchByCombinationRequest>,
|
||||
) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
|
||||
let db = PostgresDb::init()
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let combination: Vec<(&str, u32)> = req
|
||||
.combination
|
||||
.iter()
|
||||
.map(|(c, n)| (c.as_str(), *n))
|
||||
.collect();
|
||||
|
||||
let chunks =
|
||||
visual_chunk_search::search_visual_chunks_by_combination(&db, &req.uuid, &combination)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("Visual chunk search by combination failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(VisualChunkSearchResponse {
|
||||
total: chunks.len(),
|
||||
chunks,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn visual_search_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/search/visual", post(search_visual_chunks))
|
||||
.route(
|
||||
"/api/v1/search/visual/class",
|
||||
post(search_visual_chunks_by_class),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/search/visual/density",
|
||||
post(search_visual_chunks_by_density),
|
||||
)
|
||||
.route("/api/v1/search/visual/stats", post(get_visual_chunk_stats))
|
||||
.route(
|
||||
"/api/v1/search/visual/combination",
|
||||
post(search_visual_chunks_by_combination),
|
||||
)
|
||||
}
|
||||
42
src/bin/check_db_schema.rs
Normal file
42
src/bin/check_db_schema.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
use sqlx::Row;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let url = std::env::var("DATABASE_URL")
|
||||
.unwrap_or_else(|_| "postgres://accusys@localhost:5432/momentry".into());
|
||||
let pool = PgPoolOptions::new()
|
||||
.max_connections(1)
|
||||
.connect(&url)
|
||||
.await?;
|
||||
|
||||
// Check videos columns
|
||||
let rows = sqlx::query(
|
||||
"SELECT column_name, data_type FROM information_schema.columns WHERE table_schema='public' AND table_name='videos' ORDER BY ordinal_position"
|
||||
).fetch_all(&pool).await?;
|
||||
println!("=== public.videos columns ===");
|
||||
for r in &rows {
|
||||
let col: String = r.get("column_name");
|
||||
let typ: String = r.get("data_type");
|
||||
println!(" {}: {}", col, typ);
|
||||
}
|
||||
|
||||
// Check chunks_rule1 columns
|
||||
let rows2 = sqlx::query(
|
||||
"SELECT column_name, data_type FROM information_schema.columns WHERE table_schema='public' AND table_name='chunks_rule1' ORDER BY ordinal_position"
|
||||
).fetch_all(&pool).await?;
|
||||
println!("=== public.chunks_rule1 columns ===");
|
||||
for r in &rows2 {
|
||||
let col: String = r.get("column_name");
|
||||
let typ: String = r.get("data_type");
|
||||
println!(" {}: {}", col, typ);
|
||||
}
|
||||
|
||||
// Check if jobs exists
|
||||
let exists: bool = sqlx::query_scalar(
|
||||
"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema='public' AND table_name='jobs')"
|
||||
).fetch_one(&pool).await?;
|
||||
println!("=== public.jobs exists: {} ===", exists);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
95
src/bin/sync_qdrant_from_pg.rs
Normal file
95
src/bin/sync_qdrant_from_pg.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use anyhow::{Context, Result};
|
||||
use momentry_core::{Database, PostgresDb, QdrantDb, VectorPayload};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env.development").ok();
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let pg = PostgresDb::init().await.context("Failed to init PostgreSQL")?;
|
||||
let qdrant = QdrantDb::new();
|
||||
let chunk_table = momentry_core::core::db::schema::table_name("chunk");
|
||||
|
||||
let uuids = vec![
|
||||
"63acd3bb02b5b9dfbb9d6db499fcc864",
|
||||
"a6fb22eebefaef17e62af874997c5944",
|
||||
"d81e01261391b45c1a14ddd5f082733e",
|
||||
];
|
||||
|
||||
for uuid in &uuids {
|
||||
let rows = sqlx::query_as::<_, (String, String, i64, i64, f64, f64, String, String)>(
|
||||
&format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, \
|
||||
start_time, end_time, embedding::text, content::text \
|
||||
FROM {} \
|
||||
WHERE file_uuid = $1 \
|
||||
AND chunk_type = 'sentence' \
|
||||
AND embedding IS NOT NULL \
|
||||
AND (text_content IS NOT NULL AND text_content != '') \
|
||||
ORDER BY id",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(pg.pool())
|
||||
.await?;
|
||||
|
||||
let total = rows.len();
|
||||
println!("[{}] Found {} sentence chunks with embeddings to sync to Qdrant", uuid, total);
|
||||
|
||||
if total == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let mut stored = 0usize;
|
||||
let mut errors = 0usize;
|
||||
|
||||
for (chunk_id, text, start_frame, end_frame, start_time, end_time, vector_text, _content_str) in &rows {
|
||||
let vector: Vec<f32> = serde_json::from_str(vector_text)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to parse vector for {}: {}", chunk_id, e))?;
|
||||
|
||||
let payload = VectorPayload {
|
||||
file_uuid: uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
};
|
||||
|
||||
if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await {
|
||||
eprintln!("[ERROR] Qdrant upsert failed for {}: {}", chunk_id, e);
|
||||
errors += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
stored += 1;
|
||||
if stored % 5000 == 0 || stored == total {
|
||||
let elapsed = start.elapsed();
|
||||
let rate = stored as f64 / elapsed.as_secs_f64();
|
||||
println!(
|
||||
" [{}] {}/{} ({:.1}%) | {:.0} vec/s | {} errors",
|
||||
uuid.get(..8).unwrap_or(uuid),
|
||||
stored, total,
|
||||
100.0 * stored as f64 / total as f64,
|
||||
rate, errors,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"[{}] Done! {}/{} vectors synced ({} errors) in {:.1}s ({:.0} vec/s avg)",
|
||||
uuid.get(..8).unwrap_or(uuid),
|
||||
stored, total, errors,
|
||||
elapsed.as_secs_f64(),
|
||||
stored as f64 / elapsed.as_secs_f64(),
|
||||
);
|
||||
}
|
||||
|
||||
println!("\nAll files synced to Qdrant!");
|
||||
Ok(())
|
||||
}
|
||||
45
src/bin/test_bson_deserialize.rs
Normal file
45
src/bin/test_bson_deserialize.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use bson::bson;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct TestIdentitySnapshot {
|
||||
id: i32,
|
||||
uuid: String,
|
||||
name: String,
|
||||
identity_type: Option<String>,
|
||||
source: Option<String>,
|
||||
status: String,
|
||||
tmdb_id: Option<i32>,
|
||||
tmdb_profile: Option<String>,
|
||||
metadata: serde_json::Value,
|
||||
#[serde(
|
||||
with = "bson::serde_helpers::chrono_datetime_as_bson_datetime_optional",
|
||||
default
|
||||
)]
|
||||
created_at: Option<DateTime<Utc>>,
|
||||
face_count: i64,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// 模拟 MongoDB document
|
||||
let doc = bson!({
|
||||
"id": bson::Bson::Int32(21),
|
||||
"uuid": "1524f6a1537f48a187526d44a236584f",
|
||||
"name": "Albert Daumergue",
|
||||
"identity_type": "people",
|
||||
"source": "tmdb",
|
||||
"status": "confirmed",
|
||||
"tmdb_id": bson::Bson::Int32(1100817),
|
||||
"tmdb_profile": bson::Bson::Null,
|
||||
"metadata": {"role": "", "notes": "", "aliases": [], "starred": false},
|
||||
"created_at": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
|
||||
"face_count": bson::Bson::Int64(0)
|
||||
});
|
||||
|
||||
let result: Result<TestIdentitySnapshot, _> = bson::from_bson(doc);
|
||||
match result {
|
||||
Ok(s) => println!("Success! {:?}", s),
|
||||
Err(e) => println!("Error: {}", e),
|
||||
}
|
||||
}
|
||||
25
src/bin/test_empty_array.rs
Normal file
25
src/bin/test_empty_array.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
use bson::bson;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct FacesTransferred {
|
||||
file_uuid: String,
|
||||
face_ids: Vec<String>,
|
||||
trace_ids: Vec<i32>,
|
||||
count: i64,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let doc = bson!({
|
||||
"file_uuid": "",
|
||||
"face_ids": [],
|
||||
"trace_ids": [],
|
||||
"count": bson::Bson::Int64(0)
|
||||
});
|
||||
|
||||
let result: Result<FacesTransferred, _> = bson::from_bson(doc);
|
||||
match result {
|
||||
Ok(f) => println!("Success! trace_ids len: {}", f.trace_ids.len()),
|
||||
Err(e) => println!("Error: {}", e),
|
||||
}
|
||||
}
|
||||
131
src/bin/test_full_deserialize.rs
Normal file
131
src/bin/test_full_deserialize.rs
Normal file
@@ -0,0 +1,131 @@
|
||||
use bson::bson;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct IdentitySnapshot {
|
||||
id: i32,
|
||||
uuid: String,
|
||||
name: String,
|
||||
identity_type: Option<String>,
|
||||
source: Option<String>,
|
||||
status: String,
|
||||
tmdb_id: Option<i32>,
|
||||
tmdb_profile: Option<String>,
|
||||
metadata: serde_json::Value,
|
||||
#[serde(
|
||||
with = "bson::serde_helpers::chrono_datetime_as_bson_datetime_optional",
|
||||
default
|
||||
)]
|
||||
created_at: Option<DateTime<Utc>>,
|
||||
face_count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct TargetIdentitySnapshot {
|
||||
id: i32,
|
||||
uuid: String,
|
||||
name: String,
|
||||
metadata_before: serde_json::Value,
|
||||
metadata_after: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AliasEntry {
|
||||
name: String,
|
||||
locale: String,
|
||||
source: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct FacesTransferred {
|
||||
file_uuid: String,
|
||||
face_ids: Vec<String>,
|
||||
trace_ids: Vec<i32>,
|
||||
count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct MergeParams {
|
||||
keep_history: bool,
|
||||
cleared_stranger_id: bool,
|
||||
performed_by_user: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct IdentityMergeHistory {
|
||||
#[serde(rename = "_id", skip_serializing_if = "Option::is_none")]
|
||||
id: Option<bson::oid::ObjectId>,
|
||||
merge_id: String,
|
||||
source_identity: IdentitySnapshot,
|
||||
target_identity: TargetIdentitySnapshot,
|
||||
aliases_added_to_target: Vec<AliasEntry>,
|
||||
metadata_fields_added: Vec<String>,
|
||||
faces_transferred: FacesTransferred,
|
||||
merge_params: MergeParams,
|
||||
#[serde(with = "bson::serde_helpers::chrono_datetime_as_bson_datetime")]
|
||||
merged_at: DateTime<Utc>,
|
||||
#[serde(with = "bson::serde_helpers::chrono_datetime_as_bson_datetime")]
|
||||
undo_deadline: DateTime<Utc>,
|
||||
undone: bool,
|
||||
#[serde(
|
||||
with = "bson::serde_helpers::chrono_datetime_as_bson_datetime_optional",
|
||||
skip_serializing_if = "Option::is_none"
|
||||
)]
|
||||
undone_at: Option<DateTime<Utc>>,
|
||||
undone_by: Option<String>,
|
||||
undone_snapshot: Option<serde_json::Value>,
|
||||
undo_expired: bool,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let doc = bson!({
|
||||
"merge_id": "test-id",
|
||||
"source_identity": {
|
||||
"id": bson::Bson::Int32(21),
|
||||
"uuid": "test-uuid",
|
||||
"name": "Test",
|
||||
"identity_type": "people",
|
||||
"source": "tmdb",
|
||||
"status": "confirmed",
|
||||
"tmdb_id": bson::Bson::Int32(123),
|
||||
"tmdb_profile": bson::Bson::Null,
|
||||
"metadata": {},
|
||||
"created_at": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
|
||||
"face_count": bson::Bson::Int64(0)
|
||||
},
|
||||
"target_identity": {
|
||||
"id": bson::Bson::Int32(22),
|
||||
"uuid": "target-uuid",
|
||||
"name": "Target",
|
||||
"metadata_before": {},
|
||||
"metadata_after": bson::Bson::Null
|
||||
},
|
||||
"aliases_added_to_target": [],
|
||||
"metadata_fields_added": [],
|
||||
"faces_transferred": {
|
||||
"file_uuid": "",
|
||||
"face_ids": [],
|
||||
"trace_ids": [],
|
||||
"count": bson::Bson::Int64(0)
|
||||
},
|
||||
"merge_params": {
|
||||
"keep_history": false,
|
||||
"cleared_stranger_id": true,
|
||||
"performed_by_user": "0"
|
||||
},
|
||||
"merged_at": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
|
||||
"undo_deadline": bson::Bson::DateTime(bson::DateTime::from_millis(1714641951963)),
|
||||
"undone": false,
|
||||
"undone_at": bson::Bson::Null,
|
||||
"undone_by": bson::Bson::Null,
|
||||
"undone_snapshot": bson::Bson::Null,
|
||||
"undo_expired": false
|
||||
});
|
||||
|
||||
let result: Result<IdentityMergeHistory, _> = bson::from_bson(doc);
|
||||
match result {
|
||||
Ok(h) => println!("Success! {:?}", h.merge_id),
|
||||
Err(e) => println!("Error: {}", e),
|
||||
}
|
||||
}
|
||||
31
src/bin/test_number_types.rs
Normal file
31
src/bin/test_number_types.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
use bson::bson;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct TestId {
|
||||
id: i32,
|
||||
tmdb_id: Option<i32>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// 测试 Int32
|
||||
let doc_int32 = bson!({
|
||||
"id": bson::Bson::Int32(21),
|
||||
"tmdb_id": bson::Bson::Int32(1100817)
|
||||
});
|
||||
|
||||
let result1: Result<TestId, _> = bson::from_bson(doc_int32);
|
||||
println!("Int32 test: {}", result1.is_ok());
|
||||
|
||||
// 测试 Double (JavaScript Number 可能是这个)
|
||||
let doc_double = bson!({
|
||||
"id": bson::Bson::Double(21.0),
|
||||
"tmdb_id": bson::Bson::Double(1100817.0)
|
||||
});
|
||||
|
||||
let result2: Result<TestId, _> = bson::from_bson(doc_double);
|
||||
println!("Double test: {}", result2.is_ok());
|
||||
if result2.is_err() {
|
||||
println!("Double error: {}", result2.unwrap_err());
|
||||
}
|
||||
}
|
||||
117
src/bin/vectorize_missing.rs
Normal file
117
src/bin/vectorize_missing.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use anyhow::{Context, Result};
|
||||
use momentry_core::{
|
||||
Database, Embedder, PostgresDb, QdrantDb, VectorPayload,
|
||||
};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env.development").ok();
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let pg = PostgresDb::init().await.context("Failed to init PostgreSQL")?;
|
||||
let qdrant = QdrantDb::new();
|
||||
let embedder = Embedder::new("embeddinggemma-300m".to_string());
|
||||
|
||||
let uuid = "63acd3bb02b5b9dfbb9d6db499fcc864";
|
||||
let chunk_table = momentry_core::core::db::schema::table_name("chunk");
|
||||
|
||||
let rows = sqlx::query_as::<_, (String, String, i64, i64, f64, f64, String)>(
|
||||
&format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, \
|
||||
start_time, end_time, content::text \
|
||||
FROM {} \
|
||||
WHERE file_uuid = $1 \
|
||||
AND chunk_type = 'sentence' \
|
||||
AND embedding IS NULL \
|
||||
AND (text_content IS NOT NULL AND text_content != '') \
|
||||
ORDER BY id",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(pg.pool())
|
||||
.await?;
|
||||
|
||||
let total = rows.len();
|
||||
println!("Found {} sentence chunks without embedding for {}", total, uuid);
|
||||
|
||||
if total == 0 {
|
||||
println!("Nothing to vectorize. Exiting.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let mut stored = 0usize;
|
||||
let mut errors = 0usize;
|
||||
|
||||
for (chunk_id, text, start_frame, end_frame, start_time, end_time, _content_str) in &rows {
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
match embedder.embed_document(text).await {
|
||||
Ok(vector) => {
|
||||
if let Err(e) = pg.store_vector(chunk_id, &vector, uuid).await {
|
||||
eprintln!("[ERROR] PG store failed for {}: {}", chunk_id, e);
|
||||
errors += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let payload = VectorPayload {
|
||||
file_uuid: uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
};
|
||||
|
||||
if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await {
|
||||
eprintln!("[ERROR] Qdrant upsert failed for {}: {}", chunk_id, e);
|
||||
errors += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
stored += 1;
|
||||
if stored % 500 == 0 || stored == total {
|
||||
let elapsed = start.elapsed();
|
||||
let rate = stored as f64 / elapsed.as_secs_f64();
|
||||
let eta = if stored < total {
|
||||
let remaining = Duration::from_secs_f64((total - stored) as f64 / rate);
|
||||
format!(" (ETA: {}s)", remaining.as_secs())
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
println!(
|
||||
" [{}/{}] {:.1}% done | {:.0} vec/s | {} errors{}",
|
||||
stored,
|
||||
total,
|
||||
100.0 * stored as f64 / total as f64,
|
||||
rate,
|
||||
errors,
|
||||
eta,
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[ERROR] Embedding failed for {}: {}", chunk_id, e);
|
||||
errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!(
|
||||
"\nDone! {}/{} vectors stored ({} errors) in {:.1}s ({:.0} vec/s avg)",
|
||||
stored,
|
||||
total,
|
||||
errors,
|
||||
elapsed.as_secs_f64(),
|
||||
stored as f64 / elapsed.as_secs_f64(),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
use crate::core::config::OUTPUT_DIR;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::llm::client::generate_5w1h_summary;
|
||||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use sqlx::PgPool;
|
||||
@@ -115,19 +114,6 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
|
||||
let aggregated_text = texts.join(" ");
|
||||
|
||||
// 3. Call LLM for Summary
|
||||
let summary = if !aggregated_text.is_empty() {
|
||||
match generate_5w1h_summary(&aggregated_text).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
warn!("LLM Summary failed for scene {}: {}", scene.scene_number, e);
|
||||
"LLM Error".to_string()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
"No Audio".to_string()
|
||||
};
|
||||
|
||||
info!(
|
||||
"Scene {}: {} -> {} ({} sentences)",
|
||||
scene.scene_number,
|
||||
@@ -168,7 +154,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
.bind(scene.end_frame as i64)
|
||||
.bind(&metadata)
|
||||
.bind(&aggregated_text)
|
||||
.bind(&summary)
|
||||
.bind(&String::new())
|
||||
.bind(&metadata)
|
||||
.bind(&child_ids)
|
||||
.execute(&mut *tx)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use crate::core::time::FrameTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// ==================== ChunkType ====================
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkType {
|
||||
@@ -10,7 +9,6 @@ pub enum ChunkType {
|
||||
Cut,
|
||||
Trace,
|
||||
Story,
|
||||
Visual, // 視覺分片 (Phase 2.1)
|
||||
}
|
||||
|
||||
impl ChunkType {
|
||||
@@ -21,17 +19,15 @@ impl ChunkType {
|
||||
ChunkType::Cut => "cut",
|
||||
ChunkType::Trace => "trace",
|
||||
ChunkType::Story => "story",
|
||||
ChunkType::Visual => "visual",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== ChunkRule ====================
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkRule {
|
||||
Rule1, // 直接轉換
|
||||
Rule2, // 集合內容
|
||||
Rule1,
|
||||
Rule2,
|
||||
}
|
||||
|
||||
impl ChunkRule {
|
||||
@@ -43,73 +39,6 @@ impl ChunkRule {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 視覺分片相關結構 (Phase 2.1) ====================
|
||||
/// 邊界框
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BoundingBox {
|
||||
pub x: i32,
|
||||
pub y: i32,
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
}
|
||||
|
||||
/// 檢測到的物件
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DetectedObject {
|
||||
/// 物件類別名稱
|
||||
pub class_name: String,
|
||||
/// 物件類別 ID
|
||||
pub class_id: u32,
|
||||
/// 信心值 (0.0-1.0)
|
||||
pub confidence: f32,
|
||||
/// 邊界框
|
||||
pub bbox: Option<BoundingBox>,
|
||||
/// 出現次數 (在分片內)
|
||||
pub occurrence: u32,
|
||||
}
|
||||
|
||||
/// 關鍵幀的物件列表
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct KeyframeObjects {
|
||||
/// 關鍵幀時間 (秒) - 僅供參考,主要使用 frame_number
|
||||
pub timestamp: f64,
|
||||
/// 關鍵幀幀號 - 主要時間標示
|
||||
pub frame_number: u64,
|
||||
/// 檢測到的物件
|
||||
pub objects: Vec<DetectedObject>,
|
||||
}
|
||||
|
||||
/// 視覺元數據
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VisualMetadata {
|
||||
/// 總物件數量
|
||||
pub object_count: u32,
|
||||
/// 唯一物件類別列表
|
||||
pub unique_classes: Vec<String>,
|
||||
/// 最高信心值
|
||||
pub max_confidence: f32,
|
||||
/// 平均信心值
|
||||
pub avg_confidence: f32,
|
||||
/// 空間密度(每幀平均物件數)
|
||||
pub spatial_density: f32,
|
||||
}
|
||||
|
||||
/// 視覺分片內容
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VisualChunkContent {
|
||||
/// 關鍵幀物件列表,每個關鍵幀包含 frame_number
|
||||
pub keyframe_objects: Vec<KeyframeObjects>,
|
||||
/// 主要物件標籤(出現在大多數幀中的物件)
|
||||
pub dominant_objects: Vec<String>,
|
||||
/// 物件關係 (object1, relationship, object2) - 可選
|
||||
pub object_relationships: Vec<(String, String, String)>,
|
||||
/// 場景描述 - 可選
|
||||
pub scene_description: Option<String>,
|
||||
/// 視覺元數據
|
||||
pub metadata: VisualMetadata,
|
||||
}
|
||||
|
||||
// ==================== Chunk 主結構 ====================
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Chunk {
|
||||
pub file_id: i32,
|
||||
@@ -117,11 +46,8 @@ pub struct Chunk {
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: ChunkType,
|
||||
pub rule: ChunkRule,
|
||||
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
|
||||
pub fps: f64,
|
||||
/// Start frame (0-based) - 主要時間標示
|
||||
pub start_frame: i64,
|
||||
/// End frame (exclusive) - 主要時間標示
|
||||
pub end_frame: i64,
|
||||
pub text_content: Option<String>,
|
||||
pub content: serde_json::Value,
|
||||
@@ -129,13 +55,11 @@ pub struct Chunk {
|
||||
pub vector_id: Option<String>,
|
||||
pub frame_count: i32,
|
||||
pub pre_chunk_ids: Vec<i32>,
|
||||
pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
|
||||
pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks)
|
||||
pub visual_stats: Option<serde_json::Value>,
|
||||
pub parent_chunk_id: Option<String>,
|
||||
pub child_chunk_ids: Vec<String>,
|
||||
}
|
||||
|
||||
impl Chunk {
|
||||
/// 創建新分片
|
||||
pub fn new(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
@@ -166,167 +90,17 @@ impl Chunk {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: None,
|
||||
child_chunk_ids: vec![],
|
||||
visual_stats: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// 創建視覺分片 (Phase 2.1)
|
||||
pub fn new_visual(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_id: String,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
visual_content: VisualChunkContent,
|
||||
) -> Self {
|
||||
let content = serde_json::to_value(&visual_content)
|
||||
.unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
|
||||
|
||||
Self::new(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_id,
|
||||
ChunkType::Visual,
|
||||
ChunkRule::Rule2,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
}
|
||||
|
||||
/// 從 YOLO 幀創建視覺分片 (Phase 2.1)
|
||||
pub fn from_yolo_frames(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_id: String,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
|
||||
) -> Self {
|
||||
// 將 YOLO 幀轉換為關鍵幀物件
|
||||
let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
|
||||
.iter()
|
||||
.map(|frame| {
|
||||
let objects: Vec<DetectedObject> = frame
|
||||
.objects
|
||||
.iter()
|
||||
.map(|obj| DetectedObject {
|
||||
class_name: obj.class_name.clone(),
|
||||
class_id: obj.class_id,
|
||||
confidence: obj.confidence,
|
||||
bbox: Some(BoundingBox {
|
||||
x: obj.x,
|
||||
y: obj.y,
|
||||
width: obj.width,
|
||||
height: obj.height,
|
||||
}),
|
||||
occurrence: 1,
|
||||
})
|
||||
.collect();
|
||||
|
||||
KeyframeObjects {
|
||||
timestamp: frame.timestamp,
|
||||
frame_number: frame.frame,
|
||||
objects,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// 計算物件統計
|
||||
let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
|
||||
|
||||
// 收集所有物件類別
|
||||
let all_classes: Vec<String> = yolo_frames
|
||||
.iter()
|
||||
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
|
||||
.collect();
|
||||
|
||||
// 獲取唯一類別
|
||||
let unique_classes: Vec<String> = all_classes
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// 計算信心值統計
|
||||
let confidences: Vec<f32> = yolo_frames
|
||||
.iter()
|
||||
.flat_map(|f| f.objects.iter().map(|o| o.confidence))
|
||||
.collect();
|
||||
|
||||
let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
|
||||
let avg_confidence = if !confidences.is_empty() {
|
||||
confidences.iter().sum::<f32>() / confidences.len() as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
// 計算主要物件(出現在大多數幀中的物件)
|
||||
let mut object_counts = std::collections::HashMap::new();
|
||||
for frame in &yolo_frames {
|
||||
let frame_classes: std::collections::HashSet<_> =
|
||||
frame.objects.iter().map(|o| o.class_name.clone()).collect();
|
||||
for class in frame_classes {
|
||||
*object_counts.entry(class).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut dominant_objects: Vec<String> = object_counts
|
||||
.into_iter()
|
||||
.filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5)
|
||||
.map(|(class, _)| class)
|
||||
.collect();
|
||||
dominant_objects.sort();
|
||||
|
||||
// 創建視覺內容
|
||||
let visual_content = VisualChunkContent {
|
||||
keyframe_objects,
|
||||
dominant_objects,
|
||||
object_relationships: vec![], // 可選:後期添加關係檢測
|
||||
scene_description: None, // 可選:後期添加 LLM 生成的場景描述
|
||||
metadata: VisualMetadata {
|
||||
object_count: total_objects,
|
||||
unique_classes,
|
||||
max_confidence,
|
||||
avg_confidence,
|
||||
spatial_density: if yolo_frames.len() > 0 {
|
||||
total_objects as f32 / yolo_frames.len() as f32
|
||||
} else {
|
||||
0.0
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
Self::new_visual(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_id,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
visual_content,
|
||||
)
|
||||
}
|
||||
|
||||
/// 將分片轉換為幀時間
|
||||
pub fn to_frame_time(&self) -> FrameTime {
|
||||
// 使用第一個幀作為參考點
|
||||
FrameTime::from_frames(self.start_frame, self.fps)
|
||||
}
|
||||
|
||||
/// 檢查是否是父分片
|
||||
pub fn is_parent(&self) -> bool {
|
||||
self.parent_chunk_id.is_some()
|
||||
}
|
||||
|
||||
/// 從秒數創建新分片(舊版轉換)
|
||||
///
|
||||
/// 這對於從存儲時間為秒的舊系統遷移很有用。
|
||||
/// 幀數通過舍入 `seconds * fps` 計算。
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn from_seconds(
|
||||
file_id: i32,
|
||||
@@ -354,197 +128,82 @@ impl Chunk {
|
||||
)
|
||||
}
|
||||
|
||||
/// 返回開始時間為 `FrameTime`
|
||||
pub fn start_time(&self) -> FrameTime {
|
||||
FrameTime::from_frames(self.start_frame, self.fps)
|
||||
}
|
||||
|
||||
/// 返回結束時間為 `FrameTime`
|
||||
pub fn end_time(&self) -> FrameTime {
|
||||
FrameTime::from_frames(self.end_frame, self.fps)
|
||||
}
|
||||
|
||||
/// 返回持續時間的幀數
|
||||
pub fn duration_frames(&self) -> i64 {
|
||||
self.end_frame - self.start_frame
|
||||
}
|
||||
|
||||
/// 返回持續時間的秒數
|
||||
pub fn duration_seconds(&self) -> f64 {
|
||||
self.duration_frames() as f64 / self.fps
|
||||
}
|
||||
|
||||
/// 將開始時間格式化為 "seconds.frame" (例如:"123.04")
|
||||
pub fn format_start_sec_frame(&self) -> String {
|
||||
self.start_time().format_sec_frame()
|
||||
}
|
||||
|
||||
/// 將結束時間格式化為 "seconds.frame" (例如:"456.15")
|
||||
pub fn format_end_sec_frame(&self) -> String {
|
||||
self.end_time().format_sec_frame()
|
||||
}
|
||||
|
||||
/// 將開始時間格式化為 "HH:MM:SS"
|
||||
pub fn format_start_hms(&self) -> String {
|
||||
self.start_time().format_hms()
|
||||
}
|
||||
|
||||
/// 將結束時間格式化為 "HH:MM:SS"
|
||||
pub fn format_end_hms(&self) -> String {
|
||||
self.end_time().format_hms()
|
||||
}
|
||||
|
||||
/// 將開始時間格式化為 "HH:MM:SS.FF"
|
||||
pub fn format_start_hms_frame(&self) -> String {
|
||||
self.start_time().format_hms_frame()
|
||||
}
|
||||
|
||||
/// 將結束時間格式化為 "HH:MM:SS.FF"
|
||||
pub fn format_end_hms_frame(&self) -> String {
|
||||
self.end_time().format_hms_frame()
|
||||
}
|
||||
|
||||
/// 返回 (start_seconds, end_seconds) 元組用於兼容性
|
||||
///
|
||||
/// 這在遷移期間提供向後兼容性。
|
||||
/// 建議使用 `start_time()` 和 `end_time()` 方法。
|
||||
pub fn time_range_seconds(&self) -> (f64, f64) {
|
||||
(self.start_time().seconds(), self.end_time().seconds())
|
||||
}
|
||||
|
||||
/// 添加元數據
|
||||
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
|
||||
self.metadata = Some(metadata);
|
||||
self
|
||||
}
|
||||
|
||||
/// 添加向量 ID
|
||||
pub fn with_vector_id(mut self, vector_id: String) -> Self {
|
||||
self.vector_id = Some(vector_id);
|
||||
self
|
||||
}
|
||||
|
||||
/// 添加文本內容
|
||||
pub fn with_text_content(mut self, text: String) -> Self {
|
||||
self.text_content = Some(text);
|
||||
self
|
||||
}
|
||||
|
||||
/// 設置幀數
|
||||
pub fn with_frame_count(mut self, count: i32) -> Self {
|
||||
self.frame_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// 設置前一個分片 ID
|
||||
pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
|
||||
self.pre_chunk_ids = ids;
|
||||
self
|
||||
}
|
||||
|
||||
/// 設置父分片 ID
|
||||
pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
|
||||
self.parent_chunk_id = Some(parent_id);
|
||||
self
|
||||
}
|
||||
|
||||
/// 設置子分片 ID
|
||||
pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
|
||||
self.child_chunk_ids = child_ids;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== VisualChunkContent 輔助方法 ====================
|
||||
impl VisualChunkContent {
|
||||
/// 計算兩個 YOLO 幀之間的相似度(基於物件組成)
|
||||
pub fn frame_similarity(
|
||||
frame1: &crate::core::processor::yolo::YoloFrame,
|
||||
frame2: &crate::core::processor::yolo::YoloFrame,
|
||||
) -> f32 {
|
||||
if frame1.objects.is_empty() && frame2.objects.is_empty() {
|
||||
return 1.0; // 兩個空幀完全相似
|
||||
}
|
||||
|
||||
if frame1.objects.is_empty() || frame2.objects.is_empty() {
|
||||
return 0.0; // 一個空一個非空,不相似
|
||||
}
|
||||
|
||||
// 創建物件類別名稱集合
|
||||
let set1: std::collections::HashSet<String> = frame1
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
let set2: std::collections::HashSet<String> = frame2
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
|
||||
// 計算 Jaccard 相似度
|
||||
let intersection: Vec<_> = set1.intersection(&set2).collect();
|
||||
let union: Vec<_> = set1.union(&set2).collect();
|
||||
|
||||
if union.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
intersection.len() as f32 / union.len() as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// 獲取視覺分片的摘要(使用關鍵幀的 frame_number)
|
||||
pub fn summary(&self, fps: f64) -> String {
|
||||
if self.keyframe_objects.is_empty() {
|
||||
return "Empty visual chunk".to_string();
|
||||
}
|
||||
|
||||
let first_frame = self.keyframe_objects.first().unwrap().frame_number;
|
||||
let last_frame = self.keyframe_objects.last().unwrap().frame_number;
|
||||
|
||||
// 計算時間(僅供參考)
|
||||
let start_time = if fps > 0.0 {
|
||||
first_frame as f64 / fps
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let end_time = if fps > 0.0 {
|
||||
last_frame as f64 / fps
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let duration = end_time - start_time;
|
||||
let frame_count = self.keyframe_objects.len();
|
||||
|
||||
format!(
|
||||
"Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}",
|
||||
first_frame,
|
||||
last_frame,
|
||||
duration,
|
||||
frame_count,
|
||||
self.metadata.object_count,
|
||||
self.metadata.unique_classes.len(),
|
||||
if self.dominant_objects.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
self.dominant_objects.join(", ")
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// 檢查是否包含特定物件類別
|
||||
pub fn contains_object(&self, class_name: &str) -> bool {
|
||||
self.keyframe_objects
|
||||
.iter()
|
||||
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
|
||||
}
|
||||
|
||||
/// 獲取信心值高於閾值的所有物件
|
||||
pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
|
||||
self.keyframe_objects
|
||||
.iter()
|
||||
.flat_map(|ko| ko.objects.iter())
|
||||
.filter(|obj| obj.confidence >= threshold)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ pub static REDIS_URL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("REDIS_URL").unwrap_or_else(|_| {
|
||||
let password = env::var("REDIS_PASSWORD").unwrap_or_else(|_| "accusys".to_string());
|
||||
// Format: redis://[:password]@host:port (use default user)
|
||||
format!("redis://:{}@localhost:6379", password)
|
||||
format!("redis://default:{}@localhost:6379", password)
|
||||
})
|
||||
});
|
||||
|
||||
@@ -277,12 +277,14 @@ pub mod llm {
|
||||
}
|
||||
|
||||
/// Ollama embedding endpoint (vector embeddings for text sync).
|
||||
pub static OLLAMA_URL: Lazy<String> =
|
||||
Lazy::new(|| env::var("MOMENTRY_OLLAMA_URL").unwrap_or_else(|_| "http://127.0.0.1:11434".to_string()));
|
||||
pub static OLLAMA_URL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_OLLAMA_URL").unwrap_or_else(|_| "http://127.0.0.1:11434".to_string())
|
||||
});
|
||||
|
||||
/// Text embedding server (comic-embed or alternative).
|
||||
pub static EMBED_URL: Lazy<String> =
|
||||
Lazy::new(|| env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://127.0.0.1:11436".to_string()));
|
||||
pub static EMBED_URL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://127.0.0.1:11436".to_string())
|
||||
});
|
||||
|
||||
/// LLM health endpoint.
|
||||
pub static LLM_HEALTH_URL: Lazy<String> = Lazy::new(|| {
|
||||
|
||||
604
src/core/db/identity_merge_history.rs
Normal file
604
src/core/db/identity_merge_history.rs
Normal file
@@ -0,0 +1,604 @@
|
||||
use anyhow::{Context, Result};
|
||||
use bson::{doc, oid::ObjectId, DateTime as BsonDateTime, Document};
|
||||
use chrono::{DateTime, Utc};
|
||||
use mongodb::{Client, Collection, Database, IndexModel};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value as JsonValue;
|
||||
use uuid::Uuid;
|
||||
|
||||
const COLLECTION_NAME: &str = "identity_merge_history";
|
||||
|
||||
fn bson_doc_to_json(doc: &Document) -> JsonValue {
|
||||
match bson::to_bson(doc) {
|
||||
Ok(bson) => bson.into_relaxed_extjson(),
|
||||
Err(_) => JsonValue::Null,
|
||||
}
|
||||
}
|
||||
|
||||
fn json_value_to_bson_doc(value: &JsonValue) -> Document {
|
||||
bson::to_document(value).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn doc_field_to_json(doc: &Document, key: &str) -> JsonValue {
|
||||
doc.get(key)
|
||||
.map(|b| b.clone().into_relaxed_extjson())
|
||||
.unwrap_or(JsonValue::Null)
|
||||
}
|
||||
|
||||
fn json_to_bson(value: &JsonValue) -> bson::Bson {
|
||||
bson::to_bson(value).unwrap_or(bson::Bson::Null)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdentityMergeHistory {
|
||||
pub id: Option<ObjectId>,
|
||||
pub merge_id: String,
|
||||
pub source_identity: IdentitySnapshot,
|
||||
pub target_identity: TargetIdentitySnapshot,
|
||||
pub aliases_added_to_target: Vec<AliasEntry>,
|
||||
pub metadata_fields_added: Vec<String>,
|
||||
pub faces_transferred: FacesTransferred,
|
||||
pub merge_params: MergeParams,
|
||||
pub merged_at: DateTime<Utc>,
|
||||
pub undo_deadline: DateTime<Utc>,
|
||||
pub undone: bool,
|
||||
pub undone_at: Option<DateTime<Utc>>,
|
||||
pub undone_by: Option<String>,
|
||||
pub undone_snapshot: Option<UndoneSnapshot>,
|
||||
pub undo_expired: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdentitySnapshot {
|
||||
pub id: i64,
|
||||
pub uuid: String,
|
||||
pub name: String,
|
||||
pub identity_type: Option<String>,
|
||||
pub source: Option<String>,
|
||||
pub status: String,
|
||||
pub tmdb_id: Option<i64>,
|
||||
pub tmdb_profile: Option<String>,
|
||||
pub metadata: JsonValue,
|
||||
pub created_at: Option<DateTime<Utc>>,
|
||||
pub face_count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TargetIdentitySnapshot {
|
||||
pub id: i64,
|
||||
pub uuid: String,
|
||||
pub name: String,
|
||||
pub metadata_before: JsonValue,
|
||||
pub metadata_after: Option<JsonValue>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AliasEntry {
|
||||
pub name: String,
|
||||
pub locale: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FacesTransferred {
|
||||
pub file_uuid: String,
|
||||
pub face_ids: Vec<String>,
|
||||
pub trace_ids: Vec<i32>,
|
||||
pub count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UndoneSnapshot {
|
||||
pub source_identity_id: i64,
|
||||
pub source_uuid: String,
|
||||
pub source_name: String,
|
||||
pub target_metadata_at_undo: JsonValue,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MergeParams {
|
||||
pub keep_history: bool,
|
||||
pub cleared_stranger_id: bool,
|
||||
pub performed_by_user: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MergeHistoryQuery {
|
||||
pub source_uuid: Option<String>,
|
||||
pub target_uuid: Option<String>,
|
||||
pub merge_id: Option<String>,
|
||||
pub undone: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MergeHistoryEntry {
|
||||
pub merge_id: String,
|
||||
pub source_name: String,
|
||||
pub target_name: String,
|
||||
pub faces_transferred: i64,
|
||||
pub merged_at: DateTime<Utc>,
|
||||
pub undo_deadline: DateTime<Utc>,
|
||||
pub undone: bool,
|
||||
pub undo_expired: bool,
|
||||
}
|
||||
|
||||
impl IdentityMergeHistory {
|
||||
pub fn from_document(doc: &Document) -> Result<Self> {
|
||||
let source = doc
|
||||
.get_document("source_identity")
|
||||
.context("Missing source_identity")?;
|
||||
let target = doc
|
||||
.get_document("target_identity")
|
||||
.context("Missing target_identity")?;
|
||||
let faces = doc
|
||||
.get_document("faces_transferred")
|
||||
.context("Missing faces_transferred")?;
|
||||
let aliases = doc
|
||||
.get_array("aliases_added_to_target")
|
||||
.unwrap_or(&vec![])
|
||||
.clone();
|
||||
let fields = doc
|
||||
.get_array("metadata_fields_added")
|
||||
.unwrap_or(&vec![])
|
||||
.clone();
|
||||
let merge_params_doc = doc
|
||||
.get_document("merge_params")
|
||||
.unwrap_or(&Document::new())
|
||||
.clone();
|
||||
|
||||
let mut parsed_aliases = Vec::new();
|
||||
for a in aliases {
|
||||
if let Some(d) = a.as_document() {
|
||||
parsed_aliases.push(AliasEntry {
|
||||
name: d.get_str("name").unwrap_or("").to_string(),
|
||||
locale: d.get_str("locale").unwrap_or("en").to_string(),
|
||||
source: d.get_str("source").ok().map(|s| s.to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut parsed_fields = Vec::new();
|
||||
for f in fields {
|
||||
if let Some(s) = f.as_str() {
|
||||
parsed_fields.push(s.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let undone_snapshot = doc.get_document("undone_snapshot").ok().and_then(|d| {
|
||||
let sid = d.get_i64("source_identity_id").unwrap_or(0);
|
||||
let suuid = d.get_str("source_uuid").unwrap_or("").to_string();
|
||||
let sname = d.get_str("source_name").unwrap_or("").to_string();
|
||||
let meta = doc_field_to_json(d, "target_metadata_at_undo");
|
||||
Some(UndoneSnapshot {
|
||||
source_identity_id: sid,
|
||||
source_uuid: suuid,
|
||||
source_name: sname,
|
||||
target_metadata_at_undo: meta,
|
||||
})
|
||||
});
|
||||
|
||||
Ok(IdentityMergeHistory {
|
||||
id: doc.get_object_id("_id").ok(),
|
||||
merge_id: doc.get_str("merge_id").unwrap_or("").to_string(),
|
||||
source_identity: IdentitySnapshot {
|
||||
id: source.get_i64("id").unwrap_or(0),
|
||||
uuid: source.get_str("uuid").unwrap_or("").to_string(),
|
||||
name: source.get_str("name").unwrap_or("").to_string(),
|
||||
identity_type: source.get_str("identity_type").ok().map(|s| s.to_string()),
|
||||
source: source.get_str("source").ok().map(|s| s.to_string()),
|
||||
status: source.get_str("status").unwrap_or("").to_string(),
|
||||
tmdb_id: source.get_i64("tmdb_id").ok(),
|
||||
tmdb_profile: source.get_str("tmdb_profile").ok().map(|s| s.to_string()),
|
||||
metadata: doc_field_to_json(source, "metadata"),
|
||||
created_at: source
|
||||
.get_datetime("created_at")
|
||||
.map(|d| d.to_chrono())
|
||||
.ok(),
|
||||
face_count: source.get_i64("face_count").unwrap_or(0),
|
||||
},
|
||||
target_identity: TargetIdentitySnapshot {
|
||||
id: target.get_i64("id").unwrap_or(0),
|
||||
uuid: target.get_str("uuid").unwrap_or("").to_string(),
|
||||
name: target.get_str("name").unwrap_or("").to_string(),
|
||||
metadata_before: doc_field_to_json(target, "metadata_before"),
|
||||
metadata_after: target
|
||||
.get("metadata_after")
|
||||
.map(|b| b.clone().into_relaxed_extjson()),
|
||||
},
|
||||
aliases_added_to_target: parsed_aliases,
|
||||
metadata_fields_added: parsed_fields,
|
||||
faces_transferred: FacesTransferred {
|
||||
file_uuid: faces.get_str("file_uuid").unwrap_or("").to_string(),
|
||||
face_ids: faces
|
||||
.get_array("face_ids")
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|b| b.as_str().map(|s| s.to_string()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
trace_ids: faces
|
||||
.get_array("trace_ids")
|
||||
.map(|arr| arr.iter().filter_map(|b| b.as_i32()).collect())
|
||||
.unwrap_or_default(),
|
||||
count: faces.get_i64("count").unwrap_or(0),
|
||||
},
|
||||
merge_params: MergeParams {
|
||||
keep_history: merge_params_doc.get_bool("keep_history").unwrap_or(true),
|
||||
cleared_stranger_id: merge_params_doc
|
||||
.get_bool("cleared_stranger_id")
|
||||
.unwrap_or(true),
|
||||
performed_by_user: merge_params_doc
|
||||
.get_str("performed_by_user")
|
||||
.ok()
|
||||
.map(|s| s.to_string()),
|
||||
},
|
||||
merged_at: doc
|
||||
.get_datetime("merged_at")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default(),
|
||||
undo_deadline: doc
|
||||
.get_datetime("undo_deadline")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default(),
|
||||
undone: doc.get_bool("undone").unwrap_or(false),
|
||||
undone_at: doc.get_datetime("undone_at").map(|d| d.to_chrono()).ok(),
|
||||
undone_by: doc.get_str("undone_by").ok().map(|s| s.to_string()),
|
||||
undone_snapshot,
|
||||
undo_expired: doc.get_bool("undo_expired").unwrap_or(false),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_document(&self) -> Document {
|
||||
let mut doc = doc! {
|
||||
"merge_id": &self.merge_id,
|
||||
"source_identity": {
|
||||
"id": self.source_identity.id as i64,
|
||||
"uuid": &self.source_identity.uuid,
|
||||
"name": &self.source_identity.name,
|
||||
"identity_type": self.source_identity.identity_type.as_deref(),
|
||||
"source": self.source_identity.source.as_deref(),
|
||||
"status": &self.source_identity.status,
|
||||
"tmdb_id": self.source_identity.tmdb_id,
|
||||
"tmdb_profile": self.source_identity.tmdb_profile.as_deref(),
|
||||
"metadata": json_to_bson(&self.source_identity.metadata),
|
||||
"created_at": self.source_identity.created_at
|
||||
.map(|dt| BsonDateTime::from_chrono(dt)),
|
||||
"face_count": self.source_identity.face_count,
|
||||
},
|
||||
"target_identity": {
|
||||
"id": self.target_identity.id as i64,
|
||||
"uuid": &self.target_identity.uuid,
|
||||
"name": &self.target_identity.name,
|
||||
"metadata_before": json_to_bson(&self.target_identity.metadata_before),
|
||||
"metadata_after": self.target_identity.metadata_after.as_ref().map(json_to_bson),
|
||||
},
|
||||
"aliases_added_to_target": self.aliases_added_to_target.iter().map(|a| {
|
||||
doc! {
|
||||
"name": &a.name,
|
||||
"locale": &a.locale,
|
||||
"source": a.source.as_deref(),
|
||||
}
|
||||
}).collect::<Vec<Document>>(),
|
||||
"metadata_fields_added": &self.metadata_fields_added,
|
||||
"faces_transferred": {
|
||||
"file_uuid": &self.faces_transferred.file_uuid,
|
||||
"face_ids": &self.faces_transferred.face_ids,
|
||||
"trace_ids": &self.faces_transferred.trace_ids,
|
||||
"count": self.faces_transferred.count,
|
||||
},
|
||||
"merge_params": {
|
||||
"keep_history": self.merge_params.keep_history,
|
||||
"cleared_stranger_id": self.merge_params.cleared_stranger_id,
|
||||
"performed_by_user": self.merge_params.performed_by_user.as_deref(),
|
||||
},
|
||||
"merged_at": BsonDateTime::from_chrono(self.merged_at),
|
||||
"undo_deadline": BsonDateTime::from_chrono(self.undo_deadline),
|
||||
"undone": self.undone,
|
||||
"undone_at": self.undone_at.map(|dt| BsonDateTime::from_chrono(dt)),
|
||||
"undone_by": self.undone_by.as_deref(),
|
||||
"undone_snapshot": self.undone_snapshot.as_ref().map(|s| {
|
||||
doc! {
|
||||
"source_identity_id": s.source_identity_id,
|
||||
"source_uuid": &s.source_uuid,
|
||||
"source_name": &s.source_name,
|
||||
"target_metadata_at_undo": json_to_bson(&s.target_metadata_at_undo),
|
||||
}
|
||||
}),
|
||||
"undo_expired": self.undo_expired,
|
||||
};
|
||||
|
||||
if let Some(ref oid) = self.id {
|
||||
doc.insert("_id", oid.clone());
|
||||
}
|
||||
|
||||
doc
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IdentityMergeHistoryStore {
|
||||
client: Client,
|
||||
db: Database,
|
||||
collection: Collection<Document>,
|
||||
}
|
||||
|
||||
impl IdentityMergeHistoryStore {
|
||||
pub async fn init() -> Result<Self> {
|
||||
let uri = crate::core::config::MONGODB_URL.as_str();
|
||||
let client = Client::with_uri_str(uri)
|
||||
.await
|
||||
.context("Failed to connect to MongoDB")?;
|
||||
let db_name = crate::core::config::MONGODB_DATABASE.as_str();
|
||||
let db = client.database(db_name);
|
||||
let collection: Collection<Document> = db.collection(COLLECTION_NAME);
|
||||
|
||||
let store = Self {
|
||||
client,
|
||||
db,
|
||||
collection,
|
||||
};
|
||||
|
||||
store.ensure_indexes().await?;
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
async fn ensure_indexes(&self) -> Result<()> {
|
||||
let merge_id_index = IndexModel::builder()
|
||||
.keys(doc! { "merge_id": 1 })
|
||||
.options(
|
||||
mongodb::options::IndexOptions::builder()
|
||||
.unique(true)
|
||||
.build(),
|
||||
)
|
||||
.build();
|
||||
|
||||
let merged_at_index = IndexModel::builder().keys(doc! { "merged_at": -1 }).build();
|
||||
|
||||
let source_uuid_index = IndexModel::builder()
|
||||
.keys(doc! { "source_identity.uuid": 1 })
|
||||
.build();
|
||||
|
||||
let target_uuid_index = IndexModel::builder()
|
||||
.keys(doc! { "target_identity.uuid": 1 })
|
||||
.build();
|
||||
|
||||
self.collection
|
||||
.create_indexes(
|
||||
[
|
||||
merge_id_index,
|
||||
merged_at_index,
|
||||
source_uuid_index,
|
||||
target_uuid_index,
|
||||
],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context("Failed to create identity_merge_history indexes")?;
|
||||
|
||||
tracing::info!("MongoDB identity_merge_history indexes ensured");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn generate_merge_id() -> String {
|
||||
Uuid::new_v4().to_string()
|
||||
}
|
||||
|
||||
pub async fn store_merge_history(&self, history: &IdentityMergeHistory) -> Result<()> {
|
||||
let doc = history.to_document();
|
||||
self.collection
|
||||
.insert_one(doc, None)
|
||||
.await
|
||||
.context("Failed to store merge history in MongoDB")?;
|
||||
|
||||
tracing::info!(
|
||||
"Stored merge history: merge_id={}, source={}, target={}, faces={}",
|
||||
history.merge_id,
|
||||
history.source_identity.name,
|
||||
history.target_identity.name,
|
||||
history.faces_transferred.count
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_merge_history(&self, merge_id: &str) -> Result<Option<IdentityMergeHistory>> {
|
||||
let filter = doc! { "merge_id": merge_id };
|
||||
let result = self
|
||||
.collection
|
||||
.find_one(filter, None)
|
||||
.await
|
||||
.context("Failed to get merge history from MongoDB")?;
|
||||
|
||||
match result {
|
||||
Some(doc) => {
|
||||
let history = IdentityMergeHistory::from_document(&doc)
|
||||
.context("Failed to parse merge history from MongoDB")?;
|
||||
Ok(Some(history))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn query_merge_history(
|
||||
&self,
|
||||
query: MergeHistoryQuery,
|
||||
page: u32,
|
||||
page_size: u32,
|
||||
) -> Result<(Vec<MergeHistoryEntry>, u64)> {
|
||||
let mut filter = doc! {};
|
||||
|
||||
if let Some(source_uuid) = query.source_uuid {
|
||||
filter.insert("source_identity.uuid", source_uuid);
|
||||
}
|
||||
if let Some(target_uuid) = query.target_uuid {
|
||||
filter.insert("target_identity.uuid", target_uuid);
|
||||
}
|
||||
if let Some(merge_id) = query.merge_id {
|
||||
filter.insert("merge_id", merge_id);
|
||||
}
|
||||
if let Some(undone) = query.undone {
|
||||
filter.insert("undone", undone);
|
||||
}
|
||||
|
||||
let skip = (page - 1) * page_size;
|
||||
let limit = page_size;
|
||||
|
||||
let mut cursor = self
|
||||
.collection
|
||||
.find(filter.clone(), None)
|
||||
.await
|
||||
.context("Failed to query merge history")?;
|
||||
|
||||
let total = self
|
||||
.collection
|
||||
.count_documents(filter, None)
|
||||
.await
|
||||
.context("Failed to count merge history")?;
|
||||
|
||||
let mut results: Vec<MergeHistoryEntry> = Vec::new();
|
||||
let mut count = 0;
|
||||
|
||||
while cursor.advance().await.context("Failed to advance cursor")? {
|
||||
if count >= skip && results.len() < limit as usize {
|
||||
let doc: Document = cursor
|
||||
.deserialize_current()
|
||||
.context("Failed to deserialize")?;
|
||||
|
||||
let merge_id = doc.get_str("merge_id").unwrap_or("").to_string();
|
||||
let source_name = doc
|
||||
.get_document("source_identity")
|
||||
.map(|d| d.get_str("name").unwrap_or("").to_string())
|
||||
.unwrap_or_default();
|
||||
let target_name = doc
|
||||
.get_document("target_identity")
|
||||
.map(|d| d.get_str("name").unwrap_or("").to_string())
|
||||
.unwrap_or_default();
|
||||
let faces_count = doc
|
||||
.get_document("faces_transferred")
|
||||
.map(|d| d.get_i64("count").unwrap_or(0))
|
||||
.unwrap_or(0);
|
||||
let merged_at = doc
|
||||
.get_datetime("merged_at")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default();
|
||||
let undo_deadline = doc
|
||||
.get_datetime("undo_deadline")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default();
|
||||
let undone = doc.get_bool("undone").unwrap_or(false);
|
||||
let undo_expired = doc.get_bool("undo_expired").unwrap_or(false);
|
||||
|
||||
results.push(MergeHistoryEntry {
|
||||
merge_id,
|
||||
source_name,
|
||||
target_name,
|
||||
faces_transferred: faces_count,
|
||||
merged_at,
|
||||
undo_deadline,
|
||||
undone,
|
||||
undo_expired,
|
||||
});
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok((results, total))
|
||||
}
|
||||
|
||||
pub async fn mark_as_undone(
|
||||
&self,
|
||||
merge_id: &str,
|
||||
undone_by: Option<&str>,
|
||||
undone_snapshot: UndoneSnapshot,
|
||||
) -> Result<()> {
|
||||
let filter = doc! { "merge_id": merge_id };
|
||||
let snapshot_doc = doc! {
|
||||
"source_identity_id": undone_snapshot.source_identity_id,
|
||||
"source_uuid": &undone_snapshot.source_uuid,
|
||||
"source_name": &undone_snapshot.source_name,
|
||||
"target_metadata_at_undo": json_to_bson(&undone_snapshot.target_metadata_at_undo),
|
||||
};
|
||||
let update = doc! {
|
||||
"$set": {
|
||||
"undone": true,
|
||||
"undone_at": BsonDateTime::from_chrono(Utc::now()),
|
||||
"undone_by": undone_by,
|
||||
"undone_snapshot": snapshot_doc,
|
||||
}
|
||||
};
|
||||
|
||||
self.collection
|
||||
.update_one(filter, update, None)
|
||||
.await
|
||||
.context("Failed to mark merge as undone")?;
|
||||
|
||||
tracing::info!("Marked merge {} as undone", merge_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn mark_as_redone(&self, merge_id: &str, redone_by: Option<&str>) -> Result<()> {
|
||||
let now = Utc::now();
|
||||
let new_deadline = now + chrono::Duration::hours(24);
|
||||
let filter = doc! { "merge_id": merge_id };
|
||||
let update = doc! {
|
||||
"$set": {
|
||||
"undone": false,
|
||||
"undone_at": bson::Bson::Null,
|
||||
"undone_by": redone_by,
|
||||
"undone_snapshot": bson::Bson::Null,
|
||||
"undo_deadline": BsonDateTime::from_chrono(new_deadline),
|
||||
"undo_expired": false
|
||||
}
|
||||
};
|
||||
|
||||
self.collection
|
||||
.update_one(filter, update, None)
|
||||
.await
|
||||
.context("Failed to mark merge as redone")?;
|
||||
|
||||
tracing::info!(
|
||||
"Marked merge {} as redone (new deadline: {})",
|
||||
merge_id,
|
||||
new_deadline
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn check_undo_deadline(&self, merge_id: &str) -> Result<bool> {
|
||||
let history = self
|
||||
.get_merge_history(merge_id)
|
||||
.await?
|
||||
.context("Merge history not found")?;
|
||||
|
||||
let now = Utc::now();
|
||||
if now > history.undo_deadline {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub async fn mark_expired_merges(&self) -> Result<u64> {
|
||||
let now = BsonDateTime::from_chrono(Utc::now());
|
||||
let filter = doc! {
|
||||
"undo_deadline": { "$lt": now },
|
||||
"undone": false,
|
||||
"undo_expired": false
|
||||
};
|
||||
let update = doc! { "$set": { "undo_expired": true } };
|
||||
|
||||
let result = self
|
||||
.collection
|
||||
.update_many(filter, update, None)
|
||||
.await
|
||||
.context("Failed to mark expired merges")?;
|
||||
|
||||
let count = result.modified_count;
|
||||
if count > 0 {
|
||||
tracing::info!("Marked {} expired merges", count);
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
@@ -32,17 +32,21 @@ pub trait VectorStore: Send + Sync {
|
||||
async fn search(&self, query_vector: &[f32], limit: usize) -> Result<Vec<SearchResult>>;
|
||||
}
|
||||
|
||||
pub mod identity_merge_history;
|
||||
pub mod mongodb_db;
|
||||
pub mod postgres_db;
|
||||
pub mod qdrant_db;
|
||||
pub mod redis_client;
|
||||
pub mod redis_db;
|
||||
pub mod sync_db;
|
||||
|
||||
pub use identity_merge_history::{
|
||||
AliasEntry, FacesTransferred, IdentityMergeHistory, IdentityMergeHistoryStore,
|
||||
IdentitySnapshot, MergeHistoryEntry, MergeHistoryQuery, MergeParams, TargetIdentitySnapshot,
|
||||
UndoneSnapshot,
|
||||
};
|
||||
pub use mongodb_db::MongoDb;
|
||||
pub use postgres_db::{
|
||||
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileIdentityRecord, FileRecord,
|
||||
HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
|
||||
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileFaceRecord, FileIdentityRecord,
|
||||
FileRecord, HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
|
||||
IdentityFileRecord, MonitorJob, MonitorJobStats, MonitorJobStatus, PipelineType, PostgresDb,
|
||||
ProcessorJobStatus, ProcessorResult, ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
|
||||
};
|
||||
@@ -52,4 +56,3 @@ pub use redis_client::{
|
||||
ProgressMessage, RedisClient,
|
||||
};
|
||||
pub use redis_db::RedisDb;
|
||||
pub use sync_db::SyncDb;
|
||||
|
||||
@@ -131,7 +131,6 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -190,7 +189,6 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -246,7 +244,6 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -70,7 +70,7 @@ impl QdrantDb {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let create_url = format!("{}/collections", self.base_url);
|
||||
let create_url = format!("{}/collections/{}", self.base_url, self.collection_name);
|
||||
let body = serde_json::json!({
|
||||
"vectors": {
|
||||
"size": vector_dim,
|
||||
@@ -79,7 +79,7 @@ impl QdrantDb {
|
||||
});
|
||||
|
||||
self.client
|
||||
.post(&create_url)
|
||||
.put(&create_url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
@@ -867,50 +867,6 @@ impl VectorStore for QdrantDb {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sync face embeddings from PostgreSQL to Qdrant for ANN search
|
||||
pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
let qdrant: QdrantDb = QdrantDb::new();
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
);
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
|
||||
|
||||
let mut count = 0u64;
|
||||
for row in &rows {
|
||||
let id: i32 = row.get(0);
|
||||
let trace_id: Option<i32> = row.get(1);
|
||||
let frame_number: i64 = row.get(2);
|
||||
let embedding: Option<Vec<f32>> = row.get(3);
|
||||
|
||||
if let (Some(emb), Some(tid)) = (embedding, trace_id) {
|
||||
if let Err(e) = qdrant
|
||||
.upsert_face_embedding(id as u64, &emb, file_uuid, tid, frame_number)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("Qdrant upsert failed for face {}: {}", id, e);
|
||||
continue;
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
"Synced {} face embeddings to Qdrant for {}",
|
||||
count,
|
||||
file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
@@ -984,12 +940,22 @@ pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
// Push to Qdrant in batches
|
||||
// Point ID: hash(file_uuid + trace_id) for global uniqueness
|
||||
for chunk in trace_avgs.chunks(500) {
|
||||
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
|
||||
.iter()
|
||||
.map(|t| {
|
||||
let point_id = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(file_uuid.as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(t.tid.to_string().as_bytes());
|
||||
let hash = hasher.finalize();
|
||||
u64::from_be_bytes(hash[0..8].try_into().unwrap())
|
||||
};
|
||||
(
|
||||
t.tid as u64,
|
||||
point_id,
|
||||
t.avg_emb.as_slice(),
|
||||
Some(serde_json::json!({
|
||||
"trace_id": t.tid,
|
||||
|
||||
@@ -319,7 +319,9 @@ impl RedisClient {
|
||||
"timestamp": chrono::Utc::now().to_rfc3339(),
|
||||
});
|
||||
|
||||
let _: usize = conn.publish(&channel, serde_json::to_string(&alert_json)?).await?;
|
||||
let _: usize = conn
|
||||
.publish(&channel, serde_json::to_string(&alert_json)?)
|
||||
.await?;
|
||||
|
||||
tracing::warn!(
|
||||
"Processor alert: {} | {} | {} | {}",
|
||||
|
||||
@@ -78,7 +78,10 @@ impl SyncDb {
|
||||
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.post(&format!("{}/api/embeddings", crate::core::config::OLLAMA_URL.as_str()))
|
||||
.post(&format!(
|
||||
"{}/api/embeddings",
|
||||
crate::core::config::OLLAMA_URL.as_str()
|
||||
))
|
||||
.json(&serde_json::json!({
|
||||
"model": "all-minilm",
|
||||
"prompt": text,
|
||||
@@ -78,12 +78,19 @@ impl FrameManager {
|
||||
.and_then(|s| s.strip_suffix(".jpg"))
|
||||
{
|
||||
if let Ok(frame_num) = num_str.parse::<u64>() {
|
||||
let timestamp = frame_num as f64 / fps;
|
||||
frames.push(CachedFrame {
|
||||
path: entry.path(),
|
||||
frame_number: frame_num,
|
||||
timestamp_secs: timestamp,
|
||||
});
|
||||
let frame_path = entry.path();
|
||||
if let Ok(data) = std::fs::read(&frame_path) {
|
||||
if crate::core::thumbnail::validator::is_valid_jpeg(&data) {
|
||||
let timestamp = frame_num as f64 / fps;
|
||||
frames.push(CachedFrame {
|
||||
path: frame_path,
|
||||
frame_number: frame_num,
|
||||
timestamp_secs: timestamp,
|
||||
});
|
||||
} else {
|
||||
info!("[FrameCache] Skipping invalid JPEG: {:?}", frame_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,7 +193,7 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
|
||||
|
||||
let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
|
||||
&format!(
|
||||
"SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
|
||||
"SELECT id::bigint, uuid::text, name, identity_type, source, status, metadata, COALESCE(reference_data, '{{}}'::jsonb) as reference_data, \
|
||||
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
|
||||
face_embedding::real[] as face_embedding, \
|
||||
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
|
||||
|
||||
@@ -97,6 +97,68 @@ pub fn llm_vision_model() -> String {
|
||||
config::llm::VISION_MODEL.clone()
|
||||
}
|
||||
|
||||
/// Call the vision LLM with text + base64 images. Returns the generated text.
|
||||
pub async fn call_llm_vision(
|
||||
system_prompt: &str,
|
||||
user_text: &str,
|
||||
base64_images: Vec<String>,
|
||||
max_tokens: u32,
|
||||
timeout_secs: u64,
|
||||
) -> anyhow::Result<String> {
|
||||
let mut content_parts: Vec<Value> = vec![json!({"type": "text", "text": user_text})];
|
||||
for img in &base64_images {
|
||||
content_parts.push(json!({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": format!("data:image/jpeg;base64,{}", img)}
|
||||
}));
|
||||
}
|
||||
|
||||
let messages = json!([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": content_parts}
|
||||
]);
|
||||
|
||||
let req = json!({
|
||||
"model": llm_vision_model(),
|
||||
"messages": messages,
|
||||
"temperature": 0.1,
|
||||
"max_tokens": max_tokens,
|
||||
"stream": false,
|
||||
});
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(timeout_secs))
|
||||
.build()?;
|
||||
|
||||
let res = client.post(&llm_vision_url()).json(&req).send().await?;
|
||||
if !res.status().is_success() {
|
||||
let text = res.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Vision LLM API error: {}", text);
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct VisionResponse {
|
||||
choices: Vec<VisionChoice>,
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
struct VisionChoice {
|
||||
message: VisionMessage,
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
struct VisionMessage {
|
||||
content: Option<String>,
|
||||
}
|
||||
|
||||
let vision_res: VisionResponse = res.json().await?;
|
||||
let content = vision_res
|
||||
.choices
|
||||
.into_iter()
|
||||
.next()
|
||||
.and_then(|c| c.message.content)
|
||||
.unwrap_or_default();
|
||||
Ok(content.trim().to_string())
|
||||
}
|
||||
|
||||
/// Build a tool definition JSON for function calling
|
||||
pub fn make_tool(name: &str, description: &str, properties: Value, required: Vec<&str>) -> ToolDef {
|
||||
ToolDef {
|
||||
@@ -121,9 +183,11 @@ pub async fn call_llm(
|
||||
timeout_secs: u64,
|
||||
) -> anyhow::Result<LlmResponse> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(
|
||||
if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS },
|
||||
))
|
||||
.timeout(std::time::Duration::from_secs(if timeout_secs > 0 {
|
||||
timeout_secs
|
||||
} else {
|
||||
*config::llm::CHAT_TIMEOUT_SECS
|
||||
}))
|
||||
.build()?;
|
||||
|
||||
let req = ChatRequest {
|
||||
@@ -135,11 +199,7 @@ pub async fn call_llm(
|
||||
tools,
|
||||
};
|
||||
|
||||
let res = client
|
||||
.post(&llm_chat_url())
|
||||
.json(&req)
|
||||
.send()
|
||||
.await?;
|
||||
let res = client.post(&llm_chat_url()).json(&req).send().await?;
|
||||
|
||||
if !res.status().is_success() {
|
||||
let text = res.text().await.unwrap_or_default();
|
||||
@@ -147,13 +207,17 @@ pub async fn call_llm(
|
||||
}
|
||||
|
||||
let chat_res: ChatResponse = res.json().await?;
|
||||
let choice = chat_res.choices.into_iter().next()
|
||||
let choice = chat_res
|
||||
.choices
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| anyhow::anyhow!("Empty LLM response"))?;
|
||||
|
||||
match choice.finish_reason.as_deref() {
|
||||
Some("tool_calls") => {
|
||||
let calls = choice.message.tool_calls
|
||||
.ok_or_else(|| anyhow::anyhow!("finish_reason=tool_calls but no tool_calls in message"))?;
|
||||
let calls = choice.message.tool_calls.ok_or_else(|| {
|
||||
anyhow::anyhow!("finish_reason=tool_calls but no tool_calls in message")
|
||||
})?;
|
||||
Ok(LlmResponse::ToolCalls(calls))
|
||||
}
|
||||
_ => {
|
||||
@@ -164,16 +228,18 @@ pub async fn call_llm(
|
||||
}
|
||||
|
||||
/// Helper to build the system prompt + user messages
|
||||
pub fn build_conversation(system_prompt: &str, user_query: &str, history: Vec<ChatMessage>) -> Vec<ChatMessage> {
|
||||
let mut messages = vec![
|
||||
ChatMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some(system_prompt.to_string()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
name: None,
|
||||
},
|
||||
];
|
||||
pub fn build_conversation(
|
||||
system_prompt: &str,
|
||||
user_query: &str,
|
||||
history: Vec<ChatMessage>,
|
||||
) -> Vec<ChatMessage> {
|
||||
let mut messages = vec![ChatMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some(system_prompt.to_string()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
name: None,
|
||||
}];
|
||||
// Add history (user + assistant exchanges)
|
||||
messages.extend(history);
|
||||
// Add current user query
|
||||
|
||||
@@ -18,12 +18,22 @@ pub struct AsrxResult {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrxSegment {
|
||||
#[serde(alias = "start")]
|
||||
pub start_time: f64,
|
||||
#[serde(alias = "end")]
|
||||
pub end_time: f64,
|
||||
#[serde(default)]
|
||||
pub start_frame: u64,
|
||||
#[serde(default)]
|
||||
pub end_frame: u64,
|
||||
pub text: String,
|
||||
pub speaker_id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub language: Option<String>,
|
||||
#[serde(default)]
|
||||
pub lang_prob: Option<f64>,
|
||||
#[serde(default)]
|
||||
pub quality: Option<f64>,
|
||||
}
|
||||
|
||||
pub async fn process_asrx(
|
||||
@@ -32,24 +42,16 @@ pub async fn process_asrx(
|
||||
uuid: Option<&str>,
|
||||
) -> Result<AsrxResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("asrx_processor_custom.py");
|
||||
let script_path = executor.script_path("asrx_processor.py");
|
||||
|
||||
tracing::info!(
|
||||
"[ASRX] Starting speaker diarization (custom): {}",
|
||||
"[ASRX] Starting hybrid speaker diarization: {}",
|
||||
video_path
|
||||
);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[ASRX] Custom script not found, falling back to original");
|
||||
let fallback_path = executor.script_path("asrx_processor.py");
|
||||
if !fallback_path.exists() {
|
||||
tracing::warn!("[ASRX] No script found, returning empty result");
|
||||
return Ok(AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
});
|
||||
}
|
||||
tracing::error!("[ASRX] Script not found: {:?}", script_path);
|
||||
anyhow::bail!("asrx_processor.py not found");
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
@@ -65,6 +67,7 @@ pub async fn process_asrx(
|
||||
|
||||
if let Some(u) = uuid {
|
||||
cmd.arg("--uuid").arg(u);
|
||||
cmd.arg("--file-uuid").arg(u);
|
||||
}
|
||||
|
||||
cmd.stdout(std::process::Stdio::piped())
|
||||
@@ -126,6 +129,9 @@ mod tests {
|
||||
end_frame: 75,
|
||||
text: "Hello".to_string(),
|
||||
speaker_id: Some("SPEAKER_00".to_string()),
|
||||
language: None,
|
||||
lang_prob: None,
|
||||
quality: None,
|
||||
}],
|
||||
embeddings: None,
|
||||
};
|
||||
@@ -173,7 +179,27 @@ mod tests {
|
||||
end_frame: 150,
|
||||
text: "Test".to_string(),
|
||||
speaker_id: None,
|
||||
language: None,
|
||||
lang_prob: None,
|
||||
quality: None,
|
||||
};
|
||||
assert!(segment.end_time > segment.start_time);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asrx_backward_compat_old_format() {
|
||||
let json = r#"{
|
||||
"language": "en",
|
||||
"segments": [
|
||||
{"start": 10.0, "end": 12.5, "text": "Hello", "speaker_id": "SPEAKER_00"}
|
||||
]
|
||||
}"#;
|
||||
let result: AsrxResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.segments.len(), 1);
|
||||
assert_eq!(result.segments[0].start_time, 10.0);
|
||||
assert_eq!(result.segments[0].end_time, 12.5);
|
||||
assert_eq!(result.segments[0].text, "Hello");
|
||||
assert_eq!(result.segments[0].start_frame, 0);
|
||||
assert_eq!(result.segments[0].end_frame, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,11 +43,15 @@ pub async fn process_cut(
|
||||
let script_path = executor.script_path("cut_processor.py");
|
||||
|
||||
if !script_path.exists() {
|
||||
return Ok(CutResult {
|
||||
let empty_result = CutResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
scenes: vec![],
|
||||
});
|
||||
};
|
||||
let json = serde_json::to_string_pretty(&empty_result)?;
|
||||
std::fs::write(output_path, &json)
|
||||
.with_context(|| format!("Failed to write {:?}", output_path))?;
|
||||
return Ok(empty_result);
|
||||
}
|
||||
|
||||
executor
|
||||
@@ -127,18 +131,26 @@ fn try_native_cut(video_path: &str) -> Result<CutResult> {
|
||||
.context("Failed to run ffmpeg scene detection")?;
|
||||
|
||||
let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
|
||||
let stdout_output = String::from_utf8_lossy(&scene_output.stdout);
|
||||
let mut scene_times: Vec<f64> = Vec::new();
|
||||
|
||||
// Parse ffmpeg showinfo output for scene changes
|
||||
// Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
|
||||
for line in stderr_output.lines() {
|
||||
if line.contains("pts_time:") {
|
||||
if let Some(pos) = line.find("pts_time:") {
|
||||
let rest = &line[pos + 9..];
|
||||
let time_str = rest.split_whitespace().next().unwrap_or("");
|
||||
if let Ok(t) = time_str.parse::<f64>() {
|
||||
scene_times.push(t);
|
||||
}
|
||||
// Parse ffprobe output for scene changes (check both stderr and stdout)
|
||||
// Format: pts_time=123.456 or pts_time:123.456
|
||||
for line in stderr_output.lines().chain(stdout_output.lines()) {
|
||||
// Try pts_time= format (standard ffprobe output)
|
||||
if let Some(pos) = line.find("pts_time=") {
|
||||
let rest = &line[pos + 9..];
|
||||
let time_str = rest.split_whitespace().next().unwrap_or("");
|
||||
if let Ok(t) = time_str.parse::<f64>() {
|
||||
scene_times.push(t);
|
||||
}
|
||||
}
|
||||
// Try pts_time: format (showinfo filter output)
|
||||
else if let Some(pos) = line.find("pts_time:") {
|
||||
let rest = &line[pos + 9..];
|
||||
let time_str = rest.split_whitespace().next().unwrap_or("");
|
||||
if let Ok(t) = time_str.parse::<f64>() {
|
||||
scene_times.push(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@ pub mod pose;
|
||||
pub mod scene_classification;
|
||||
pub mod story;
|
||||
pub mod tkg;
|
||||
pub mod visual_chunk;
|
||||
pub mod yolo;
|
||||
|
||||
pub use asr::{process_asr, AsrResult, AsrSegment};
|
||||
@@ -40,5 +39,4 @@ pub use tkg::{
|
||||
build_tkg, query_auto_representative_frame, FrameTraceInfo, MainIdentityInfo,
|
||||
RepresentativeFrameResult, TkgResult,
|
||||
};
|
||||
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
|
||||
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
@@ -38,7 +38,10 @@ fn load_face_pose_data(output_dir: &str, file_uuid: &str) -> Result<Vec<FacePose
|
||||
let mut poses = Vec::new();
|
||||
if let Some(frames) = json.get("frames").and_then(|v| v.as_array()) {
|
||||
for frame_entry in frames {
|
||||
let frame_num = frame_entry.get("frame").and_then(|v| v.as_i64()).unwrap_or(0);
|
||||
let frame_num = frame_entry
|
||||
.get("frame")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0);
|
||||
if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) {
|
||||
for face in faces {
|
||||
let bbox = match face.get("bbox") {
|
||||
@@ -68,7 +71,14 @@ fn load_face_pose_data(output_dir: &str, file_uuid: &str) -> Result<Vec<FacePose
|
||||
|
||||
/// Match a face from face_detections (frame, x, y, w, h) to its pose in face.json
|
||||
/// Uses bbox center distance to find the best match when multiple faces per frame.
|
||||
fn get_pose_for_face(frame: i64, x: f64, y: f64, w: f64, h: f64, poses: &[FacePose]) -> Option<(f64, f64, f64)> {
|
||||
fn get_pose_for_face(
|
||||
frame: i64,
|
||||
x: f64,
|
||||
y: f64,
|
||||
w: f64,
|
||||
h: f64,
|
||||
poses: &[FacePose],
|
||||
) -> Option<(f64, f64, f64)> {
|
||||
let cx = x + w / 2.0;
|
||||
let cy = y + h / 2.0;
|
||||
let mut best_dist = f64::MAX;
|
||||
@@ -86,8 +96,12 @@ fn get_pose_for_face(frame: i64, x: f64, y: f64, w: f64, h: f64, poses: &[FacePo
|
||||
}
|
||||
|
||||
fn detect_mutual_gaze(
|
||||
bbox_a_x: f64, bbox_a_w: f64, yaw_a: f64,
|
||||
bbox_b_x: f64, bbox_b_w: f64, yaw_b: f64,
|
||||
bbox_a_x: f64,
|
||||
bbox_a_w: f64,
|
||||
yaw_a: f64,
|
||||
bbox_b_x: f64,
|
||||
bbox_b_w: f64,
|
||||
yaw_b: f64,
|
||||
threshold: f64,
|
||||
) -> bool {
|
||||
let cx_a = bbox_a_x + bbox_a_w / 2.0;
|
||||
@@ -138,12 +152,16 @@ struct AsrxSegmentEntry {
|
||||
#[serde(default)]
|
||||
speaker_id: String,
|
||||
#[serde(default)]
|
||||
start_time: f64,
|
||||
start: f64,
|
||||
#[serde(default)]
|
||||
end_time: f64,
|
||||
end: f64,
|
||||
#[serde(default)]
|
||||
text: String,
|
||||
#[allow(dead_code)]
|
||||
#[serde(default)]
|
||||
start_frame: i64,
|
||||
#[allow(dead_code)]
|
||||
#[serde(default)]
|
||||
end_frame: i64,
|
||||
}
|
||||
|
||||
@@ -195,7 +213,10 @@ pub struct TkgResult {
|
||||
pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
|
||||
let pool = db.pool();
|
||||
let pose_data = load_face_pose_data(output_dir, file_uuid).unwrap_or_default();
|
||||
tracing::info!("[TKG] Loaded {} pose entries from face.json", pose_data.len());
|
||||
tracing::info!(
|
||||
"[TKG] Loaded {} pose entries from face.json",
|
||||
pose_data.len()
|
||||
);
|
||||
|
||||
let n_face = build_face_trace_nodes(pool, file_uuid, &pose_data).await?;
|
||||
let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
|
||||
@@ -217,7 +238,11 @@ pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Re
|
||||
|
||||
// ── Node builders ─────────────────────────────────────────────────
|
||||
|
||||
async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[FacePose]) -> Result<usize> {
|
||||
async fn build_face_trace_nodes(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
pose_data: &[FacePose],
|
||||
) -> Result<usize> {
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
|
||||
@@ -257,7 +282,10 @@ async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[Fac
|
||||
// Group by trace_id: trace_id → Vec<(frame, x, y, w, h)>
|
||||
let mut trace_frames: HashMap<i64, Vec<(i64, f64, f64, f64, f64)>> = HashMap::new();
|
||||
for (tid, frame, x, y, w, h) in &frame_rows {
|
||||
trace_frames.entry(*tid).or_default().push((*frame, *x, *y, *w, *h));
|
||||
trace_frames
|
||||
.entry(*tid)
|
||||
.or_default()
|
||||
.push((*frame, *x, *y, *w, *h));
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
@@ -274,7 +302,9 @@ async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[Fac
|
||||
|
||||
if let Some(frames) = trace_frames.get(&tid) {
|
||||
for (frame, x, y, w, h) in frames {
|
||||
if let Some((yaw, pitch, roll)) = get_pose_for_face(*frame, *x, *y, *w, *h, pose_data) {
|
||||
if let Some((yaw, pitch, roll)) =
|
||||
get_pose_for_face(*frame, *x, *y, *w, *h, pose_data)
|
||||
{
|
||||
yaw_sum += yaw;
|
||||
pitch_sum += pitch;
|
||||
roll_sum += roll;
|
||||
@@ -284,7 +314,11 @@ async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str, pose_data: &[Fac
|
||||
}
|
||||
|
||||
let (avg_yaw, avg_pitch, avg_roll) = if pose_count > 0 {
|
||||
(yaw_sum / pose_count as f64, pitch_sum / pose_count as f64, roll_sum / pose_count as f64)
|
||||
(
|
||||
yaw_sum / pose_count as f64,
|
||||
pitch_sum / pose_count as f64,
|
||||
roll_sum / pose_count as f64,
|
||||
)
|
||||
} else {
|
||||
(0.0, 0.0, 0.0)
|
||||
};
|
||||
@@ -401,8 +435,44 @@ async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let mut count = 0;
|
||||
|
||||
// Group segments by speaker_id
|
||||
let mut speaker_segments: HashMap<String, Vec<&AsrxSegmentEntry>> = HashMap::new();
|
||||
for seg in &asrx.segments {
|
||||
speaker_segments
|
||||
.entry(seg.speaker_id.clone())
|
||||
.or_default()
|
||||
.push(seg);
|
||||
}
|
||||
|
||||
for (sid, stat) in &stats {
|
||||
let props = serde_json::json!({ "segment_count": stat.count });
|
||||
let segs = speaker_segments.get(sid);
|
||||
let (full_text, segments_json) = if let Some(seg_list) = segs {
|
||||
let full: String = seg_list
|
||||
.iter()
|
||||
.map(|s| s.text.trim())
|
||||
.filter(|t| !t.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
let segments: Vec<serde_json::Value> = seg_list
|
||||
.iter()
|
||||
.map(|s| {
|
||||
serde_json::json!({
|
||||
"start": s.start,
|
||||
"end": s.end,
|
||||
"text": s.text,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
(full, serde_json::Value::Array(segments))
|
||||
} else {
|
||||
(String::new(), serde_json::Value::Array(vec![]))
|
||||
};
|
||||
|
||||
let props = serde_json::json!({
|
||||
"segment_count": stat.count,
|
||||
"segments": segments_json,
|
||||
"full_text": full_text,
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
@@ -576,8 +646,8 @@ async fn build_speaker_face_edges(
|
||||
|
||||
// Calculate fps from last segment
|
||||
let last = asrx.segments.last().unwrap();
|
||||
let fps = if last.end_time > 0.0 {
|
||||
last.end_frame as f64 / last.end_time
|
||||
let fps = if last.end > 0.0 {
|
||||
last.end_frame as f64 / last.end
|
||||
} else {
|
||||
30.0
|
||||
};
|
||||
@@ -604,8 +674,8 @@ async fn build_speaker_face_edges(
|
||||
let face_end_sec = *ef as f64 / fps;
|
||||
|
||||
for seg in &asrx.segments {
|
||||
let seg_start = seg.start_time;
|
||||
let seg_end = seg.end_time;
|
||||
let seg_start = seg.start;
|
||||
let seg_end = seg.end;
|
||||
let overlap_start = face_start_sec.max(seg_start);
|
||||
let overlap_end = face_end_sec.min(seg_end);
|
||||
|
||||
@@ -669,7 +739,11 @@ async fn build_speaker_face_edges(
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[FacePose]) -> Result<usize> {
|
||||
async fn build_face_face_edges(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
pose_data: &[FacePose],
|
||||
) -> Result<usize> {
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
@@ -722,8 +796,9 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
|
||||
(Some(&(xa, ya, wa, ha)), Some(&(xb, yb, wb, hb))) => {
|
||||
get_pose_for_face(*frame, xa, ya, wa, ha, pose_data)
|
||||
.and_then(|(yaw_a, _, _)| {
|
||||
get_pose_for_face(*frame, xb, yb, wb, hb, pose_data)
|
||||
.map(|(yaw_b, _, _)| detect_mutual_gaze(xa, wa, yaw_a, xb, wb, yaw_b, 0.05))
|
||||
get_pose_for_face(*frame, xb, yb, wb, hb, pose_data).map(|(yaw_b, _, _)| {
|
||||
detect_mutual_gaze(xa, wa, yaw_a, xb, wb, yaw_b, 0.05)
|
||||
})
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
@@ -770,7 +845,11 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
|
||||
};
|
||||
|
||||
let frames: Vec<i64> = frame_data.iter().map(|(f, _)| *f).collect();
|
||||
let gaze_frames: Vec<i64> = frame_data.iter().filter(|(_, g)| *g).map(|(f, _)| *f).collect();
|
||||
let gaze_frames: Vec<i64> = frame_data
|
||||
.iter()
|
||||
.filter(|(_, g)| *g)
|
||||
.map(|(f, _)| *f)
|
||||
.collect();
|
||||
let gaze_count = gaze_frames.len() as i64;
|
||||
let has_gaze = gaze_count > 0;
|
||||
|
||||
@@ -793,8 +872,13 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
|
||||
}
|
||||
}
|
||||
let (avg_ya, avg_yb) = if gaze_sample > 0 {
|
||||
(yaw_a_sum / gaze_sample as f64, yaw_b_sum / gaze_sample as f64)
|
||||
} else { (0.0, 0.0) };
|
||||
(
|
||||
yaw_a_sum / gaze_sample as f64,
|
||||
yaw_b_sum / gaze_sample as f64,
|
||||
)
|
||||
} else {
|
||||
(0.0, 0.0)
|
||||
};
|
||||
|
||||
serde_json::json!({
|
||||
"first_frame": frames[0],
|
||||
@@ -902,9 +986,14 @@ pub async fn query_auto_representative_frame(
|
||||
.context("Failed to detect main identities")?;
|
||||
|
||||
let main_ids: Vec<(i32, String, String, i64)> = mains;
|
||||
let main_idents: Vec<MainIdentityInfo> = main_ids.iter().map(|(_, u, n, c)|
|
||||
MainIdentityInfo { identity_uuid: u.clone(), name: n.clone(), face_count: *c }
|
||||
).collect();
|
||||
let main_idents: Vec<MainIdentityInfo> = main_ids
|
||||
.iter()
|
||||
.map(|(_, u, n, c)| MainIdentityInfo {
|
||||
identity_uuid: u.clone(),
|
||||
name: n.clone(),
|
||||
face_count: *c,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let frame_number: Option<i64> = if main_ids.len() >= 2 {
|
||||
let id_a = main_ids[0].0;
|
||||
@@ -915,16 +1004,20 @@ pub async fn query_auto_representative_frame(
|
||||
AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(id_a)
|
||||
.fetch_optional(pool).await?;
|
||||
.bind(file_uuid)
|
||||
.bind(id_a)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let trace_b: Option<(i32,)> = sqlx::query_as(&format!(
|
||||
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND identity_id = $2 \
|
||||
AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(id_b)
|
||||
.fetch_optional(pool).await?;
|
||||
.bind(file_uuid)
|
||||
.bind(id_b)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
match (trace_a, trace_b) {
|
||||
(Some((ta,)), Some((tb,))) => {
|
||||
@@ -940,11 +1033,18 @@ pub async fn query_auto_representative_frame(
|
||||
LIMIT 1",
|
||||
edges_table, nodes_table, nodes_table
|
||||
))
|
||||
.bind(file_uuid).bind(ta).bind(tb)
|
||||
.fetch_optional(pool).await?;
|
||||
.bind(file_uuid)
|
||||
.bind(ta)
|
||||
.bind(tb)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
if let Some((f,)) = tkg_frame {
|
||||
if f <= half_frame { Some(f) } else { None }
|
||||
if f <= half_frame {
|
||||
Some(f)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
sqlx::query_scalar::<_, i64>(&format!(
|
||||
"SELECT MIN(fd_a.frame_number)::bigint \
|
||||
@@ -954,8 +1054,12 @@ pub async fn query_auto_representative_frame(
|
||||
AND fd_b.identity_id = $3 AND fd_a.frame_number <= $4",
|
||||
fd_table, fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(id_a).bind(id_b).bind(half_frame)
|
||||
.fetch_optional(pool).await?
|
||||
.bind(file_uuid)
|
||||
.bind(id_a)
|
||||
.bind(id_b)
|
||||
.bind(half_frame)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
@@ -976,8 +1080,11 @@ pub async fn query_auto_representative_frame(
|
||||
LIMIT 1",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(first_id).bind(half_frame)
|
||||
.fetch_optional(pool).await?
|
||||
.bind(file_uuid)
|
||||
.bind(first_id)
|
||||
.bind(half_frame)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -995,20 +1102,25 @@ pub async fn query_auto_representative_frame(
|
||||
LIMIT 1",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(half_frame)
|
||||
.fetch_optional(pool).await?
|
||||
.bind(file_uuid)
|
||||
.bind(half_frame)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
|
||||
let frame_number = frame_number.ok_or_else(|| anyhow::anyhow!("No faces found in this file"))?;
|
||||
let frame_number =
|
||||
frame_number.ok_or_else(|| anyhow::anyhow!("No faces found in this file"))?;
|
||||
|
||||
let face_quality: f64 = sqlx::query_scalar::<_, f64>(&format!(
|
||||
"SELECT COALESCE(MAX((width::float8 * height::float8) * confidence::float8), 0) \
|
||||
FROM {} WHERE file_uuid = $1 AND frame_number = $2",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid).bind(frame_number)
|
||||
.fetch_one(pool).await?;
|
||||
.bind(file_uuid)
|
||||
.bind(frame_number)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
|
||||
let traces: Vec<FrameTraceInfo> = sqlx::query_as::<_, (i32, Option<String>, Option<String>, i32, i32, i32, i32, f64)>(&format!(
|
||||
"SELECT fd.trace_id, i.uuid::text, i.name, fd.x, fd.y, fd.width, fd.height, fd.confidence::float8 \
|
||||
|
||||
@@ -1,594 +0,0 @@
|
||||
//! 視覺分片處理器 (Phase 2.2)
|
||||
//!
|
||||
//! 從 YOLO 結果生成視覺分片
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
use super::yolo::{YoloFrame, YoloResult};
|
||||
|
||||
const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
|
||||
/// 視覺分片處理結果
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct VisualChunkResult {
|
||||
/// 生成的視覺分片數量
|
||||
pub chunk_count: u32,
|
||||
/// 處理的總幀數
|
||||
pub total_frames: u32,
|
||||
/// 檢測到的總物件數
|
||||
pub total_objects: u32,
|
||||
/// 唯一物件類別數
|
||||
pub unique_classes: u32,
|
||||
/// 生成的視覺分片
|
||||
pub chunks: Vec<crate::core::chunk::Chunk>,
|
||||
}
|
||||
|
||||
/// 從 YOLO 結果生成視覺分片
|
||||
pub async fn process_visual_chunk(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
video_path: &str,
|
||||
yolo_result: &YoloResult,
|
||||
chunk_index_offset: u32,
|
||||
fps: f64,
|
||||
) -> Result<VisualChunkResult> {
|
||||
tracing::info!(
|
||||
"[VisualChunk] Starting visual chunk generation for video: {}, {} frames",
|
||||
video_path,
|
||||
yolo_result.frames.len()
|
||||
);
|
||||
|
||||
if yolo_result.frames.is_empty() {
|
||||
tracing::warn!("[VisualChunk] No YOLO frames to process");
|
||||
return Ok(VisualChunkResult {
|
||||
chunk_count: 0,
|
||||
total_frames: 0,
|
||||
total_objects: 0,
|
||||
unique_classes: 0,
|
||||
chunks: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// 策略 1: 固定幀數分片(每 N 幀一個分片)
|
||||
let chunks = create_fixed_frame_chunks(file_id, &uuid, yolo_result, chunk_index_offset, fps);
|
||||
|
||||
// 統計信息
|
||||
let total_objects: u32 = yolo_result
|
||||
.frames
|
||||
.iter()
|
||||
.map(|f| f.objects.len() as u32)
|
||||
.sum();
|
||||
let all_classes: Vec<String> = yolo_result
|
||||
.frames
|
||||
.iter()
|
||||
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
|
||||
.collect();
|
||||
let unique_classes: u32 = all_classes
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<std::collections::HashSet<_>>()
|
||||
.len() as u32;
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Generated {} visual chunks from {} frames, {} total objects, {} unique classes",
|
||||
chunks.len(),
|
||||
yolo_result.frames.len(),
|
||||
total_objects,
|
||||
unique_classes
|
||||
);
|
||||
|
||||
Ok(VisualChunkResult {
|
||||
chunk_count: chunks.len() as u32,
|
||||
total_frames: yolo_result.frames.len() as u32,
|
||||
total_objects,
|
||||
unique_classes,
|
||||
chunks,
|
||||
})
|
||||
}
|
||||
|
||||
/// 創建固定幀數分片(每 N 幀一個分片)
|
||||
fn create_fixed_frame_chunks(
|
||||
file_id: i32,
|
||||
uuid: &str,
|
||||
yolo_result: &YoloResult,
|
||||
chunk_index_offset: u32,
|
||||
fps: f64,
|
||||
) -> Vec<crate::core::chunk::Chunk> {
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
// 配置:每 30 幀創建一個分片(約 1 秒,如果 fps=30)
|
||||
let frames_per_chunk = 30;
|
||||
let total_frames = yolo_result.frames.len();
|
||||
|
||||
if total_frames == 0 {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
let mut chunk_index = chunk_index_offset;
|
||||
let mut start_idx = 0;
|
||||
|
||||
while start_idx < total_frames {
|
||||
let end_idx = std::cmp::min(start_idx + frames_per_chunk, total_frames);
|
||||
|
||||
// 獲取這個分片的幀
|
||||
let chunk_frames: Vec<YoloFrame> = yolo_result.frames[start_idx..end_idx]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if chunk_frames.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
// 計算幀範圍
|
||||
let start_frame = chunk_frames.first().unwrap().frame as i64;
|
||||
let end_frame = chunk_frames.last().unwrap().frame as i64 + 1; // exclusive
|
||||
|
||||
// 創建視覺分片
|
||||
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
|
||||
file_id,
|
||||
uuid.to_string(),
|
||||
format!("vis_{}", chunk_index),
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
chunk_frames,
|
||||
);
|
||||
|
||||
chunks.push(chunk);
|
||||
|
||||
// 更新索引
|
||||
start_idx = end_idx;
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
/// 基於物件相似度創建分片
|
||||
fn create_similarity_based_chunks(
|
||||
file_id: i32,
|
||||
uuid: &str,
|
||||
yolo_result: &YoloResult,
|
||||
chunk_index_offset: u32,
|
||||
fps: f64,
|
||||
similarity_threshold: f32,
|
||||
min_frames_per_chunk: usize,
|
||||
) -> Vec<crate::core::chunk::Chunk> {
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
if yolo_result.frames.is_empty() {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
let mut current_chunk_frames: Vec<YoloFrame> = Vec::new();
|
||||
let mut chunk_index = chunk_index_offset;
|
||||
let mut current_start_frame = 0;
|
||||
|
||||
for (i, frame) in yolo_result.frames.iter().enumerate() {
|
||||
if current_chunk_frames.is_empty() {
|
||||
current_chunk_frames.push(frame.clone());
|
||||
current_start_frame = frame.frame as i64;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 檢查相似度(簡化版本:檢查物件類別是否相同)
|
||||
let last_frame = current_chunk_frames.last().unwrap();
|
||||
let similarity = calculate_frame_similarity(last_frame, frame);
|
||||
|
||||
if similarity >= similarity_threshold {
|
||||
// 相似度高,加入當前分片
|
||||
current_chunk_frames.push(frame.clone());
|
||||
} else {
|
||||
// 相似度低,創建新分片
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
|
||||
|
||||
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
|
||||
file_id,
|
||||
uuid.to_string(),
|
||||
format!("vis_{}", chunk_index),
|
||||
current_start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
current_chunk_frames.clone(),
|
||||
);
|
||||
|
||||
chunks.push(chunk);
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
// 開始新的分片
|
||||
current_chunk_frames = vec![frame.clone()];
|
||||
current_start_frame = frame.frame as i64;
|
||||
}
|
||||
}
|
||||
|
||||
// 處理最後一個分片
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
let end_frame = current_chunk_frames.last().unwrap().frame as i64 + 1;
|
||||
|
||||
let chunk = crate::core::chunk::Chunk::from_yolo_frames(
|
||||
file_id,
|
||||
uuid.to_string(),
|
||||
format!("vis_{}", chunk_index),
|
||||
current_start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
current_chunk_frames,
|
||||
);
|
||||
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
/// 計算兩個幀之間的相似度(基於物件類別)
|
||||
fn calculate_frame_similarity(frame1: &YoloFrame, frame2: &YoloFrame) -> f32 {
|
||||
if frame1.objects.is_empty() && frame2.objects.is_empty() {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
if frame1.objects.is_empty() || frame2.objects.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let set1: std::collections::HashSet<String> = frame1
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
let set2: std::collections::HashSet<String> = frame2
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
|
||||
let intersection: Vec<_> = set1.intersection(&set2).collect();
|
||||
let union: Vec<_> = set1.union(&set2).collect();
|
||||
|
||||
if union.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
intersection.len() as f32 / union.len() as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// 使用 Python 腳本生成視覺分片(進階版本)
|
||||
pub async fn process_visual_chunk_advanced(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<VisualChunkResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("visual_chunk_processor.py");
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Starting advanced visual chunk generation: {}",
|
||||
video_path
|
||||
);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[VisualChunk] Script not found, using basic generation");
|
||||
// 這裡可以回退到基本生成方法
|
||||
return Ok(VisualChunkResult {
|
||||
chunk_count: 0,
|
||||
total_frames: 0,
|
||||
total_objects: 0,
|
||||
unique_classes: 0,
|
||||
chunks: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let yolo_path = uuid.map(|u| {
|
||||
std::path::PathBuf::from(crate::core::config::OUTPUT_DIR.as_str())
|
||||
.join(format!("{}.yolo.json", u))
|
||||
.to_string_lossy()
|
||||
.to_string()
|
||||
});
|
||||
let args: &[&str] = if let Some(ref yp) = yolo_path {
|
||||
&[video_path, output_path, "--yolo-result", yp]
|
||||
} else {
|
||||
&[video_path, output_path]
|
||||
};
|
||||
let result = match executor
|
||||
.run(
|
||||
"visual_chunk_processor.py",
|
||||
args,
|
||||
uuid,
|
||||
"VisualChunk",
|
||||
Some(VISUAL_CHUNK_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(_) => match std::fs::read_to_string(output_path) {
|
||||
Ok(json_str) => match serde_json::from_str::<VisualChunkResult>(&json_str) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to parse output ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to read output ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to run script ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Advanced generation result: {} chunks, {} frames",
|
||||
result.chunk_count,
|
||||
result.total_frames
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_calculate_frame_similarity() {
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject};
|
||||
|
||||
let frame1 = YoloFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.95,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame2 = YoloFrame {
|
||||
frame: 1,
|
||||
timestamp: 0.033,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.92,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 310,
|
||||
y: 155,
|
||||
width: 82,
|
||||
height: 62,
|
||||
confidence: 0.85,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame3 = YoloFrame {
|
||||
frame: 2,
|
||||
timestamp: 0.066,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 150,
|
||||
y: 250,
|
||||
width: 40,
|
||||
height: 60,
|
||||
confidence: 0.78,
|
||||
}],
|
||||
};
|
||||
|
||||
// 相同物件的幀應該高度相似
|
||||
let similarity_same = calculate_frame_similarity(&frame1, &frame2);
|
||||
assert!((similarity_same - 1.0).abs() < 0.001);
|
||||
|
||||
// 不同物件的幀應該不相似
|
||||
let similarity_diff = calculate_frame_similarity(&frame1, &frame3);
|
||||
assert!((similarity_diff - 0.0).abs() < 0.001);
|
||||
|
||||
// 空幀應該完全相似
|
||||
let empty_frame = YoloFrame {
|
||||
frame: 3,
|
||||
timestamp: 0.1,
|
||||
objects: vec![],
|
||||
};
|
||||
let similarity_empty = calculate_frame_similarity(&empty_frame, &empty_frame);
|
||||
assert!((similarity_empty - 1.0).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_fixed_frame_chunks() {
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
// 創建測試 YOLO 結果(60 幀,每幀都有物件)
|
||||
let mut frames = Vec::new();
|
||||
for i in 0..60 {
|
||||
frames.push(YoloFrame {
|
||||
frame: i as u64,
|
||||
timestamp: i as f64 / 30.0, // 假設 fps=30
|
||||
objects: vec![YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.9,
|
||||
}],
|
||||
});
|
||||
}
|
||||
|
||||
let yolo_result = YoloResult {
|
||||
frame_count: 60,
|
||||
fps: 30.0,
|
||||
frames,
|
||||
};
|
||||
|
||||
let chunks = create_fixed_frame_chunks(1, "test-uuid", &yolo_result, 0, 30.0);
|
||||
|
||||
// 60 幀,每 30 幀一個分片,應該有 2 個分片
|
||||
assert_eq!(chunks.len(), 2);
|
||||
|
||||
// 檢查第一個分片
|
||||
let first_chunk = &chunks[0];
|
||||
assert_eq!(
|
||||
first_chunk.chunk_type,
|
||||
crate::core::chunk::ChunkType::Visual
|
||||
);
|
||||
assert_eq!(first_chunk.start_frame, 0);
|
||||
assert_eq!(first_chunk.end_frame, 30); // exclusive
|
||||
assert_eq!(first_chunk.frame_count, 30);
|
||||
|
||||
// 檢查第二個分片
|
||||
let second_chunk = &chunks[1];
|
||||
assert_eq!(
|
||||
second_chunk.chunk_type,
|
||||
crate::core::chunk::ChunkType::Visual
|
||||
);
|
||||
assert_eq!(second_chunk.start_frame, 30);
|
||||
assert_eq!(second_chunk.end_frame, 60); // exclusive
|
||||
assert_eq!(second_chunk.frame_count, 30);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_similarity_based_chunks() {
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
// 創建測試 YOLO 結果
|
||||
let frames = vec![
|
||||
YoloFrame {
|
||||
// 幀 0-4: 都有 person 和 car
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.9,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.8,
|
||||
},
|
||||
],
|
||||
},
|
||||
YoloFrame {
|
||||
// 幀 1
|
||||
frame: 1,
|
||||
timestamp: 0.033,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.88,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 310,
|
||||
y: 155,
|
||||
width: 82,
|
||||
height: 62,
|
||||
confidence: 0.78,
|
||||
},
|
||||
],
|
||||
},
|
||||
YoloFrame {
|
||||
// 幀 5-9: 只有 dog
|
||||
frame: 5,
|
||||
timestamp: 0.166,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 150,
|
||||
y: 250,
|
||||
width: 40,
|
||||
height: 60,
|
||||
confidence: 0.7,
|
||||
}],
|
||||
},
|
||||
YoloFrame {
|
||||
// 幀 6
|
||||
frame: 6,
|
||||
timestamp: 0.2,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 155,
|
||||
y: 255,
|
||||
width: 42,
|
||||
height: 62,
|
||||
confidence: 0.68,
|
||||
}],
|
||||
},
|
||||
];
|
||||
|
||||
let yolo_result = YoloResult {
|
||||
frame_count: 7,
|
||||
fps: 30.0,
|
||||
frames,
|
||||
};
|
||||
|
||||
let chunks = create_similarity_based_chunks(
|
||||
1,
|
||||
"test-uuid",
|
||||
&yolo_result,
|
||||
0,
|
||||
30.0,
|
||||
0.5, // similarity threshold
|
||||
2, // min frames per chunk
|
||||
);
|
||||
|
||||
// 應該有 2 個分片:一個是 person+car,一個是 dog
|
||||
assert_eq!(chunks.len(), 2);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
pub mod validator;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
202
src/core/thumbnail/validator.rs
Normal file
202
src/core/thumbnail/validator.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
use anyhow::{bail, Result};
|
||||
|
||||
pub const JPEG_MIN_SIZE: usize = 100;
|
||||
pub const JPEG_SOI_MARKER: [u8; 3] = [0xFF, 0xD8, 0xFF];
|
||||
pub const JPEG_EOI_MARKER: [u8; 2] = [0xFF, 0xD9];
|
||||
|
||||
pub fn validate_jpeg(data: &[u8]) -> Result<()> {
|
||||
if data.len() < JPEG_MIN_SIZE {
|
||||
bail!(
|
||||
"JPEG too small: {} bytes (minimum {})",
|
||||
data.len(),
|
||||
JPEG_MIN_SIZE
|
||||
);
|
||||
}
|
||||
|
||||
if data[0..3] != JPEG_SOI_MARKER {
|
||||
bail!(
|
||||
"Invalid JPEG header: expected {:02X?}, got {:02X?}",
|
||||
JPEG_SOI_MARKER,
|
||||
&data[0..3]
|
||||
);
|
||||
}
|
||||
|
||||
if data[data.len() - 2..] != JPEG_EOI_MARKER {
|
||||
bail!(
|
||||
"Incomplete JPEG: missing EOI marker, got {:02X?}",
|
||||
&data[data.len() - 2..]
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_valid_jpeg(data: &[u8]) -> bool {
|
||||
validate_jpeg(data).is_ok()
|
||||
}
|
||||
|
||||
pub fn jpeg_size_ok(data: &[u8]) -> bool {
|
||||
data.len() >= JPEG_MIN_SIZE
|
||||
}
|
||||
|
||||
pub fn jpeg_header_ok(data: &[u8]) -> bool {
|
||||
data.len() >= 3 && data[0..3] == JPEG_SOI_MARKER
|
||||
}
|
||||
|
||||
pub fn jpeg_footer_ok(data: &[u8]) -> bool {
|
||||
data.len() >= 2 && data[data.len() - 2..] == JPEG_EOI_MARKER
|
||||
}
|
||||
|
||||
pub fn validate_frame(frame: i64, total_frames: i64) -> Result<()> {
|
||||
if frame < 0 {
|
||||
bail!("Frame number cannot be negative: {}", frame);
|
||||
}
|
||||
if frame > total_frames {
|
||||
bail!("Frame {} exceeds total frames {}", frame, total_frames);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn validate_crop(
|
||||
x: i32,
|
||||
y: i32,
|
||||
w: i32,
|
||||
h: i32,
|
||||
video_width: i32,
|
||||
video_height: i32,
|
||||
) -> Result<()> {
|
||||
if x < 0 || y < 0 || w <= 0 || h <= 0 {
|
||||
bail!(
|
||||
"Invalid crop parameters: x={}, y={}, w={}, h={} (must be positive)",
|
||||
x,
|
||||
y,
|
||||
w,
|
||||
h
|
||||
);
|
||||
}
|
||||
if x + w > video_width {
|
||||
bail!(
|
||||
"Crop width exceeds video: x+w={} > video_width={}",
|
||||
x + w,
|
||||
video_width
|
||||
);
|
||||
}
|
||||
if y + h > video_height {
|
||||
bail!(
|
||||
"Crop height exceeds video: y+h={} > video_height={}",
|
||||
y + h,
|
||||
video_height
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_validate_jpeg_valid() {
|
||||
let valid_jpeg = vec![
|
||||
0xFF, 0xD8, 0xFF, // SOI marker
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
|
||||
0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B,
|
||||
0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
|
||||
0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||||
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
|
||||
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
|
||||
0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0xFF,
|
||||
0xD9, // EOI marker
|
||||
];
|
||||
assert!(validate_jpeg(&valid_jpeg).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_jpeg_too_small() {
|
||||
let small_data = vec![0xFF, 0xD8, 0xFF, 0xFF, 0xD9];
|
||||
assert!(validate_jpeg(&small_data).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_jpeg_invalid_header() {
|
||||
let invalid_header = vec![
|
||||
0x00, 0x00, 0x00, // wrong header
|
||||
0x00, 0x01, 0x02, 0x03, 0xFF, 0xD9,
|
||||
];
|
||||
assert!(validate_jpeg(&invalid_header).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_jpeg_missing_footer() {
|
||||
let missing_footer = vec![0xFF, 0xD8, 0xFF, 0x00, 0x01, 0x02, 0x03];
|
||||
assert!(validate_jpeg(&missing_footer).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_frame_valid() {
|
||||
assert!(validate_frame(500, 1000).is_ok());
|
||||
assert!(validate_frame(0, 1000).is_ok());
|
||||
assert!(validate_frame(1000, 1000).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_frame_exceeds() {
|
||||
assert!(validate_frame(1001, 1000).is_err());
|
||||
assert!(validate_frame(-1, 1000).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_crop_valid() {
|
||||
assert!(validate_crop(100, 100, 200, 200, 1920, 1080).is_ok());
|
||||
assert!(validate_crop(0, 0, 1920, 1080, 1920, 1080).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_crop_exceeds_width() {
|
||||
assert!(validate_crop(1800, 100, 200, 200, 1920, 1080).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_crop_exceeds_height() {
|
||||
assert!(validate_crop(100, 900, 200, 200, 1920, 1080).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_crop_negative() {
|
||||
assert!(validate_crop(-1, 100, 200, 200, 1920, 1080).is_err());
|
||||
assert!(validate_crop(100, -1, 200, 200, 1920, 1080).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_valid_jpeg() {
|
||||
let valid_jpeg = vec![
|
||||
0xFF, 0xD8, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
|
||||
0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
|
||||
0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34,
|
||||
0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42,
|
||||
0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
|
||||
0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
|
||||
0x5F, 0xFF, 0xD9,
|
||||
];
|
||||
assert!(is_valid_jpeg(&valid_jpeg));
|
||||
assert!(!is_valid_jpeg(&[0xFF, 0xD8, 0xFF, 0xFF, 0xD9])); // too small
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_jpeg_helpers() {
|
||||
let valid_jpeg = vec![
|
||||
0xFF, 0xD8, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
|
||||
0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
|
||||
0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34,
|
||||
0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42,
|
||||
0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
|
||||
0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E,
|
||||
0x5F, 0xFF, 0xD9,
|
||||
];
|
||||
assert!(jpeg_size_ok(&valid_jpeg));
|
||||
assert!(jpeg_header_ok(&valid_jpeg));
|
||||
assert!(jpeg_footer_ok(&valid_jpeg));
|
||||
}
|
||||
}
|
||||
@@ -91,22 +91,21 @@ async fn upsert_identities_from_disk(
|
||||
{
|
||||
Ok(identity_file) => {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let uuid_clean = identity_file.identity_uuid.replace('-', "");
|
||||
let result = sqlx::query(&format!(
|
||||
"INSERT INTO {} (uuid, name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \
|
||||
VALUES ($1::uuid, $2, 'people', 'tmdb', 'confirmed', $3, $4, $5::jsonb) \
|
||||
VALUES (gen_random_uuid(), $1, 'people', 'tmdb', 'confirmed', $2, $3, $4::jsonb) \
|
||||
ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET \
|
||||
uuid = COALESCE({}.uuid, $1::uuid), \
|
||||
tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
|
||||
metadata = {}.metadata || $5::jsonb",
|
||||
identities_table, identities_table, identities_table, identities_table
|
||||
))
|
||||
.bind(&identity_file.identity_uuid)
|
||||
.bind(&identity_file.name)
|
||||
.bind(identity_file.tmdb_id)
|
||||
.bind(&identity_file.tmdb_profile)
|
||||
.bind(&identity_file.metadata)
|
||||
.execute(db.pool())
|
||||
.await;
|
||||
metadata = jsonb_deep_merge({}.metadata, $4::jsonb)",
|
||||
identities_table, identities_table, identities_table
|
||||
))
|
||||
.bind(&identity_file.name)
|
||||
.bind(identity_file.tmdb_id)
|
||||
.bind(&identity_file.tmdb_profile)
|
||||
.bind(&identity_file.metadata)
|
||||
.execute(db.pool())
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
@@ -226,7 +225,7 @@ pub async fn create_identities_from_data(
|
||||
VALUES ($1, 'people', 'tmdb', 'confirmed', $2, $3, $4::jsonb) \
|
||||
ON CONFLICT (tmdb_id) WHERE tmdb_id IS NOT NULL DO UPDATE SET \
|
||||
tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
|
||||
metadata = {}.metadata || $4::jsonb \
|
||||
metadata = jsonb_deep_merge({}.metadata, $4::jsonb) \
|
||||
RETURNING uuid",
|
||||
identities_table, identities_table, identities_table
|
||||
))
|
||||
|
||||
@@ -2426,7 +2426,7 @@ async fn main() -> Result<()> {
|
||||
.await
|
||||
.context("Failed to init PostgreSQL")?;
|
||||
let qdrant = QdrantDb::init().await.context("Failed to init Qdrant")?;
|
||||
let embedder = Embedder::new("nomic-embed-text-v2-moe:latest".to_string());
|
||||
let embedder = Embedder::new("embeddinggemma-300m".to_string());
|
||||
|
||||
let target_uuid = if uuid == "all" {
|
||||
None
|
||||
|
||||
@@ -41,7 +41,6 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
|
||||
let proc_name = processor.as_str();
|
||||
let filename = match processor {
|
||||
ProcessorType::Story => format!("{}.story_story.json", file_uuid),
|
||||
ProcessorType::FiveW1H => format!("{}.story_llm.json", file_uuid),
|
||||
_ => format!("{}.{}.json", file_uuid, proc_name),
|
||||
};
|
||||
let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename);
|
||||
@@ -65,7 +64,7 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
|
||||
};
|
||||
|
||||
match processor {
|
||||
ProcessorType::Asr | ProcessorType::Asrx => {
|
||||
ProcessorType::Asrx => {
|
||||
let segs = value.get("segments").and_then(|v| v.as_array());
|
||||
match segs {
|
||||
Some(_) => VerificationResult::ok(proc_name, file_uuid),
|
||||
@@ -105,18 +104,8 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
|
||||
None => VerificationResult::ok(proc_name, file_uuid),
|
||||
}
|
||||
}
|
||||
ProcessorType::VisualChunk => VerificationResult::ok(proc_name, file_uuid),
|
||||
ProcessorType::Story => VerificationResult::ok(proc_name, file_uuid),
|
||||
ProcessorType::FiveW1H => {
|
||||
let scenes = value.get("scenes").and_then(|v| v.as_array());
|
||||
match scenes {
|
||||
Some(s) if s.is_empty() => {
|
||||
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
|
||||
}
|
||||
Some(_) => VerificationResult::ok(proc_name, file_uuid),
|
||||
None => VerificationResult::ok(proc_name, file_uuid),
|
||||
}
|
||||
}
|
||||
_ => VerificationResult::ok(proc_name, file_uuid),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -91,6 +91,7 @@ impl JobWorker {
|
||||
self.processor_pool.sweep_stale().await;
|
||||
|
||||
// Reset stale running jobs: jobs stuck in 'running' with no active processor results
|
||||
// Exclude jobs where all processor_results are completed (waiting for ingestion)
|
||||
let monitor_jobs_table = schema::table_name("monitor_jobs");
|
||||
let processor_results_table = schema::table_name("processor_results");
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
@@ -99,8 +100,13 @@ impl JobWorker {
|
||||
AND id NOT IN (
|
||||
SELECT DISTINCT job_id FROM {}
|
||||
WHERE status IN ('pending', 'running')
|
||||
)
|
||||
AND id NOT IN (
|
||||
SELECT job_id FROM {}
|
||||
GROUP BY job_id
|
||||
HAVING bool_and(status = 'completed')
|
||||
)",
|
||||
monitor_jobs_table, processor_results_table
|
||||
monitor_jobs_table, processor_results_table, processor_results_table
|
||||
))
|
||||
.execute(self.db.pool())
|
||||
.await
|
||||
@@ -197,9 +203,9 @@ impl JobWorker {
|
||||
job.processors.len()
|
||||
};
|
||||
let should_retry = self
|
||||
.check_and_complete_job(job.id, &job.uuid, expected_count)
|
||||
.check_and_complete_job(job.id, &job.uuid, &job.processors, expected_count)
|
||||
.await
|
||||
.is_ok();
|
||||
.unwrap_or(false);
|
||||
if should_retry && self.processor_pool.can_start().await {
|
||||
if let Err(e) = self.process_job(job.clone()).await {
|
||||
error!("Failed to reprocess job {}: {}", job.uuid, e);
|
||||
@@ -708,14 +714,14 @@ impl JobWorker {
|
||||
} else {
|
||||
job.processors.len()
|
||||
};
|
||||
self.check_and_complete_job(job.id, &job.uuid, expected_count)
|
||||
self.check_and_complete_job(job.id, &job.uuid, &job.processors, expected_count)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 檢查所有入庫步驟是否已完成(與 ingestion-status endpoint 同步邏輯)
|
||||
async fn ingestion_complete(pool: &PgPool, uuid: &str) -> bool {
|
||||
async fn ingestion_complete(pool: &PgPool, uuid: &str, job_processors: &[String]) -> bool {
|
||||
let chunk_t = schema::table_name("chunk");
|
||||
let fd_t = schema::table_name("face_detections");
|
||||
|
||||
@@ -730,18 +736,30 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
let fu = uuid;
|
||||
let rule1 = check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
|
||||
));
|
||||
let vector = check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
|
||||
let rule3 = check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
|
||||
));
|
||||
let trace = check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
|
||||
// Only check conditions relevant to the job's processors
|
||||
let has_asr_or_asrx = job_processors.is_empty()
|
||||
|| job_processors.iter().any(|p| p == "asrx" || p == "asr");
|
||||
let has_cut = job_processors.is_empty()
|
||||
|| job_processors.iter().any(|p| p == "cut");
|
||||
let has_face = job_processors.is_empty()
|
||||
|| job_processors.iter().any(|p| p == "face");
|
||||
|
||||
let rule1 = !has_asr_or_asrx
|
||||
|| check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' LIMIT 1"
|
||||
));
|
||||
let vector = !has_asr_or_asrx
|
||||
|| check!(&format!("SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'sentence' AND embedding IS NOT NULL LIMIT 1"));
|
||||
let rule3 = !has_cut
|
||||
|| check!(&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = '{fu}' AND chunk_type = 'cut' LIMIT 1"
|
||||
));
|
||||
let trace = !has_face
|
||||
|| check!(&format!("SELECT COUNT(DISTINCT trace_id) FROM {fd_t} WHERE file_uuid = '{fu}' AND trace_id IS NOT NULL"));
|
||||
let all_ok = rule1 && vector && rule3 && trace;
|
||||
if !all_ok {
|
||||
tracing::info!(
|
||||
"[Ingestion] waiting: rule1={rule1} vector={vector} rule3={rule3} trace={trace}"
|
||||
"[Ingestion] waiting (uuid={fu}): rule1={rule1} vector={vector} rule3={rule3} trace={trace}"
|
||||
);
|
||||
}
|
||||
all_ok
|
||||
@@ -751,8 +769,9 @@ impl JobWorker {
|
||||
&self,
|
||||
job_id: i32,
|
||||
uuid: &str,
|
||||
job_processors: &[String],
|
||||
expected_count: usize,
|
||||
) -> Result<()> {
|
||||
) -> Result<bool> {
|
||||
let results = self.db.get_processor_results_by_job(job_id).await?;
|
||||
|
||||
info!(
|
||||
@@ -831,10 +850,29 @@ impl JobWorker {
|
||||
.await?;
|
||||
|
||||
if has_asrx {
|
||||
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
// Guard: only spawn Rule 1 if sentence chunks don't exist yet
|
||||
let chunk_t = schema::table_name("chunk");
|
||||
let already_spawned: bool = sqlx::query_scalar::<_, i64>(
|
||||
&format!(
|
||||
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
|
||||
),
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_optional(self.db.pool())
|
||||
.await?
|
||||
.unwrap_or(0)
|
||||
> 0;
|
||||
|
||||
if already_spawned {
|
||||
info!(
|
||||
"✅ Rule 1 already completed for {}, skipping spawn",
|
||||
uuid
|
||||
);
|
||||
} else {
|
||||
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
match db_clone.get_video_by_uuid(&uuid_clone).await {
|
||||
Ok(Some(video)) => {
|
||||
let fps = video.fps;
|
||||
@@ -886,6 +924,7 @@ impl JobWorker {
|
||||
Err(e) => error!("Failed to get video info for chunking: {}", e),
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if all_completed {
|
||||
@@ -1031,12 +1070,12 @@ impl JobWorker {
|
||||
});
|
||||
}
|
||||
|
||||
if !Self::ingestion_complete(self.db.pool(), uuid).await {
|
||||
if !Self::ingestion_complete(self.db.pool(), uuid, job_processors).await {
|
||||
info!(
|
||||
"Job {}: all processors done, waiting for ingestion...",
|
||||
job_id
|
||||
);
|
||||
return Ok(());
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
self.db
|
||||
@@ -1114,7 +1153,7 @@ impl JobWorker {
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self) {
|
||||
|
||||
@@ -7,8 +7,6 @@ use std::sync::Arc;
|
||||
use tokio::sync::{mpsc, RwLock};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
|
||||
|
||||
/// Guard that ensures processor pool cleanup runs even if the task panics.
|
||||
struct ProcessorCleanupGuard {
|
||||
job_id: i32,
|
||||
@@ -28,17 +26,23 @@ impl Drop for ProcessorCleanupGuard {
|
||||
warn!("[ProcessorCleanupGuard] running lock contended");
|
||||
}
|
||||
if let Ok(mut guard) = self.running_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
match self.pipeline {
|
||||
PipelineType::Frame => {
|
||||
if let Ok(mut guard) = self.frame_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineType::Time => {
|
||||
if let Ok(mut guard) = self.time_count.try_write() {
|
||||
if *guard > 0 { *guard -= 1; }
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
|
||||
@@ -66,7 +70,6 @@ use crate::core::processor::face::FaceResult;
|
||||
use crate::core::processor::ocr::OcrResult;
|
||||
use crate::core::processor::pose::PoseResult;
|
||||
use crate::core::processor::scene_classification::SceneClassificationResult;
|
||||
use crate::core::processor::visual_chunk::VisualChunkResult;
|
||||
use crate::core::processor::yolo::YoloResult;
|
||||
use crate::worker::resources::SystemResources;
|
||||
|
||||
@@ -518,32 +521,10 @@ impl ProcessorPool {
|
||||
let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0);
|
||||
|
||||
match processor_type {
|
||||
ProcessorType::Asr => {
|
||||
let result =
|
||||
processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
|
||||
let chunks_produced = result.segments.len() as i32;
|
||||
tracing::info!(
|
||||
"ASR completed, storing {} segments for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Cut => {
|
||||
let cut_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.cut.json", job.uuid));
|
||||
let result = if cut_path.exists() {
|
||||
// CUT 在 register 階段已完成,直接載入
|
||||
let content =
|
||||
std::fs::read_to_string(&cut_path).context("Failed to read cut.json")?;
|
||||
serde_json::from_str(&content).context("Failed to parse cut.json")?
|
||||
@@ -624,10 +605,6 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
// 將 face embedding 寫入 Qdrant
|
||||
if let Err(e) = Self::store_face_embeddings_to_qdrant(&job.uuid, &result).await {
|
||||
tracing::error!("Failed to store face embeddings to Qdrant: {}", e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
@@ -685,31 +662,6 @@ impl ProcessorPool {
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::VisualChunk => {
|
||||
let result = processor::process_visual_chunk_advanced(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
)
|
||||
.await?;
|
||||
let chunks_produced = result.chunk_count as i32;
|
||||
tracing::info!(
|
||||
"VisualChunk completed, storing {} chunks for {}",
|
||||
chunks_produced,
|
||||
job.uuid
|
||||
);
|
||||
if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Scene => {
|
||||
let scene_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.json", job.uuid));
|
||||
@@ -717,7 +669,6 @@ impl ProcessorPool {
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.err", job.uuid));
|
||||
let scene_tmp =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.tmp", job.uuid));
|
||||
// 優先順序:.err(跳過)→ .json(載入)→ .tmp(等待或重新執行)
|
||||
let result = if scene_err.exists() {
|
||||
tracing::warn!("Scene previously failed for {}, skipping", job.uuid);
|
||||
return Ok(ProcessorOutput {
|
||||
@@ -1009,72 +960,6 @@ impl ProcessorPool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 face embeddings 寫入 Qdrant momentry_dev_face collection
|
||||
pub async fn store_face_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
face_result: &FaceResult,
|
||||
) -> Result<()> {
|
||||
let qdrant = QdrantDb::new();
|
||||
let collection = format!(
|
||||
"{}{}",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':'),
|
||||
"_face"
|
||||
);
|
||||
|
||||
// 確保 collection 存在(dim=512 for FaceNet)
|
||||
if let Err(e) = qdrant.ensure_collection(&collection, 512).await {
|
||||
tracing::error!("Failed to ensure Qdrant face collection: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
for frame in &face_result.frames {
|
||||
for face in &frame.faces {
|
||||
if let Some(embedding) = &face.embedding {
|
||||
if embedding.len() != 512 {
|
||||
continue;
|
||||
}
|
||||
// 使用 hash 作為 Qdrant point ID(需要 unsigned integer)
|
||||
// 使用 frame number 作為 Qdrant point ID(u64)
|
||||
let point_id = frame.frame as u64;
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"face_id": face.face_id,
|
||||
"frame": frame.frame,
|
||||
"timestamp": frame.timestamp,
|
||||
"x": face.x,
|
||||
"y": face.y,
|
||||
"width": face.width,
|
||||
"height": face.height,
|
||||
"confidence": face.confidence,
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&collection,
|
||||
point_id,
|
||||
embedding,
|
||||
Some(payload),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::error!("Failed to upsert face vector {}: {}", point_id, e);
|
||||
} else {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
tracing::info!("Stored {} face embeddings to Qdrant for {}", count, uuid);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
|
||||
pub async fn store_voice_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
@@ -1106,9 +991,22 @@ impl ProcessorPool {
|
||||
if emb.len() != 192 {
|
||||
continue;
|
||||
}
|
||||
// Point ID: hash(file_uuid + speaker_id + index) for global uniqueness
|
||||
let point_id = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(uuid.as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(segment.speaker_id.clone().unwrap_or_default().as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(i.to_string().as_bytes());
|
||||
let hash = hasher.finalize();
|
||||
u64::from_be_bytes(hash[0..8].try_into().unwrap())
|
||||
};
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"speaker_id": segment.speaker_id,
|
||||
"speaker_id": segment.speaker_id.clone().unwrap_or_default(),
|
||||
"segment_index": i,
|
||||
"start_frame": segment.start_frame,
|
||||
"end_frame": segment.end_frame,
|
||||
@@ -1117,7 +1015,7 @@ impl ProcessorPool {
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(&collection, i as u64, emb, Some(payload))
|
||||
.upsert_vector_to_collection(&collection, point_id, emb, Some(payload))
|
||||
.await
|
||||
{
|
||||
tracing::error!("Failed to upsert voice vector {}: {}", i, e);
|
||||
@@ -1174,6 +1072,7 @@ impl ProcessorPool {
|
||||
);
|
||||
|
||||
let mut pre_chunks_to_store = Vec::new();
|
||||
let mut speaker_detections = Vec::new();
|
||||
|
||||
for (i, segment) in asrx_result.segments.iter().enumerate() {
|
||||
let data = serde_json::json!({
|
||||
@@ -1184,28 +1083,23 @@ impl ProcessorPool {
|
||||
|
||||
// ASRX is time-based, so we use segment index or start time as coordinate.
|
||||
pre_chunks_to_store.push((i as i64, Some(segment.start_time), data, None, None));
|
||||
|
||||
speaker_detections.push((
|
||||
segment.speaker_id.clone().unwrap_or_default(),
|
||||
segment.start_time,
|
||||
segment.end_time,
|
||||
segment.text.clone(),
|
||||
None::<String>, // chunk_id: unknown yet, filled later
|
||||
0.0, // confidence: updated after binding
|
||||
));
|
||||
}
|
||||
|
||||
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn store_visual_chunk_chunks(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
visual_chunk_result: &VisualChunkResult,
|
||||
) -> Result<()> {
|
||||
for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() {
|
||||
match db.store_chunk(chunk).await {
|
||||
Ok(_) => {
|
||||
tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
db.store_raw_pre_chunks_batch(uuid, "asr", &pre_chunks_to_store)
|
||||
.await?;
|
||||
db.store_speaker_detections_batch(uuid, &speaker_detections)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1256,7 +1150,7 @@ impl ProcessorPool {
|
||||
});
|
||||
let chunk_table = crate::core::db::schema::table_name("chunk");
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3",
|
||||
"UPDATE {} SET metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $1::jsonb) WHERE file_uuid=$2 AND chunk_id=$3",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&meta)
|
||||
|
||||
Reference in New Issue
Block a user