feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system

This commit is contained in:
Accusys
2026-06-02 07:13:23 +08:00
parent e3066c3f49
commit e1572907ae
198 changed files with 43705 additions and 8910 deletions

View File

@@ -1,10 +1,4 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::post,
Router,
};
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
@@ -13,7 +7,10 @@ use std::time::Instant;
use crate::api::types::AppState;
use crate::core::db::schema;
use crate::core::llm::function_calling::{self, ChatMessage, LlmResponse, ToolCall, ToolDef};
use crate::core::llm::function_calling::{
self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
};
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
// ── Conversation Manager ─────────────────────────────────────────
@@ -43,11 +40,14 @@ fn get_or_create_conv(conv_id: Option<&str>) -> (String, Vec<ChatMessage>) {
}
}
let id = uuid::Uuid::new_v4().to_string().replace('-', "")[..16].to_string();
map.insert(id.clone(), Conversation {
messages: Vec::new(),
created_at: Instant::now(),
last_active: Instant::now(),
});
map.insert(
id.clone(),
Conversation {
messages: Vec::new(),
created_at: Instant::now(),
last_active: Instant::now(),
},
);
(id, Vec::new())
}
@@ -85,8 +85,13 @@ const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶
## 工具使用規則
1. 先確認用戶在問哪部影片 — 使用 find_file 或 list_files
2. 人物問題優先使用 tkg_query
3. 語意/內容問題使用 smart_search 或 universal_search
4. 可以同時呼叫多個工具
3. 人物台詞/發言問題使用 identities_search輸入人名→回傳台詞片段
4. 人物對話互動(誰跟誰說話)使用 tkg_query 的 speaker_interaction
5. 人物台詞內容使用 tkg_query 的 speaker_dialogue
6. 用文字反查人物使用 identity_text輸入關鍵字→找出誰說/提到這段話)
7. 語意/內容問題使用 smart_search 或 universal_search
8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容(場景、人物表情、動作、物件等)
9. 可以同時呼叫多個工具
## 引導規則
- 如果用戶沒說片名 → 用 find_file 搜尋,如果名稱不明確就反問
@@ -120,16 +125,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
),
function_calling::make_tool(
"tkg_query",
"查詢影片的人物互動、配對、同框資料。query_type 包括top_identities人物排名、first_cooccurrence第一次同框、identity_details人物詳細、mutual_gaze互看、interaction_network互動網絡、identity_traces出場片段、file_info影片資訊",
"查詢影片的人物互動、配對、同框、台詞資料。query_type 包括top_identities人物排名、first_cooccurrence第一次同框、identity_details人物詳細、mutual_gaze互看、interaction_network互動網絡、identity_traces出場片段、file_info影片資訊、speaker_dialogue人物台詞、speaker_interaction兩人對話互動",
serde_json::json!({
"file_uuid": {"type": "string", "description": "影片 UUID"},
"query_type": {
"type": "string",
"enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info"],
"enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info", "speaker_dialogue", "speaker_interaction"],
"description": "查詢類型"
},
"identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces"},
"identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze"},
"identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces / speaker_dialogue / speaker_interaction"},
"identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze / speaker_interaction"},
"limit": {"type": "integer", "default": 5}
}),
vec!["file_uuid", "query_type"],
@@ -144,6 +149,26 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
}),
vec!["query"],
),
function_calling::make_tool(
"identity_text",
"搜尋文字關鍵字找出有提及該內容的影片人物。適合回答「誰說了OOO」、「誰跟OOO有關」。不是查詢人物的台詞而是用文字反查人物。",
serde_json::json!({
"q": {"type": "string", "description": "搜尋關鍵字(台詞片段、主題等)"},
"file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"},
"limit": {"type": "integer", "default": 10}
}),
vec!["q"],
),
function_calling::make_tool(
"identities_search",
"查詢特定人物的台詞/發言內容。輸入人物名稱,回傳該人物在影片中說過的話。適合回答「某某人說了什麼」、「某某人的台詞」。",
serde_json::json!({
"q": {"type": "string", "description": "人物名稱關鍵字(姓名、角色名、別名)"},
"file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"},
"limit": {"type": "integer", "default": 10}
}),
vec!["q"],
),
function_calling::make_tool(
"get_identity_detail",
"查詢單一身份的詳細資料名字、角色、TMDb 資訊)。",
@@ -168,6 +193,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
}),
vec!["file_uuid"],
),
function_calling::make_tool(
"analyze_frame",
"分析影片中指定畫面的視覺內容(場景、人物表情、動作、物件等)。若不指定 frame_number會使用代表性畫面。問題會傳給視覺 LLM 分析。",
serde_json::json!({
"file_uuid": {"type": "string", "description": "影片 UUID"},
"question": {"type": "string", "description": "關於畫面的問題,例如「這個場景發生什麼事?」"},
"frame_number": {"type": "integer", "description": "指定的 frame 編號(可選)"}
}),
vec!["file_uuid"],
),
]
}
@@ -193,9 +228,10 @@ async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
if rows.is_empty() {
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
}
let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
}).collect();
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
Ok(serde_json::json!({"found": true, "files": files}).to_string())
}
@@ -214,15 +250,19 @@ async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Resul
.await
.map_err(|e| e.to_string())?;
let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
}).collect();
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
Ok(serde_json::json!({"files": files}).to_string())
}
async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let query_type = args.get("query_type").and_then(|v| v.as_str()).unwrap_or("");
let query_type = args
.get("query_type")
.and_then(|v| v.as_str())
.unwrap_or("");
let identity_name = args.get("identity_name").and_then(|v| v.as_str());
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
@@ -242,9 +282,11 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
fd_table, id_table
))
.bind(file_uuid).bind(limit)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identities": rows}).to_string())
}
"first_cooccurrence" => {
@@ -325,8 +367,9 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
}
"identity_traces" => {
let name = identity_name.unwrap_or("");
let rows: Vec<(i32, i64, i32, i32)> = sqlx::query_as(&format!(
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::int, MAX(fd.frame_number)::int \
// MIN/MAX frame_number should be bigint (i64), not int
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
@@ -344,14 +387,133 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
))
.bind(file_uuid)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}
_ => Ok(serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string()),
"speaker_dialogue" => {
let name = identity_name.unwrap_or("");
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
AND fn.node_type = 'face_trace' \
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($2::text IS NULL OR e.file_uuid = $2) \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE i.name ILIKE $1 \
LIMIT $3",
id_table, fd_table, nodes, edges, nodes
))
.bind(name)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
serde_json::json!({"speaker_id": sid, "dialogue": text})
}).collect::<Vec<_>>()})
.to_string(),
)
}
"speaker_interaction" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
if name_a.is_empty() || name_b.is_empty() {
return Ok(
serde_json::json!({"error": "identity_name and identity_b are required"})
.to_string(),
);
}
// Get both speakers' segments from TKG
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
AND fn.node_type = 'face_trace' \
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($3::text IS NULL OR e.file_uuid = $3) \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
ORDER BY sn.external_id",
id_table, fd_table, nodes, edges, nodes
))
.bind(name_a)
.bind(name_b)
.bind(file_uuid)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
let mut interactions = Vec::new();
for i in 0..rows.len() {
for j in i + 1..rows.len() {
let (sid_a, text_a, segs_a_val) = &rows[i];
let (sid_b, text_b, segs_b_val) = &rows[j];
let segs_a = segs_a_val.as_array();
let segs_b = segs_b_val.as_array();
if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
for sa in a_list {
let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
if sa_text.is_empty() {
continue;
}
for sb in b_list {
let sb_start =
sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
if sb_text.is_empty() {
continue;
}
// Check temporal overlap
let overlap_start = sa_start.max(sb_start);
let overlap_end = sa_end.min(sb_end);
if overlap_start < overlap_end {
interactions.push(serde_json::json!({
"speaker_a": sid_a,
"speaker_b": sid_b,
"time_range_s": [overlap_start, overlap_end],
"dialogue_a": sa_text,
"dialogue_b": sb_text,
}));
}
}
}
}
}
}
interactions.sort_by(|a, b| {
let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
a_start.partial_cmp(&b_start).unwrap()
});
interactions.truncate(limit as usize);
Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
}
_ => Ok(
serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
),
}
}
async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
async fn exec_smart_search(
_pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
@@ -359,7 +521,8 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
let chunk_table = schema::table_name("chunk");
let mut sql = format!(
"SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
FROM {} WHERE text_content ILIKE $1", chunk_table
FROM {} WHERE text_content ILIKE $1",
chunk_table
);
if file_uuid.is_some() {
sql.push_str(" AND file_uuid = $2");
@@ -369,21 +532,147 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
if let Some(fuid) = file_uuid {
let like = format!("%{}%", query);
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
.bind(&like).bind(fuid)
.bind(&like)
.bind(fuid)
.fetch_all(_pool)
.await.map_err(|e| e.to_string())?;
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"results": rows}).to_string())
} else {
let like = format!("%{}%", query);
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
.bind(&like)
.fetch_all(_pool)
.await.map_err(|e| e.to_string())?;
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"results": rows}).to_string())
}
}
async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
async fn exec_identity_text(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.min(50);
let chunk_table = schema::table_name("chunk");
let fd_table = schema::table_name("face_detections");
let id_table = schema::table_name("identities");
let like_q = format!("%{}%", q.replace('%', "%%"));
let sql = format!(
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
i.name AS identity_name, fd.trace_id, i.source AS identity_source \
FROM {} c \
JOIN {} fd ON fd.file_uuid = c.file_uuid \
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
AND fd.identity_id IS NOT NULL \
JOIN {} i ON i.id = fd.identity_id \
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
ORDER BY c.start_time \
LIMIT $3",
chunk_table, fd_table, id_table
);
let rows: Vec<(
String,
f64,
f64,
Option<String>,
String,
Option<i32>,
String,
)> = sqlx::query_as(&sql)
.bind(file_uuid)
.bind(&like_q)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
serde_json::json!({
"chunk_id": chunk_id,
"start_time": st,
"end_time": et,
"text": txt,
"identity_name": name,
"trace_id": tid,
"source": src
})
} ).collect::<Vec<_>>()})
.to_string(),
)
}
async fn exec_identities_search(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.min(50);
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let chunk_table = schema::table_name("chunk");
let like_q = format!("%{}%", q.replace('%', "%%"));
let sql = format!(
"SELECT DISTINCT ON (i.name, c.chunk_id) \
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \
JOIN {} c ON c.file_uuid = fd.file_uuid \
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
WHERE (i.name ILIKE $1 \
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
AND ($2::text IS NULL OR fd.file_uuid = $2) \
ORDER BY i.name, c.chunk_id, c.start_time \
LIMIT $3",
id_table, fd_table, chunk_table
);
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
.bind(&like_q)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
serde_json::json!({
"identity_name": name,
"chunk_id": chunk_id,
"start_time": st,
"end_time": et,
"text": txt,
"trace_id": tid,
})
}).collect::<Vec<_>>()})
.to_string(),
)
}
async fn exec_get_identity_detail(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
let id_table = schema::table_name("identities");
let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
@@ -396,7 +685,10 @@ async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value)
Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
}
async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
async fn exec_get_file_info(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let videos = schema::table_name("videos");
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
@@ -405,11 +697,15 @@ async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
))
.bind(file_uuid)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}
async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
async fn exec_get_representative_frame(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
Ok(r) => Ok(serde_json::json!({
@@ -417,24 +713,131 @@ async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::V
"face_quality": r.face_quality,
"main_identities": r.main_identities,
"traces": r.traces,
}).to_string()),
})
.to_string()),
Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
}
}
async fn exec_analyze_frame(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let question = args
.get("question")
.and_then(|v| v.as_str())
.unwrap_or("請描述這個畫面中的內容");
if file_uuid.is_empty() {
return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
}
let videos = schema::table_name("videos");
let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.ok_or_else(|| "Video not found".to_string())?;
let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
Some(f) => f,
None => {
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
.await
{
Ok(r) => r.frame_number,
Err(_) => {
let duration: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.unwrap_or(0.0);
if duration > 0.0 {
((duration / 2.0) * fps) as i64
} else {
0
}
}
}
}
};
let timestamp_secs = frame_number as f64 / fps;
let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
if std::path::Path::new(full).exists() {
full.to_string()
} else {
"ffmpeg".to_string()
}
});
let output = tokio::process::Command::new(&ffmpeg_path)
.args([
"-ss",
&format!("{:.3}", timestamp_secs),
"-i",
&video_path,
"-vframes",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
.await
.map_err(|e| format!("ffmpeg execution error: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
}
let base64_img = BASE64.encode(&output.stdout);
let system_prompt =
"你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"frame_number": frame_number,
"timestamp_secs": timestamp_secs,
"analysis": vision_result,
})
.to_string())
}
// ── Tool Router ───────────────────────────────────────────────────
async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
let name = tool_call.function.name.clone();
let args: serde_json::Value = serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
let args: serde_json::Value =
serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
let result = match name.as_str() {
"find_file" => exec_find_file(pool, &args).await,
"list_files" => exec_list_files(pool, &args).await,
"tkg_query" => exec_tkg_query(pool, &args).await,
"smart_search" => exec_smart_search(pool, &args).await,
"identity_text" => exec_identity_text(pool, &args).await,
"identities_search" => exec_identities_search(pool, &args).await,
"get_identity_detail" => exec_get_identity_detail(pool, &args).await,
"get_file_info" => exec_get_file_info(pool, &args).await,
"get_representative_frame" => exec_get_representative_frame(pool, &args).await,
"analyze_frame" => exec_analyze_frame(pool, &args).await,
_ => Err(format!("Unknown tool: {}", name)),
};
let content = match result {
@@ -476,7 +879,11 @@ async fn run_tool_loop(
for call in &calls {
let (tool_call_id, name, content) = execute_tool(pool, call).await;
sources.push(serde_json::json!({"tool": name, "result": content}));
messages.push(function_calling::make_tool_result(&tool_call_id, &name, &content));
messages.push(function_calling::make_tool_result(
&tool_call_id,
&name,
&content,
));
}
}
Err(e) => {
@@ -484,7 +891,10 @@ async fn run_tool_loop(
}
}
}
("已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(), sources)
(
"已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(),
sources,
)
}
// ── Handler ───────────────────────────────────────────────────────
@@ -495,13 +905,8 @@ async fn agent_search(
) -> Result<Json<AgentSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref());
let (answer, sources) = run_tool_loop(
state.db.pool(),
SYSTEM_PROMPT,
&req.query,
history,
)
.await;
let (answer, sources) =
run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await;
// Save updated messages for conversation continuation
let new_msgs = function_calling::build_conversation(SYSTEM_PROMPT, &req.query, vec![]);
@@ -509,7 +914,11 @@ async fn agent_search(
let needs_input = answer.contains('') || answer.contains('?');
let suggestions = if needs_input {
Some(vec!["演員名".to_string(), "電影片名".to_string(), "年份".to_string()])
Some(vec![
"演員名".to_string(),
"電影片名".to_string(),
"年份".to_string(),
])
} else {
None
};
@@ -526,6 +935,5 @@ async fn agent_search(
// ── Routes ─────────────────────────────────────────────────────────
pub fn agent_search_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/search", post(agent_search))
Router::new().route("/api/v1/agents/search", post(agent_search))
}