feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
@@ -1,10 +1,4 @@
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
Router,
|
||||
};
|
||||
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
@@ -13,7 +7,10 @@ use std::time::Instant;
|
||||
|
||||
use crate::api::types::AppState;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::llm::function_calling::{self, ChatMessage, LlmResponse, ToolCall, ToolDef};
|
||||
use crate::core::llm::function_calling::{
|
||||
self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
|
||||
};
|
||||
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
|
||||
|
||||
// ── Conversation Manager ─────────────────────────────────────────
|
||||
|
||||
@@ -43,11 +40,14 @@ fn get_or_create_conv(conv_id: Option<&str>) -> (String, Vec<ChatMessage>) {
|
||||
}
|
||||
}
|
||||
let id = uuid::Uuid::new_v4().to_string().replace('-', "")[..16].to_string();
|
||||
map.insert(id.clone(), Conversation {
|
||||
messages: Vec::new(),
|
||||
created_at: Instant::now(),
|
||||
last_active: Instant::now(),
|
||||
});
|
||||
map.insert(
|
||||
id.clone(),
|
||||
Conversation {
|
||||
messages: Vec::new(),
|
||||
created_at: Instant::now(),
|
||||
last_active: Instant::now(),
|
||||
},
|
||||
);
|
||||
(id, Vec::new())
|
||||
}
|
||||
|
||||
@@ -85,8 +85,13 @@ const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶
|
||||
## 工具使用規則
|
||||
1. 先確認用戶在問哪部影片 — 使用 find_file 或 list_files
|
||||
2. 人物問題優先使用 tkg_query
|
||||
3. 語意/內容問題使用 smart_search 或 universal_search
|
||||
4. 可以同時呼叫多個工具
|
||||
3. 人物台詞/發言問題使用 identities_search(輸入人名→回傳台詞片段)
|
||||
4. 人物對話互動(誰跟誰說話)使用 tkg_query 的 speaker_interaction
|
||||
5. 人物台詞內容使用 tkg_query 的 speaker_dialogue
|
||||
6. 用文字反查人物使用 identity_text(輸入關鍵字→找出誰說/提到這段話)
|
||||
7. 語意/內容問題使用 smart_search 或 universal_search
|
||||
8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容(場景、人物表情、動作、物件等)
|
||||
9. 可以同時呼叫多個工具
|
||||
|
||||
## 引導規則
|
||||
- 如果用戶沒說片名 → 用 find_file 搜尋,如果名稱不明確就反問
|
||||
@@ -120,16 +125,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"tkg_query",
|
||||
"查詢影片的人物互動、配對、同框資料。query_type 包括:top_identities(人物排名)、first_cooccurrence(第一次同框)、identity_details(人物詳細)、mutual_gaze(互看)、interaction_network(互動網絡)、identity_traces(出場片段)、file_info(影片資訊)。",
|
||||
"查詢影片的人物互動、配對、同框、台詞資料。query_type 包括:top_identities(人物排名)、first_cooccurrence(第一次同框)、identity_details(人物詳細)、mutual_gaze(互看)、interaction_network(互動網絡)、identity_traces(出場片段)、file_info(影片資訊)、speaker_dialogue(人物台詞)、speaker_interaction(兩人對話互動)。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"query_type": {
|
||||
"type": "string",
|
||||
"enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info"],
|
||||
"enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info", "speaker_dialogue", "speaker_interaction"],
|
||||
"description": "查詢類型"
|
||||
},
|
||||
"identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces)"},
|
||||
"identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze)"},
|
||||
"identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces / speaker_dialogue / speaker_interaction)"},
|
||||
"identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze / speaker_interaction)"},
|
||||
"limit": {"type": "integer", "default": 5}
|
||||
}),
|
||||
vec!["file_uuid", "query_type"],
|
||||
@@ -144,6 +149,26 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
}),
|
||||
vec!["query"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"identity_text",
|
||||
"搜尋文字關鍵字,找出有提及該內容的影片人物。適合回答「誰說了OOO」、「誰跟OOO有關」。不是查詢人物的台詞,而是用文字反查人物。",
|
||||
serde_json::json!({
|
||||
"q": {"type": "string", "description": "搜尋關鍵字(台詞片段、主題等)"},
|
||||
"file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"},
|
||||
"limit": {"type": "integer", "default": 10}
|
||||
}),
|
||||
vec!["q"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"identities_search",
|
||||
"查詢特定人物的台詞/發言內容。輸入人物名稱,回傳該人物在影片中說過的話。適合回答「某某人說了什麼」、「某某人的台詞」。",
|
||||
serde_json::json!({
|
||||
"q": {"type": "string", "description": "人物名稱關鍵字(姓名、角色名、別名)"},
|
||||
"file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"},
|
||||
"limit": {"type": "integer", "default": 10}
|
||||
}),
|
||||
vec!["q"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"get_identity_detail",
|
||||
"查詢單一身份的詳細資料(名字、角色、TMDb 資訊)。",
|
||||
@@ -168,6 +193,16 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"analyze_frame",
|
||||
"分析影片中指定畫面的視覺內容(場景、人物表情、動作、物件等)。若不指定 frame_number,會使用代表性畫面。問題會傳給視覺 LLM 分析。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"question": {"type": "string", "description": "關於畫面的問題,例如「這個場景發生什麼事?」"},
|
||||
"frame_number": {"type": "integer", "description": "指定的 frame 編號(可選)"}
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -193,9 +228,10 @@ async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
if rows.is_empty() {
|
||||
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
|
||||
}
|
||||
let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
|
||||
serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
|
||||
}).collect();
|
||||
let files: Vec<serde_json::Value> = rows
|
||||
.into_iter()
|
||||
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
|
||||
.collect();
|
||||
Ok(serde_json::json!({"found": true, "files": files}).to_string())
|
||||
}
|
||||
|
||||
@@ -214,15 +250,19 @@ async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Resul
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let files: Vec<serde_json::Value> = rows.into_iter().map(|(u, n, hd)| {
|
||||
serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})
|
||||
}).collect();
|
||||
let files: Vec<serde_json::Value> = rows
|
||||
.into_iter()
|
||||
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
|
||||
.collect();
|
||||
Ok(serde_json::json!({"files": files}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let query_type = args.get("query_type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let query_type = args
|
||||
.get("query_type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let identity_name = args.get("identity_name").and_then(|v| v.as_str());
|
||||
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
|
||||
@@ -242,9 +282,11 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
|
||||
fd_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(limit)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"identities": rows}).to_string())
|
||||
}
|
||||
"first_cooccurrence" => {
|
||||
@@ -325,8 +367,9 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
}
|
||||
"identity_traces" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
let rows: Vec<(i32, i64, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::int, MAX(fd.frame_number)::int \
|
||||
// MIN/MAX frame_number should be bigint (i64), not int
|
||||
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
|
||||
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
|
||||
GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
|
||||
@@ -344,14 +387,133 @@ async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
|
||||
}
|
||||
_ => Ok(serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string()),
|
||||
"speaker_dialogue" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
|
||||
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
|
||||
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
|
||||
AND fn.node_type = 'face_trace' \
|
||||
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
|
||||
JOIN {} e ON e.source_node_id = fn.id \
|
||||
AND e.edge_type = 'SPEAKS_AS' \
|
||||
AND ($2::text IS NULL OR e.file_uuid = $2) \
|
||||
JOIN {} sn ON sn.id = e.target_node_id \
|
||||
WHERE i.name ILIKE $1 \
|
||||
LIMIT $3",
|
||||
id_table, fd_table, nodes, edges, nodes
|
||||
))
|
||||
.bind(name)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
|
||||
serde_json::json!({"speaker_id": sid, "dialogue": text})
|
||||
}).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
"speaker_interaction" => {
|
||||
let name_a = identity_name.unwrap_or("");
|
||||
let name_b = identity_b.unwrap_or("");
|
||||
if name_a.is_empty() || name_b.is_empty() {
|
||||
return Ok(
|
||||
serde_json::json!({"error": "identity_name and identity_b are required"})
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
// Get both speakers' segments from TKG
|
||||
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
|
||||
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
|
||||
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
|
||||
AND fn.node_type = 'face_trace' \
|
||||
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
|
||||
JOIN {} e ON e.source_node_id = fn.id \
|
||||
AND e.edge_type = 'SPEAKS_AS' \
|
||||
AND ($3::text IS NULL OR e.file_uuid = $3) \
|
||||
JOIN {} sn ON sn.id = e.target_node_id \
|
||||
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
|
||||
ORDER BY sn.external_id",
|
||||
id_table, fd_table, nodes, edges, nodes
|
||||
))
|
||||
.bind(name_a)
|
||||
.bind(name_b)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let mut interactions = Vec::new();
|
||||
for i in 0..rows.len() {
|
||||
for j in i + 1..rows.len() {
|
||||
let (sid_a, text_a, segs_a_val) = &rows[i];
|
||||
let (sid_b, text_b, segs_b_val) = &rows[j];
|
||||
let segs_a = segs_a_val.as_array();
|
||||
let segs_b = segs_b_val.as_array();
|
||||
if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
|
||||
for sa in a_list {
|
||||
let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if sa_text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
for sb in b_list {
|
||||
let sb_start =
|
||||
sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if sb_text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Check temporal overlap
|
||||
let overlap_start = sa_start.max(sb_start);
|
||||
let overlap_end = sa_end.min(sb_end);
|
||||
if overlap_start < overlap_end {
|
||||
interactions.push(serde_json::json!({
|
||||
"speaker_a": sid_a,
|
||||
"speaker_b": sid_b,
|
||||
"time_range_s": [overlap_start, overlap_end],
|
||||
"dialogue_a": sa_text,
|
||||
"dialogue_b": sb_text,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
interactions.sort_by(|a, b| {
|
||||
let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
|
||||
let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
|
||||
a_start.partial_cmp(&b_start).unwrap()
|
||||
});
|
||||
interactions.truncate(limit as usize);
|
||||
|
||||
Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
|
||||
}
|
||||
_ => Ok(
|
||||
serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_smart_search(
|
||||
_pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
|
||||
@@ -359,7 +521,8 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let mut sql = format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
|
||||
FROM {} WHERE text_content ILIKE $1", chunk_table
|
||||
FROM {} WHERE text_content ILIKE $1",
|
||||
chunk_table
|
||||
);
|
||||
if file_uuid.is_some() {
|
||||
sql.push_str(" AND file_uuid = $2");
|
||||
@@ -369,21 +532,147 @@ async fn exec_smart_search(_pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
|
||||
if let Some(fuid) = file_uuid {
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
|
||||
.bind(&like).bind(fuid)
|
||||
.bind(&like)
|
||||
.bind(fuid)
|
||||
.fetch_all(_pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"results": rows}).to_string())
|
||||
} else {
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
|
||||
.bind(&like)
|
||||
.fetch_all(_pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"results": rows}).to_string())
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_identity_text(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(10)
|
||||
.min(50);
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let id_table = schema::table_name("identities");
|
||||
let like_q = format!("%{}%", q.replace('%', "%%"));
|
||||
|
||||
let sql = format!(
|
||||
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
|
||||
i.name AS identity_name, fd.trace_id, i.source AS identity_source \
|
||||
FROM {} c \
|
||||
JOIN {} fd ON fd.file_uuid = c.file_uuid \
|
||||
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
|
||||
AND fd.identity_id IS NOT NULL \
|
||||
JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
|
||||
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
|
||||
ORDER BY c.start_time \
|
||||
LIMIT $3",
|
||||
chunk_table, fd_table, id_table
|
||||
);
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
f64,
|
||||
f64,
|
||||
Option<String>,
|
||||
String,
|
||||
Option<i32>,
|
||||
String,
|
||||
)> = sqlx::query_as(&sql)
|
||||
.bind(file_uuid)
|
||||
.bind(&like_q)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
|
||||
serde_json::json!({
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": st,
|
||||
"end_time": et,
|
||||
"text": txt,
|
||||
"identity_name": name,
|
||||
"trace_id": tid,
|
||||
"source": src
|
||||
})
|
||||
} ).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn exec_identities_search(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(10)
|
||||
.min(50);
|
||||
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let like_q = format!("%{}%", q.replace('%', "%%"));
|
||||
|
||||
let sql = format!(
|
||||
"SELECT DISTINCT ON (i.name, c.chunk_id) \
|
||||
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id \
|
||||
JOIN {} c ON c.file_uuid = fd.file_uuid \
|
||||
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
|
||||
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
|
||||
WHERE (i.name ILIKE $1 \
|
||||
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
|
||||
AND ($2::text IS NULL OR fd.file_uuid = $2) \
|
||||
ORDER BY i.name, c.chunk_id, c.start_time \
|
||||
LIMIT $3",
|
||||
id_table, fd_table, chunk_table
|
||||
);
|
||||
|
||||
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
|
||||
.bind(&like_q)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
|
||||
serde_json::json!({
|
||||
"identity_name": name,
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": st,
|
||||
"end_time": et,
|
||||
"text": txt,
|
||||
"trace_id": tid,
|
||||
})
|
||||
}).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn exec_get_identity_detail(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let id_table = schema::table_name("identities");
|
||||
let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
|
||||
@@ -396,7 +685,10 @@ async fn exec_get_identity_detail(pool: &sqlx::PgPool, args: &serde_json::Value)
|
||||
Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_get_file_info(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let videos = schema::table_name("videos");
|
||||
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
|
||||
@@ -405,11 +697,15 @@ async fn exec_get_file_info(pool: &sqlx::PgPool, args: &serde_json::Value) -> Re
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
async fn exec_get_representative_frame(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
|
||||
Ok(r) => Ok(serde_json::json!({
|
||||
@@ -417,24 +713,131 @@ async fn exec_get_representative_frame(pool: &sqlx::PgPool, args: &serde_json::V
|
||||
"face_quality": r.face_quality,
|
||||
"main_identities": r.main_identities,
|
||||
"traces": r.traces,
|
||||
}).to_string()),
|
||||
})
|
||||
.to_string()),
|
||||
Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_analyze_frame(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let question = args
|
||||
.get("question")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("請描述這個畫面中的內容");
|
||||
|
||||
if file_uuid.is_empty() {
|
||||
return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
|
||||
}
|
||||
|
||||
let videos = schema::table_name("videos");
|
||||
let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
|
||||
"SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.ok_or_else(|| "Video not found".to_string())?;
|
||||
|
||||
let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
|
||||
.await
|
||||
{
|
||||
Ok(r) => r.frame_number,
|
||||
Err(_) => {
|
||||
let duration: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.unwrap_or(0.0);
|
||||
if duration > 0.0 {
|
||||
((duration / 2.0) * fps) as i64
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp_secs = frame_number as f64 / fps;
|
||||
|
||||
let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
|
||||
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
|
||||
if std::path::Path::new(full).exists() {
|
||||
full.to_string()
|
||||
} else {
|
||||
"ffmpeg".to_string()
|
||||
}
|
||||
});
|
||||
|
||||
let output = tokio::process::Command::new(&ffmpeg_path)
|
||||
.args([
|
||||
"-ss",
|
||||
&format!("{:.3}", timestamp_secs),
|
||||
"-i",
|
||||
&video_path,
|
||||
"-vframes",
|
||||
"1",
|
||||
"-f",
|
||||
"image2pipe",
|
||||
"-vcodec",
|
||||
"mjpeg",
|
||||
"-",
|
||||
])
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| format!("ffmpeg execution error: {}", e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
|
||||
}
|
||||
|
||||
let base64_img = BASE64.encode(&output.stdout);
|
||||
|
||||
let system_prompt =
|
||||
"你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
|
||||
let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"frame_number": frame_number,
|
||||
"timestamp_secs": timestamp_secs,
|
||||
"analysis": vision_result,
|
||||
})
|
||||
.to_string())
|
||||
}
|
||||
|
||||
// ── Tool Router ───────────────────────────────────────────────────
|
||||
|
||||
async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
|
||||
let name = tool_call.function.name.clone();
|
||||
let args: serde_json::Value = serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
|
||||
let args: serde_json::Value =
|
||||
serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
|
||||
let result = match name.as_str() {
|
||||
"find_file" => exec_find_file(pool, &args).await,
|
||||
"list_files" => exec_list_files(pool, &args).await,
|
||||
"tkg_query" => exec_tkg_query(pool, &args).await,
|
||||
"smart_search" => exec_smart_search(pool, &args).await,
|
||||
"identity_text" => exec_identity_text(pool, &args).await,
|
||||
"identities_search" => exec_identities_search(pool, &args).await,
|
||||
"get_identity_detail" => exec_get_identity_detail(pool, &args).await,
|
||||
"get_file_info" => exec_get_file_info(pool, &args).await,
|
||||
"get_representative_frame" => exec_get_representative_frame(pool, &args).await,
|
||||
"analyze_frame" => exec_analyze_frame(pool, &args).await,
|
||||
_ => Err(format!("Unknown tool: {}", name)),
|
||||
};
|
||||
let content = match result {
|
||||
@@ -476,7 +879,11 @@ async fn run_tool_loop(
|
||||
for call in &calls {
|
||||
let (tool_call_id, name, content) = execute_tool(pool, call).await;
|
||||
sources.push(serde_json::json!({"tool": name, "result": content}));
|
||||
messages.push(function_calling::make_tool_result(&tool_call_id, &name, &content));
|
||||
messages.push(function_calling::make_tool_result(
|
||||
&tool_call_id,
|
||||
&name,
|
||||
&content,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -484,7 +891,10 @@ async fn run_tool_loop(
|
||||
}
|
||||
}
|
||||
}
|
||||
("已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(), sources)
|
||||
(
|
||||
"已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(),
|
||||
sources,
|
||||
)
|
||||
}
|
||||
|
||||
// ── Handler ───────────────────────────────────────────────────────
|
||||
@@ -495,13 +905,8 @@ async fn agent_search(
|
||||
) -> Result<Json<AgentSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref());
|
||||
|
||||
let (answer, sources) = run_tool_loop(
|
||||
state.db.pool(),
|
||||
SYSTEM_PROMPT,
|
||||
&req.query,
|
||||
history,
|
||||
)
|
||||
.await;
|
||||
let (answer, sources) =
|
||||
run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await;
|
||||
|
||||
// Save updated messages for conversation continuation
|
||||
let new_msgs = function_calling::build_conversation(SYSTEM_PROMPT, &req.query, vec![]);
|
||||
@@ -509,7 +914,11 @@ async fn agent_search(
|
||||
|
||||
let needs_input = answer.contains('?') || answer.contains('?');
|
||||
let suggestions = if needs_input {
|
||||
Some(vec!["演員名".to_string(), "電影片名".to_string(), "年份".to_string()])
|
||||
Some(vec![
|
||||
"演員名".to_string(),
|
||||
"電影片名".to_string(),
|
||||
"年份".to_string(),
|
||||
])
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -526,6 +935,5 @@ async fn agent_search(
|
||||
// ── Routes ─────────────────────────────────────────────────────────
|
||||
|
||||
pub fn agent_search_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/agents/search", post(agent_search))
|
||||
Router::new().route("/api/v1/agents/search", post(agent_search))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user