use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router}; use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::Mutex; use std::time::Instant; use crate::api::types::AppState; use crate::core::db::schema; use crate::core::llm::function_calling::{ self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef, }; use base64::{engine::general_purpose::STANDARD as BASE64, Engine}; // ── Conversation Manager ───────────────────────────────────────── struct Conversation { messages: Vec, created_at: Instant, last_active: Instant, } static CONVERSATIONS: Lazy>> = Lazy::new(|| { // Spawn cleanup task std::thread::spawn(|| loop { std::thread::sleep(std::time::Duration::from_secs(60)); let mut map = CONVERSATIONS.lock().unwrap(); let now = Instant::now(); map.retain(|_, conv| now.duration_since(conv.last_active).as_secs() < 1800); }); Mutex::new(HashMap::new()) }); fn get_or_create_conv(conv_id: Option<&str>) -> (String, Vec) { let mut map = CONVERSATIONS.lock().unwrap(); if let Some(cid) = conv_id { if let Some(conv) = map.get_mut(cid) { conv.last_active = Instant::now(); return (cid.to_string(), conv.messages.clone()); } } let id = uuid::Uuid::new_v4().to_string().replace('-', "")[..16].to_string(); map.insert( id.clone(), Conversation { messages: Vec::new(), created_at: Instant::now(), last_active: Instant::now(), }, ); (id, Vec::new()) } fn save_messages(conv_id: &str, messages: &[ChatMessage]) { if let Some(conv) = CONVERSATIONS.lock().unwrap().get_mut(conv_id) { conv.messages = messages.to_vec(); conv.last_active = Instant::now(); } } // ── Request / Response ─────────────────────────────────────────── #[derive(Debug, Deserialize)] pub struct AgentSearchRequest { pub query: String, pub conversation_id: Option, pub file_uuid: Option, } #[derive(Debug, Serialize)] pub struct AgentSearchResponse { pub success: bool, pub conversation_id: String, pub answer: String, #[serde(skip_serializing_if = "Option::is_none")] pub suggestions: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub sources: Option>, } // ── Tool Definitions ────────────────────────────────────────────── const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶關於影片內容的問題。 ## 工具使用規則 1. 先確認用戶在問哪部影片 — 使用 find_file 或 list_files 2. 人物問題優先使用 tkg_query 3. 人物台詞/發言問題使用 identities_search(輸入人名→回傳台詞片段) 4. 人物對話互動(誰跟誰說話)使用 tkg_query 的 speaker_interaction 5. 人物台詞內容使用 tkg_query 的 speaker_dialogue 6. 用文字反查人物使用 identity_text(輸入關鍵字→找出誰說/提到這段話) 7. 語意/內容問題使用 smart_search 或 universal_search 8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容(場景、人物表情、動作、物件等) 9. **可以同時呼叫多個工具,但需符合以下條件:** - ✅ 查詢多部影片的相同資訊(如:3部影片的人物列表) - ✅ 需要組合多個來源的資訊才能回答(如:file_info + tkg_query) - ❌ 不要為了「嘗試所有可能」而盲目並行呼叫 - ❌ 如果單一工具已返回足夠答案,不需要額外呼叫 ## 引導規則(優化版) - **搜尋優先原則**: 1. **所有問題都先嘗試搜尋,不要過早判斷用戶是否說了片名** 2. 根據搜尋結果和答案性質決定是否反問: - **列举型問題**(找出所有、列出)→ ✅ 不反問,列出所有結果 - **指定型問題**(这部、那个)→ ⚠️ 反問選擇具體哪個 - **統計型問題**(多少、幾個)→ ✅ 不反問,統計所有結果 - **分析型問題**(分析、描述)→ ⚠️ 視問題表述決定 - **反問條件(精確)**: 1. **答案需要分辨才反問**,不是「找到多部影片就反問」 2. 判断标准: - ✅ 如果問題要求「所有」「列出」→ 答案不需要分辨 → 不反問 - ⚠️ 如果問題要求「这部」「那个」→ 答案需要分辨 → 反問 - ⚠️ 如果問題不明確 → 根據常理判断是否需要分辨 - **反問優化**: 1. 反問時提供智能 suggestions(依問題類型調整) 2. 人物問題 → suggestions: ["演員名", "角色名", "年代"] 3. 內容問題 → suggestions: ["片名", "年代", "主題關鍵字"] 4. 畫面問題 → suggestions: ["片名", "時間範圍", "場景描述"] - **特殊情況**: - 如果影片的 has_data 為 false → 不要推薦,引導選擇 has_data=true - 如果搜尋結果直接包含答案 → 直接回答,不額外呼叫工具 - 如果找不到影片 → 反問提供更多資訊(片名、演員、年份) - **回答格式**: - 不要輸出 JSON,用自然語言回答 - 引用資料時附上具體數字(frame 編號、時間秒數) ## 回答規則(優化版) - 回答長度依問題類型調整: - 簡單查詢(如「列出影片」)→ 簡潔列表回答(1-2句) - 分析問題(如「描述情節」)→ 詳細回答(3-5句) - 計數問題(如「有幾個場景」)→ 直接回答數字 + 簡短說明 - 回答格式: - ✅ 如果找到影片,附上 file_uuid(用戶之後可能需要) - ✅ 對於人物問題,說出角色名和演員名(如果有) - ✅ 引用資料時附上具體數字(frame 編號、時間秒數) - ❌ 不要輸出 JSON 格式,用自然語言回答 - ❌ 不要編造資料,如果找不到就明確說「找不到」 ## 停止規則(重要) - **如果已經找到足夠資訊回答用戶問題,立即停止呼叫工具,直接回答** - **如果連續 2 轪呼叫工具都返回空結果或相同資訊,停止並告知用戶「找不到更多相關資訊」** - **如果用戶問題不明確或範圍過大,停止並反問用戶(提供 suggestions)** - **如果單一工具呼叫返回完整答案,不需要額外呼叫其他工具補充** - **優化效率:避免重複呼叫相同工具或查詢相同內容** - **成本控制:主動判斷是否需要繼續,不要盲目嘗試所有工具**"#; fn make_tools(pool: &sqlx::PgPool) -> Vec { vec![ function_calling::make_tool( "find_file", "透過關鍵字搜尋影片(片名、演員、年份)。回傳符合的影片列表。", serde_json::json!({ "query": {"type": "string", "description": "搜尋關鍵字(片名、演員名、年份)"} }), vec!["query"], ), function_calling::make_tool( "list_files", "列出近期註冊的影片。", serde_json::json!({ "limit": {"type": "integer", "description": "回傳筆數上限", "default": 10} }), vec![], ), function_calling::make_tool( "tkg_query", "查詢影片的人物互動、配對、同框、台詞資料。query_type 包括:top_identities(人物排名)、first_cooccurrence(第一次同框)、identity_details(人物詳細)、mutual_gaze(互看)、interaction_network(互動網絡)、identity_traces(出場片段)、file_info(影片資訊)、speaker_dialogue(人物台詞)、speaker_interaction(兩人對話互動)。", serde_json::json!({ "file_uuid": {"type": "string", "description": "影片 UUID"}, "query_type": { "type": "string", "enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info", "speaker_dialogue", "speaker_interaction"], "description": "查詢類型" }, "identity_name": {"type": "string", "description": "人物名稱(配合 identity_details / identity_traces / speaker_dialogue / speaker_interaction)"}, "identity_b": {"type": "string", "description": "第二人物名稱(配合 first_cooccurrence / mutual_gaze / speaker_interaction)"}, "limit": {"type": "integer", "default": 5} }), vec!["file_uuid", "query_type"], ), function_calling::make_tool( "smart_search", "語意搜尋 chunk 文字內容。適合需要理解意圖的查詢。", serde_json::json!({ "file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"}, "query": {"type": "string", "description": "搜尋關鍵字"}, "limit": {"type": "integer", "default": 5} }), vec!["query"], ), function_calling::make_tool( "identity_text", "搜尋文字關鍵字,找出有提及該內容的影片人物。適合回答「誰說了OOO」、「誰跟OOO有關」。不是查詢人物的台詞,而是用文字反查人物。", serde_json::json!({ "q": {"type": "string", "description": "搜尋關鍵字(台詞片段、主題等)"}, "file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"}, "limit": {"type": "integer", "default": 10} }), vec!["q"], ), function_calling::make_tool( "identities_search", "查詢特定人物的台詞/發言內容。輸入人物名稱,回傳該人物在影片中說過的話。適合回答「某某人說了什麼」、「某某人的台詞」。", serde_json::json!({ "q": {"type": "string", "description": "人物名稱關鍵字(姓名、角色名、別名)"}, "file_uuid": {"type": "string", "description": "限制搜尋範圍(可選)"}, "limit": {"type": "integer", "default": 10} }), vec!["q"], ), function_calling::make_tool( "get_identity_detail", "查詢單一身份的詳細資料(名字、角色、TMDb 資訊)。", serde_json::json!({ "name": {"type": "string", "description": "人物名稱"} }), vec!["name"], ), function_calling::make_tool( "get_file_info", "查詢影片基本資訊(片名、長度、解析度)。", serde_json::json!({ "file_uuid": {"type": "string", "description": "影片 UUID"} }), vec!["file_uuid"], ), function_calling::make_tool( "get_representative_frame", "查詢影片最具代表性的 frame 資訊(frame 編號、時間、人物)。", serde_json::json!({ "file_uuid": {"type": "string", "description": "影片 UUID"} }), vec!["file_uuid"], ), function_calling::make_tool( "analyze_frame", "分析影片中指定畫面的視覺內容(場景、人物表情、動作、物件等)。若不指定 frame_number,會使用代表性畫面。問題會傳給視覺 LLM 分析。", serde_json::json!({ "file_uuid": {"type": "string", "description": "影片 UUID"}, "question": {"type": "string", "description": "關於畫面的問題,例如「這個場景發生什麼事?」"}, "frame_number": {"type": "integer", "description": "指定的 frame 編號(可選)"} }), vec!["file_uuid"], ), ] } // ── Tool Executors ─────────────────────────────────────────────── async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result { let query = args.get("query").and_then(|v| v.as_str()).unwrap_or(""); let videos = schema::table_name("videos"); let fd_table = schema::table_name("face_detections"); let like = format!("%{}%", query); let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!( "SELECT v.file_uuid::text, v.file_name, \ (SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \ FROM {} v WHERE v.file_name ILIKE $1 \ ORDER BY v.created_at DESC LIMIT 10", fd_table, videos )) .bind(&like) .fetch_all(pool) .await .map_err(|e| e.to_string())?; if rows.is_empty() { return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string()); } let files: Vec = rows .into_iter() .map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})) .collect(); Ok(serde_json::json!({"found": true, "files": files}).to_string()) } async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result { let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10); let videos = schema::table_name("videos"); let fd_table = schema::table_name("face_detections"); let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!( "SELECT v.file_uuid::text, v.file_name, \ (SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \ FROM {} v ORDER BY v.created_at DESC LIMIT $1", fd_table, videos )) .bind(limit) .fetch_all(pool) .await .map_err(|e| e.to_string())?; let files: Vec = rows .into_iter() .map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd})) .collect(); Ok(serde_json::json!({"files": files}).to_string()) } async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result { let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or(""); let query_type = args .get("query_type") .and_then(|v| v.as_str()) .unwrap_or(""); let identity_name = args.get("identity_name").and_then(|v| v.as_str()); let identity_b = args.get("identity_b").and_then(|v| v.as_str()); let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5); let id_table = schema::table_name("identities"); let fd_table = schema::table_name("face_detections"); let videos = schema::table_name("videos"); let nodes = schema::table_name("tkg_nodes"); let edges = schema::table_name("tkg_edges"); match query_type { "top_identities" => { let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!( "SELECT i.uuid::text, i.name, COUNT(fd.id)::bigint AS face_count \ FROM {} fd JOIN {} i ON i.id = fd.identity_id \ WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL AND i.source = 'tmdb' \ GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2", fd_table, id_table )) .bind(file_uuid) .bind(limit) .fetch_all(pool) .await .map_err(|e| e.to_string())?; Ok(serde_json::json!({"identities": rows}).to_string()) } "first_cooccurrence" => { let name_a = identity_name.unwrap_or(""); let name_b = identity_b.unwrap_or(""); let row: Option<(i64, f64)> = sqlx::query_as(&format!( "SELECT MIN(fd_a.frame_number)::bigint, \ ROUND(MIN(fd_a.frame_number)::numeric / GREATEST(MAX(v.fps)::numeric, 25.0), 2)::float8 \ FROM {} fd_a JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \ JOIN {} v ON v.file_uuid = $1 \ WHERE fd_a.file_uuid = $1 \ AND fd_a.identity_id = (SELECT id FROM {} WHERE name ILIKE $2 LIMIT 1) \ AND fd_b.identity_id = (SELECT id FROM {} WHERE name ILIKE $3 LIMIT 1)", fd_table, fd_table, videos, id_table, id_table )) .bind(file_uuid).bind(name_a).bind(name_b) .fetch_optional(pool) .await.map_err(|e| e.to_string())?; Ok(serde_json::json!({"first_cooccurrence": row.map(|(f, t)| serde_json::json!({"frame": f, "timestamp_secs": t}))}).to_string()) } "identity_details" => { let name = identity_name.unwrap_or(""); let row: Option<(String, String, Option, i64)> = sqlx::query_as(&format!( "SELECT i.uuid::text, i.name, i.tmdb_id, \ (SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = i.id AND fd.file_uuid = $1)::bigint \ FROM {} i WHERE i.name ILIKE $2 LIMIT 1", fd_table, id_table )) .bind(file_uuid).bind(name) .fetch_optional(pool) .await.map_err(|e| e.to_string())?; Ok(serde_json::json!({"identity": row.map(|(u, n, tid, fc)| serde_json::json!({"uuid": u, "name": n, "tmdb_id": tid, "face_count": fc}))}).to_string()) } "mutual_gaze" => { let name_a = identity_name.unwrap_or(""); let name_b = identity_b.unwrap_or(""); let row: Option<(i64, i64, f64, f64)> = sqlx::query_as(&format!( "SELECT (e.properties->>'first_frame')::bigint, \ (e.properties->>'gaze_frame_count')::int::bigint, \ (e.properties->>'yaw_a_avg')::float8, \ (e.properties->>'yaw_b_avg')::float8 \ FROM {} e \ JOIN {} a ON a.id = e.source_node_id \ JOIN {} b ON b.id = e.target_node_id \ JOIN {} fd_a ON fd_a.file_uuid = $1 AND fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int \ JOIN {} fd_b ON fd_b.file_uuid = $1 AND fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int \ JOIN {} ia ON ia.id = fd_a.identity_id \ JOIN {} ib ON ib.id = fd_b.identity_id \ WHERE e.file_uuid = $1 AND ia.name ILIKE $2 AND ib.name ILIKE $3 \ AND e.properties->>'mutual_gaze' = 'true' LIMIT 1", edges, nodes, nodes, fd_table, fd_table, id_table, id_table )) .bind(file_uuid).bind(name_a).bind(name_b) .fetch_optional(pool) .await.map_err(|e| e.to_string())?; Ok(serde_json::json!({"mutual_gaze": row.map(|(f, gc, ya, yb)| serde_json::json!({"first_frame": f, "gaze_frame_count": gc, "yaw_a": ya, "yaw_b": yb}))}).to_string()) } "interaction_network" => { let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!( "SELECT ia.name, ib.name, COUNT(*)::bigint \ FROM {} e \ JOIN {} a ON a.id = e.source_node_id \ JOIN {} b ON b.id = e.target_node_id \ JOIN {} fd_a ON fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int AND fd_a.file_uuid = $1 \ JOIN {} fd_b ON fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int AND fd_b.file_uuid = $1 \ JOIN {} ia ON ia.id = fd_a.identity_id \ JOIN {} ib ON ib.id = fd_b.identity_id \ WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \ AND ia.name != ib.name AND ia.source = 'tmdb' AND ib.source = 'tmdb' \ GROUP BY ia.name, ib.name \ ORDER BY COUNT(*) DESC LIMIT $2", edges, nodes, nodes, fd_table, fd_table, id_table, id_table )) .bind(file_uuid).bind(limit) .fetch_all(pool) .await.map_err(|e| e.to_string())?; Ok(serde_json::json!({"interaction_network": rows}).to_string()) } "identity_traces" => { let name = identity_name.unwrap_or(""); // MIN/MAX frame_number should be bigint (i64), not int let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!( "SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \ FROM {} fd JOIN {} i ON i.id = fd.identity_id \ WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \ GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3", fd_table, id_table )) .bind(file_uuid).bind(name).bind(limit) .fetch_all(pool) .await.map_err(|e| e.to_string())?; Ok(serde_json::json!({"traces": rows}).to_string()) } "file_info" => { let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!( "SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1", videos )) .bind(file_uuid) .fetch_optional(pool) .await .map_err(|e| e.to_string())?; Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string()) } "speaker_dialogue" => { let name = identity_name.unwrap_or(""); let rows: Vec<(String, Option)> = sqlx::query_as(&format!( "SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \ FROM {} i \ JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \ JOIN {} fn ON fn.file_uuid = fd.file_uuid \ AND fn.node_type = 'face_trace' \ AND fn.external_id = CONCAT('trace_', fd.trace_id) \ JOIN {} e ON e.source_node_id = fn.id \ AND e.edge_type = 'SPEAKS_AS' \ AND ($2::text IS NULL OR e.file_uuid = $2) \ JOIN {} sn ON sn.id = e.target_node_id \ WHERE i.name ILIKE $1 \ LIMIT $3", id_table, fd_table, nodes, edges, nodes )) .bind(name) .bind(file_uuid) .bind(limit) .fetch_all(pool) .await .map_err(|e| e.to_string())?; Ok( serde_json::json!({"speakers": rows.iter().map(|(sid, text)| { serde_json::json!({"speaker_id": sid, "dialogue": text}) }).collect::>()}) .to_string(), ) } "speaker_interaction" => { let name_a = identity_name.unwrap_or(""); let name_b = identity_b.unwrap_or(""); if name_a.is_empty() || name_b.is_empty() { return Ok( serde_json::json!({"error": "identity_name and identity_b are required"}) .to_string(), ); } // Get both speakers' segments from TKG let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!( "SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \ FROM {} i \ JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \ JOIN {} fn ON fn.file_uuid = fd.file_uuid \ AND fn.node_type = 'face_trace' \ AND fn.external_id = CONCAT('trace_', fd.trace_id) \ JOIN {} e ON e.source_node_id = fn.id \ AND e.edge_type = 'SPEAKS_AS' \ AND ($3::text IS NULL OR e.file_uuid = $3) \ JOIN {} sn ON sn.id = e.target_node_id \ WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \ ORDER BY sn.external_id", id_table, fd_table, nodes, edges, nodes )) .bind(name_a) .bind(name_b) .bind(file_uuid) .fetch_all(pool) .await .map_err(|e| e.to_string())?; let mut interactions = Vec::new(); for i in 0..rows.len() { for j in i + 1..rows.len() { let (sid_a, text_a, segs_a_val) = &rows[i]; let (sid_b, text_b, segs_b_val) = &rows[j]; let segs_a = segs_a_val.as_array(); let segs_b = segs_b_val.as_array(); if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) { for sa in a_list { let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0); let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0); let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or(""); if sa_text.is_empty() { continue; } for sb in b_list { let sb_start = sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0); let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0); let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or(""); if sb_text.is_empty() { continue; } // Check temporal overlap let overlap_start = sa_start.max(sb_start); let overlap_end = sa_end.min(sb_end); if overlap_start < overlap_end { interactions.push(serde_json::json!({ "speaker_a": sid_a, "speaker_b": sid_b, "time_range_s": [overlap_start, overlap_end], "dialogue_a": sa_text, "dialogue_b": sb_text, })); } } } } } } interactions.sort_by(|a, b| { let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0); let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0); a_start.partial_cmp(&b_start).unwrap() }); interactions.truncate(limit as usize); Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string()) } _ => Ok( serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(), ), } } async fn exec_smart_search( _pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let query = args.get("query").and_then(|v| v.as_str()).unwrap_or(""); let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()); let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5); let chunk_table = schema::table_name("chunk"); let mut sql = format!( "SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \ FROM {} WHERE text_content ILIKE $1", chunk_table ); if file_uuid.is_some() { sql.push_str(" AND file_uuid = $2"); } sql.push_str(&format!(" ORDER BY start_frame LIMIT {}", limit)); if let Some(fuid) = file_uuid { let like = format!("%{}%", query); let rows: Vec<(String, Option, i64, i64, String)> = sqlx::query_as(&sql) .bind(&like) .bind(fuid) .fetch_all(_pool) .await .map_err(|e| e.to_string())?; Ok(serde_json::json!({"results": rows}).to_string()) } else { let like = format!("%{}%", query); let rows: Vec<(String, Option, i64, i64, String)> = sqlx::query_as(&sql) .bind(&like) .fetch_all(_pool) .await .map_err(|e| e.to_string())?; Ok(serde_json::json!({"results": rows}).to_string()) } } async fn exec_identity_text( pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let q = args.get("q").and_then(|v| v.as_str()).unwrap_or(""); let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(10) .min(50); let chunk_table = schema::table_name("chunk"); let fd_table = schema::table_name("face_detections"); let id_table = schema::table_name("identities"); let like_q = format!("%{}%", q.replace('%', "%%")); let sql = format!( "SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \ i.name AS identity_name, fd.trace_id, i.source AS identity_source \ FROM {} c \ JOIN {} fd ON fd.file_uuid = c.file_uuid \ AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \ AND fd.identity_id IS NOT NULL \ JOIN {} i ON i.id = fd.identity_id \ WHERE ($1::text IS NULL OR c.file_uuid = $1) \ AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \ ORDER BY c.start_time \ LIMIT $3", chunk_table, fd_table, id_table ); let rows: Vec<( String, f64, f64, Option, String, Option, String, )> = sqlx::query_as(&sql) .bind(file_uuid) .bind(&like_q) .bind(limit) .fetch_all(pool) .await .map_err(|e| e.to_string())?; Ok( serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| { serde_json::json!({ "chunk_id": chunk_id, "start_time": st, "end_time": et, "text": txt, "identity_name": name, "trace_id": tid, "source": src }) } ).collect::>()}) .to_string(), ) } async fn exec_identities_search( pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let q = args.get("q").and_then(|v| v.as_str()).unwrap_or(""); let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()); let limit = args .get("limit") .and_then(|v| v.as_i64()) .unwrap_or(10) .min(50); let id_table = schema::table_name("identities"); let fd_table = schema::table_name("face_detections"); let chunk_table = schema::table_name("chunk"); let like_q = format!("%{}%", q.replace('%', "%%")); let sql = format!( "SELECT DISTINCT ON (i.name, c.chunk_id) \ i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \ FROM {} i \ JOIN {} fd ON fd.identity_id = i.id \ JOIN {} c ON c.file_uuid = fd.file_uuid \ AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \ AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \ WHERE (i.name ILIKE $1 \ OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \ AND ($2::text IS NULL OR fd.file_uuid = $2) \ ORDER BY i.name, c.chunk_id, c.start_time \ LIMIT $3", id_table, fd_table, chunk_table ); let rows: Vec<(String, String, f64, f64, Option, Option)> = sqlx::query_as(&sql) .bind(&like_q) .bind(file_uuid) .bind(limit) .fetch_all(pool) .await .map_err(|e| e.to_string())?; Ok( serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| { serde_json::json!({ "identity_name": name, "chunk_id": chunk_id, "start_time": st, "end_time": et, "text": txt, "trace_id": tid, }) }).collect::>()}) .to_string(), ) } async fn exec_get_identity_detail( pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let name = args.get("name").and_then(|v| v.as_str()).unwrap_or(""); let id_table = schema::table_name("identities"); let row: Option<(String, String, Option, Option, Option)> = sqlx::query_as(&format!( "SELECT uuid::text, name, source, tmdb_id, metadata->>'tmdb_character' FROM {} WHERE name ILIKE $1 LIMIT 1", id_table )) .bind(name) .fetch_optional(pool) .await.map_err(|e| e.to_string())?; Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string()) } async fn exec_get_file_info( pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or(""); let videos = schema::table_name("videos"); let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!( "SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1", videos )) .bind(file_uuid) .fetch_optional(pool) .await .map_err(|e| e.to_string())?; Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string()) } async fn exec_get_representative_frame( pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or(""); match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await { Ok(r) => Ok(serde_json::json!({ "frame_number": r.frame_number, "face_quality": r.face_quality, "main_identities": r.main_identities, "traces": r.traces, }) .to_string()), Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()), } } async fn exec_analyze_frame( pool: &sqlx::PgPool, args: &serde_json::Value, ) -> Result { let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or(""); let question = args .get("question") .and_then(|v| v.as_str()) .unwrap_or("請描述這個畫面中的內容"); if file_uuid.is_empty() { return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string()); } let videos = schema::table_name("videos"); let (video_path, fps): (String, f64) = sqlx::query_as(&format!( "SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1", videos )) .bind(file_uuid) .fetch_optional(pool) .await .map_err(|e| e.to_string())? .ok_or_else(|| "Video not found".to_string())?; let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) { Some(f) => f, None => { match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid) .await { Ok(r) => r.frame_number, Err(_) => { let duration: f64 = sqlx::query_scalar(&format!( "SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1", videos )) .bind(file_uuid) .fetch_optional(pool) .await .map_err(|e| e.to_string())? .unwrap_or(0.0); if duration > 0.0 { ((duration / 2.0) * fps) as i64 } else { 0 } } } } }; let timestamp_secs = frame_number as f64 / fps; let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| { let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg"; if std::path::Path::new(full).exists() { full.to_string() } else { "ffmpeg".to_string() } }); let output = tokio::process::Command::new(&ffmpeg_path) .args([ "-ss", &format!("{:.3}", timestamp_secs), "-i", &video_path, "-vframes", "1", "-f", "image2pipe", "-vcodec", "mjpeg", "-", ]) .output() .await .map_err(|e| format!("ffmpeg execution error: {}", e))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string()); } let base64_img = BASE64.encode(&output.stdout); let system_prompt = "你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。"; let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120) .await .map_err(|e| e.to_string())?; Ok(serde_json::json!({ "frame_number": frame_number, "timestamp_secs": timestamp_secs, "analysis": vision_result, }) .to_string()) } // ── Tool Router ─────────────────────────────────────────────────── async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) { let name = tool_call.function.name.clone(); let tool_call_id = tool_call.id.clone().unwrap_or_default(); let args: serde_json::Value = match serde_json::from_str(&tool_call.function.arguments) { Ok(v) => v, Err(e) => return (tool_call_id, name, serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string()), }; let result = match name.as_str() { "find_file" => exec_find_file(pool, &args).await, "list_files" => exec_list_files(pool, &args).await, "tkg_query" => exec_tkg_query(pool, &args).await, "smart_search" => exec_smart_search(pool, &args).await, "identity_text" => exec_identity_text(pool, &args).await, "identities_search" => exec_identities_search(pool, &args).await, "get_identity_detail" => exec_get_identity_detail(pool, &args).await, "get_file_info" => exec_get_file_info(pool, &args).await, "get_representative_frame" => exec_get_representative_frame(pool, &args).await, "analyze_frame" => exec_analyze_frame(pool, &args).await, _ => Err(format!("Unknown tool: {}", name)), }; let content = match result { Ok(s) => s, Err(e) => serde_json::json!({"error": e}).to_string(), }; (tool_call_id, name, content) } // ── Tool Loop ───────────────────────────────────────────────────── const MAX_ROUNDS: u32 = 15; async fn run_tool_loop( pool: &sqlx::PgPool, system_prompt: &str, user_query: &str, history: Vec, ) -> (String, Vec, Vec) { let mut messages = function_calling::build_conversation(system_prompt, user_query, history); let mut sources = Vec::new(); for round in 0..MAX_ROUNDS { let tools = make_tools(pool); tracing::info!( "[AGENT] Round {} started, message_count: {}, tools_available: {}", round + 1, messages.len(), tools.len() ); match function_calling::call_llm(messages.clone(), Some(tools.clone()), 2048, 120).await { Ok(LlmResponse::Text(text)) => { tracing::info!( "[AGENT] Loop finished: rounds_used={}, total_tools_called={}, answer_length={} chars", round + 1, sources.len(), text.len() ); return (text, messages, sources); } Ok(LlmResponse::ToolCalls(calls)) => { messages.push(ChatMessage { role: "assistant".to_string(), content: None, tool_calls: Some(calls.clone()), tool_call_id: None, name: None, }); for call in &calls { let (tool_call_id, name, content) = execute_tool(pool, call).await; tracing::info!( "[AGENT] Tool called: {}, result_size: {} chars, round: {}", name, content.len(), round + 1 ); sources.push(serde_json::json!({"tool": name, "result": content})); messages.push(function_calling::make_tool_result( &tool_call_id, &name, &content, )); } } Err(e) => { tracing::error!("[AGENT] LLM call failed: {}", e); return (format!("系統錯誤:{}", e), messages, sources); } } } tracing::warn!( "[AGENT] Max rounds reached: rounds_used={}, total_tools_called={}", MAX_ROUNDS, sources.len() ); ( "已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(), messages, sources, ) } // ── Handler ─────────────────────────────────────────────────────── async fn agent_search( State(state): State, Json(req): Json, ) -> Result, (StatusCode, Json)> { let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref()); let (answer, messages, sources) = run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await; // Save messages (skip system prompt — build_conversation re-adds it) let history: Vec = messages.into_iter().skip(1).collect(); save_messages(&conv_id, &history); let needs_input = answer.contains('?') || answer.contains('?'); let suggestions = if needs_input { Some(vec![ "演員名".to_string(), "電影片名".to_string(), "年份".to_string(), ]) } else { None }; Ok(Json(AgentSearchResponse { success: true, conversation_id: conv_id, answer, suggestions, sources: Some(sources), })) } // ── Routes ───────────────────────────────────────────────────────── pub fn agent_search_routes() -> Router { Router::new().route("/api/v1/agents/search", post(agent_search)) }