momentry_core/src/api/agent_search.rs

use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Mutex;
use std::time::Instant;

use crate::api::types::AppState;
use crate::core::db::schema;
use crate::core::llm::function_calling::{
    self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
};
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};

// ── Conversation Manager ─────────────────────────────────────────

struct Conversation {
    messages: Vec<ChatMessage>,
    created_at: Instant,
    last_active: Instant,
}

static CONVERSATIONS: Lazy<Mutex<HashMap<String, Conversation>>> = Lazy::new(|| {
    // Spawn cleanup task
    std::thread::spawn(|| loop {
        std::thread::sleep(std::time::Duration::from_secs(60));
        let mut map = CONVERSATIONS.lock().unwrap();
        let now = Instant::now();
        map.retain(|_, conv| now.duration_since(conv.last_active).as_secs() < 1800);
    });
    Mutex::new(HashMap::new())
});

fn get_or_create_conv(conv_id: Option<&str>) -> (String, Vec<ChatMessage>) {
    let mut map = CONVERSATIONS.lock().unwrap();
    if let Some(cid) = conv_id {
        if let Some(conv) = map.get_mut(cid) {
            conv.last_active = Instant::now();
            return (cid.to_string(), conv.messages.clone());
        }
    }
    let id = uuid::Uuid::new_v4().to_string().replace('-', "")[..16].to_string();
    map.insert(
        id.clone(),
        Conversation {
            messages: Vec::new(),
            created_at: Instant::now(),
            last_active: Instant::now(),
        },
    );
    (id, Vec::new())
}

fn save_messages(conv_id: &str, messages: &[ChatMessage]) {
    if let Some(conv) = CONVERSATIONS.lock().unwrap().get_mut(conv_id) {
        conv.messages = messages.to_vec();
        conv.last_active = Instant::now();
    }
}

// ── Request / Response ───────────────────────────────────────────

#[derive(Debug, Deserialize)]
pub struct AgentSearchRequest {
    pub query: String,
    pub conversation_id: Option<String>,
    pub file_uuid: Option<String>,
}

#[derive(Debug, Serialize)]
pub struct AgentSearchResponse {
    pub success: bool,
    pub conversation_id: String,
    pub answer: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub suggestions: Option<Vec<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sources: Option<Vec<serde_json::Value>>,
}

// ── Tool Definitions ──────────────────────────────────────────────

const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶關於影片內容的問題。

## 工具使用規則
1. 先確認用戶在問哪部影片 — 使用 find_file 或 list_files
2. 人物問題優先使用 tkg_query
3. 人物台詞/發言問題使用 identities_search（輸入人名→回傳台詞片段）
4. 人物對話互動（誰跟誰說話）使用 tkg_query 的 speaker_interaction
5. 人物台詞內容使用 tkg_query 的 speaker_dialogue
6. 用文字反查人物使用 identity_text（輸入關鍵字→找出誰說/提到這段話）
7. 語意/內容問題使用 smart_search 或 universal_search
8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容（場景、人物表情、動作、物件等）
9. **可以同時呼叫多個工具，但需符合以下條件：**
   - ✅ 查詢多部影片的相同資訊（如：3部影片的人物列表）
   - ✅ 需要組合多個來源的資訊才能回答（如：file_info + tkg_query）
   - ❌ 不要為了「嘗試所有可能」而盲目並行呼叫
   - ❌ 如果單一工具已返回足夠答案，不需要額外呼叫

## 引導規則（優化版）
- **搜尋優先原則**：
  1. **所有問題都先嘗試搜尋，不要過早判斷用戶是否說了片名**
  2. 根據搜尋結果和答案性質決定是否反問：
     - **列举型問題**（找出所有、列出）→ ✅ 不反問，列出所有結果
     - **指定型問題**（这部、那个）→ ⚠️ 反問選擇具體哪個
     - **統計型問題**（多少、幾個）→ ✅ 不反問，統計所有結果
     - **分析型問題**（分析、描述）→ ⚠️ 視問題表述決定

- **反問條件（精確）**：
  1. **答案需要分辨才反問**，不是「找到多部影片就反問」
  2. 判断标准：
     - ✅ 如果問題要求「所有」「列出」→ 答案不需要分辨 → 不反問
     - ⚠️ 如果問題要求「这部」「那个」→ 答案需要分辨 → 反問
     - ⚠️ 如果問題不明確 → 根據常理判断是否需要分辨

- **反問優化**：
  1. 反問時提供智能 suggestions（依問題類型調整）
  2. 人物問題 → suggestions: ["演員名", "角色名", "年代"]
  3. 內容問題 → suggestions: ["片名", "年代", "主題關鍵字"]
  4. 畫面問題 → suggestions: ["片名", "時間範圍", "場景描述"]

- **特殊情況**：
  - 如果影片的 has_data 為 false → 不要推薦，引導選擇 has_data=true
  - 如果搜尋結果直接包含答案 → 直接回答，不額外呼叫工具
  - 如果找不到影片 → 反問提供更多資訊（片名、演員、年份）

- **回答格式**：
  - 不要輸出 JSON，用自然語言回答
  - 引用資料時附上具體數字（frame 編號、時間秒數）

## 回答規則（優化版）
- 回答長度依問題類型調整：
  - 簡單查詢（如「列出影片」）→ 簡潔列表回答（1-2句）
  - 分析問題（如「描述情節」）→ 詳細回答（3-5句）
  - 計數問題（如「有幾個場景」）→ 直接回答數字 + 簡短說明

- 回答格式：
  - ✅ 如果找到影片，附上 file_uuid（用戶之後可能需要）
  - ✅ 對於人物問題，說出角色名和演員名（如果有）
  - ✅ 引用資料時附上具體數字（frame 編號、時間秒數）
  - ❌ 不要輸出 JSON 格式，用自然語言回答
  - ❌ 不要編造資料，如果找不到就明確說「找不到」

## 停止規則（重要）
- **如果已經找到足夠資訊回答用戶問題，立即停止呼叫工具，直接回答**
- **如果連續 2 轪呼叫工具都返回空結果或相同資訊，停止並告知用戶「找不到更多相關資訊」**
- **如果用戶問題不明確或範圍過大，停止並反問用戶（提供 suggestions）**
- **如果單一工具呼叫返回完整答案，不需要額外呼叫其他工具補充**
- **優化效率：避免重複呼叫相同工具或查詢相同內容**
- **成本控制：主動判斷是否需要繼續，不要盲目嘗試所有工具**"#;

fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
    vec![
        function_calling::make_tool(
            "find_file",
            "透過關鍵字搜尋影片（片名、演員、年份）。回傳符合的影片列表。",
            serde_json::json!({
                "query": {"type": "string", "description": "搜尋關鍵字（片名、演員名、年份）"}
            }),
            vec!["query"],
        ),
        function_calling::make_tool(
            "list_files",
            "列出近期註冊的影片。",
            serde_json::json!({
                "limit": {"type": "integer", "description": "回傳筆數上限", "default": 10}
            }),
            vec![],
        ),
        function_calling::make_tool(
            "tkg_query",
            "查詢影片的人物互動、配對、同框、台詞資料。query_type 包括：top_identities（人物排名）、first_cooccurrence（第一次同框）、identity_details（人物詳細）、mutual_gaze（互看）、interaction_network（互動網絡）、identity_traces（出場片段）、file_info（影片資訊）、speaker_dialogue（人物台詞）、speaker_interaction（兩人對話互動）。",
            serde_json::json!({
                "file_uuid": {"type": "string", "description": "影片 UUID"},
                "query_type": {
                    "type": "string",
                    "enum": ["top_identities", "first_cooccurrence", "identity_details", "mutual_gaze", "interaction_network", "identity_traces", "file_info", "speaker_dialogue", "speaker_interaction"],
                    "description": "查詢類型"
                },
                "identity_name": {"type": "string", "description": "人物名稱（配合 identity_details / identity_traces / speaker_dialogue / speaker_interaction）"},
                "identity_b": {"type": "string", "description": "第二人物名稱（配合 first_cooccurrence / mutual_gaze / speaker_interaction）"},
                "limit": {"type": "integer", "default": 5}
            }),
            vec!["file_uuid", "query_type"],
        ),
        function_calling::make_tool(
            "smart_search",
            "語意搜尋 chunk 文字內容。適合需要理解意圖的查詢。",
            serde_json::json!({
                "file_uuid": {"type": "string", "description": "限制搜尋範圍（可選）"},
                "query": {"type": "string", "description": "搜尋關鍵字"},
                "limit": {"type": "integer", "default": 5}
            }),
            vec!["query"],
        ),
        function_calling::make_tool(
            "identity_text",
            "搜尋文字關鍵字，找出有提及該內容的影片人物。適合回答「誰說了OOO」、「誰跟OOO有關」。不是查詢人物的台詞，而是用文字反查人物。",
            serde_json::json!({
                "q": {"type": "string", "description": "搜尋關鍵字（台詞片段、主題等）"},
                "file_uuid": {"type": "string", "description": "限制搜尋範圍（可選）"},
                "limit": {"type": "integer", "default": 10}
            }),
            vec!["q"],
        ),
        function_calling::make_tool(
            "identities_search",
            "查詢特定人物的台詞/發言內容。輸入人物名稱，回傳該人物在影片中說過的話。適合回答「某某人說了什麼」、「某某人的台詞」。",
            serde_json::json!({
                "q": {"type": "string", "description": "人物名稱關鍵字（姓名、角色名、別名）"},
                "file_uuid": {"type": "string", "description": "限制搜尋範圍（可選）"},
                "limit": {"type": "integer", "default": 10}
            }),
            vec!["q"],
        ),
        function_calling::make_tool(
            "get_identity_detail",
            "查詢單一身份的詳細資料（名字、角色、TMDb 資訊）。",
            serde_json::json!({
                "name": {"type": "string", "description": "人物名稱"}
            }),
            vec!["name"],
        ),
        function_calling::make_tool(
            "get_file_info",
            "查詢影片基本資訊（片名、長度、解析度）。",
            serde_json::json!({
                "file_uuid": {"type": "string", "description": "影片 UUID"}
            }),
            vec!["file_uuid"],
        ),
        function_calling::make_tool(
            "get_representative_frame",
            "查詢影片最具代表性的 frame 資訊（frame 編號、時間、人物）。",
            serde_json::json!({
                "file_uuid": {"type": "string", "description": "影片 UUID"}
            }),
            vec!["file_uuid"],
        ),
        function_calling::make_tool(
            "analyze_frame",
            "分析影片中指定畫面的視覺內容（場景、人物表情、動作、物件等）。若不指定 frame_number，會使用代表性畫面。問題會傳給視覺 LLM 分析。",
            serde_json::json!({
                "file_uuid": {"type": "string", "description": "影片 UUID"},
                "question": {"type": "string", "description": "關於畫面的問題，例如「這個場景發生什麼事？」"},
                "frame_number": {"type": "integer", "description": "指定的 frame 編號（可選）"}
            }),
            vec!["file_uuid"],
        ),
    ]
}

// ── Tool Executors ───────────────────────────────────────────────

async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
    let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
    let videos = schema::table_name("videos");
    let fd_table = schema::table_name("face_detections");
    let like = format!("%{}%", query);
    let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
        "SELECT v.file_uuid::text, v.file_name, \
                (SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
         FROM {} v WHERE v.file_name ILIKE $1 \
         ORDER BY v.created_at DESC LIMIT 10",
        fd_table, videos
    ))
    .bind(&like)
    .fetch_all(pool)
    .await
    .map_err(|e| e.to_string())?;

    if rows.is_empty() {
        return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
    }
    let files: Vec<serde_json::Value> = rows
        .into_iter()
        .map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
        .collect();
    Ok(serde_json::json!({"found": true, "files": files}).to_string())
}

async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
    let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10);
    let videos = schema::table_name("videos");
    let fd_table = schema::table_name("face_detections");
    let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
        "SELECT v.file_uuid::text, v.file_name, \
                (SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
         FROM {} v ORDER BY v.created_at DESC LIMIT $1",
        fd_table, videos
    ))
    .bind(limit)
    .fetch_all(pool)
    .await
    .map_err(|e| e.to_string())?;

    let files: Vec<serde_json::Value> = rows
        .into_iter()
        .map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
        .collect();
    Ok(serde_json::json!({"files": files}).to_string())
}

async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
    let query_type = args
        .get("query_type")
        .and_then(|v| v.as_str())
        .unwrap_or("");
    let identity_name = args.get("identity_name").and_then(|v| v.as_str());
    let identity_b = args.get("identity_b").and_then(|v| v.as_str());
    let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);

    let id_table = schema::table_name("identities");
    let fd_table = schema::table_name("face_detections");
    let videos = schema::table_name("videos");
    let nodes = schema::table_name("tkg_nodes");
    let edges = schema::table_name("tkg_edges");

    match query_type {
        "top_identities" => {
            let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
                "SELECT i.uuid::text, i.name, COUNT(fd.id)::bigint AS face_count \
                 FROM {} fd JOIN {} i ON i.id = fd.identity_id \
                 WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL AND i.source = 'tmdb' \
                 GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
                fd_table, id_table
            ))
            .bind(file_uuid)
            .bind(limit)
            .fetch_all(pool)
            .await
            .map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"identities": rows}).to_string())
        }
        "first_cooccurrence" => {
            let name_a = identity_name.unwrap_or("");
            let name_b = identity_b.unwrap_or("");
            let row: Option<(i64, f64)> = sqlx::query_as(&format!(
                "SELECT MIN(fd_a.frame_number)::bigint, \
                        ROUND(MIN(fd_a.frame_number)::numeric / GREATEST(MAX(v.fps)::numeric, 25.0), 2)::float8 \
                 FROM {} fd_a JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \
                 JOIN {} v ON v.file_uuid = $1 \
                 WHERE fd_a.file_uuid = $1 \
                 AND fd_a.identity_id = (SELECT id FROM {} WHERE name ILIKE $2 LIMIT 1) \
                 AND fd_b.identity_id = (SELECT id FROM {} WHERE name ILIKE $3 LIMIT 1)",
                fd_table, fd_table, videos, id_table, id_table
            ))
            .bind(file_uuid).bind(name_a).bind(name_b)
            .fetch_optional(pool)
            .await.map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"first_cooccurrence": row.map(|(f, t)| serde_json::json!({"frame": f, "timestamp_secs": t}))}).to_string())
        }
        "identity_details" => {
            let name = identity_name.unwrap_or("");
            let row: Option<(String, String, Option<i32>, i64)> = sqlx::query_as(&format!(
                "SELECT i.uuid::text, i.name, i.tmdb_id, \
                        (SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = i.id AND fd.file_uuid = $1)::bigint \
                 FROM {} i WHERE i.name ILIKE $2 LIMIT 1",
                fd_table, id_table
            ))
            .bind(file_uuid).bind(name)
            .fetch_optional(pool)
            .await.map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"identity": row.map(|(u, n, tid, fc)| serde_json::json!({"uuid": u, "name": n, "tmdb_id": tid, "face_count": fc}))}).to_string())
        }
        "mutual_gaze" => {
            let name_a = identity_name.unwrap_or("");
            let name_b = identity_b.unwrap_or("");
            let row: Option<(i64, i64, f64, f64)> = sqlx::query_as(&format!(
                "SELECT (e.properties->>'first_frame')::bigint, \
                        (e.properties->>'gaze_frame_count')::int::bigint, \
                        (e.properties->>'yaw_a_avg')::float8, \
                        (e.properties->>'yaw_b_avg')::float8 \
                 FROM {} e \
                 JOIN {} a ON a.id = e.source_node_id \
                 JOIN {} b ON b.id = e.target_node_id \
                 JOIN {} fd_a ON fd_a.file_uuid = $1 AND fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int \
                 JOIN {} fd_b ON fd_b.file_uuid = $1 AND fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int \
                 JOIN {} ia ON ia.id = fd_a.identity_id \
                 JOIN {} ib ON ib.id = fd_b.identity_id \
                 WHERE e.file_uuid = $1 AND ia.name ILIKE $2 AND ib.name ILIKE $3 \
                 AND e.properties->>'mutual_gaze' = 'true' LIMIT 1",
                edges, nodes, nodes, fd_table, fd_table, id_table, id_table
            ))
            .bind(file_uuid).bind(name_a).bind(name_b)
            .fetch_optional(pool)
            .await.map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"mutual_gaze": row.map(|(f, gc, ya, yb)| serde_json::json!({"first_frame": f, "gaze_frame_count": gc, "yaw_a": ya, "yaw_b": yb}))}).to_string())
        }
        "interaction_network" => {
            let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
                "SELECT ia.name, ib.name, COUNT(*)::bigint \
                 FROM {} e \
                 JOIN {} a ON a.id = e.source_node_id \
                 JOIN {} b ON b.id = e.target_node_id \
                 JOIN {} fd_a ON fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int AND fd_a.file_uuid = $1 \
                 JOIN {} fd_b ON fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int AND fd_b.file_uuid = $1 \
                 JOIN {} ia ON ia.id = fd_a.identity_id \
                 JOIN {} ib ON ib.id = fd_b.identity_id \
                 WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \
                 AND ia.name != ib.name AND ia.source = 'tmdb' AND ib.source = 'tmdb' \
                 GROUP BY ia.name, ib.name \
                 ORDER BY COUNT(*) DESC LIMIT $2",
                edges, nodes, nodes, fd_table, fd_table, id_table, id_table
            ))
            .bind(file_uuid).bind(limit)
            .fetch_all(pool)
            .await.map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"interaction_network": rows}).to_string())
        }
        "identity_traces" => {
            let name = identity_name.unwrap_or("");
            // MIN/MAX frame_number should be bigint (i64), not int
            let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
                "SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
                 FROM {} fd JOIN {} i ON i.id = fd.identity_id \
                 WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
                 GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
                fd_table, id_table
            ))
            .bind(file_uuid).bind(name).bind(limit)
            .fetch_all(pool)
            .await.map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"traces": rows}).to_string())
        }
        "file_info" => {
            let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
                "SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
                videos
            ))
            .bind(file_uuid)
            .fetch_optional(pool)
            .await
            .map_err(|e| e.to_string())?;
            Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
        }
        "speaker_dialogue" => {
            let name = identity_name.unwrap_or("");
            let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
                "SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
                 FROM {} i \
                 JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
                 JOIN {} fn ON fn.file_uuid = fd.file_uuid \
                     AND fn.node_type = 'face_trace' \
                     AND fn.external_id = CONCAT('trace_', fd.trace_id) \
                 JOIN {} e ON e.source_node_id = fn.id \
                     AND e.edge_type = 'SPEAKS_AS' \
                     AND ($2::text IS NULL OR e.file_uuid = $2) \
                 JOIN {} sn ON sn.id = e.target_node_id \
                 WHERE i.name ILIKE $1 \
                 LIMIT $3",
                id_table, fd_table, nodes, edges, nodes
            ))
            .bind(name)
            .bind(file_uuid)
            .bind(limit)
            .fetch_all(pool)
            .await
            .map_err(|e| e.to_string())?;

            Ok(
                serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
                serde_json::json!({"speaker_id": sid, "dialogue": text})
            }).collect::<Vec<_>>()})
                .to_string(),
            )
        }
        "speaker_interaction" => {
            let name_a = identity_name.unwrap_or("");
            let name_b = identity_b.unwrap_or("");
            if name_a.is_empty() || name_b.is_empty() {
                return Ok(
                    serde_json::json!({"error": "identity_name and identity_b are required"})
                        .to_string(),
                );
            }

            // Get both speakers' segments from TKG
            let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
                "SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
                 FROM {} i \
                 JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
                 JOIN {} fn ON fn.file_uuid = fd.file_uuid \
                     AND fn.node_type = 'face_trace' \
                     AND fn.external_id = CONCAT('trace_', fd.trace_id) \
                 JOIN {} e ON e.source_node_id = fn.id \
                     AND e.edge_type = 'SPEAKS_AS' \
                     AND ($3::text IS NULL OR e.file_uuid = $3) \
                 JOIN {} sn ON sn.id = e.target_node_id \
                 WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
                 ORDER BY sn.external_id",
                id_table, fd_table, nodes, edges, nodes
            ))
            .bind(name_a)
            .bind(name_b)
            .bind(file_uuid)
            .fetch_all(pool)
            .await
            .map_err(|e| e.to_string())?;

            let mut interactions = Vec::new();
            for i in 0..rows.len() {
                for j in i + 1..rows.len() {
                    let (sid_a, text_a, segs_a_val) = &rows[i];
                    let (sid_b, text_b, segs_b_val) = &rows[j];
                    let segs_a = segs_a_val.as_array();
                    let segs_b = segs_b_val.as_array();
                    if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
                        for sa in a_list {
                            let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
                            let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
                            let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
                            if sa_text.is_empty() {
                                continue;
                            }
                            for sb in b_list {
                                let sb_start =
                                    sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
                                let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
                                let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
                                if sb_text.is_empty() {
                                    continue;
                                }
                                // Check temporal overlap
                                let overlap_start = sa_start.max(sb_start);
                                let overlap_end = sa_end.min(sb_end);
                                if overlap_start < overlap_end {
                                    interactions.push(serde_json::json!({
                                        "speaker_a": sid_a,
                                        "speaker_b": sid_b,
                                        "time_range_s": [overlap_start, overlap_end],
                                        "dialogue_a": sa_text,
                                        "dialogue_b": sb_text,
                                    }));
                                }
                            }
                        }
                    }
                }
            }
            interactions.sort_by(|a, b| {
                let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
                let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
                a_start.partial_cmp(&b_start).unwrap()
            });
            interactions.truncate(limit as usize);

            Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
        }
        _ => Ok(
            serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
        ),
    }
}

async fn exec_smart_search(
    _pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
    let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);

    let chunk_table = schema::table_name("chunk");
    let mut sql = format!(
        "SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
         FROM {} WHERE text_content ILIKE $1",
        chunk_table
    );
    if file_uuid.is_some() {
        sql.push_str(" AND file_uuid = $2");
    }
    sql.push_str(&format!(" ORDER BY start_frame LIMIT {}", limit));

    if let Some(fuid) = file_uuid {
        let like = format!("%{}%", query);
        let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
            .bind(&like)
            .bind(fuid)
            .fetch_all(_pool)
            .await
            .map_err(|e| e.to_string())?;
        Ok(serde_json::json!({"results": rows}).to_string())
    } else {
        let like = format!("%{}%", query);
        let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
            .bind(&like)
            .fetch_all(_pool)
            .await
            .map_err(|e| e.to_string())?;
        Ok(serde_json::json!({"results": rows}).to_string())
    }
}

async fn exec_identity_text(
    pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
    let limit = args
        .get("limit")
        .and_then(|v| v.as_i64())
        .unwrap_or(10)
        .min(50);

    let chunk_table = schema::table_name("chunk");
    let fd_table = schema::table_name("face_detections");
    let id_table = schema::table_name("identities");
    let like_q = format!("%{}%", q.replace('%', "%%"));

    let sql = format!(
        "SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
                i.name AS identity_name, fd.trace_id, i.source AS identity_source \
         FROM {} c \
         JOIN {} fd ON fd.file_uuid = c.file_uuid \
             AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
             AND fd.identity_id IS NOT NULL \
         JOIN {} i ON i.id = fd.identity_id \
         WHERE ($1::text IS NULL OR c.file_uuid = $1) \
           AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
         ORDER BY c.start_time \
         LIMIT $3",
        chunk_table, fd_table, id_table
    );

    let rows: Vec<(
        String,
        f64,
        f64,
        Option<String>,
        String,
        Option<i32>,
        String,
    )> = sqlx::query_as(&sql)
        .bind(file_uuid)
        .bind(&like_q)
        .bind(limit)
        .fetch_all(pool)
        .await
        .map_err(|e| e.to_string())?;

    Ok(
        serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
        serde_json::json!({
            "chunk_id": chunk_id,
            "start_time": st,
            "end_time": et,
            "text": txt,
            "identity_name": name,
            "trace_id": tid,
            "source": src
        })
    }    ).collect::<Vec<_>>()})
        .to_string(),
    )
}

async fn exec_identities_search(
    pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
    let limit = args
        .get("limit")
        .and_then(|v| v.as_i64())
        .unwrap_or(10)
        .min(50);

    let id_table = schema::table_name("identities");
    let fd_table = schema::table_name("face_detections");
    let chunk_table = schema::table_name("chunk");
    let like_q = format!("%{}%", q.replace('%', "%%"));

    let sql = format!(
        "SELECT DISTINCT ON (i.name, c.chunk_id) \
                i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
         FROM {} i \
         JOIN {} fd ON fd.identity_id = i.id \
         JOIN {} c ON c.file_uuid = fd.file_uuid \
             AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
             AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
         WHERE (i.name ILIKE $1 \
             OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
           AND ($2::text IS NULL OR fd.file_uuid = $2) \
         ORDER BY i.name, c.chunk_id, c.start_time \
         LIMIT $3",
        id_table, fd_table, chunk_table
    );

    let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
        .bind(&like_q)
        .bind(file_uuid)
        .bind(limit)
        .fetch_all(pool)
        .await
        .map_err(|e| e.to_string())?;

    Ok(
        serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
            serde_json::json!({
                "identity_name": name,
                "chunk_id": chunk_id,
                "start_time": st,
                "end_time": et,
                "text": txt,
                "trace_id": tid,
            })
        }).collect::<Vec<_>>()})
        .to_string(),
    )
}

async fn exec_get_identity_detail(
    pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
    let id_table = schema::table_name("identities");
    let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
        "SELECT uuid::text, name, source, tmdb_id, metadata->>'tmdb_character' FROM {} WHERE name ILIKE $1 LIMIT 1",
        id_table
    ))
    .bind(name)
    .fetch_optional(pool)
    .await.map_err(|e| e.to_string())?;
    Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
}

async fn exec_get_file_info(
    pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
    let videos = schema::table_name("videos");
    let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
        "SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
        videos
    ))
    .bind(file_uuid)
    .fetch_optional(pool)
    .await
    .map_err(|e| e.to_string())?;
    Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}

async fn exec_get_representative_frame(
    pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
    match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
        Ok(r) => Ok(serde_json::json!({
            "frame_number": r.frame_number,
            "face_quality": r.face_quality,
            "main_identities": r.main_identities,
            "traces": r.traces,
        })
        .to_string()),
        Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
    }
}

async fn exec_analyze_frame(
    pool: &sqlx::PgPool,
    args: &serde_json::Value,
) -> Result<String, String> {
    let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
    let question = args
        .get("question")
        .and_then(|v| v.as_str())
        .unwrap_or("請描述這個畫面中的內容");

    if file_uuid.is_empty() {
        return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
    }

    let videos = schema::table_name("videos");
    let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
        "SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
        videos
    ))
    .bind(file_uuid)
    .fetch_optional(pool)
    .await
    .map_err(|e| e.to_string())?
    .ok_or_else(|| "Video not found".to_string())?;

    let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
        Some(f) => f,
        None => {
            match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
                .await
            {
                Ok(r) => r.frame_number,
                Err(_) => {
                    let duration: f64 = sqlx::query_scalar(&format!(
                        "SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
                        videos
                    ))
                    .bind(file_uuid)
                    .fetch_optional(pool)
                    .await
                    .map_err(|e| e.to_string())?
                    .unwrap_or(0.0);
                    if duration > 0.0 {
                        ((duration / 2.0) * fps) as i64
                    } else {
                        0
                    }
                }
            }
        }
    };

    let timestamp_secs = frame_number as f64 / fps;

    let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
        let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
        if std::path::Path::new(full).exists() {
            full.to_string()
        } else {
            "ffmpeg".to_string()
        }
    });

    let output = tokio::process::Command::new(&ffmpeg_path)
        .args([
            "-ss",
            &format!("{:.3}", timestamp_secs),
            "-i",
            &video_path,
            "-vframes",
            "1",
            "-f",
            "image2pipe",
            "-vcodec",
            "mjpeg",
            "-",
        ])
        .output()
        .await
        .map_err(|e| format!("ffmpeg execution error: {}", e))?;

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
    }

    let base64_img = BASE64.encode(&output.stdout);

    let system_prompt =
        "你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題，詳細描述畫面中的內容，包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
    let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
        .await
        .map_err(|e| e.to_string())?;

    Ok(serde_json::json!({
        "frame_number": frame_number,
        "timestamp_secs": timestamp_secs,
        "analysis": vision_result,
    })
    .to_string())
}

// ── Tool Router ───────────────────────────────────────────────────

async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
    let name = tool_call.function.name.clone();
    let tool_call_id = tool_call.id.clone().unwrap_or_default();
    let args: serde_json::Value =
        match serde_json::from_str(&tool_call.function.arguments) {
            Ok(v) => v,
            Err(e) => return (tool_call_id, name, serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string()),
        };
    let result = match name.as_str() {
        "find_file" => exec_find_file(pool, &args).await,
        "list_files" => exec_list_files(pool, &args).await,
        "tkg_query" => exec_tkg_query(pool, &args).await,
        "smart_search" => exec_smart_search(pool, &args).await,
        "identity_text" => exec_identity_text(pool, &args).await,
        "identities_search" => exec_identities_search(pool, &args).await,
        "get_identity_detail" => exec_get_identity_detail(pool, &args).await,
        "get_file_info" => exec_get_file_info(pool, &args).await,
        "get_representative_frame" => exec_get_representative_frame(pool, &args).await,
        "analyze_frame" => exec_analyze_frame(pool, &args).await,
        _ => Err(format!("Unknown tool: {}", name)),
    };
    let content = match result {
        Ok(s) => s,
        Err(e) => serde_json::json!({"error": e}).to_string(),
    };
    (tool_call_id, name, content)
}

// ── Tool Loop ─────────────────────────────────────────────────────

const MAX_ROUNDS: u32 = 15;

async fn run_tool_loop(
    pool: &sqlx::PgPool,
    system_prompt: &str,
    user_query: &str,
    history: Vec<ChatMessage>,
) -> (String, Vec<ChatMessage>, Vec<serde_json::Value>) {
    let mut messages = function_calling::build_conversation(system_prompt, user_query, history);
    let mut sources = Vec::new();

    for round in 0..MAX_ROUNDS {
        let tools = make_tools(pool);
        tracing::info!(
            "[AGENT] Round {} started, message_count: {}, tools_available: {}",
            round + 1,
            messages.len(),
            tools.len()
        );

        match function_calling::call_llm(messages.clone(), Some(tools.clone()), 2048, 120).await {
            Ok(LlmResponse::Text(text)) => {
                tracing::info!(
                    "[AGENT] Loop finished: rounds_used={}, total_tools_called={}, answer_length={} chars",
                    round + 1,
                    sources.len(),
                    text.len()
                );
                return (text, messages, sources);
            }
            Ok(LlmResponse::ToolCalls(calls)) => {
                messages.push(ChatMessage {
                    role: "assistant".to_string(),
                    content: None,
                    tool_calls: Some(calls.clone()),
                    tool_call_id: None,
                    name: None,
                });
                for call in &calls {
                    let (tool_call_id, name, content) = execute_tool(pool, call).await;
                    tracing::info!(
                        "[AGENT] Tool called: {}, result_size: {} chars, round: {}",
                        name,
                        content.len(),
                        round + 1
                    );
                    sources.push(serde_json::json!({"tool": name, "result": content}));
                    messages.push(function_calling::make_tool_result(
                        &tool_call_id, &name, &content,
                    ));
                }
            }
            Err(e) => {
                tracing::error!("[AGENT] LLM call failed: {}", e);
                return (format!("系統錯誤：{}", e), messages, sources);
            }
        }
    }
    tracing::warn!(
        "[AGENT] Max rounds reached: rounds_used={}, total_tools_called={}",
        MAX_ROUNDS,
        sources.len()
    );
    (
        "已達到最大查詢次數，請縮小問題範圍後重新詢問。".to_string(),
        messages,
        sources,
    )
}

// ── Handler ───────────────────────────────────────────────────────

async fn agent_search(
    State(state): State<AppState>,
    Json(req): Json<AgentSearchRequest>,
) -> Result<Json<AgentSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
    let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref());

    let (answer, messages, sources) =
        run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await;

    // Save messages (skip system prompt — build_conversation re-adds it)
    let history: Vec<ChatMessage> = messages.into_iter().skip(1).collect();
    save_messages(&conv_id, &history);

    let needs_input = answer.contains('？') || answer.contains('?');
    let suggestions = if needs_input {
        Some(vec![
            "演員名".to_string(),
            "電影片名".to_string(),
            "年份".to_string(),
        ])
    } else {
        None
    };

    Ok(Json(AgentSearchResponse {
        success: true,
        conversation_id: conv_id,
        answer,
        suggestions,
        sources: Some(sources),
    }))
}

// ── Routes ─────────────────────────────────────────────────────────

pub fn agent_search_routes() -> Router<AppState> {
    Router::new().route("/api/v1/agents/search", post(agent_search))
}