From bd82028f348904d406b3fb2a00e92b444661a174 Mon Sep 17 00:00:00 2001
From: Accusys <accusys@momentry.dev>
Date: Fri, 22 May 2026 15:47:17 +0800
Subject: [PATCH] refactor: unified LLM config -
 CHAT_URL/VISION_URL/SUMMARY_URL with env var overrides

---
 src/api/agent_api.rs             | 21 ++++++---------
 src/api/five_w1h_agent_api.rs    | 20 ++------------
 src/core/config.rs               | 45 ++++++++++++++++++++++++++++++--
 src/core/llm/function_calling.rs | 28 +++++++++++++-------
 4 files changed, 72 insertions(+), 42 deletions(-)
diff --git a/src/api/agent_api.rs b/src/api/agent_api.rs
index 74db3f3..5fad979 100644
--- a/src/api/agent_api.rs
+++ b/src/api/agent_api.rs
@@ -41,10 +41,10 @@ async fn translate_text(
         req.target_language, req.text
     );
 
-    // Call Gemma4 via llama.cpp (port 8082, OpenAI-compatible API)
+    // Call LLM via configurable endpoint
     let client = Client::new();
-    let llm_url = "http://localhost:8082/v1/chat/completions";
-    let model = "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string();
+    let llm_url = crate::core::config::llm::CHAT_URL.as_str();
+    let model = crate::core::config::llm::CHAT_MODEL.as_str();
 
     let body = serde_json::json!({
         "model": model,
@@ -71,20 +71,15 @@ async fn translate_text(
         )
     })?;
 
-    let translated_text = llm_resp
-        .get("choices")
-        .and_then(|c| c.as_array())
-        .and_then(|c| c.first())
-        .and_then(|c| c.get("message"))
-        .and_then(|m| m.get("content"))
-        .and_then(|v| v.as_str())
-        .unwrap_or("Translation failed")
+    let translated_text = llm_resp["choices"][0]["message"]["content"]
+        .as_str()
+        .unwrap_or("")
         .to_string();
 
     Ok(Json(TranslationResponse {
         success: true,
         translated_text,
-        source_language_detected: req.source_language.unwrap_or("unknown".to_string()),
-        model_used: model,
+        source_language_detected: req.source_language.unwrap_or_else(|| "auto".to_string()),
+        model_used: model.to_string(),
     }))
 }
diff --git a/src/api/five_w1h_agent_api.rs b/src/api/five_w1h_agent_api.rs
index 99a06df..759a8f9 100644
--- a/src/api/five_w1h_agent_api.rs
+++ b/src/api/five_w1h_agent_api.rs
@@ -96,27 +96,11 @@ struct SceneSummaryResult {
 // ── LLM Endpoint ──
 
 fn llm_base_url() -> String {
-    let v = std::env::var("MOMENTRY_LLM_URL");
-    if v.is_ok() {
-        return v.unwrap();
-    }
-    let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
-    if v.is_ok() {
-        return v.unwrap();
-    }
-    "http://localhost:8082/v1/chat/completions".to_string()
+    crate::core::config::llm::SUMMARY_URL.clone()
 }
 
 fn llm_model() -> String {
-    let v = std::env::var("MOMENTRY_LLM_MODEL");
-    if v.is_ok() {
-        return v.unwrap();
-    }
-    let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
-    if v.is_ok() {
-        return v.unwrap();
-    }
-    "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
+    crate::core::config::llm::SUMMARY_MODEL.clone()
 }
 
 // ── Data Fetching ──
diff --git a/src/core/config.rs b/src/core/config.rs
index 7d46e1e..3c891d6 100644
--- a/src/core/config.rs
+++ b/src/core/config.rs
@@ -216,13 +216,47 @@ pub mod cache {
 pub mod llm {
     use super::*;
 
+    /// Chat / function-calling LLM endpoint (agents/search, translation, etc.)
+    /// Default: http://127.0.0.1:8082/v1/chat/completions
+    pub static CHAT_URL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_CHAT_URL")
+            .or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_URL"))
+            .or_else(|_| env::var("MOMENTRY_LLM_URL"))
+            .unwrap_or_else(|_| "http://127.0.0.1:8082/v1/chat/completions".to_string())
+    });
+
+    pub static CHAT_MODEL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_CHAT_MODEL")
+            .or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
+            .or_else(|_| env::var("MOMENTRY_LLM_MODEL"))
+            .unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
+    });
+
+    /// Vision LLM endpoint (frame analysis, OCR). Can be same as CHAT_URL or different.
+    /// Default: falls back to CHAT_URL
+    pub static VISION_URL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_VISION_URL")
+            .unwrap_or_else(|_| CHAT_URL.clone())
+    });
+
+    pub static VISION_MODEL: Lazy<String> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_VISION_MODEL")
+            .unwrap_or_else(|_| CHAT_MODEL.clone())
+    });
+
+    /// Text summary LLM endpoint (5W1H, story). Can be same as CHAT_URL or different.
     pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
         env::var("MOMENTRY_LLM_SUMMARY_URL")
-            .unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
+            .ok()
+            .or_else(|| Some(CHAT_URL.clone()))
+            .unwrap()
     });
 
     pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
-        env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
+        env::var("MOMENTRY_LLM_SUMMARY_MODEL")
+            .ok()
+            .or_else(|| Some(CHAT_MODEL.clone()))
+            .unwrap()
     });
 
     pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
@@ -237,6 +271,13 @@ pub mod llm {
             .map(|v| v == "true" || v == "1")
             .unwrap_or(true)
     });
+
+    pub static CHAT_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
+        env::var("MOMENTRY_LLM_CHAT_TIMEOUT")
+            .unwrap_or_else(|_| "120".to_string())
+            .parse()
+            .unwrap_or(120)
+    });
 }
 
 pub static SFTPGO_BASE_URL: Lazy<String> = Lazy::new(|| {
diff --git a/src/core/llm/function_calling.rs b/src/core/llm/function_calling.rs
index 5e1fc18..1add57c 100644
--- a/src/core/llm/function_calling.rs
+++ b/src/core/llm/function_calling.rs
@@ -1,6 +1,8 @@
 use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 
+use crate::core::config;
+
 /// A tool/function definition for Gemma4 function calling
 #[derive(Debug, Clone, Serialize)]
 pub struct ToolDef {
@@ -75,18 +77,24 @@ pub enum LlmResponse {
     ToolCalls(Vec<ToolCall>),
 }
 
-/// Get the LLM chat URL with fallback chain
+/// Get the LLM chat URL from centralized config
 pub fn llm_chat_url() -> String {
-    std::env::var("MOMENTRY_LLM_URL")
-        .or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_URL"))
-        .unwrap_or_else(|_| "http://localhost:8082/v1/chat/completions".to_string())
+    config::llm::CHAT_URL.clone()
 }
 
-/// Get the LLM model name
+/// Get the LLM model name from centralized config
 pub fn llm_model() -> String {
-    std::env::var("MOMENTRY_LLM_MODEL")
-        .or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
-        .unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
+    config::llm::CHAT_MODEL.clone()
+}
+
+/// Get the vision LLM URL
+pub fn llm_vision_url() -> String {
+    config::llm::VISION_URL.clone()
+}
+
+/// Get the vision LLM model name
+pub fn llm_vision_model() -> String {
+    config::llm::VISION_MODEL.clone()
 }
 
 /// Build a tool definition JSON for function calling
@@ -113,7 +121,9 @@ pub async fn call_llm(
     timeout_secs: u64,
 ) -> anyhow::Result<LlmResponse> {
     let client = reqwest::Client::builder()
-        .timeout(std::time::Duration::from_secs(timeout_secs))
+        .timeout(std::time::Duration::from_secs(
+            if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS },
+        ))
         .build()?;
 
     let req = ChatRequest {