From bd82028f348904d406b3fb2a00e92b444661a174 Mon Sep 17 00:00:00 2001 From: Accusys Date: Fri, 22 May 2026 15:47:17 +0800 Subject: [PATCH] refactor: unified LLM config - CHAT_URL/VISION_URL/SUMMARY_URL with env var overrides --- src/api/agent_api.rs | 21 ++++++--------- src/api/five_w1h_agent_api.rs | 20 ++------------ src/core/config.rs | 45 ++++++++++++++++++++++++++++++-- src/core/llm/function_calling.rs | 28 +++++++++++++------- 4 files changed, 72 insertions(+), 42 deletions(-) diff --git a/src/api/agent_api.rs b/src/api/agent_api.rs index 74db3f3..5fad979 100644 --- a/src/api/agent_api.rs +++ b/src/api/agent_api.rs @@ -41,10 +41,10 @@ async fn translate_text( req.target_language, req.text ); - // Call Gemma4 via llama.cpp (port 8082, OpenAI-compatible API) + // Call LLM via configurable endpoint let client = Client::new(); - let llm_url = "http://localhost:8082/v1/chat/completions"; - let model = "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string(); + let llm_url = crate::core::config::llm::CHAT_URL.as_str(); + let model = crate::core::config::llm::CHAT_MODEL.as_str(); let body = serde_json::json!({ "model": model, @@ -71,20 +71,15 @@ async fn translate_text( ) })?; - let translated_text = llm_resp - .get("choices") - .and_then(|c| c.as_array()) - .and_then(|c| c.first()) - .and_then(|c| c.get("message")) - .and_then(|m| m.get("content")) - .and_then(|v| v.as_str()) - .unwrap_or("Translation failed") + let translated_text = llm_resp["choices"][0]["message"]["content"] + .as_str() + .unwrap_or("") .to_string(); Ok(Json(TranslationResponse { success: true, translated_text, - source_language_detected: req.source_language.unwrap_or("unknown".to_string()), - model_used: model, + source_language_detected: req.source_language.unwrap_or_else(|| "auto".to_string()), + model_used: model.to_string(), })) } diff --git a/src/api/five_w1h_agent_api.rs b/src/api/five_w1h_agent_api.rs index 99a06df..759a8f9 100644 --- a/src/api/five_w1h_agent_api.rs +++ b/src/api/five_w1h_agent_api.rs @@ -96,27 +96,11 @@ struct SceneSummaryResult { // ── LLM Endpoint ── fn llm_base_url() -> String { - let v = std::env::var("MOMENTRY_LLM_URL"); - if v.is_ok() { - return v.unwrap(); - } - let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL"); - if v.is_ok() { - return v.unwrap(); - } - "http://localhost:8082/v1/chat/completions".to_string() + crate::core::config::llm::SUMMARY_URL.clone() } fn llm_model() -> String { - let v = std::env::var("MOMENTRY_LLM_MODEL"); - if v.is_ok() { - return v.unwrap(); - } - let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"); - if v.is_ok() { - return v.unwrap(); - } - "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string() + crate::core::config::llm::SUMMARY_MODEL.clone() } // ── Data Fetching ── diff --git a/src/core/config.rs b/src/core/config.rs index 7d46e1e..3c891d6 100644 --- a/src/core/config.rs +++ b/src/core/config.rs @@ -216,13 +216,47 @@ pub mod cache { pub mod llm { use super::*; + /// Chat / function-calling LLM endpoint (agents/search, translation, etc.) + /// Default: http://127.0.0.1:8082/v1/chat/completions + pub static CHAT_URL: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_CHAT_URL") + .or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_URL")) + .or_else(|_| env::var("MOMENTRY_LLM_URL")) + .unwrap_or_else(|_| "http://127.0.0.1:8082/v1/chat/completions".to_string()) + }); + + pub static CHAT_MODEL: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_CHAT_MODEL") + .or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_MODEL")) + .or_else(|_| env::var("MOMENTRY_LLM_MODEL")) + .unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()) + }); + + /// Vision LLM endpoint (frame analysis, OCR). Can be same as CHAT_URL or different. + /// Default: falls back to CHAT_URL + pub static VISION_URL: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_VISION_URL") + .unwrap_or_else(|_| CHAT_URL.clone()) + }); + + pub static VISION_MODEL: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_VISION_MODEL") + .unwrap_or_else(|_| CHAT_MODEL.clone()) + }); + + /// Text summary LLM endpoint (5W1H, story). Can be same as CHAT_URL or different. pub static SUMMARY_URL: Lazy = Lazy::new(|| { env::var("MOMENTRY_LLM_SUMMARY_URL") - .unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string()) + .ok() + .or_else(|| Some(CHAT_URL.clone())) + .unwrap() }); pub static SUMMARY_MODEL: Lazy = Lazy::new(|| { - env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string()) + env::var("MOMENTRY_LLM_SUMMARY_MODEL") + .ok() + .or_else(|| Some(CHAT_MODEL.clone())) + .unwrap() }); pub static SUMMARY_TIMEOUT_SECS: Lazy = Lazy::new(|| { @@ -237,6 +271,13 @@ pub mod llm { .map(|v| v == "true" || v == "1") .unwrap_or(true) }); + + pub static CHAT_TIMEOUT_SECS: Lazy = Lazy::new(|| { + env::var("MOMENTRY_LLM_CHAT_TIMEOUT") + .unwrap_or_else(|_| "120".to_string()) + .parse() + .unwrap_or(120) + }); } pub static SFTPGO_BASE_URL: Lazy = Lazy::new(|| { diff --git a/src/core/llm/function_calling.rs b/src/core/llm/function_calling.rs index 5e1fc18..1add57c 100644 --- a/src/core/llm/function_calling.rs +++ b/src/core/llm/function_calling.rs @@ -1,6 +1,8 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; +use crate::core::config; + /// A tool/function definition for Gemma4 function calling #[derive(Debug, Clone, Serialize)] pub struct ToolDef { @@ -75,18 +77,24 @@ pub enum LlmResponse { ToolCalls(Vec), } -/// Get the LLM chat URL with fallback chain +/// Get the LLM chat URL from centralized config pub fn llm_chat_url() -> String { - std::env::var("MOMENTRY_LLM_URL") - .or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_URL")) - .unwrap_or_else(|_| "http://localhost:8082/v1/chat/completions".to_string()) + config::llm::CHAT_URL.clone() } -/// Get the LLM model name +/// Get the LLM model name from centralized config pub fn llm_model() -> String { - std::env::var("MOMENTRY_LLM_MODEL") - .or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_MODEL")) - .unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()) + config::llm::CHAT_MODEL.clone() +} + +/// Get the vision LLM URL +pub fn llm_vision_url() -> String { + config::llm::VISION_URL.clone() +} + +/// Get the vision LLM model name +pub fn llm_vision_model() -> String { + config::llm::VISION_MODEL.clone() } /// Build a tool definition JSON for function calling @@ -113,7 +121,9 @@ pub async fn call_llm( timeout_secs: u64, ) -> anyhow::Result { let client = reqwest::Client::builder() - .timeout(std::time::Duration::from_secs(timeout_secs)) + .timeout(std::time::Duration::from_secs( + if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS }, + )) .build()?; let req = ChatRequest {