refactor: unified LLM config - CHAT_URL/VISION_URL/SUMMARY_URL with env var overrides

This commit is contained in:
Accusys
2026-05-22 15:47:17 +08:00
parent a78b5bc12b
commit bd82028f34
4 changed files with 72 additions and 42 deletions

View File

@@ -216,13 +216,47 @@ pub mod cache {
pub mod llm {
use super::*;
/// Chat / function-calling LLM endpoint (agents/search, translation, etc.)
/// Default: http://127.0.0.1:8082/v1/chat/completions
pub static CHAT_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_URL")
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_URL"))
.or_else(|_| env::var("MOMENTRY_LLM_URL"))
.unwrap_or_else(|_| "http://127.0.0.1:8082/v1/chat/completions".to_string())
});
pub static CHAT_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_MODEL")
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
.or_else(|_| env::var("MOMENTRY_LLM_MODEL"))
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
});
/// Vision LLM endpoint (frame analysis, OCR). Can be same as CHAT_URL or different.
/// Default: falls back to CHAT_URL
pub static VISION_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_VISION_URL")
.unwrap_or_else(|_| CHAT_URL.clone())
});
pub static VISION_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_VISION_MODEL")
.unwrap_or_else(|_| CHAT_MODEL.clone())
});
/// Text summary LLM endpoint (5W1H, story). Can be same as CHAT_URL or different.
pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_URL")
.unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
.ok()
.or_else(|| Some(CHAT_URL.clone()))
.unwrap()
});
pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
env::var("MOMENTRY_LLM_SUMMARY_MODEL")
.ok()
.or_else(|| Some(CHAT_MODEL.clone()))
.unwrap()
});
pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
@@ -237,6 +271,13 @@ pub mod llm {
.map(|v| v == "true" || v == "1")
.unwrap_or(true)
});
pub static CHAT_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_TIMEOUT")
.unwrap_or_else(|_| "120".to_string())
.parse()
.unwrap_or(120)
});
}
pub static SFTPGO_BASE_URL: Lazy<String> = Lazy::new(|| {

View File

@@ -1,6 +1,8 @@
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::core::config;
/// A tool/function definition for Gemma4 function calling
#[derive(Debug, Clone, Serialize)]
pub struct ToolDef {
@@ -75,18 +77,24 @@ pub enum LlmResponse {
ToolCalls(Vec<ToolCall>),
}
/// Get the LLM chat URL with fallback chain
/// Get the LLM chat URL from centralized config
pub fn llm_chat_url() -> String {
std::env::var("MOMENTRY_LLM_URL")
.or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_URL"))
.unwrap_or_else(|_| "http://localhost:8082/v1/chat/completions".to_string())
config::llm::CHAT_URL.clone()
}
/// Get the LLM model name
/// Get the LLM model name from centralized config
pub fn llm_model() -> String {
std::env::var("MOMENTRY_LLM_MODEL")
.or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
config::llm::CHAT_MODEL.clone()
}
/// Get the vision LLM URL
pub fn llm_vision_url() -> String {
config::llm::VISION_URL.clone()
}
/// Get the vision LLM model name
pub fn llm_vision_model() -> String {
config::llm::VISION_MODEL.clone()
}
/// Build a tool definition JSON for function calling
@@ -113,7 +121,9 @@ pub async fn call_llm(
timeout_secs: u64,
) -> anyhow::Result<LlmResponse> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(timeout_secs))
.timeout(std::time::Duration::from_secs(
if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS },
))
.build()?;
let req = ChatRequest {