refactor: unified LLM config - CHAT_URL/VISION_URL/SUMMARY_URL with env var overrides

This commit is contained in:
Accusys
2026-05-22 15:47:17 +08:00
parent a78b5bc12b
commit bd82028f34
4 changed files with 72 additions and 42 deletions

View File

@@ -41,10 +41,10 @@ async fn translate_text(
req.target_language, req.text
);
// Call Gemma4 via llama.cpp (port 8082, OpenAI-compatible API)
// Call LLM via configurable endpoint
let client = Client::new();
let llm_url = "http://localhost:8082/v1/chat/completions";
let model = "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string();
let llm_url = crate::core::config::llm::CHAT_URL.as_str();
let model = crate::core::config::llm::CHAT_MODEL.as_str();
let body = serde_json::json!({
"model": model,
@@ -71,20 +71,15 @@ async fn translate_text(
)
})?;
let translated_text = llm_resp
.get("choices")
.and_then(|c| c.as_array())
.and_then(|c| c.first())
.and_then(|c| c.get("message"))
.and_then(|m| m.get("content"))
.and_then(|v| v.as_str())
.unwrap_or("Translation failed")
let translated_text = llm_resp["choices"][0]["message"]["content"]
.as_str()
.unwrap_or("")
.to_string();
Ok(Json(TranslationResponse {
success: true,
translated_text,
source_language_detected: req.source_language.unwrap_or("unknown".to_string()),
model_used: model,
source_language_detected: req.source_language.unwrap_or_else(|| "auto".to_string()),
model_used: model.to_string(),
}))
}

View File

@@ -96,27 +96,11 @@ struct SceneSummaryResult {
// ── LLM Endpoint ──
fn llm_base_url() -> String {
let v = std::env::var("MOMENTRY_LLM_URL");
if v.is_ok() {
return v.unwrap();
}
let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
if v.is_ok() {
return v.unwrap();
}
"http://localhost:8082/v1/chat/completions".to_string()
crate::core::config::llm::SUMMARY_URL.clone()
}
fn llm_model() -> String {
let v = std::env::var("MOMENTRY_LLM_MODEL");
if v.is_ok() {
return v.unwrap();
}
let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
if v.is_ok() {
return v.unwrap();
}
"google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
crate::core::config::llm::SUMMARY_MODEL.clone()
}
// ── Data Fetching ──

View File

@@ -216,13 +216,47 @@ pub mod cache {
pub mod llm {
use super::*;
/// Chat / function-calling LLM endpoint (agents/search, translation, etc.)
/// Default: http://127.0.0.1:8082/v1/chat/completions
pub static CHAT_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_URL")
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_URL"))
.or_else(|_| env::var("MOMENTRY_LLM_URL"))
.unwrap_or_else(|_| "http://127.0.0.1:8082/v1/chat/completions".to_string())
});
pub static CHAT_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_MODEL")
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
.or_else(|_| env::var("MOMENTRY_LLM_MODEL"))
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
});
/// Vision LLM endpoint (frame analysis, OCR). Can be same as CHAT_URL or different.
/// Default: falls back to CHAT_URL
pub static VISION_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_VISION_URL")
.unwrap_or_else(|_| CHAT_URL.clone())
});
pub static VISION_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_VISION_MODEL")
.unwrap_or_else(|_| CHAT_MODEL.clone())
});
/// Text summary LLM endpoint (5W1H, story). Can be same as CHAT_URL or different.
pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_URL")
.unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
.ok()
.or_else(|| Some(CHAT_URL.clone()))
.unwrap()
});
pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
env::var("MOMENTRY_LLM_SUMMARY_MODEL")
.ok()
.or_else(|| Some(CHAT_MODEL.clone()))
.unwrap()
});
pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
@@ -237,6 +271,13 @@ pub mod llm {
.map(|v| v == "true" || v == "1")
.unwrap_or(true)
});
pub static CHAT_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_LLM_CHAT_TIMEOUT")
.unwrap_or_else(|_| "120".to_string())
.parse()
.unwrap_or(120)
});
}
pub static SFTPGO_BASE_URL: Lazy<String> = Lazy::new(|| {

View File

@@ -1,6 +1,8 @@
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::core::config;
/// A tool/function definition for Gemma4 function calling
#[derive(Debug, Clone, Serialize)]
pub struct ToolDef {
@@ -75,18 +77,24 @@ pub enum LlmResponse {
ToolCalls(Vec<ToolCall>),
}
/// Get the LLM chat URL with fallback chain
/// Get the LLM chat URL from centralized config
pub fn llm_chat_url() -> String {
std::env::var("MOMENTRY_LLM_URL")
.or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_URL"))
.unwrap_or_else(|_| "http://localhost:8082/v1/chat/completions".to_string())
config::llm::CHAT_URL.clone()
}
/// Get the LLM model name
/// Get the LLM model name from centralized config
pub fn llm_model() -> String {
std::env::var("MOMENTRY_LLM_MODEL")
.or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
config::llm::CHAT_MODEL.clone()
}
/// Get the vision LLM URL
pub fn llm_vision_url() -> String {
config::llm::VISION_URL.clone()
}
/// Get the vision LLM model name
pub fn llm_vision_model() -> String {
config::llm::VISION_MODEL.clone()
}
/// Build a tool definition JSON for function calling
@@ -113,7 +121,9 @@ pub async fn call_llm(
timeout_secs: u64,
) -> anyhow::Result<LlmResponse> {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(timeout_secs))
.timeout(std::time::Duration::from_secs(
if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS },
))
.build()?;
let req = ChatRequest {