refactor: unified LLM config - CHAT_URL/VISION_URL/SUMMARY_URL with env var overrides
This commit is contained in:
@@ -41,10 +41,10 @@ async fn translate_text(
|
||||
req.target_language, req.text
|
||||
);
|
||||
|
||||
// Call Gemma4 via llama.cpp (port 8082, OpenAI-compatible API)
|
||||
// Call LLM via configurable endpoint
|
||||
let client = Client::new();
|
||||
let llm_url = "http://localhost:8082/v1/chat/completions";
|
||||
let model = "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string();
|
||||
let llm_url = crate::core::config::llm::CHAT_URL.as_str();
|
||||
let model = crate::core::config::llm::CHAT_MODEL.as_str();
|
||||
|
||||
let body = serde_json::json!({
|
||||
"model": model,
|
||||
@@ -71,20 +71,15 @@ async fn translate_text(
|
||||
)
|
||||
})?;
|
||||
|
||||
let translated_text = llm_resp
|
||||
.get("choices")
|
||||
.and_then(|c| c.as_array())
|
||||
.and_then(|c| c.first())
|
||||
.and_then(|c| c.get("message"))
|
||||
.and_then(|m| m.get("content"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("Translation failed")
|
||||
let translated_text = llm_resp["choices"][0]["message"]["content"]
|
||||
.as_str()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
Ok(Json(TranslationResponse {
|
||||
success: true,
|
||||
translated_text,
|
||||
source_language_detected: req.source_language.unwrap_or("unknown".to_string()),
|
||||
model_used: model,
|
||||
source_language_detected: req.source_language.unwrap_or_else(|| "auto".to_string()),
|
||||
model_used: model.to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -96,27 +96,11 @@ struct SceneSummaryResult {
|
||||
// ── LLM Endpoint ──
|
||||
|
||||
fn llm_base_url() -> String {
|
||||
let v = std::env::var("MOMENTRY_LLM_URL");
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
let v = std::env::var("MOMENTRY_LLM_SUMMARY_URL");
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
"http://localhost:8082/v1/chat/completions".to_string()
|
||||
crate::core::config::llm::SUMMARY_URL.clone()
|
||||
}
|
||||
|
||||
fn llm_model() -> String {
|
||||
let v = std::env::var("MOMENTRY_LLM_MODEL");
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
let v = std::env::var("MOMENTRY_LLM_SUMMARY_MODEL");
|
||||
if v.is_ok() {
|
||||
return v.unwrap();
|
||||
}
|
||||
"google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string()
|
||||
crate::core::config::llm::SUMMARY_MODEL.clone()
|
||||
}
|
||||
|
||||
// ── Data Fetching ──
|
||||
|
||||
@@ -216,13 +216,47 @@ pub mod cache {
|
||||
pub mod llm {
|
||||
use super::*;
|
||||
|
||||
/// Chat / function-calling LLM endpoint (agents/search, translation, etc.)
|
||||
/// Default: http://127.0.0.1:8082/v1/chat/completions
|
||||
pub static CHAT_URL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_CHAT_URL")
|
||||
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_URL"))
|
||||
.or_else(|_| env::var("MOMENTRY_LLM_URL"))
|
||||
.unwrap_or_else(|_| "http://127.0.0.1:8082/v1/chat/completions".to_string())
|
||||
});
|
||||
|
||||
pub static CHAT_MODEL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_CHAT_MODEL")
|
||||
.or_else(|_| env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
|
||||
.or_else(|_| env::var("MOMENTRY_LLM_MODEL"))
|
||||
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
|
||||
});
|
||||
|
||||
/// Vision LLM endpoint (frame analysis, OCR). Can be same as CHAT_URL or different.
|
||||
/// Default: falls back to CHAT_URL
|
||||
pub static VISION_URL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_VISION_URL")
|
||||
.unwrap_or_else(|_| CHAT_URL.clone())
|
||||
});
|
||||
|
||||
pub static VISION_MODEL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_VISION_MODEL")
|
||||
.unwrap_or_else(|_| CHAT_MODEL.clone())
|
||||
});
|
||||
|
||||
/// Text summary LLM endpoint (5W1H, story). Can be same as CHAT_URL or different.
|
||||
pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_SUMMARY_URL")
|
||||
.unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
|
||||
.ok()
|
||||
.or_else(|| Some(CHAT_URL.clone()))
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
|
||||
env::var("MOMENTRY_LLM_SUMMARY_MODEL")
|
||||
.ok()
|
||||
.or_else(|| Some(CHAT_MODEL.clone()))
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
|
||||
@@ -237,6 +271,13 @@ pub mod llm {
|
||||
.map(|v| v == "true" || v == "1")
|
||||
.unwrap_or(true)
|
||||
});
|
||||
|
||||
pub static CHAT_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
|
||||
env::var("MOMENTRY_LLM_CHAT_TIMEOUT")
|
||||
.unwrap_or_else(|_| "120".to_string())
|
||||
.parse()
|
||||
.unwrap_or(120)
|
||||
});
|
||||
}
|
||||
|
||||
pub static SFTPGO_BASE_URL: Lazy<String> = Lazy::new(|| {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::core::config;
|
||||
|
||||
/// A tool/function definition for Gemma4 function calling
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ToolDef {
|
||||
@@ -75,18 +77,24 @@ pub enum LlmResponse {
|
||||
ToolCalls(Vec<ToolCall>),
|
||||
}
|
||||
|
||||
/// Get the LLM chat URL with fallback chain
|
||||
/// Get the LLM chat URL from centralized config
|
||||
pub fn llm_chat_url() -> String {
|
||||
std::env::var("MOMENTRY_LLM_URL")
|
||||
.or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_URL"))
|
||||
.unwrap_or_else(|_| "http://localhost:8082/v1/chat/completions".to_string())
|
||||
config::llm::CHAT_URL.clone()
|
||||
}
|
||||
|
||||
/// Get the LLM model name
|
||||
/// Get the LLM model name from centralized config
|
||||
pub fn llm_model() -> String {
|
||||
std::env::var("MOMENTRY_LLM_MODEL")
|
||||
.or_else(|_| std::env::var("MOMENTRY_LLM_SUMMARY_MODEL"))
|
||||
.unwrap_or_else(|_| "google_gemma-4-26B-A4B-it-Q5_K_M.gguf".to_string())
|
||||
config::llm::CHAT_MODEL.clone()
|
||||
}
|
||||
|
||||
/// Get the vision LLM URL
|
||||
pub fn llm_vision_url() -> String {
|
||||
config::llm::VISION_URL.clone()
|
||||
}
|
||||
|
||||
/// Get the vision LLM model name
|
||||
pub fn llm_vision_model() -> String {
|
||||
config::llm::VISION_MODEL.clone()
|
||||
}
|
||||
|
||||
/// Build a tool definition JSON for function calling
|
||||
@@ -113,7 +121,9 @@ pub async fn call_llm(
|
||||
timeout_secs: u64,
|
||||
) -> anyhow::Result<LlmResponse> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(timeout_secs))
|
||||
.timeout(std::time::Duration::from_secs(
|
||||
if timeout_secs > 0 { timeout_secs } else { *config::llm::CHAT_TIMEOUT_SECS },
|
||||
))
|
||||
.build()?;
|
||||
|
||||
let req = ChatRequest {
|
||||
|
||||
Reference in New Issue
Block a user