Files
momentry_core/src/api/llm_search.rs
Accusys 17e4e15860 feat: add Vision LLM integration (CLIP + Qwen3-VL cascade)
- Add Qwen3-VL dynamic management (start/stop/status CLI)
- Add CLIP + Qwen3-VL cascade detection strategy
- Add Vision CLI commands (vision start/stop/status, detect)
- Add cascade_vision processor module
- Add clip processor module
- Add qwen_vl_manager module

Changes:
- scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts
- src/core/vision/: Qwen3-VL manager module
- src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic
- src/core/processor/clip.rs: CLIP classification and detection
- src/api/clip_api.rs: CLIP API endpoints
- src/cli/vision.rs: Vision CLI implementation
- src/cli/args.rs: Add Vision and Detect commands
- src/main.rs: Integrate Vision CLI
- src/core/mod.rs: Add vision module
- src/core/processor/mod.rs: Add cascade_vision module
2026-06-13 16:25:52 +08:00

86 lines
2.7 KiB
Rust

use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use serde::Deserialize;
use tracing::warn;
use crate::core::llm::rerank::rerank_search_results;
use super::search::{smart_search, SearchResult, SmartSearchRequest, SmartSearchResponse};
#[derive(Debug, Deserialize)]
pub struct LlmSmartSearchRequest {
#[serde(default)]
pub file_uuid: Option<String>,
pub query: String,
pub limit: Option<usize>,
}
pub async fn llm_smart_search_handler(
State(state): State<crate::api::types::AppState>,
Json(req): Json<LlmSmartSearchRequest>,
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let user_limit = req.limit.unwrap_or(10).max(1);
let llm_candidate_count = (user_limit * 3).clamp(10, 20);
// 1. Get initial RRF-ranked results via the existing smart_search
let initial_req = SmartSearchRequest {
file_uuid: req.file_uuid.clone(),
query: req.query.clone(),
page: Some(1),
page_size: Some(llm_candidate_count),
limit: Some(llm_candidate_count),
};
let initial_response = smart_search(State(state.clone()), Json(initial_req)).await?;
let initial_results = initial_response.0.results;
if initial_results.is_empty() {
return Ok(Json(SmartSearchResponse {
query: req.query,
results: vec![],
page: 1,
page_size: 0,
strategy: "llm_reranked".to_string(),
}));
}
// 2. Build candidates: (original_index, summary_text)
let candidates: Vec<(usize, String)> = initial_results
.iter()
.enumerate()
.map(|(i, r)| (i, r.summary.clone().unwrap_or_default()))
.collect();
let candidate_refs: Vec<(usize, &str)> =
candidates.iter().map(|(i, t)| (*i, t.as_str())).collect();
// 3. LLM re-ranking
let ranked_indices = match rerank_search_results(&req.query, &candidate_refs).await {
Ok(indices) => indices,
Err(e) => {
warn!("LLM rerank failed, falling back to RRF order: {}", e);
(0..initial_results.len()).collect()
}
};
// 4. Re-order results
let mut reordered: Vec<SearchResult> = ranked_indices
.into_iter()
.filter_map(|i| initial_results.get(i).cloned())
.collect();
// 5. Trim to user's requested limit
reordered.truncate(user_limit);
Ok(Json(SmartSearchResponse {
query: req.query,
results: reordered,
page: 1,
page_size: user_limit,
strategy: "llm_reranked".to_string(),
}))
}
pub fn llm_smart_routes() -> Router<crate::api::types::AppState> {
Router::new().route("/api/v1/search/llm-smart", post(llm_smart_search_handler))
}