- Add Qwen3-VL dynamic management (start/stop/status CLI) - Add CLIP + Qwen3-VL cascade detection strategy - Add Vision CLI commands (vision start/stop/status, detect) - Add cascade_vision processor module - Add clip processor module - Add qwen_vl_manager module Changes: - scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts - src/core/vision/: Qwen3-VL manager module - src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic - src/core/processor/clip.rs: CLIP classification and detection - src/api/clip_api.rs: CLIP API endpoints - src/cli/vision.rs: Vision CLI implementation - src/cli/args.rs: Add Vision and Detect commands - src/main.rs: Integrate Vision CLI - src/core/mod.rs: Add vision module - src/core/processor/mod.rs: Add cascade_vision module
86 lines
2.7 KiB
Rust
86 lines
2.7 KiB
Rust
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
|
|
use serde::Deserialize;
|
|
use tracing::warn;
|
|
|
|
use crate::core::llm::rerank::rerank_search_results;
|
|
|
|
use super::search::{smart_search, SearchResult, SmartSearchRequest, SmartSearchResponse};
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
pub struct LlmSmartSearchRequest {
|
|
#[serde(default)]
|
|
pub file_uuid: Option<String>,
|
|
pub query: String,
|
|
pub limit: Option<usize>,
|
|
}
|
|
|
|
pub async fn llm_smart_search_handler(
|
|
State(state): State<crate::api::types::AppState>,
|
|
Json(req): Json<LlmSmartSearchRequest>,
|
|
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
|
let user_limit = req.limit.unwrap_or(10).max(1);
|
|
let llm_candidate_count = (user_limit * 3).clamp(10, 20);
|
|
|
|
// 1. Get initial RRF-ranked results via the existing smart_search
|
|
let initial_req = SmartSearchRequest {
|
|
file_uuid: req.file_uuid.clone(),
|
|
query: req.query.clone(),
|
|
page: Some(1),
|
|
page_size: Some(llm_candidate_count),
|
|
limit: Some(llm_candidate_count),
|
|
};
|
|
|
|
let initial_response = smart_search(State(state.clone()), Json(initial_req)).await?;
|
|
let initial_results = initial_response.0.results;
|
|
|
|
if initial_results.is_empty() {
|
|
return Ok(Json(SmartSearchResponse {
|
|
query: req.query,
|
|
results: vec![],
|
|
page: 1,
|
|
page_size: 0,
|
|
strategy: "llm_reranked".to_string(),
|
|
}));
|
|
}
|
|
|
|
// 2. Build candidates: (original_index, summary_text)
|
|
let candidates: Vec<(usize, String)> = initial_results
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, r)| (i, r.summary.clone().unwrap_or_default()))
|
|
.collect();
|
|
|
|
let candidate_refs: Vec<(usize, &str)> =
|
|
candidates.iter().map(|(i, t)| (*i, t.as_str())).collect();
|
|
|
|
// 3. LLM re-ranking
|
|
let ranked_indices = match rerank_search_results(&req.query, &candidate_refs).await {
|
|
Ok(indices) => indices,
|
|
Err(e) => {
|
|
warn!("LLM rerank failed, falling back to RRF order: {}", e);
|
|
(0..initial_results.len()).collect()
|
|
}
|
|
};
|
|
|
|
// 4. Re-order results
|
|
let mut reordered: Vec<SearchResult> = ranked_indices
|
|
.into_iter()
|
|
.filter_map(|i| initial_results.get(i).cloned())
|
|
.collect();
|
|
|
|
// 5. Trim to user's requested limit
|
|
reordered.truncate(user_limit);
|
|
|
|
Ok(Json(SmartSearchResponse {
|
|
query: req.query,
|
|
results: reordered,
|
|
page: 1,
|
|
page_size: user_limit,
|
|
strategy: "llm_reranked".to_string(),
|
|
}))
|
|
}
|
|
|
|
pub fn llm_smart_routes() -> Router<crate::api::types::AppState> {
|
|
Router::new().route("/api/v1/search/llm-smart", post(llm_smart_search_handler))
|
|
}
|