feat: score-based search, LLM re-ranking endpoint, video title search, pipeline module
Core search changes: - Replace RRF with score-based merge (max of semantic/keyword/identity) - Add video title ILIKE search for brand/name queries (score 0.9) - Add /api/v1/search/llm-smart endpoint with Gemma 4 re-ranking - Fix LLM JSON parsing (markdown fences, empty responses) Infrastructure: - Rebuild Qdrant collection (clear 347K contaminated points) - Add dotenv loading to main.rs for config parity - Implement store_pre_chunk in postgres_db.rs Pipeline module (WordPress): - store-asrx, rule1, vectorize, phase1, complete endpoints - CLI commands for pipeline operations Docs: - SEARCH_SCORE_IMPROVEMENT.md (score-based merge proposal)
This commit is contained in:
91
src/api/llm_search.rs
Normal file
91
src/api/llm_search.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
Router,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::core::llm::rerank::rerank_search_results;
|
||||
|
||||
use super::search::{smart_search, SearchResult, SmartSearchRequest, SmartSearchResponse};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct LlmSmartSearchRequest {
|
||||
#[serde(default)]
|
||||
pub file_uuid: Option<String>,
|
||||
pub query: String,
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
pub async fn llm_smart_search_handler(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Json(req): Json<LlmSmartSearchRequest>,
|
||||
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let user_limit = req.limit.unwrap_or(10).max(1);
|
||||
let llm_candidate_count = (user_limit * 3).clamp(10, 20);
|
||||
|
||||
// 1. Get initial RRF-ranked results via the existing smart_search
|
||||
let initial_req = SmartSearchRequest {
|
||||
file_uuid: req.file_uuid.clone(),
|
||||
query: req.query.clone(),
|
||||
page: Some(1),
|
||||
page_size: Some(llm_candidate_count),
|
||||
limit: Some(llm_candidate_count),
|
||||
};
|
||||
|
||||
let initial_response = smart_search(State(state.clone()), Json(initial_req)).await?;
|
||||
let initial_results = initial_response.0.results;
|
||||
|
||||
if initial_results.is_empty() {
|
||||
return Ok(Json(SmartSearchResponse {
|
||||
query: req.query,
|
||||
results: vec![],
|
||||
page: 1,
|
||||
page_size: 0,
|
||||
strategy: "llm_reranked".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
// 2. Build candidates: (original_index, summary_text)
|
||||
let candidates: Vec<(usize, String)> = initial_results
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, r)| (i, r.summary.clone().unwrap_or_default()))
|
||||
.collect();
|
||||
|
||||
let candidate_refs: Vec<(usize, &str)> =
|
||||
candidates.iter().map(|(i, t)| (*i, t.as_str())).collect();
|
||||
|
||||
// 3. LLM re-ranking
|
||||
let ranked_indices = match rerank_search_results(&req.query, &candidate_refs).await {
|
||||
Ok(indices) => indices,
|
||||
Err(e) => {
|
||||
warn!("LLM rerank failed, falling back to RRF order: {}", e);
|
||||
(0..initial_results.len()).collect()
|
||||
}
|
||||
};
|
||||
|
||||
// 4. Re-order results
|
||||
let mut reordered: Vec<SearchResult> = ranked_indices
|
||||
.into_iter()
|
||||
.filter_map(|i| initial_results.get(i).cloned())
|
||||
.collect();
|
||||
|
||||
// 5. Trim to user's requested limit
|
||||
reordered.truncate(user_limit);
|
||||
|
||||
Ok(Json(SmartSearchResponse {
|
||||
query: req.query,
|
||||
results: reordered,
|
||||
page: 1,
|
||||
page_size: user_limit,
|
||||
strategy: "llm_reranked".to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn llm_smart_routes() -> Router<crate::api::types::AppState> {
|
||||
Router::new().route("/api/v1/search/llm-smart", post(llm_smart_search_handler))
|
||||
}
|
||||
Reference in New Issue
Block a user