feat: score-based search, LLM re-ranking endpoint, video title search, pipeline module

Core search changes:
- Replace RRF with score-based merge (max of semantic/keyword/identity)
- Add video title ILIKE search for brand/name queries (score 0.9)
- Add /api/v1/search/llm-smart endpoint with Gemma 4 re-ranking
- Fix LLM JSON parsing (markdown fences, empty responses)

Infrastructure:
- Rebuild Qdrant collection (clear 347K contaminated points)
- Add dotenv loading to main.rs for config parity
- Implement store_pre_chunk in postgres_db.rs

Pipeline module (WordPress):
- store-asrx, rule1, vectorize, phase1, complete endpoints
- CLI commands for pipeline operations

Docs:
- SEARCH_SCORE_IMPROVEMENT.md (score-based merge proposal)
This commit is contained in:
Accusys
2026-06-04 07:40:41 +08:00
parent e1572907ae
commit 834b0d4865
14 changed files with 835 additions and 31 deletions

91
src/api/llm_search.rs Normal file
View File

@@ -0,0 +1,91 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::post,
Router,
};
use serde::Deserialize;
use tracing::warn;
use crate::core::llm::rerank::rerank_search_results;
use super::search::{smart_search, SearchResult, SmartSearchRequest, SmartSearchResponse};
#[derive(Debug, Deserialize)]
pub struct LlmSmartSearchRequest {
#[serde(default)]
pub file_uuid: Option<String>,
pub query: String,
pub limit: Option<usize>,
}
pub async fn llm_smart_search_handler(
State(state): State<crate::api::types::AppState>,
Json(req): Json<LlmSmartSearchRequest>,
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let user_limit = req.limit.unwrap_or(10).max(1);
let llm_candidate_count = (user_limit * 3).clamp(10, 20);
// 1. Get initial RRF-ranked results via the existing smart_search
let initial_req = SmartSearchRequest {
file_uuid: req.file_uuid.clone(),
query: req.query.clone(),
page: Some(1),
page_size: Some(llm_candidate_count),
limit: Some(llm_candidate_count),
};
let initial_response = smart_search(State(state.clone()), Json(initial_req)).await?;
let initial_results = initial_response.0.results;
if initial_results.is_empty() {
return Ok(Json(SmartSearchResponse {
query: req.query,
results: vec![],
page: 1,
page_size: 0,
strategy: "llm_reranked".to_string(),
}));
}
// 2. Build candidates: (original_index, summary_text)
let candidates: Vec<(usize, String)> = initial_results
.iter()
.enumerate()
.map(|(i, r)| (i, r.summary.clone().unwrap_or_default()))
.collect();
let candidate_refs: Vec<(usize, &str)> =
candidates.iter().map(|(i, t)| (*i, t.as_str())).collect();
// 3. LLM re-ranking
let ranked_indices = match rerank_search_results(&req.query, &candidate_refs).await {
Ok(indices) => indices,
Err(e) => {
warn!("LLM rerank failed, falling back to RRF order: {}", e);
(0..initial_results.len()).collect()
}
};
// 4. Re-order results
let mut reordered: Vec<SearchResult> = ranked_indices
.into_iter()
.filter_map(|i| initial_results.get(i).cloned())
.collect();
// 5. Trim to user's requested limit
reordered.truncate(user_limit);
Ok(Json(SmartSearchResponse {
query: req.query,
results: reordered,
page: 1,
page_size: user_limit,
strategy: "llm_reranked".to_string(),
}))
}
pub fn llm_smart_routes() -> Router<crate::api::types::AppState> {
Router::new().route("/api/v1/search/llm-smart", post(llm_smart_search_handler))
}