feat: score-based search, LLM re-ranking endpoint, video title search, pipeline module
Core search changes: - Replace RRF with score-based merge (max of semantic/keyword/identity) - Add video title ILIKE search for brand/name queries (score 0.9) - Add /api/v1/search/llm-smart endpoint with Gemma 4 re-ranking - Fix LLM JSON parsing (markdown fences, empty responses) Infrastructure: - Rebuild Qdrant collection (clear 347K contaminated points) - Add dotenv loading to main.rs for config parity - Implement store_pre_chunk in postgres_db.rs Pipeline module (WordPress): - store-asrx, rule1, vectorize, phase1, complete endpoints - CLI commands for pipeline operations Docs: - SEARCH_SCORE_IMPROVEMENT.md (score-based merge proposal)
This commit is contained in:
91
src/api/llm_search.rs
Normal file
91
src/api/llm_search.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
Router,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::core::llm::rerank::rerank_search_results;
|
||||
|
||||
use super::search::{smart_search, SearchResult, SmartSearchRequest, SmartSearchResponse};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct LlmSmartSearchRequest {
|
||||
#[serde(default)]
|
||||
pub file_uuid: Option<String>,
|
||||
pub query: String,
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
pub async fn llm_smart_search_handler(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Json(req): Json<LlmSmartSearchRequest>,
|
||||
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let user_limit = req.limit.unwrap_or(10).max(1);
|
||||
let llm_candidate_count = (user_limit * 3).clamp(10, 20);
|
||||
|
||||
// 1. Get initial RRF-ranked results via the existing smart_search
|
||||
let initial_req = SmartSearchRequest {
|
||||
file_uuid: req.file_uuid.clone(),
|
||||
query: req.query.clone(),
|
||||
page: Some(1),
|
||||
page_size: Some(llm_candidate_count),
|
||||
limit: Some(llm_candidate_count),
|
||||
};
|
||||
|
||||
let initial_response = smart_search(State(state.clone()), Json(initial_req)).await?;
|
||||
let initial_results = initial_response.0.results;
|
||||
|
||||
if initial_results.is_empty() {
|
||||
return Ok(Json(SmartSearchResponse {
|
||||
query: req.query,
|
||||
results: vec![],
|
||||
page: 1,
|
||||
page_size: 0,
|
||||
strategy: "llm_reranked".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
// 2. Build candidates: (original_index, summary_text)
|
||||
let candidates: Vec<(usize, String)> = initial_results
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, r)| (i, r.summary.clone().unwrap_or_default()))
|
||||
.collect();
|
||||
|
||||
let candidate_refs: Vec<(usize, &str)> =
|
||||
candidates.iter().map(|(i, t)| (*i, t.as_str())).collect();
|
||||
|
||||
// 3. LLM re-ranking
|
||||
let ranked_indices = match rerank_search_results(&req.query, &candidate_refs).await {
|
||||
Ok(indices) => indices,
|
||||
Err(e) => {
|
||||
warn!("LLM rerank failed, falling back to RRF order: {}", e);
|
||||
(0..initial_results.len()).collect()
|
||||
}
|
||||
};
|
||||
|
||||
// 4. Re-order results
|
||||
let mut reordered: Vec<SearchResult> = ranked_indices
|
||||
.into_iter()
|
||||
.filter_map(|i| initial_results.get(i).cloned())
|
||||
.collect();
|
||||
|
||||
// 5. Trim to user's requested limit
|
||||
reordered.truncate(user_limit);
|
||||
|
||||
Ok(Json(SmartSearchResponse {
|
||||
query: req.query,
|
||||
results: reordered,
|
||||
page: 1,
|
||||
page_size: user_limit,
|
||||
strategy: "llm_reranked".to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn llm_smart_routes() -> Router<crate::api::types::AppState> {
|
||||
Router::new().route("/api/v1/search/llm-smart", post(llm_smart_search_handler))
|
||||
}
|
||||
@@ -9,8 +9,10 @@ pub mod identities;
|
||||
pub mod identity_agent_api;
|
||||
pub mod identity_api;
|
||||
pub mod identity_binding;
|
||||
pub mod llm_search;
|
||||
pub mod media_api;
|
||||
pub mod middleware;
|
||||
pub mod pipeline;
|
||||
pub mod processing;
|
||||
pub mod scan;
|
||||
pub mod search;
|
||||
|
||||
85
src/api/pipeline.rs
Normal file
85
src/api/pipeline.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use axum::extract::Path;
|
||||
use axum::routing::post;
|
||||
use axum::{Json, Router};
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::core::db::postgres_db::PostgresDb;
|
||||
use crate::core::pipeline as pipeline_core;
|
||||
use crate::core::config;
|
||||
|
||||
async fn handle_store_asrx(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
|
||||
let db = PostgresDb::new(&config::DATABASE_URL).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": "DB connection failed"})))
|
||||
})?;
|
||||
|
||||
pipeline_core::store_asrx_chunks(&db, &uuid).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("store_asrx error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
Ok(Json(json!({"success": true, "message": "ASRX chunks stored", "file_uuid": uuid})))
|
||||
}
|
||||
|
||||
async fn handle_rule1(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
|
||||
let db = PostgresDb::new(&config::DATABASE_URL).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": "DB connection failed"})))
|
||||
})?;
|
||||
|
||||
let count = pipeline_core::execute_rule1(&db, &uuid).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("rule1 error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
Ok(Json(json!({"success": true, "message": format!("Rule 1 complete: {} chunks", count), "file_uuid": uuid, "chunks": count})))
|
||||
}
|
||||
|
||||
async fn handle_vectorize(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
|
||||
pipeline_core::vectorize_chunks(&uuid).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("vectorize error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
Ok(Json(json!({"success": true, "message": "Vectorization complete", "file_uuid": uuid})))
|
||||
}
|
||||
|
||||
async fn handle_phase1(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
|
||||
pipeline_core::run_phase1(&uuid).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("phase1 error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
Ok(Json(json!({"success": true, "message": "Phase 1 complete", "file_uuid": uuid})))
|
||||
}
|
||||
|
||||
async fn handle_complete(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
|
||||
let db = PostgresDb::new(&config::DATABASE_URL).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": "DB connection failed"})))
|
||||
})?;
|
||||
|
||||
pipeline_core::mark_complete(&db, &uuid).await
|
||||
.map_err(|e| {
|
||||
tracing::error!("complete error: {}", e);
|
||||
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
|
||||
})?;
|
||||
|
||||
Ok(Json(json!({"success": true, "message": "Video marked as completed", "file_uuid": uuid})))
|
||||
}
|
||||
|
||||
pub fn pipeline_routes() -> Router<crate::api::types::AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/file/:file_uuid/store-asrx", post(handle_store_asrx))
|
||||
.route("/api/v1/file/:file_uuid/rule1", post(handle_rule1))
|
||||
.route("/api/v1/file/:file_uuid/vectorize", post(handle_vectorize))
|
||||
.route("/api/v1/file/:file_uuid/phase1", post(handle_phase1))
|
||||
.route("/api/v1/file/:file_uuid/complete", post(handle_complete))
|
||||
}
|
||||
@@ -21,7 +21,7 @@ pub struct SmartSearchRequest {
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub id: i32,
|
||||
pub file_uuid: Option<String>,
|
||||
@@ -47,12 +47,12 @@ pub struct SmartSearchResponse {
|
||||
pub strategy: String,
|
||||
}
|
||||
|
||||
/// Internal merged result with RRF scoring
|
||||
/// Internal merged result with score-based merge
|
||||
#[derive(Debug)]
|
||||
struct MergedResult {
|
||||
file_uuid: String,
|
||||
chunk_id: String,
|
||||
rrf_score: f64,
|
||||
score: f64,
|
||||
semantic_score: Option<f64>,
|
||||
keyword_score: Option<f64>,
|
||||
identity_score: Option<f64>,
|
||||
@@ -140,8 +140,10 @@ pub async fn smart_search(
|
||||
},
|
||||
)?;
|
||||
|
||||
const KEYWORD_FIXED_SCORE: f64 = 0.5;
|
||||
const IDENTITY_FIXED_SCORE: f64 = 0.85;
|
||||
|
||||
let fetch_limit = limit * 3;
|
||||
let rrf_k = 60.0;
|
||||
|
||||
// 2. Semantic search via Qdrant
|
||||
let semantic_results: Vec<(String, String, f64)> = if let Some(file_uuid) = &req.file_uuid {
|
||||
@@ -176,6 +178,46 @@ pub async fn smart_search(
|
||||
}
|
||||
};
|
||||
|
||||
// 3b. Video title search: if query matches a video title, get its chunks
|
||||
const TITLE_MATCH_SCORE: f64 = 0.9;
|
||||
let title_results: Vec<(String, String, f64)> = {
|
||||
let clean_query = req.query.replace('\'', "''");
|
||||
let v_table = crate::core::db::schema::table_name("videos");
|
||||
let c_table = crate::core::db::schema::table_name("chunk");
|
||||
let video_rows: Vec<(String,)> = sqlx::query_as(&format!(
|
||||
"SELECT file_uuid::text FROM {} WHERE file_name ILIKE $1 LIMIT 5",
|
||||
v_table
|
||||
))
|
||||
.bind(format!("%{}%", clean_query))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
for (fu,) in video_rows.iter() {
|
||||
if let Some(ref f) = req.file_uuid {
|
||||
if fu != f {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let rows: Vec<(String, String)> = sqlx::query_as(&format!(
|
||||
"SELECT chunk_id, file_uuid::text FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL \
|
||||
AND chunk_type = 'sentence' \
|
||||
LIMIT 20",
|
||||
c_table
|
||||
))
|
||||
.bind(fu)
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
for (cid, file_uuid) in rows {
|
||||
chunks.push((file_uuid, cid, TITLE_MATCH_SCORE));
|
||||
}
|
||||
}
|
||||
chunks
|
||||
};
|
||||
|
||||
// 4. Identity search: if query matches a person name, get their chunks
|
||||
let identity_results: Vec<(String, String, f64)> = {
|
||||
let id_table = crate::core::db::schema::table_name("identities");
|
||||
@@ -211,24 +253,23 @@ pub async fn smart_search(
|
||||
id_chunks
|
||||
};
|
||||
|
||||
// 5. RRF merge: combine results from all sources
|
||||
// 5. Score-based merge: combine results from all sources
|
||||
let mut merged: HashMap<(String, String), MergedResult> = HashMap::new();
|
||||
|
||||
// Add semantic results
|
||||
for (rank, (file_uuid, chunk_id, score)) in semantic_results.iter().enumerate() {
|
||||
// Add semantic results (use Qdrant cosine score directly)
|
||||
for (file_uuid, chunk_id, score) in semantic_results.iter() {
|
||||
let key = (file_uuid.clone(), chunk_id.clone());
|
||||
let rrf_contribution = 1.0 / (rrf_k + rank as f64 + 1.0);
|
||||
merged
|
||||
.entry(key)
|
||||
.and_modify(|e| {
|
||||
e.rrf_score += rrf_contribution;
|
||||
e.score = e.score.max(*score);
|
||||
e.semantic_score = Some(*score);
|
||||
e.source = format!("{}_{}", e.source.strip_prefix("semantic+").unwrap_or(&e.source), "semantic");
|
||||
})
|
||||
.or_insert(MergedResult {
|
||||
file_uuid: file_uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
rrf_score: rrf_contribution,
|
||||
score: *score,
|
||||
semantic_score: Some(*score),
|
||||
keyword_score: None,
|
||||
identity_score: None,
|
||||
@@ -236,54 +277,76 @@ pub async fn smart_search(
|
||||
});
|
||||
}
|
||||
|
||||
// Add keyword results
|
||||
for (rank, (file_uuid, chunk_id, score)) in keyword_results.iter().enumerate() {
|
||||
// Add keyword results (fixed score 0.5)
|
||||
let keyword_fixed = KEYWORD_FIXED_SCORE;
|
||||
for (file_uuid, chunk_id, _) in keyword_results.iter() {
|
||||
let key = (file_uuid.clone(), chunk_id.clone());
|
||||
let rrf_contribution = 1.0 / (rrf_k + rank as f64 + 1.0);
|
||||
merged
|
||||
.entry(key)
|
||||
.and_modify(|e| {
|
||||
e.rrf_score += rrf_contribution;
|
||||
e.keyword_score = Some(*score);
|
||||
e.score = e.score.max(keyword_fixed);
|
||||
e.keyword_score = Some(keyword_fixed);
|
||||
e.source = format!("{}_keyword", e.source);
|
||||
})
|
||||
.or_insert(MergedResult {
|
||||
file_uuid: file_uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
rrf_score: rrf_contribution,
|
||||
score: keyword_fixed,
|
||||
semantic_score: None,
|
||||
keyword_score: Some(*score),
|
||||
keyword_score: Some(keyword_fixed),
|
||||
identity_score: None,
|
||||
source: "keyword".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Add identity results (only if we found matching identities)
|
||||
let has_identity_match = !identity_results.is_empty();
|
||||
for (rank, (file_uuid, chunk_id, score)) in identity_results.iter().enumerate() {
|
||||
// Add title match results (high score 0.9) — query matched video title
|
||||
let has_title_match = !title_results.is_empty();
|
||||
let title_fixed = TITLE_MATCH_SCORE;
|
||||
for (file_uuid, chunk_id, _) in title_results.iter() {
|
||||
let key = (file_uuid.clone(), chunk_id.clone());
|
||||
let rrf_contribution = 1.0 / (rrf_k + rank as f64 + 1.0);
|
||||
merged
|
||||
.entry(key)
|
||||
.and_modify(|e| {
|
||||
e.rrf_score += rrf_contribution;
|
||||
e.identity_score = Some(*score);
|
||||
e.score = e.score.max(title_fixed);
|
||||
e.source = format!("{}_title", e.source);
|
||||
})
|
||||
.or_insert(MergedResult {
|
||||
file_uuid: file_uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
score: title_fixed,
|
||||
semantic_score: None,
|
||||
keyword_score: None,
|
||||
identity_score: None,
|
||||
source: "title".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Add identity results (fixed score 0.85)
|
||||
let has_identity_match = !identity_results.is_empty();
|
||||
let identity_fixed = IDENTITY_FIXED_SCORE;
|
||||
for (file_uuid, chunk_id, _) in identity_results.iter() {
|
||||
let key = (file_uuid.clone(), chunk_id.clone());
|
||||
merged
|
||||
.entry(key)
|
||||
.and_modify(|e| {
|
||||
e.score = e.score.max(identity_fixed);
|
||||
e.identity_score = Some(identity_fixed);
|
||||
e.source = format!("{}_identity", e.source);
|
||||
})
|
||||
.or_insert(MergedResult {
|
||||
file_uuid: file_uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
rrf_score: rrf_contribution,
|
||||
score: identity_fixed,
|
||||
semantic_score: None,
|
||||
keyword_score: None,
|
||||
identity_score: Some(*score),
|
||||
identity_score: Some(identity_fixed),
|
||||
source: "identity".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by RRF score descending
|
||||
// Sort by score descending (score-based merge)
|
||||
let mut ranked: Vec<&MergedResult> = merged.values().collect();
|
||||
ranked.sort_by(|a, b| b.rrf_score.partial_cmp(&a.rrf_score).unwrap_or(std::cmp::Ordering::Equal));
|
||||
ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
// 6. Enrich top results from PG and build final response
|
||||
let mut final_results = Vec::new();
|
||||
@@ -307,7 +370,7 @@ pub async fn smart_search(
|
||||
raw_text: None,
|
||||
summary: Some(pg.summary),
|
||||
metadata: pg.metadata.clone(),
|
||||
similarity: Some(mr.rrf_score),
|
||||
similarity: Some(mr.score),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -320,6 +383,9 @@ pub async fn smart_search(
|
||||
if has_identity_match {
|
||||
strategies.push("identity");
|
||||
}
|
||||
if has_title_match {
|
||||
strategies.push("title");
|
||||
}
|
||||
|
||||
Ok(Json(SmartSearchResponse {
|
||||
query: req.query,
|
||||
|
||||
@@ -19,6 +19,8 @@ use super::identities;
|
||||
use super::identity_agent_api;
|
||||
use super::identity_api;
|
||||
use super::identity_binding;
|
||||
use super::llm_search;
|
||||
use super::pipeline;
|
||||
use super::media_api;
|
||||
use super::middleware::unified_auth;
|
||||
use super::processing;
|
||||
@@ -117,7 +119,9 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
.merge(media_api::bbox_routes())
|
||||
.merge(trace_agent_api::trace_agent_routes())
|
||||
.merge(search_routes())
|
||||
.merge(llm_search::llm_smart_routes())
|
||||
.merge(universal_search_routes())
|
||||
.merge(pipeline::pipeline_routes())
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
state.api_state.clone(),
|
||||
unified_auth,
|
||||
|
||||
Reference in New Issue
Block a user