update: pipeline, search, clip, embedding fixes

This commit is contained in:
Accusys
2026-05-17 19:46:35 +08:00
parent eec2eea880
commit 3164a65554
36 changed files with 4313 additions and 4061 deletions

View File

@@ -94,84 +94,31 @@ pub async fn smart_search(
},
)?;
if db_parents.is_empty() {
return Ok(Json(SmartSearchResponse {
query: req.query,
results: vec![],
page,
page_size,
strategy: "semantic_vector_search".to_string(),
}));
}
// Collect Parent IDs
let parent_ids: Vec<i32> = db_parents.iter().map(|p| p.id).collect();
// 3. Fetch Children for these Parents (Drill Down)
// We fetch all children for these parents (limit can be adjusted)
let children: Vec<crate::core::db::postgres_db::ChildChunkResult> = db
.get_children_for_parents(&parent_ids, 10) // Fetch top 10 children per parent
.await
.map_err(
|e: anyhow::Error| -> (StatusCode, Json<serde_json::Value>) {
tracing::error!("Fetching children failed: {}", e);
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({ "error": e.to_string() })),
)
},
)?;
// 4. Map Parents to a lookup table
let parent_map: std::collections::HashMap<
i32,
&crate::core::db::postgres_db::SemanticSearchResult,
> = db_parents.iter().map(|p| (p.id, p)).collect();
// Map Children to API response struct
let results: Vec<SearchResult> = children
// Return parent chunks directly as search results
let results: Vec<SearchResult> = db_parents
.into_iter()
.map(|c| {
let parent = parent_map.get(&c.parent_id);
SearchResult {
id: c.id,
parent_id: c.parent_id,
scene_order: parent.map(|p| p.scene_order),
start_frame: c.start_frame,
end_frame: c.end_frame,
fps: c.fps,
start_time: c.start_time,
end_time: c.end_time,
raw_text: Some(c.raw_text),
summary: parent.map(|p| p.summary.clone()),
metadata: parent.map(|p| p.metadata.clone()),
similarity: parent.and_then(|p| p.similarity),
}
.map(|p| SearchResult {
id: 0,
parent_id: p.scene_order,
scene_order: Some(p.scene_order),
start_frame: 0,
end_frame: 0,
fps: 0.0,
start_time: p.start_time,
end_time: p.end_time,
raw_text: None,
summary: Some(p.summary),
metadata: p.metadata.clone(),
similarity: p.similarity,
})
.collect();
// 6. Sort results by similarity (descending)
// Since all children of a parent have the same parent similarity, this groups relevant chunks together
let mut results = results;
results.sort_by(|a, b| {
b.similarity
.partial_cmp(&a.similarity)
.unwrap_or(std::cmp::Ordering::Equal)
});
// 7. Limit the final results (optional, but good for API consistency)
let truncate_limit = hard_limit.min(page_size * 5); // Allow more children per parent context
results.truncate(truncate_limit);
// 8. Format Response
let response = SmartSearchResponse {
query: req.query,
results,
page,
page_size,
strategy: "drill_down_semantic_search".to_string(),
strategy: "semantic_vector_search".to_string(),
};
Ok(Json(response))