feat: trace chunks with co-appearance relationships

- New trace_ingest module: creates chunks for each face trace (time + bbox + ASR text)
- Computes pairwise time overlaps between traces -> co_appearances in metadata
- Worker auto-triggers after face trace store + Qdrant sync
- SearchFilters: chunk_type filter (sentence/cut/trace/visual)
- SearchFilters: co_appears_with_trace_id filter
This commit is contained in:
Accusys
2026-05-09 06:18:32 +08:00
parent 9f5afd1b86
commit b902763d45
5 changed files with 373 additions and 6 deletions

View File

@@ -20,6 +20,8 @@ pub struct UniversalSearchRequest {
pub types: Vec<String>, // chunk, frame, person
pub time_range: Option<[f64; 2]>,
pub filters: Option<SearchFilters>,
pub page: Option<usize>,
pub page_size: Option<usize>,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
@@ -31,6 +33,10 @@ pub struct SearchFilters {
pub ocr_text: Option<String>,
pub has_face: Option<bool>,
pub speaker_id: Option<String>,
/// 指定 chunk_type如 "sentence", "cut", "trace", "visual"
pub chunk_type: Option<String>,
/// 搜尋與指定 trace_id 有時間重疊的 trace chunk
pub co_appears_with_trace_id: Option<i32>,
// Visual chunk filters
pub min_confidence: Option<f32>,
pub min_unique_classes: Option<u32>,
@@ -44,6 +50,8 @@ pub struct UniversalSearchResponse {
pub query: String,
pub results: Vec<SearchResult>,
pub total: usize,
pub page: usize,
pub page_size: usize,
pub took_ms: u64,
}
@@ -108,8 +116,14 @@ pub async fn universal_search(
)
})?;
let limit = req.limit.unwrap_or(20);
let offset = req.offset.unwrap_or(0);
let page = req.page.unwrap_or(1).max(1);
let page_size = req.page_size.unwrap_or(20).max(1).min(200);
// Backward compat: if old `offset` is used without `page`, derive from offset
let offset = if req.page.is_none() && req.offset.is_some() {
req.offset.unwrap()
} else {
(page - 1) * page_size
};
let types = if req.types.is_empty() {
vec![
"chunk".to_string(),
@@ -163,7 +177,8 @@ pub async fn universal_search(
});
let total = results.len();
let end = std::cmp::min(offset + limit, results.len());
let effective_limit = req.limit.unwrap_or(usize::MAX);
let end = std::cmp::min(offset + page_size, results.len()).min(effective_limit);
let paginated = if offset < results.len() {
results[offset..end].to_vec()
} else {
@@ -176,6 +191,8 @@ pub async fn universal_search(
query: req.query,
results: paginated,
total,
page,
page_size,
took_ms: took,
}))
}
@@ -378,10 +395,22 @@ async fn search_chunks(
sql.push_str(&format!(" AND ({})", class_conditions.join(" OR ")));
}
}
if let Some(ref chunk_type) = filters.chunk_type {
sql.push_str(&format!(
" AND chunk_type = '{}'",
chunk_type.replace('\'', "''")
));
}
if let Some(trace_id) = filters.co_appears_with_trace_id {
sql.push_str(&format!(
" AND metadata->'co_appearances' @> '[{{ \"trace_id\": {} }}]'",
trace_id
));
}
}
sql.push_str(" ORDER BY start_time ASC");
sql.push_str(&format!(" LIMIT {}", req.limit.unwrap_or(20)));
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
String,
@@ -495,7 +524,7 @@ async fn search_frames_internal(
}
sql.push_str(" ORDER BY f.timestamp ASC");
sql.push_str(&format!(" LIMIT {}", req.limit.unwrap_or(20)));
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
i64,
@@ -575,7 +604,7 @@ async fn search_persons_internal(
}
sql.push_str(" ORDER BY appearance_count DESC");
sql.push_str(&format!(" LIMIT {}", req.limit.unwrap_or(20)));
sql.push_str(&format!(" LIMIT {}", req.page_size.unwrap_or(20)));
let rows: Vec<(
String,