feat: trace-level matching, health watcher/worker status, timezone config
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -15,9 +15,11 @@ pub struct QdrantDb {
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VectorPayload {
|
||||
pub uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: String,
|
||||
pub start_frame: i64,
|
||||
pub end_frame: i64,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub text: Option<String>,
|
||||
@@ -189,6 +191,49 @@ impl QdrantDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vectors_batch(
|
||||
&self,
|
||||
collection: &str,
|
||||
points: &[(u64, &[f32], Option<serde_json::Value>)],
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, collection
|
||||
);
|
||||
|
||||
let qdrant_points: Vec<serde_json::Value> = points
|
||||
.iter()
|
||||
.map(|(id, vec, payload)| {
|
||||
let mut p = serde_json::json!({
|
||||
"id": id,
|
||||
"vector": vec,
|
||||
});
|
||||
if let Some(pl) = payload {
|
||||
p["payload"] = pl.clone();
|
||||
}
|
||||
p
|
||||
})
|
||||
.collect();
|
||||
|
||||
let body = serde_json::json!({ "points": qdrant_points });
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send batch upsert request to Qdrant")?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let response_text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vector(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
@@ -207,12 +252,23 @@ impl QdrantDb {
|
||||
);
|
||||
|
||||
let mut payload_map = HashMap::new();
|
||||
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
|
||||
payload_map.insert(
|
||||
"file_uuid".to_string(),
|
||||
serde_json::json!(payload.file_uuid),
|
||||
);
|
||||
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
|
||||
payload_map.insert(
|
||||
"chunk_type".to_string(),
|
||||
serde_json::json!(payload.chunk_type),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_frame".to_string(),
|
||||
serde_json::json!(payload.start_frame),
|
||||
);
|
||||
payload_map.insert(
|
||||
"end_frame".to_string(),
|
||||
serde_json::json!(payload.end_frame),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_time".to_string(),
|
||||
serde_json::json!(payload.start_time),
|
||||
@@ -224,7 +280,7 @@ impl QdrantDb {
|
||||
|
||||
// Generate consistent point ID from uuid and chunk_id
|
||||
// Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
|
||||
let point_id_str = format!("{}_{}", payload.uuid, chunk_id);
|
||||
let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
@@ -240,9 +296,9 @@ impl QdrantDb {
|
||||
});
|
||||
|
||||
tracing::debug!(
|
||||
"Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}",
|
||||
"Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
|
||||
chunk_id,
|
||||
payload.uuid,
|
||||
payload.file_uuid,
|
||||
vector.len()
|
||||
);
|
||||
|
||||
@@ -337,7 +393,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -409,7 +465,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -471,7 +527,7 @@ impl QdrantDb {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"key": "file_uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
@@ -532,7 +588,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -553,6 +609,89 @@ impl QdrantDb {
|
||||
Ok(search_results)
|
||||
}
|
||||
|
||||
pub async fn search_face_collection(
|
||||
&self,
|
||||
collection: &str,
|
||||
query_vector: &[f32],
|
||||
limit: usize,
|
||||
exclude_payload_key: &str,
|
||||
exclude_payload_value: &str,
|
||||
include_file_uuid: Option<&str>,
|
||||
) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
|
||||
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
|
||||
|
||||
let mut filter = serde_json::json!({
|
||||
"must_not": [
|
||||
{
|
||||
"key": exclude_payload_key,
|
||||
"match": { "value": exclude_payload_value }
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
if let Some(file_uuid) = include_file_uuid {
|
||||
filter["must"] = serde_json::json!([
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
}
|
||||
]);
|
||||
}
|
||||
|
||||
let body = serde_json::json!({
|
||||
"vector": query_vector,
|
||||
"limit": limit,
|
||||
"with_payload": true,
|
||||
"filter": filter,
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search Qdrant face collection")?;
|
||||
|
||||
let status = response.status();
|
||||
let response_text = response
|
||||
.text()
|
||||
.await
|
||||
.unwrap_or_else(|_| "Failed to read response".to_string());
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Qdrant search_face_collection failed: {} - {}",
|
||||
status,
|
||||
response_text
|
||||
));
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantSearchResult {
|
||||
result: Vec<QdrantPoint>,
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantPoint {
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
match serde_json::from_str::<QdrantSearchResult>(&response_text) {
|
||||
Ok(parsed) => {
|
||||
let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
|
||||
.result
|
||||
.into_iter()
|
||||
.map(|r| (r.score, r.payload))
|
||||
.collect();
|
||||
Ok(results)
|
||||
}
|
||||
Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/delete",
|
||||
@@ -563,7 +702,7 @@ impl QdrantDb {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"key": "file_uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
|
||||
impl VectorStore for QdrantDb {
|
||||
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
|
||||
let payload = VectorPayload {
|
||||
uuid: String::new(),
|
||||
file_uuid: String::new(),
|
||||
chunk_id: chunk_id.to_string(),
|
||||
chunk_type: String::new(),
|
||||
start_frame: 0,
|
||||
end_frame: 0,
|
||||
start_time: 0.0,
|
||||
end_time: 0.0,
|
||||
text: None,
|
||||
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
let qdrant: QdrantDb = QdrantDb::new();
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
);
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
|
||||
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
let collection = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
qdrant.ensure_collection(&collection, 512).await?;
|
||||
|
||||
// Read all face_detections with embeddings, grouped by trace_id in Rust
|
||||
let rows = sqlx::query(&format!(
|
||||
"SELECT trace_id, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(&pool)
|
||||
.await?;
|
||||
|
||||
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
|
||||
std::collections::HashMap::new();
|
||||
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
|
||||
std::collections::HashMap::new(); // (count, min_frame, max_frame)
|
||||
|
||||
for row in &rows {
|
||||
let tid: Option<i32> = row.get(0);
|
||||
let emb: Option<Vec<f32>> = row.get(1);
|
||||
if let (Some(tid), Some(emb)) = (tid, emb) {
|
||||
trace_faces.entry(tid).or_default().push(emb);
|
||||
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
|
||||
entry.0 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute average embedding per trace
|
||||
struct AvgTrace {
|
||||
tid: i32,
|
||||
avg_emb: Vec<f32>,
|
||||
frame_count: i64,
|
||||
}
|
||||
|
||||
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
|
||||
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let dim = faces[0].len();
|
||||
let mut avg = vec![0.0f32; dim];
|
||||
for face in faces {
|
||||
for (i, &v) in face.iter().enumerate() {
|
||||
avg[i] += v;
|
||||
}
|
||||
}
|
||||
let n = faces.len() as f32;
|
||||
for v in &mut avg {
|
||||
*v /= n;
|
||||
}
|
||||
|
||||
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
|
||||
trace_avgs.push(AvgTrace {
|
||||
tid,
|
||||
avg_emb: avg,
|
||||
frame_count: stats.0,
|
||||
});
|
||||
}
|
||||
|
||||
// Push to Qdrant in batches
|
||||
for chunk in trace_avgs.chunks(500) {
|
||||
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
|
||||
.iter()
|
||||
.map(|t| {
|
||||
(
|
||||
t.tid as u64,
|
||||
t.avg_emb.as_slice(),
|
||||
Some(serde_json::json!({
|
||||
"trace_id": t.tid,
|
||||
"file_uuid": file_uuid,
|
||||
"frame_count": t.frame_count,
|
||||
"source": "trace",
|
||||
})),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
qdrant.upsert_vectors_batch(&collection, &batch).await?;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Synced {} trace embeddings to Qdrant for {}",
|
||||
trace_faces.len(),
|
||||
file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -45,9 +45,11 @@ impl SyncDb {
|
||||
}
|
||||
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.clone(),
|
||||
file_uuid: uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type,
|
||||
start_frame: chunk.start_frame,
|
||||
end_frame: chunk.end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
text: Some(text.to_string()),
|
||||
|
||||
Reference in New Issue
Block a user