feat: trace-level matching, health watcher/worker status, timezone config

This commit is contained in:
Accusys
2026-05-21 01:08:30 +08:00
parent 8ede4be159
commit bebaa743ed
60 changed files with 6110 additions and 1586 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -15,9 +15,11 @@ pub struct QdrantDb {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorPayload {
pub uuid: String,
pub file_uuid: String,
pub chunk_id: String,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
pub start_time: f64,
pub end_time: f64,
pub text: Option<String>,
@@ -189,6 +191,49 @@ impl QdrantDb {
Ok(())
}
pub async fn upsert_vectors_batch(
&self,
collection: &str,
points: &[(u64, &[f32], Option<serde_json::Value>)],
) -> Result<()> {
let url = format!(
"{}/collections/{}/points?wait=true",
self.base_url, collection
);
let qdrant_points: Vec<serde_json::Value> = points
.iter()
.map(|(id, vec, payload)| {
let mut p = serde_json::json!({
"id": id,
"vector": vec,
});
if let Some(pl) = payload {
p["payload"] = pl.clone();
}
p
})
.collect();
let body = serde_json::json!({ "points": qdrant_points });
let response = self
.client
.put(&url)
.header("api-key", &self.api_key)
.json(&body)
.send()
.await
.context("Failed to send batch upsert request to Qdrant")?;
let status = response.status();
if !status.is_success() {
let response_text = response.text().await.unwrap_or_default();
anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
}
Ok(())
}
pub async fn upsert_vector(
&self,
chunk_id: &str,
@@ -207,12 +252,23 @@ impl QdrantDb {
);
let mut payload_map = HashMap::new();
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
payload_map.insert(
"file_uuid".to_string(),
serde_json::json!(payload.file_uuid),
);
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
payload_map.insert(
"chunk_type".to_string(),
serde_json::json!(payload.chunk_type),
);
payload_map.insert(
"start_frame".to_string(),
serde_json::json!(payload.start_frame),
);
payload_map.insert(
"end_frame".to_string(),
serde_json::json!(payload.end_frame),
);
payload_map.insert(
"start_time".to_string(),
serde_json::json!(payload.start_time),
@@ -224,7 +280,7 @@ impl QdrantDb {
// Generate consistent point ID from uuid and chunk_id
// Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
let point_id_str = format!("{}_{}", payload.uuid, chunk_id);
let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
@@ -240,9 +296,9 @@ impl QdrantDb {
});
tracing::debug!(
"Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}",
"Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
chunk_id,
payload.uuid,
payload.file_uuid,
vector.len()
);
@@ -337,7 +393,7 @@ impl QdrantDb {
.map(|r| {
let uuid = r
.payload
.get("uuid")
.get("file_uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
@@ -409,7 +465,7 @@ impl QdrantDb {
.map(|r| {
let uuid = r
.payload
.get("uuid")
.get("file_uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
@@ -471,7 +527,7 @@ impl QdrantDb {
"filter": {
"must": [
{
"key": "uuid",
"key": "file_uuid",
"match": {
"value": uuid
}
@@ -532,7 +588,7 @@ impl QdrantDb {
.map(|r| {
let uuid = r
.payload
.get("uuid")
.get("file_uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
@@ -553,6 +609,89 @@ impl QdrantDb {
Ok(search_results)
}
pub async fn search_face_collection(
&self,
collection: &str,
query_vector: &[f32],
limit: usize,
exclude_payload_key: &str,
exclude_payload_value: &str,
include_file_uuid: Option<&str>,
) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
let mut filter = serde_json::json!({
"must_not": [
{
"key": exclude_payload_key,
"match": { "value": exclude_payload_value }
}
]
});
if let Some(file_uuid) = include_file_uuid {
filter["must"] = serde_json::json!([
{
"key": "file_uuid",
"match": { "value": file_uuid }
}
]);
}
let body = serde_json::json!({
"vector": query_vector,
"limit": limit,
"with_payload": true,
"filter": filter,
});
let response = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context("Failed to search Qdrant face collection")?;
let status = response.status();
let response_text = response
.text()
.await
.unwrap_or_else(|_| "Failed to read response".to_string());
if !status.is_success() {
return Err(anyhow::anyhow!(
"Qdrant search_face_collection failed: {} - {}",
status,
response_text
));
}
#[derive(Deserialize)]
struct QdrantSearchResult {
result: Vec<QdrantPoint>,
}
#[derive(Deserialize)]
struct QdrantPoint {
score: f64,
payload: HashMap<String, serde_json::Value>,
}
match serde_json::from_str::<QdrantSearchResult>(&response_text) {
Ok(parsed) => {
let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
.result
.into_iter()
.map(|r| (r.score, r.payload))
.collect();
Ok(results)
}
Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
}
}
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
let url = format!(
"{}/collections/{}/points/delete",
@@ -563,7 +702,7 @@ impl QdrantDb {
"filter": {
"must": [
{
"key": "uuid",
"key": "file_uuid",
"match": {
"value": uuid
}
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
impl VectorStore for QdrantDb {
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
let payload = VectorPayload {
uuid: String::new(),
file_uuid: String::new(),
chunk_id: chunk_id.to_string(),
chunk_type: String::new(),
start_frame: 0,
end_frame: 0,
start_time: 0.0,
end_time: 0.0,
text: None,
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
let qdrant: QdrantDb = QdrantDb::new();
let query = format!(
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
"SELECT id, trace_id, frame_number, embedding FROM {} \
WHERE file_uuid = $1 AND embedding IS NOT NULL \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
);
Ok(())
}
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
use crate::core::config::DATABASE_URL;
use sqlx::Row;
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
let table = crate::core::db::schema::table_name("face_detections");
let qdrant = QdrantDb::new();
let collection = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
qdrant.ensure_collection(&collection, 512).await?;
// Read all face_detections with embeddings, grouped by trace_id in Rust
let rows = sqlx::query(&format!(
"SELECT trace_id, embedding FROM {} \
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
table
))
.bind(file_uuid)
.fetch_all(&pool)
.await?;
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
std::collections::HashMap::new();
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
std::collections::HashMap::new(); // (count, min_frame, max_frame)
for row in &rows {
let tid: Option<i32> = row.get(0);
let emb: Option<Vec<f32>> = row.get(1);
if let (Some(tid), Some(emb)) = (tid, emb) {
trace_faces.entry(tid).or_default().push(emb);
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
entry.0 += 1;
}
}
// Compute average embedding per trace
struct AvgTrace {
tid: i32,
avg_emb: Vec<f32>,
frame_count: i64,
}
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
for (&tid, faces) in &trace_faces {
let dim = faces[0].len();
let mut avg = vec![0.0f32; dim];
for face in faces {
for (i, &v) in face.iter().enumerate() {
avg[i] += v;
}
}
let n = faces.len() as f32;
for v in &mut avg {
*v /= n;
}
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
trace_avgs.push(AvgTrace {
tid,
avg_emb: avg,
frame_count: stats.0,
});
}
// Push to Qdrant in batches
for chunk in trace_avgs.chunks(500) {
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
.iter()
.map(|t| {
(
t.tid as u64,
t.avg_emb.as_slice(),
Some(serde_json::json!({
"trace_id": t.tid,
"file_uuid": file_uuid,
"frame_count": t.frame_count,
"source": "trace",
})),
)
})
.collect();
qdrant.upsert_vectors_batch(&collection, &batch).await?;
}
tracing::info!(
"Synced {} trace embeddings to Qdrant for {}",
trace_faces.len(),
file_uuid
);
Ok(())
}

View File

@@ -45,9 +45,11 @@ impl SyncDb {
}
let payload = VectorPayload {
uuid: uuid.clone(),
file_uuid: uuid.clone(),
chunk_id: chunk_id.clone(),
chunk_type,
start_frame: chunk.start_frame,
end_frame: chunk.end_frame,
start_time,
end_time,
text: Some(text.to_string()),