feat: trace-level matching, health watcher/worker status, timezone config

This commit is contained in:
Accusys
2026-05-21 01:08:30 +08:00
parent 8ede4be159
commit bebaa743ed
60 changed files with 6110 additions and 1586 deletions

View File

@@ -15,8 +15,14 @@ use crate::core::db::PostgresDb;
pub fn identity_agent_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/identity/match-from-photo", post(match_from_photo))
.route("/api/v1/agents/identity/match-from-trace", post(match_from_trace))
.route(
"/api/v1/agents/identity/match-from-photo",
post(match_from_photo),
)
.route(
"/api/v1/agents/identity/match-from-trace",
post(match_from_trace),
)
}
#[derive(Debug, Serialize)]
@@ -73,13 +79,21 @@ async fn match_from_photo(
let uuid_clean = identity_uuid.replace('-', "");
if uuid_clean.is_empty() || file_uuid.is_empty() {
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
"success": false, "message": "identity_uuid and file_uuid are required"
}))));
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "identity_uuid and file_uuid are required"
})),
));
}
let data = image_data.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({
"success": false, "message": "No image field found. Use field name 'image'."
}))))?;
let data = image_data.ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": "No image field found. Use field name 'image'."
})),
)
})?;
// 1. Save uploaded image to temp
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
@@ -88,11 +102,17 @@ async fn match_from_photo(
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let temp_dir = std::env::temp_dir().join("momentry_match_face");
std::fs::create_dir_all(&temp_dir).map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to create temp dir: {}", e)})),
)
})?;
let temp_img = temp_dir.join(format!("{}.jpg", uuid_clean));
std::fs::write(&temp_img, &data).map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to save temp image: {}", e)})),
)
})?;
// 2. Extract face embedding via Python script
@@ -103,79 +123,109 @@ async fn match_from_photo(
.output()
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Failed to run extractor: {}", e)})),
)
})?;
let _ = std::fs::remove_file(&temp_img);
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
"success": false, "message": format!("Face extraction failed: {}", stderr)
}))));
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"success": false, "message": format!("Face extraction failed: {}", stderr)
})),
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let extract_result: serde_json::Value = serde_json::from_str(&stdout).map_err(|_| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Failed to parse extractor output"})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": "Failed to parse extractor output"})),
)
})?;
let embedding: Vec<f64> = serde_json::from_value(
extract_result.get("embedding")
.ok_or_else(|| (StatusCode::BAD_REQUEST, Json(serde_json::json!({"message": "No embedding in extractor output"}))))?
.clone()
).map_err(|_| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": "Invalid embedding format"})))
extract_result
.get("embedding")
.ok_or_else(|| {
(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({"message": "No embedding in extractor output"})),
)
})?
.clone(),
)
.map_err(|_| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": "Invalid embedding format"})),
)
})?;
let embedding_f32: Vec<f32> = embedding.into_iter().map(|v| v as f32).collect();
// 3. Look up identity internal ID
let id_table = schema::table_name("identities");
let identity_id_row: Option<(i32,)> = sqlx::query_as(
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
)
let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
id_table
))
.bind(&uuid_clean)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
let identity_id = match identity_id_row {
Some((id,)) => id,
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
"success": false, "message": "Identity not found"
})))),
None => {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})),
))
}
};
// 4. Find best matching trace (highest similarity, no threshold)
let fd_table = schema::table_name("face_detections");
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(
&format!(
r#"SELECT id, trace_id,
let best_match: Option<(i32, i32, f64)> = sqlx::query_as(&format!(
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity
FROM {}
WHERE file_uuid = $2 AND embedding IS NOT NULL
ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#,
fd_table
)
)
fd_table
))
.bind(&embedding_f32)
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?;
// 5. Update best match face_detection
let mut traces_matched: Vec<i32> = Vec::new();
if let Some((fb_id, fb_trace, fb_sim)) = best_match {
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE id = $2",
fd_table
))
.bind(identity_id)
.bind(fb_id)
.execute(state.db.pool())
@@ -191,7 +241,10 @@ async fn match_from_photo(
file_uuid,
matches: 1,
traces_matched,
message: format!("Best trace: trace_id={}, similarity={:.4}", fb_trace, fb_sim),
message: format!(
"Best trace: trace_id={}, similarity={:.4}",
fb_trace, fb_sim
),
}))
} else {
Ok(Json(MatchFromPhotoResponse {
@@ -221,26 +274,30 @@ async fn match_from_trace(
// 1. Get 3 best face embeddings from this trace at different angles
// Divide trace frame range into 3 segments, pick best face from each
let fd_table = schema::table_name("face_detections");
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(
&format!(
"SELECT embedding, frame_number FROM {} \
let all_faces: Vec<(Vec<f32>, i64)> = sqlx::query_as::<_, (Vec<f32>, i64)>(&format!(
"SELECT embedding, frame_number FROM {} \
WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC",
fd_table
)
)
fd_table
))
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_all(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
if all_faces.is_empty() {
return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
"success": false, "message": "No embedding found for this trace"
}))));
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "No embedding found for this trace"
})),
));
}
// Pick 3 samples: divide frame range into 3 segments, use face with largest area per segment
@@ -254,14 +311,12 @@ async fn match_from_trace(
let mut query_embeddings: Vec<Vec<f32>> = Vec::new();
// Get width*height info if available (not all pipelines store it)
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(
&format!(
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
let face_sizes: Vec<(i64, i32)> = sqlx::query_as::<_, (i64, i32)>(&format!(
"SELECT frame_number, COALESCE(width, 0) * COALESCE(height, 0) AS area \
FROM {} WHERE file_uuid = $1 AND trace_id = $2 AND embedding IS NOT NULL \
ORDER BY frame_number ASC",
fd_table
)
)
fd_table
))
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_all(state.db.pool())
@@ -296,9 +351,8 @@ async fn match_from_trace(
let mut seen_trace_ids = std::collections::HashSet::new();
for qemb in &query_embeddings {
let top = sqlx::query_as::<_, (i32, i32, f64)>(
&format!(
r#"SELECT id, trace_id,
let top = sqlx::query_as::<_, (i32, i32, f64)>(&format!(
r#"SELECT id, trace_id,
1 - (embedding::vector <=> $1::vector) as similarity
FROM {}
WHERE file_uuid = $2
@@ -306,16 +360,18 @@ async fn match_from_trace(
AND embedding IS NOT NULL
ORDER BY embedding::vector <=> $1::vector
LIMIT 1"#,
fd_table
)
)
fd_table
))
.bind(qemb)
.bind(&req.file_uuid)
.bind(req.trace_id)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("Search failed: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("Search failed: {}", e)})),
)
})?;
if let Some((cface_id, c_trace_id, c_sim)) = top {
@@ -327,35 +383,49 @@ async fn match_from_trace(
// 3. Look up identity internal ID
let id_table = schema::table_name("identities");
let identity_id_row: Option<(i32,)> = sqlx::query_as(
&format!("SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table)
)
let identity_id_row: Option<(i32,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
id_table
))
.bind(&uuid_clean)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"message": format!("DB error: {}", e)})))
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"message": format!("DB error: {}", e)})),
)
})?;
let identity_id = match identity_id_row {
Some((id,)) => id,
None => return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({
"success": false, "message": "Identity not found"
})))),
None => {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"success": false, "message": "Identity not found"
})),
))
}
};
// 4. Update matched face_detections
let mut traces_matched: Vec<i32> = Vec::new();
for (id, trace_id, _similarity) in &validated {
if let Err(e) = sqlx::query(
&format!("UPDATE {} SET identity_id = $1 WHERE id = $2", fd_table)
)
if let Err(e) = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE id = $2",
fd_table
))
.bind(identity_id)
.bind(id)
.execute(state.db.pool())
.await
{
tracing::warn!("[match-from-trace] Failed to update face_detection {}: {}", id, e);
tracing::warn!(
"[match-from-trace] Failed to update face_detection {}: {}",
id,
e
);
} else {
if !traces_matched.contains(trace_id) {
traces_matched.push(*trace_id);
@@ -364,9 +434,10 @@ async fn match_from_trace(
}
// 5. Also bind the source trace itself
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", fd_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3",
fd_table
))
.bind(identity_id)
.bind(&req.file_uuid)
.bind(req.trace_id)
@@ -388,7 +459,10 @@ async fn match_from_trace(
file_uuid: req.file_uuid,
matches: match_count,
traces_matched,
message: format!("Matched {} faces ({} unique traces)", match_count, trace_count),
message: format!(
"Matched {} faces ({} unique traces)",
match_count, trace_count
),
}))
}
@@ -461,7 +535,10 @@ fn analyze_person_speaker_overlap(
}
// Check if persons co-occur in time (frame proximity)
let overlap = person.frames.iter().any(|f| other_person.frames.contains(f));
let overlap = person
.frames
.iter()
.any(|f| other_person.frames.contains(f));
if overlap {
matched_persons.push(other_person.person_id.clone());
visited_persons.insert(other_person.person_id.clone());
@@ -474,9 +551,10 @@ fn analyze_person_speaker_overlap(
person.frames.iter().max().copied().unwrap_or(0) as f64,
);
for speaker in speakers {
let has_overlap = speaker.segments.iter().any(|(start, end)| {
*start <= person_time_range.1 && *end >= person_time_range.0
});
let has_overlap = speaker
.segments
.iter()
.any(|(start, end)| *start <= person_time_range.1 && *end >= person_time_range.0);
if has_overlap {
if !matched_speakers.contains(&speaker.speaker_id) {
matched_speakers.push(speaker.speaker_id.clone());
@@ -563,11 +641,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 2: 載入所有 face_detections含 frame_number按 trace_id 分組
let fd_table = schema::table_name("face_detections");
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(
&format!("SELECT trace_id, frame_number, embedding FROM {} \
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(&format!(
"SELECT trace_id, frame_number, embedding FROM {} \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id, frame_number", fd_table),
)
ORDER BY trace_id, frame_number",
fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
@@ -647,16 +726,18 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
let fd_table = schema::table_name("face_detections");
let mut updated = 0usize;
for (tid, name) in &matched {
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
&format!("SELECT id FROM {} WHERE name=$1 AND source='tmdb'", identities_table),
)
let id_opt = sqlx::query_scalar::<_, Option<i32>>(&format!(
"SELECT id FROM {} WHERE name=$1 AND source='tmdb'",
identities_table
))
.bind(name)
.fetch_optional(pool)
.await?;
if let Some(identity_id) = id_opt {
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
fd_table
))
.bind(identity_id)
.bind(file_uuid)
.bind(tid)
@@ -726,32 +807,32 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
// trace_id 在同一個 file 內是 sequential integer直接複用為 stranger_id
let stranger_update = sqlx::query(
&format!(
"UPDATE {} SET stranger_id = trace_id \
let stranger_update = sqlx::query(&format!(
"UPDATE {} SET stranger_id = trace_id \
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
AND (stranger_id IS NULL OR stranger_id != trace_id)",
fd_table
)
)
fd_table
))
.bind(file_uuid)
.execute(pool)
.await?;
let stranger_count = stranger_update.rows_affected();
// Step 7: Save identity files for all affected identities
let affected = sqlx::query_scalar::<_, uuid::Uuid>(
&format!("SELECT DISTINCT i.uuid FROM {} i \
let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!(
"SELECT DISTINCT i.uuid FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", identities_table, fd_table)
)
WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL",
identities_table, fd_table
))
.bind(file_uuid)
.fetch_all(pool)
.await
.unwrap_or_default();
for uuid in &affected {
let us = uuid.to_string().replace('-', "");
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await {
if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await
{
tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e);
}
}
@@ -773,13 +854,15 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
// Load face traces with identity_id and frame numbers
let fd_table = schema::table_name("face_detections");
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(
&format!("SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(&format!(
"SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
FROM {} WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
GROUP BY trace_id", fd_table)
)
GROUP BY trace_id",
fd_table
))
.bind(file_uuid)
.fetch_all(pool).await?;
.fetch_all(pool)
.await?;
if traces.is_empty() {
tracing::info!("[SpeakerBind] No face traces with identities");
@@ -945,9 +1028,8 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let speakers = extract_speakers_from_asrx_data(&asrx_data);
let identities = analyze_person_speaker_overlap(&persons, &speakers);
let uuid_short = &file_uuid[..8.min(file_uuid.len())];
for (idx, id_result) in identities.iter().enumerate() {
let identity_name = format!("stranger_{}_{}", uuid_short, idx);
let identity_name = format!("stranger_{}", idx);
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,