Release v1.0.0 candidate
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,7 @@ use sqlx::Row;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::api::server::AppState;
|
||||
use crate::core::db::PostgresDb;
|
||||
|
||||
pub fn identity_agent_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
@@ -124,6 +125,13 @@ async fn analyze_identity(
|
||||
let face_clustered_path = video_dir.join(format!("{}.face_clustered.json", req.file_uuid));
|
||||
let asrx_path = video_dir.join(format!("{}.asrx.json", req.file_uuid));
|
||||
|
||||
// 如果子目錄找不到,試根目錄
|
||||
let face_clustered_path = if face_clustered_path.exists() {
|
||||
face_clustered_path
|
||||
} else {
|
||||
PathBuf::from(&output_dir).join(format!("{}.face_clustered.json", req.file_uuid))
|
||||
};
|
||||
|
||||
if !face_clustered_path.exists() {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
@@ -132,37 +140,15 @@ async fn analyze_identity(
|
||||
}
|
||||
|
||||
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to read face data: {}", e),
|
||||
)
|
||||
})?
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read face data: {}", e)))?
|
||||
.parse()
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to parse face data: {}", e),
|
||||
)
|
||||
})?;
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse face data: {}", e)))?;
|
||||
|
||||
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
|
||||
Some(
|
||||
std::fs::read_to_string(&asrx_path)
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to read asrx data: {}", e),
|
||||
)
|
||||
})?
|
||||
.parse()
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to parse asrx data: {}", e),
|
||||
)
|
||||
})?,
|
||||
)
|
||||
Some(std::fs::read_to_string(&asrx_path)
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read asrx data: {}", e)))?
|
||||
.parse()
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse asrx data: {}", e)))?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -172,6 +158,37 @@ async fn analyze_identity(
|
||||
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
// 將 identity 結果寫入 DB
|
||||
let pool = state.db.pool();
|
||||
for id_result in &identities {
|
||||
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
"speaker_ids": id_result.speaker_ids,
|
||||
"confidence": id_result.confidence,
|
||||
"evidence": {
|
||||
"speaker_overlap": id_result.evidence.speaker_overlap,
|
||||
"frame_ratio": id_result.evidence.frame_ratio,
|
||||
},
|
||||
"reasoning": id_result.reasoning,
|
||||
});
|
||||
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identities (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING"
|
||||
)
|
||||
.bind(&identity_name)
|
||||
.bind(&metadata)
|
||||
.execute(pool)
|
||||
.await;
|
||||
}
|
||||
|
||||
// 迭代多角度 face embedding 比對(TMDb seed → 傳播)
|
||||
let _ = match_faces_iterative(pool, &req.file_uuid).await.unwrap_or(0);
|
||||
|
||||
// 將 ASRX speaker 綁定到已匹配 identity 的 trace
|
||||
let _ = bind_speakers(pool, &req.file_uuid).await.unwrap_or(0);
|
||||
|
||||
let processing_status = IdentityProcessingStatus {
|
||||
status: "completed".to_string(),
|
||||
persons_analyzed: persons.len() as i32,
|
||||
@@ -287,33 +304,27 @@ fn extract_persons_from_face_data(face_data: &serde_json::Value) -> Vec<PersonDa
|
||||
|
||||
fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec<SpeakerData> {
|
||||
let mut speakers = Vec::new();
|
||||
|
||||
if let Some(data) = asrx_data {
|
||||
if let Some(segments) = data.get("segments").and_then(|s| s.as_array()) {
|
||||
let mut speaker_segments_map: std::collections::HashMap<String, Vec<(f64, f64)>> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for segment in segments {
|
||||
if let Some(speaker_id) = segment.get("speaker").and_then(|s| s.as_str()) {
|
||||
let start = segment.get("start").and_then(|s| s.as_f64()).unwrap_or(0.0);
|
||||
let end = segment.get("end").and_then(|e| e.as_f64()).unwrap_or(0.0);
|
||||
|
||||
let speaker_id = segment.get("speaker_id").and_then(|s| s.as_str())
|
||||
.or_else(|| segment.get("speaker").and_then(|s| s.as_str()));
|
||||
if let Some(speaker_id) = speaker_id {
|
||||
let start = segment.get("start").or_else(|| segment.get("start_time")).and_then(|s| s.as_f64()).unwrap_or(0.0);
|
||||
let end = segment.get("end").or_else(|| segment.get("end_time")).and_then(|e| e.as_f64()).unwrap_or(0.0);
|
||||
speaker_segments_map
|
||||
.entry(speaker_id.to_string())
|
||||
.or_insert_with(Vec::new)
|
||||
.push((start, end));
|
||||
}
|
||||
}
|
||||
|
||||
for (speaker_id, segments) in speaker_segments_map {
|
||||
speakers.push(SpeakerData {
|
||||
speaker_id,
|
||||
segments,
|
||||
});
|
||||
speakers.push(SpeakerData { speaker_id, segments });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
speakers
|
||||
}
|
||||
|
||||
@@ -374,10 +385,8 @@ fn analyze_person_speaker_overlap(
|
||||
|
||||
fn calculate_overlap(person: &PersonData, speaker: &SpeakerData) -> i32 {
|
||||
let mut overlap_count = 0;
|
||||
|
||||
for frame_num in &person.frames {
|
||||
let frame_time = *frame_num as f64 / 23.976;
|
||||
|
||||
let frame_time = *frame_num as f64 / 25.0; // default fps=25
|
||||
for (start, end) in &speaker.segments {
|
||||
if frame_time >= *start && frame_time <= *end {
|
||||
overlap_count += 1;
|
||||
@@ -385,7 +394,6 @@ fn calculate_overlap(person: &PersonData, speaker: &SpeakerData) -> i32 {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
overlap_count
|
||||
}
|
||||
|
||||
@@ -416,31 +424,31 @@ async fn suggest_clustering(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<SuggestClusteringRequest>,
|
||||
) -> Result<Json<SuggestClusteringResponse>, (StatusCode, String)> {
|
||||
let min_cluster_size = req.min_cluster_size.unwrap_or(3);
|
||||
|
||||
let file_filter = match &req.file_uuid {
|
||||
Some(uuid) => format!("AND fc.file_uuid = '{}'", uuid),
|
||||
Some(uuid) => format!("AND fd.file_uuid = '{}'", uuid),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
let query = format!(
|
||||
r#"
|
||||
SELECT fc.cluster_id, fc.file_uuid, fc.n_faces, fc.metadata
|
||||
FROM face_clusters fc
|
||||
WHERE fc.n_faces >= $1
|
||||
SELECT trace_id, file_uuid, COUNT(*) as face_count
|
||||
FROM dev.face_detections fd
|
||||
WHERE fd.trace_id IS NOT NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM identities i
|
||||
WHERE i.metadata->>'cluster_id' = fc.cluster_id
|
||||
SELECT 1 FROM dev.identities i
|
||||
WHERE i.metadata->>'trace_id' = fd.trace_id::text
|
||||
)
|
||||
{}
|
||||
ORDER BY fc.n_faces DESC
|
||||
GROUP BY trace_id, file_uuid
|
||||
HAVING COUNT(*) >= $1
|
||||
ORDER BY face_count DESC
|
||||
"#,
|
||||
file_filter
|
||||
);
|
||||
|
||||
let pool = state.db.pool();
|
||||
let rows = sqlx::query(&query)
|
||||
.bind(min_cluster_size as i64)
|
||||
.bind(req.min_cluster_size.unwrap_or(3) as i64)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
@@ -448,27 +456,14 @@ async fn suggest_clustering(
|
||||
let suggestions: Vec<ClusteringSuggestion> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let cluster_id: String = row.get("cluster_id");
|
||||
let n_faces: i32 = row.get("n_faces");
|
||||
let metadata: serde_json::Value =
|
||||
row.try_get("metadata").unwrap_or(serde_json::Value::Null);
|
||||
|
||||
let avg_confidence = metadata
|
||||
.get("avg_confidence")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
let representative_face = metadata
|
||||
.get("representative_face_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let trace_id: Option<i32> = row.try_get("trace_id").ok();
|
||||
let face_count: i64 = row.get("face_count");
|
||||
ClusteringSuggestion {
|
||||
cluster_id,
|
||||
face_count: n_faces as usize,
|
||||
avg_confidence,
|
||||
cluster_id: format!("trace_{}", trace_id.unwrap_or(0)),
|
||||
face_count: face_count as usize,
|
||||
avg_confidence: 0.0,
|
||||
suggested_name: None,
|
||||
representative_face,
|
||||
representative_face: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -601,3 +596,325 @@ struct SpeakerData {
|
||||
speaker_id: String,
|
||||
segments: Vec<(f64, f64)>,
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() { return 0.0; }
|
||||
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
|
||||
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
|
||||
}
|
||||
|
||||
/// 迭代多角度 face embedding 比對 + 傳播
|
||||
/// Round 1: 用 TMDb seed face_embedding 比對 face_detections (threshold 0.50)
|
||||
/// Round 2+: 用已匹配 trace 的所有 face 作為 seed,傳播到未匹配 trace
|
||||
async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
// Step 1: 載入 TMDb identities (source='tmdb' 且有 face_embedding)
|
||||
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
|
||||
"SELECT id, name, face_embedding::real[] FROM dev.identities WHERE source='tmdb' AND face_embedding IS NOT NULL"
|
||||
)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if tmdb_rows.is_empty() {
|
||||
tracing::warn!("[FaceMatch] No TMDb identities with face embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
tracing::info!("[FaceMatch] Loaded {} TMDb seed identities", tmdb_rows.len());
|
||||
|
||||
// Step 2: 載入所有 face_detections,按 trace_id 分組
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
|
||||
"SELECT trace_id, embedding FROM dev.face_detections \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id"
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if fd_rows.is_empty() {
|
||||
tracing::warn!("[FaceMatch] No face detections with embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// 分組:trace_id → Vec<embedding>
|
||||
use std::collections::HashMap;
|
||||
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
|
||||
for (tid, emb) in &fd_rows {
|
||||
trace_faces.entry(*tid).or_insert_with(Vec::new).push(emb.clone());
|
||||
}
|
||||
|
||||
// 去重:同一個 trace 內,embedding 太接近的只留一個
|
||||
for faces in trace_faces.values_mut() {
|
||||
faces.sort_by(|a, b| b[0].partial_cmp(&a[0]).unwrap_or(std::cmp::Ordering::Equal));
|
||||
faces.dedup_by(|a, b| cosine_similarity(a, b) > 0.99);
|
||||
}
|
||||
|
||||
let total_traces = trace_faces.len();
|
||||
tracing::info!("[FaceMatch] Loaded {} traces with {} faces", total_traces, fd_rows.len());
|
||||
|
||||
// Step 3: 建立 TMDb 查找表
|
||||
let tmdb_seeds: Vec<(i32, String, Vec<f32>)> = tmdb_rows;
|
||||
|
||||
// Step 4: 迭代匹配
|
||||
const TH: f32 = 0.50;
|
||||
let mut matched: HashMap<i32, String> = HashMap::new(); // trace_id → identity_name
|
||||
|
||||
// Round 1: 直接比對 TMDb
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
|
||||
for face_emb in faces {
|
||||
let s = cosine_similarity(face_emb, tmdb_emb);
|
||||
if s > best_sim { best_sim = s; best_name = name.clone(); }
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
matched.insert(tid, best_name);
|
||||
}
|
||||
}
|
||||
tracing::info!("[FaceMatch] Round 1: {} matched ({}%)", matched.len(), matched.len() * 100 / total_traces);
|
||||
|
||||
// Round 2+: 用已匹配的 face 作為 seed 傳播
|
||||
for round_n in 2..=10 {
|
||||
let prev = matched.len();
|
||||
// 建立 seed pool: name → Vec<embedding>
|
||||
let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
|
||||
for (&tid, name) in &matched {
|
||||
if let Some(faces) = trace_faces.get(&tid) {
|
||||
seed_pool.entry(name.clone()).or_default().extend(faces.iter());
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_matches: Vec<(i32, String)> = Vec::new();
|
||||
for (&tid, faces) in &trace_faces {
|
||||
if matched.contains_key(&tid) { continue; }
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
if faces.is_empty() { continue; }
|
||||
let ref_face = &faces[0];
|
||||
for (name, seed_faces) in &seed_pool {
|
||||
for seed in seed_faces {
|
||||
let s = cosine_similarity(ref_face, seed);
|
||||
if s > best_sim { best_sim = s; best_name = name.clone(); }
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
new_matches.push((tid, best_name));
|
||||
}
|
||||
}
|
||||
for (tid, name) in new_matches {
|
||||
matched.insert(tid, name);
|
||||
}
|
||||
let new = matched.len() - prev;
|
||||
tracing::info!("[FaceMatch] Round {}: +{} matched (total {}, {}%)", round_n, new, matched.len(), matched.len() * 100 / total_traces);
|
||||
if new < 5 { break; }
|
||||
}
|
||||
|
||||
// Step 5: 寫入 DB
|
||||
let mut updated = 0usize;
|
||||
for (tid, name) in &matched {
|
||||
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
|
||||
"SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'"
|
||||
)
|
||||
.bind(name)
|
||||
.fetch_optional(pool).await?;
|
||||
if let Some(identity_id) = id_opt {
|
||||
let _ = sqlx::query(
|
||||
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
.execute(pool).await;
|
||||
updated += 1;
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("[FaceMatch] Done: {}/{} traces matched ({}%)", matched.len(), total_traces, matched.len() * 100 / total_traces);
|
||||
Ok(updated)
|
||||
}
|
||||
|
||||
/// Bind ASRX speakers to face traces based on temporal overlap.
|
||||
/// Reads face_detections (trace_id, identity_id, frame_number) and ASRX
|
||||
/// segments (speaker_id, start_time, end_time), computes overlap,
|
||||
/// and stores bindings in identity_bindings table.
|
||||
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
// Load face traces with identity_id and frame numbers
|
||||
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(
|
||||
"SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
|
||||
FROM dev.face_detections WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
|
||||
GROUP BY trace_id"
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if traces.is_empty() {
|
||||
tracing::info!("[SpeakerBind] No face traces with identities");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Load ASRX speakers from the output JSON
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
|
||||
let asrx_path = std::path::Path::new(&output_dir).join(format!("{}.asrx.json", file_uuid));
|
||||
|
||||
let asrx_data: serde_json::Value = match std::fs::read_to_string(&asrx_path) {
|
||||
Ok(s) => serde_json::from_str(&s).unwrap_or_default(),
|
||||
Err(_) => {
|
||||
tracing::info!("[SpeakerBind] No ASRX file found");
|
||||
return Ok(0);
|
||||
}
|
||||
};
|
||||
|
||||
// Extract speaker segments: speaker_id → [(start_time, end_time)]
|
||||
use std::collections::HashMap;
|
||||
let mut speakers: HashMap<String, Vec<(f64, f64)>> = HashMap::new();
|
||||
if let Some(segments) = asrx_data.get("segments").and_then(|s| s.as_array()) {
|
||||
for seg in segments {
|
||||
let sid = seg.get("speaker_id").and_then(|s| s.as_str())
|
||||
.or_else(|| seg.get("speaker").and_then(|s| s.as_str()));
|
||||
if let Some(sid) = sid {
|
||||
let start = seg.get("start_time").or_else(|| seg.get("start")).and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let end = seg.get("end_time").or_else(|| seg.get("end")).and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
speakers.entry(sid.to_string()).or_default().push((start, end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if speakers.is_empty() {
|
||||
tracing::info!("[SpeakerBind] No speakers found in ASRX data");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Get fps for frame-to-time conversion
|
||||
let fps: f64 = 25.0; // default, could also read from DB
|
||||
|
||||
// For each trace, compute overlap with each speaker
|
||||
let mut bindings = 0usize;
|
||||
for (trace_id, frames) in &traces {
|
||||
if frames.is_empty() { continue; }
|
||||
|
||||
// Get identity_id for this trace
|
||||
let identity_id: Option<i32> = sqlx::query_scalar(
|
||||
"SELECT identity_id FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2 AND identity_id IS NOT NULL LIMIT 1"
|
||||
)
|
||||
.bind(file_uuid).bind(trace_id)
|
||||
.fetch_optional(pool).await?.flatten();
|
||||
|
||||
if identity_id.is_none() { continue; }
|
||||
let identity_id = identity_id.unwrap();
|
||||
|
||||
// Compute overlap with each speaker
|
||||
let mut best_speaker = String::new();
|
||||
let mut best_overlap = 0usize;
|
||||
|
||||
for (speaker_id, segments) in &speakers {
|
||||
let mut overlap = 0usize;
|
||||
for &fn_num in frames {
|
||||
let frame_time = fn_num as f64 / fps;
|
||||
for (start, end) in segments {
|
||||
if frame_time >= *start && frame_time <= *end {
|
||||
overlap += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if overlap > best_overlap {
|
||||
best_overlap = overlap;
|
||||
best_speaker = speaker_id.clone();
|
||||
}
|
||||
}
|
||||
|
||||
// Only bind if meaningful overlap
|
||||
let overlap_ratio = best_overlap as f64 / frames.len() as f64;
|
||||
if overlap_ratio > 0.3 && !best_speaker.is_empty() {
|
||||
let metadata = serde_json::json!({
|
||||
"trace_id": trace_id,
|
||||
"overlap_frames": best_overlap,
|
||||
"total_frames": frames.len(),
|
||||
"overlap_ratio": overlap_ratio,
|
||||
});
|
||||
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, confidence, metadata) \
|
||||
VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata"
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(&best_speaker)
|
||||
.bind(overlap_ratio)
|
||||
.bind(&metadata)
|
||||
.execute(pool).await;
|
||||
|
||||
bindings += 1;
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("[SpeakerBind] Created {}/{} speaker bindings", bindings, traces.len());
|
||||
Ok(bindings)
|
||||
}
|
||||
|
||||
/// Pipeline-triggered entry point: runs the full identity agent for a file.
|
||||
/// Reads face_clustered.json + asrx.json, extracts persons/speakers, creates identities,
|
||||
/// runs iterative face matching, and binds speakers.
|
||||
pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<()> {
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
|
||||
|
||||
let video_dir = PathBuf::from(&output_dir).join(file_uuid);
|
||||
let face_clustered_path = video_dir.join(format!("{}.face_clustered.json", file_uuid));
|
||||
let face_clustered_path = if face_clustered_path.exists() {
|
||||
face_clustered_path
|
||||
} else {
|
||||
PathBuf::from(&output_dir).join(format!("{}.face_clustered.json", file_uuid))
|
||||
};
|
||||
|
||||
if !face_clustered_path.exists() {
|
||||
tracing::warn!("[IdentityAgent] face_clustered.json not found for {}", file_uuid);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)?.parse()?;
|
||||
let asrx_path = video_dir.join(format!("{}.asrx.json", file_uuid));
|
||||
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
|
||||
Some(std::fs::read_to_string(&asrx_path)?.parse()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let persons = extract_persons_from_face_data(&face_data);
|
||||
let speakers = extract_speakers_from_asrx_data(&asrx_data);
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
let pool = db.pool();
|
||||
for id_result in &identities {
|
||||
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
"speaker_ids": id_result.speaker_ids,
|
||||
"confidence": id_result.confidence,
|
||||
"evidence": {
|
||||
"speaker_overlap": id_result.evidence.speaker_overlap,
|
||||
"frame_ratio": id_result.evidence.frame_ratio,
|
||||
},
|
||||
"reasoning": id_result.reasoning,
|
||||
});
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identities (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING"
|
||||
)
|
||||
.bind(&identity_name)
|
||||
.bind(&metadata)
|
||||
.execute(pool)
|
||||
.await;
|
||||
}
|
||||
|
||||
let matched = match_faces_iterative(pool, file_uuid).await.unwrap_or(0);
|
||||
let bound = bind_speakers(pool, file_uuid).await.unwrap_or(0);
|
||||
|
||||
tracing::info!(
|
||||
"[IdentityAgent] Done for {}: {} identities, {} face matches, {} speaker bindings",
|
||||
file_uuid, identities.len(), matched, bound
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -282,16 +282,44 @@ async fn trace_video(
|
||||
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
|
||||
let seek = (start_sec - padding).max(0.0);
|
||||
|
||||
// Build filters: per-frame bbox + text
|
||||
// Build filters: bbox+text holding at last detection until next one
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
for (frame, x, y, w, h) in &rows {
|
||||
let offset = frame - first_frame + (padding * fps) as i32;
|
||||
for (i, (frame, x, y, w, h)) in rows.iter().enumerate() {
|
||||
// Hold this detection until the next one (or end)
|
||||
let next_frame = if i + 1 < rows.len() {
|
||||
rows[i + 1].0
|
||||
} else {
|
||||
// For last detection, extend to duration end
|
||||
last_frame + (padding * fps) as i32
|
||||
};
|
||||
let start_offset = frame - first_frame + (padding * fps) as i32;
|
||||
let end_offset = next_frame - first_frame + (padding * fps) as i32;
|
||||
|
||||
// Bbox: visible from this frame until next detection
|
||||
parts.push(format!(
|
||||
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='eq(n,{})'",
|
||||
x, y, w, h, offset
|
||||
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='between(n,{},{})'",
|
||||
x, y, w, h, start_offset, end_offset - 1
|
||||
));
|
||||
// Text: same hold behavior
|
||||
let label = format!("t{}", trace_id);
|
||||
render_text(&mut parts, &label, *x + 6, *y + 6, Some(offset));
|
||||
let mut tx = *x + 6;
|
||||
let mut ty = *y + 6;
|
||||
for ch in label.chars() {
|
||||
let bm = bitmap_char(ch);
|
||||
for (row, bits) in bm.iter().enumerate() {
|
||||
for col in 0..5 {
|
||||
if bits & (1 << (4 - col)) != 0 {
|
||||
let dx = tx + col as i32 * 3;
|
||||
let dy = ty + row as i32 * 3;
|
||||
parts.push(format!(
|
||||
"drawbox=x={}:y={}:w=3:h=3:color=white@1.0:t=fill:enable='between(n,{},{})'",
|
||||
dx, dy, start_offset, end_offset - 1
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
tx += CHAR_ADVANCE;
|
||||
}
|
||||
}
|
||||
|
||||
let vf = if parts.is_empty() {
|
||||
|
||||
@@ -162,7 +162,7 @@ async fn get_ollama_embedding(
|
||||
) -> Result<Vec<f32>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let client = reqwest::Client::new();
|
||||
let payload = serde_json::json!({
|
||||
"model": "nomic-embed-text",
|
||||
"model": "mxbai-embed-large",
|
||||
"prompt": text
|
||||
});
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ use super::identity_api;
|
||||
use super::identity_binding;
|
||||
use super::middleware::api_key_validation;
|
||||
use super::search::search_routes;
|
||||
use super::trace_agent_api;
|
||||
use super::universal_search::universal_search_routes;
|
||||
use super::visual_chunk_search;
|
||||
use crate::core::chunk::types::Chunk;
|
||||
@@ -794,8 +795,6 @@ async fn register_single_file(
|
||||
.arg(&cut_script)
|
||||
.arg(&canonical_path)
|
||||
.arg(&cut_path)
|
||||
.arg("--threshold")
|
||||
.arg("27")
|
||||
.output();
|
||||
if let Ok(output) = cut_output {
|
||||
if output.status.success() {
|
||||
@@ -2246,42 +2245,58 @@ async fn list_jobs(Query(params): Query<JobsQuery>) -> Result<Json<JobListRespon
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// TODO: 需要修改 PostgresDb::get_pending_jobs 以支持分頁和狀態過濾
|
||||
// 目前先使用現有方法,獲取所有工作然後手動分頁
|
||||
let jobs = pg
|
||||
.get_pending_jobs(1000) // 臨時解決方案:獲取較多工作
|
||||
let table = crate::core::db::schema::table_name("monitor_jobs");
|
||||
|
||||
// Build status IN clause
|
||||
let statuses: Vec<String> = status_filter
|
||||
.split(',')
|
||||
.map(|s| format!("'{}'", s.trim()))
|
||||
.collect();
|
||||
let status_clause = statuses.join(",");
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
|
||||
error_count, last_error, started_at::TEXT, updated_at::TEXT, created_at::TEXT,
|
||||
processors, completed_processors, failed_processors, video_id
|
||||
FROM {}
|
||||
WHERE status IN ({})
|
||||
ORDER BY created_at DESC
|
||||
LIMIT {} OFFSET {}",
|
||||
table, status_clause, page_size, offset
|
||||
);
|
||||
|
||||
let count_query = format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE status IN ({})",
|
||||
table, status_clause
|
||||
);
|
||||
|
||||
let total_count: i64 = sqlx::query_scalar(&count_query)
|
||||
.fetch_one(pg.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// 過濾狀態
|
||||
let filtered_jobs: Vec<_> = jobs
|
||||
.into_iter()
|
||||
.filter(|j| {
|
||||
let job_status = j.status.as_str();
|
||||
status_filter.split(',').any(|s| s.trim() == job_status)
|
||||
})
|
||||
.collect();
|
||||
use crate::core::db::MonitorJobStatus;
|
||||
|
||||
let total_count = filtered_jobs.len() as i64;
|
||||
let rows = sqlx::query(&query)
|
||||
.fetch_all(pg.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// 手動分頁
|
||||
let paginated_jobs: Vec<_> = filtered_jobs
|
||||
let job_infos: Vec<JobInfoResponse> = rows
|
||||
.into_iter()
|
||||
.skip(offset as usize)
|
||||
.take(page_size)
|
||||
.collect();
|
||||
|
||||
let job_infos: Vec<JobInfoResponse> = paginated_jobs
|
||||
.into_iter()
|
||||
.map(|j| JobInfoResponse {
|
||||
id: j.id,
|
||||
uuid: j.uuid,
|
||||
status: j.status.as_str().to_string(),
|
||||
current_processor: j.current_processor,
|
||||
progress_current: j.progress_current,
|
||||
progress_total: j.progress_total,
|
||||
created_at: j.created_at.to_string(),
|
||||
started_at: j.started_at.map(|t| t.to_string()),
|
||||
.map(|r| {
|
||||
let status_str: String = r.try_get("status").unwrap_or_default();
|
||||
let status = MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
|
||||
JobInfoResponse {
|
||||
id: r.try_get("id").unwrap_or(0),
|
||||
uuid: r.try_get("uuid").unwrap_or_default(),
|
||||
status: status.as_str().to_string(),
|
||||
current_processor: r.try_get("current_processor").ok(),
|
||||
progress_current: r.try_get("progress_current").unwrap_or(0),
|
||||
progress_total: r.try_get("progress_total").unwrap_or(0),
|
||||
created_at: r.try_get::<String, _>("created_at").unwrap_or_default(),
|
||||
started_at: r.try_get::<String, _>("started_at").ok(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -2537,6 +2552,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
.merge(super::identity_agent_api::identity_agent_routes()) // Phase 5 Routes
|
||||
.merge(five_w1h_agent_api::five_w1h_agent_routes()) // Phase 3 Routes (5W1H Agent)
|
||||
.merge(super::media_api::bbox_routes()) // Media: video/bbox/thumbnail
|
||||
.merge(super::trace_agent_api::trace_agent_routes()) // Trace listing
|
||||
.merge(search_routes()) // Smart search drill-down
|
||||
.merge(universal_search_routes()) // Universal / frames / persons search
|
||||
.merge(protected_routes)
|
||||
@@ -3242,7 +3258,7 @@ async fn list_pre_chunks(
|
||||
let data_query = format!(
|
||||
"SELECT id, processor_type, coordinate_type, coordinate_index,
|
||||
start_frame, end_frame, start_time, end_time, fps,
|
||||
data, identity_id, confidence, created_at
|
||||
data, created_at
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 {}
|
||||
ORDER BY coordinate_index ASC
|
||||
@@ -3261,8 +3277,6 @@ async fn list_pre_chunks(
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
serde_json::Value,
|
||||
Option<uuid::Uuid>,
|
||||
Option<f64>,
|
||||
chrono::DateTime<chrono::Utc>,
|
||||
)> = sqlx::query_as(&data_query)
|
||||
.bind(&uuid)
|
||||
@@ -3283,9 +3297,9 @@ async fn list_pre_chunks(
|
||||
end_time: row.7,
|
||||
fps: row.8,
|
||||
data: row.9.clone(),
|
||||
identity_id: row.10.map(|id| id.to_string()),
|
||||
confidence: row.11,
|
||||
created_at: row.12.to_rfc3339(),
|
||||
identity_id: None,
|
||||
confidence: None,
|
||||
created_at: row.10.to_rfc3339(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
extract::{Path, Query, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -10,10 +10,15 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::core::db::PostgresDb;
|
||||
|
||||
pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
|
||||
Router::new().route(
|
||||
"/api/v1/file/:file_uuid/face_trace/sortby",
|
||||
post(list_traces_sorted),
|
||||
)
|
||||
Router::new()
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/face_trace/sortby",
|
||||
post(list_traces_sorted),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/trace/:trace_id/faces",
|
||||
get(list_trace_faces),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -21,6 +26,8 @@ struct TracesRequest {
|
||||
min_faces: Option<i64>,
|
||||
sort_by: Option<String>,
|
||||
limit: Option<i64>,
|
||||
min_confidence: Option<f64>,
|
||||
max_confidence: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -53,14 +60,15 @@ async fn list_traces_sorted(
|
||||
let min_faces = req.min_faces.unwrap_or(1);
|
||||
let sort = req.sort_by.as_deref().unwrap_or("first_appearance");
|
||||
let limit = req.limit.unwrap_or(500);
|
||||
let min_confidence = req.min_confidence.unwrap_or(0.0);
|
||||
let max_confidence = req.max_confidence.unwrap_or(1.0);
|
||||
|
||||
let order_clause = match sort {
|
||||
"face_count" => "face_count DESC",
|
||||
"duration" => "duration_sec DESC",
|
||||
"duration" => "(MAX(frame_number) - MIN(frame_number)) DESC",
|
||||
_ => "first_frame ASC",
|
||||
};
|
||||
|
||||
// Get actual video FPS
|
||||
let fps: f64 =
|
||||
sqlx::query_scalar("SELECT COALESCE(fps, 24.0) FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(&file_uuid)
|
||||
@@ -84,6 +92,7 @@ async fn list_traces_sorted(
|
||||
AVG(confidence) AS avg_confidence
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
AND confidence >= $4 AND confidence <= $5
|
||||
GROUP BY trace_id
|
||||
HAVING COUNT(*) >= $2
|
||||
ORDER BY {}
|
||||
@@ -103,6 +112,8 @@ async fn list_traces_sorted(
|
||||
.bind(&file_uuid)
|
||||
.bind(min_faces)
|
||||
.bind(limit)
|
||||
.bind(min_confidence)
|
||||
.bind(max_confidence)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
@@ -138,3 +149,146 @@ async fn list_traces_sorted(
|
||||
traces,
|
||||
}))
|
||||
}
|
||||
|
||||
// ── Individual face detections for a trace ──
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TraceFacesQuery {
|
||||
limit: Option<i64>,
|
||||
offset: Option<i64>,
|
||||
interpolate: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TraceFaceItem {
|
||||
id: i32,
|
||||
start_frame: i32,
|
||||
start_time: f64,
|
||||
x: Option<i32>,
|
||||
y: Option<i32>,
|
||||
width: Option<i32>,
|
||||
height: Option<i32>,
|
||||
confidence: f64,
|
||||
interpolated: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TraceFacesResponse {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
trace_id: i32,
|
||||
total: i64,
|
||||
faces: Vec<TraceFaceItem>,
|
||||
}
|
||||
|
||||
fn lerp_i32(a: Option<i32>, b: Option<i32>, t: f64) -> Option<i32> {
|
||||
match (a, b) {
|
||||
(Some(av), Some(bv)) => Some((av as f64 + (bv - av) as f64 * t).round() as i32),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_trace_faces(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Path((file_uuid, trace_id)): Path<(String, i32)>,
|
||||
Query(q): Query<TraceFacesQuery>,
|
||||
) -> Result<Json<TraceFacesResponse>, (StatusCode, String)> {
|
||||
let limit = q.limit.unwrap_or(200).min(1000);
|
||||
let offset = q.offset.unwrap_or(0);
|
||||
let interpolate = q.interpolate.unwrap_or(false);
|
||||
|
||||
let fps: f64 =
|
||||
sqlx::query_scalar("SELECT COALESCE(fps, 24.0) FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.unwrap_or(24.0);
|
||||
|
||||
let total_detected: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1 AND trace_id = $2"
|
||||
)
|
||||
.bind(&file_uuid)
|
||||
.bind(trace_id)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let rows: Vec<(i32, i32, Option<i32>, Option<i32>, Option<i32>, Option<i32>, f32)> =
|
||||
sqlx::query_as(
|
||||
"SELECT id, frame_number, x, y, width, height, confidence
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = $1 AND trace_id = $2
|
||||
ORDER BY frame_number ASC
|
||||
LIMIT $3 OFFSET $4"
|
||||
)
|
||||
.bind(&file_uuid)
|
||||
.bind(trace_id)
|
||||
.bind(limit)
|
||||
.bind(offset)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let mut faces: Vec<TraceFaceItem> = Vec::new();
|
||||
|
||||
for (i, (id, frame, x, y, w, h, conf)) in rows.iter().enumerate() {
|
||||
let cur = (x, y, w, h);
|
||||
|
||||
// Add interpolated frames between previous and current detection
|
||||
if interpolate && i > 0 {
|
||||
let prev = &rows[i - 1];
|
||||
let prev_frame = prev.1;
|
||||
let gap = frame - prev_frame;
|
||||
if gap > 1 {
|
||||
for mid in 1..gap {
|
||||
let t = mid as f64 / gap as f64;
|
||||
let mid_x = lerp_i32(prev.2, *x, t);
|
||||
let mid_y = lerp_i32(prev.3, *y, t);
|
||||
let mid_w = lerp_i32(prev.4, *w, t);
|
||||
let mid_h = lerp_i32(prev.5, *h, t);
|
||||
let mid_frame = prev_frame + mid;
|
||||
faces.push(TraceFaceItem {
|
||||
id: 0,
|
||||
start_frame: mid_frame,
|
||||
start_time: (mid_frame as f64 / fps * 10.0).round() / 10.0,
|
||||
x: mid_x,
|
||||
y: mid_y,
|
||||
width: mid_w,
|
||||
height: mid_h,
|
||||
confidence: 0.0,
|
||||
interpolated: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the real detection
|
||||
let frame_val = *frame;
|
||||
faces.push(TraceFaceItem {
|
||||
id: *id,
|
||||
start_frame: frame_val,
|
||||
start_time: (frame_val as f64 / fps * 10.0).round() / 10.0,
|
||||
x: *x,
|
||||
y: *y,
|
||||
width: *w,
|
||||
height: *h,
|
||||
confidence: *conf as f64,
|
||||
interpolated: false,
|
||||
});
|
||||
}
|
||||
|
||||
let total = if interpolate && faces.len() as i64 > total_detected {
|
||||
faces.len() as i64
|
||||
} else {
|
||||
total_detected
|
||||
};
|
||||
|
||||
Ok(Json(TraceFacesResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
trace_id,
|
||||
total,
|
||||
faces,
|
||||
}))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user