feat: Phase 1 handover - schema migration, correction mechanism, API fixes

Schema changes: dev.chunks->dev.chunk, remove old_chunk_id/chunk_index
Correction: asr-1.json format, generate/apply scripts
API: 37/37 endpoints fixed and tested
Docs: HANDOVER_V2.0.md for M4
This commit is contained in:
Accusys
2026-05-11 07:03:22 +08:00
parent ef894a44ad
commit 39ba5ddf76
147 changed files with 19843 additions and 3053 deletions

View File

@@ -140,15 +140,37 @@ async fn analyze_identity(
}
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read face data: {}", e)))?
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to read face data: {}", e),
)
})?
.parse()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse face data: {}", e)))?;
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse face data: {}", e),
)
})?;
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
Some(std::fs::read_to_string(&asrx_path)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read asrx data: {}", e)))?
.parse()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse asrx data: {}", e)))?)
Some(
std::fs::read_to_string(&asrx_path)
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to read asrx data: {}", e),
)
})?
.parse()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to parse asrx data: {}", e),
)
})?,
)
} else {
None
};
@@ -161,7 +183,14 @@ async fn analyze_identity(
// 將 identity 結果寫入 DB
let pool = state.db.pool();
for id_result in &identities {
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
let identity_name = format!(
"person_{}",
id_result
.person_ids
.first()
.map(|s| &**s)
.unwrap_or("unknown")
);
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,
@@ -184,7 +213,9 @@ async fn analyze_identity(
}
// 迭代多角度 face embedding 比對TMDb seed → 傳播)
let _ = match_faces_iterative(pool, &req.file_uuid).await.unwrap_or(0);
let _ = match_faces_iterative(pool, &req.file_uuid)
.await
.unwrap_or(0);
// 將 ASRX speaker 綁定到已匹配 identity 的 trace
let _ = bind_speakers(pool, &req.file_uuid).await.unwrap_or(0);
@@ -309,11 +340,21 @@ fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec
let mut speaker_segments_map: std::collections::HashMap<String, Vec<(f64, f64)>> =
std::collections::HashMap::new();
for segment in segments {
let speaker_id = segment.get("speaker_id").and_then(|s| s.as_str())
let speaker_id = segment
.get("speaker_id")
.and_then(|s| s.as_str())
.or_else(|| segment.get("speaker").and_then(|s| s.as_str()));
if let Some(speaker_id) = speaker_id {
let start = segment.get("start").or_else(|| segment.get("start_time")).and_then(|s| s.as_f64()).unwrap_or(0.0);
let end = segment.get("end").or_else(|| segment.get("end_time")).and_then(|e| e.as_f64()).unwrap_or(0.0);
let start = segment
.get("start")
.or_else(|| segment.get("start_time"))
.and_then(|s| s.as_f64())
.unwrap_or(0.0);
let end = segment
.get("end")
.or_else(|| segment.get("end_time"))
.and_then(|e| e.as_f64())
.unwrap_or(0.0);
speaker_segments_map
.entry(speaker_id.to_string())
.or_insert_with(Vec::new)
@@ -321,7 +362,10 @@ fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec
}
}
for (speaker_id, segments) in speaker_segments_map {
speakers.push(SpeakerData { speaker_id, segments });
speakers.push(SpeakerData {
speaker_id,
segments,
});
}
}
}
@@ -598,11 +642,17 @@ struct SpeakerData {
}
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() || a.is_empty() { return 0.0; }
if a.len() != b.len() || a.is_empty() {
return 0.0;
}
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
if na == 0.0 || nb == 0.0 {
0.0
} else {
dot / (na * nb)
}
}
/// 迭代多角度 face embedding 比對 + 傳播
@@ -619,16 +669,20 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
tracing::warn!("[FaceMatch] No TMDb identities with face embeddings");
return Ok(0);
}
tracing::info!("[FaceMatch] Loaded {} TMDb seed identities", tmdb_rows.len());
tracing::info!(
"[FaceMatch] Loaded {} TMDb seed identities",
tmdb_rows.len()
);
// Step 2: 載入所有 face_detections按 trace_id 分組
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
"SELECT trace_id, embedding FROM dev.face_detections \
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
ORDER BY trace_id"
ORDER BY trace_id",
)
.bind(file_uuid)
.fetch_all(pool).await?;
.fetch_all(pool)
.await?;
if fd_rows.is_empty() {
tracing::warn!("[FaceMatch] No face detections with embeddings");
@@ -639,7 +693,10 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
use std::collections::HashMap;
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
for (tid, emb) in &fd_rows {
trace_faces.entry(*tid).or_insert_with(Vec::new).push(emb.clone());
trace_faces
.entry(*tid)
.or_insert_with(Vec::new)
.push(emb.clone());
}
// 去重:同一個 trace 內embedding 太接近的只留一個
@@ -649,7 +706,11 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
}
let total_traces = trace_faces.len();
tracing::info!("[FaceMatch] Loaded {} traces with {} faces", total_traces, fd_rows.len());
tracing::info!(
"[FaceMatch] Loaded {} traces with {} faces",
total_traces,
fd_rows.len()
);
// Step 3: 建立 TMDb 查找表
let tmdb_seeds: Vec<(i32, String, Vec<f32>)> = tmdb_rows;
@@ -665,14 +726,21 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
for face_emb in faces {
let s = cosine_similarity(face_emb, tmdb_emb);
if s > best_sim { best_sim = s; best_name = name.clone(); }
if s > best_sim {
best_sim = s;
best_name = name.clone();
}
}
}
if best_sim >= TH {
matched.insert(tid, best_name);
}
}
tracing::info!("[FaceMatch] Round 1: {} matched ({}%)", matched.len(), matched.len() * 100 / total_traces);
tracing::info!(
"[FaceMatch] Round 1: {} matched ({}%)",
matched.len(),
matched.len() * 100 / total_traces
);
// Round 2+: 用已匹配的 face 作為 seed 傳播
for round_n in 2..=10 {
@@ -681,21 +749,31 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
for (&tid, name) in &matched {
if let Some(faces) = trace_faces.get(&tid) {
seed_pool.entry(name.clone()).or_default().extend(faces.iter());
seed_pool
.entry(name.clone())
.or_default()
.extend(faces.iter());
}
}
let mut new_matches: Vec<(i32, String)> = Vec::new();
for (&tid, faces) in &trace_faces {
if matched.contains_key(&tid) { continue; }
if matched.contains_key(&tid) {
continue;
}
let mut best_name = String::new();
let mut best_sim = 0.0f32;
if faces.is_empty() { continue; }
if faces.is_empty() {
continue;
}
let ref_face = &faces[0];
for (name, seed_faces) in &seed_pool {
for seed in seed_faces {
let s = cosine_similarity(ref_face, seed);
if s > best_sim { best_sim = s; best_name = name.clone(); }
if s > best_sim {
best_sim = s;
best_name = name.clone();
}
}
}
if best_sim >= TH {
@@ -706,31 +784,46 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
matched.insert(tid, name);
}
let new = matched.len() - prev;
tracing::info!("[FaceMatch] Round {}: +{} matched (total {}, {}%)", round_n, new, matched.len(), matched.len() * 100 / total_traces);
if new < 5 { break; }
tracing::info!(
"[FaceMatch] Round {}: +{} matched (total {}, {}%)",
round_n,
new,
matched.len(),
matched.len() * 100 / total_traces
);
if new < 5 {
break;
}
}
// Step 5: 寫入 DB
let mut updated = 0usize;
for (tid, name) in &matched {
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
"SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'"
"SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'",
)
.bind(name)
.fetch_optional(pool).await?;
.fetch_optional(pool)
.await?;
if let Some(identity_id) = id_opt {
let _ = sqlx::query(
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
)
.bind(identity_id)
.bind(file_uuid)
.bind(tid)
.execute(pool).await;
.execute(pool)
.await;
updated += 1;
}
}
tracing::info!("[FaceMatch] Done: {}/{} traces matched ({}%)", matched.len(), total_traces, matched.len() * 100 / total_traces);
tracing::info!(
"[FaceMatch] Done: {}/{} traces matched ({}%)",
matched.len(),
total_traces,
matched.len() * 100 / total_traces
);
Ok(updated)
}
@@ -771,12 +864,25 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
let mut speakers: HashMap<String, Vec<(f64, f64)>> = HashMap::new();
if let Some(segments) = asrx_data.get("segments").and_then(|s| s.as_array()) {
for seg in segments {
let sid = seg.get("speaker_id").and_then(|s| s.as_str())
let sid = seg
.get("speaker_id")
.and_then(|s| s.as_str())
.or_else(|| seg.get("speaker").and_then(|s| s.as_str()));
if let Some(sid) = sid {
let start = seg.get("start_time").or_else(|| seg.get("start")).and_then(|v| v.as_f64()).unwrap_or(0.0);
let end = seg.get("end_time").or_else(|| seg.get("end")).and_then(|v| v.as_f64()).unwrap_or(0.0);
speakers.entry(sid.to_string()).or_default().push((start, end));
let start = seg
.get("start_time")
.or_else(|| seg.get("start"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let end = seg
.get("end_time")
.or_else(|| seg.get("end"))
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
speakers
.entry(sid.to_string())
.or_default()
.push((start, end));
}
}
}
@@ -792,7 +898,9 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
// For each trace, compute overlap with each speaker
let mut bindings = 0usize;
for (trace_id, frames) in &traces {
if frames.is_empty() { continue; }
if frames.is_empty() {
continue;
}
// Get identity_id for this trace
let identity_id: Option<i32> = sqlx::query_scalar(
@@ -801,7 +909,9 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
.bind(file_uuid).bind(trace_id)
.fetch_optional(pool).await?.flatten();
if identity_id.is_none() { continue; }
if identity_id.is_none() {
continue;
}
let identity_id = identity_id.unwrap();
// Compute overlap with each speaker
@@ -850,7 +960,11 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
}
}
tracing::info!("[SpeakerBind] Created {}/{} speaker bindings", bindings, traces.len());
tracing::info!(
"[SpeakerBind] Created {}/{} speaker bindings",
bindings,
traces.len()
);
Ok(bindings)
}
@@ -870,7 +984,10 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
};
if !face_clustered_path.exists() {
tracing::warn!("[IdentityAgent] face_clustered.json not found for {}", file_uuid);
tracing::warn!(
"[IdentityAgent] face_clustered.json not found for {}",
file_uuid
);
return Ok(());
}
@@ -888,7 +1005,14 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
let pool = db.pool();
for id_result in &identities {
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
let identity_name = format!(
"person_{}",
id_result
.person_ids
.first()
.map(|s| &**s)
.unwrap_or("unknown")
);
let metadata = serde_json::json!({
"source": "identity_agent",
"trace_ids": id_result.person_ids,
@@ -914,7 +1038,10 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res
tracing::info!(
"[IdentityAgent] Done for {}: {} identities, {} face matches, {} speaker bindings",
file_uuid, identities.len(), matched, bound
file_uuid,
identities.len(),
matched,
bound
);
Ok(())
}