feat: schema tracking, SHA256 integrity, identity UUID fix, 3-angle face match, cuts table, trace stranger_id
This commit is contained in:
@@ -676,12 +676,12 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
tmdb_rows.len()
|
||||
);
|
||||
|
||||
// Step 2: 載入所有 face_detections,按 trace_id 分組
|
||||
// Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
|
||||
&format!("SELECT trace_id, embedding FROM {} \
|
||||
let fd_rows = sqlx::query_as::<_, (i32, i32, Vec<f32>)>(
|
||||
&format!("SELECT trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id", fd_table),
|
||||
ORDER BY trace_id, frame_number", fd_table),
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
@@ -692,27 +692,38 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// 分組:trace_id → Vec<embedding>
|
||||
// 分組:trace_id → (frame_number, embedding)
|
||||
use std::collections::HashMap;
|
||||
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
|
||||
for (tid, emb) in &fd_rows {
|
||||
trace_faces
|
||||
let mut trace_faces_raw: HashMap<i32, Vec<(i32, Vec<f32>)>> = HashMap::new();
|
||||
for (tid, frame, emb) in &fd_rows {
|
||||
trace_faces_raw
|
||||
.entry(*tid)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(emb.clone());
|
||||
.push((*frame, emb.clone()));
|
||||
}
|
||||
|
||||
// 去重:同一個 trace 內,embedding 太接近的只留一個
|
||||
for faces in trace_faces.values_mut() {
|
||||
faces.sort_by(|a, b| b[0].partial_cmp(&a[0]).unwrap_or(std::cmp::Ordering::Equal));
|
||||
faces.dedup_by(|a, b| cosine_similarity(a, b) > 0.99);
|
||||
// 從每個 trace 選取不同角度的 3 個 face embedding
|
||||
// 策略:按 frame_number 排序,取前中後各 1 個
|
||||
let mut trace_samples: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
|
||||
for (tid, mut faces) in trace_faces_raw {
|
||||
faces.sort_by_key(|(frame, _)| *frame);
|
||||
let n = faces.len();
|
||||
let indices = if n <= 3 {
|
||||
(0..n).collect()
|
||||
} else {
|
||||
let mid = n / 2;
|
||||
vec![0, mid, n - 1]
|
||||
};
|
||||
let samples: Vec<Vec<f32>> = indices.iter().map(|&i| faces[i].1.clone()).collect();
|
||||
trace_samples.insert(tid, samples);
|
||||
}
|
||||
|
||||
let total_traces = trace_faces.len();
|
||||
let total_traces = trace_samples.len();
|
||||
let sample_count: usize = trace_samples.values().map(|v| v.len()).sum();
|
||||
tracing::info!(
|
||||
"[FaceMatch] Loaded {} traces with {} faces",
|
||||
"[FaceMatch] Loaded {} traces, sampled {} embeddings (3-angle)",
|
||||
total_traces,
|
||||
fd_rows.len()
|
||||
sample_count
|
||||
);
|
||||
|
||||
// Step 3: 建立 TMDb 查找表
|
||||
@@ -722,12 +733,13 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
const TH: f32 = 0.50;
|
||||
let mut matched: HashMap<i32, String> = HashMap::new(); // trace_id → identity_name
|
||||
|
||||
// Round 1: 直接比對 TMDb
|
||||
for (&tid, faces) in &trace_faces {
|
||||
// Round 1: 用 3-angle samples 比對 TMDb
|
||||
// 每個 trace 選 3 個不同角度 face,取最高 similarity
|
||||
for (&tid, samples) in &trace_samples {
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
|
||||
for face_emb in faces {
|
||||
for face_emb in samples {
|
||||
let s = cosine_similarity(face_emb, tmdb_emb);
|
||||
if s > best_sim {
|
||||
best_sim = s;
|
||||
@@ -751,31 +763,33 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
// 建立 seed pool: name → Vec<embedding>
|
||||
let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
|
||||
for (&tid, name) in &matched {
|
||||
if let Some(faces) = trace_faces.get(&tid) {
|
||||
if let Some(samples) = trace_samples.get(&tid) {
|
||||
seed_pool
|
||||
.entry(name.clone())
|
||||
.or_default()
|
||||
.extend(faces.iter());
|
||||
.extend(samples.iter());
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_matches: Vec<(i32, String)> = Vec::new();
|
||||
for (&tid, faces) in &trace_faces {
|
||||
for (&tid, samples) in &trace_samples {
|
||||
if matched.contains_key(&tid) {
|
||||
continue;
|
||||
}
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
if faces.is_empty() {
|
||||
if samples.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let ref_face = &faces[0];
|
||||
// 用 3-angle samples 分別比對 seed,取最高 similarity
|
||||
for (name, seed_faces) in &seed_pool {
|
||||
for seed in seed_faces {
|
||||
let s = cosine_similarity(ref_face, seed);
|
||||
if s > best_sim {
|
||||
best_sim = s;
|
||||
best_name = name.clone();
|
||||
for face_emb in samples {
|
||||
for seed in seed_faces {
|
||||
let s = cosine_similarity(face_emb, seed);
|
||||
if s > best_sim {
|
||||
best_sim = s;
|
||||
best_name = name.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -799,7 +813,7 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5: 寫入 DB
|
||||
// Step 5: 寫入 DB — 已匹配的設 identity_id
|
||||
let identities_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut updated = 0usize;
|
||||
@@ -823,11 +837,27 @@ async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::
|
||||
}
|
||||
}
|
||||
|
||||
// Step 6: 未匹配的 trace 設 stranger_id = trace_id
|
||||
// trace_id 在同一個 file 內是 sequential integer,直接複用為 stranger_id
|
||||
let stranger_update = sqlx::query(
|
||||
&format!(
|
||||
"UPDATE {} SET stranger_id = trace_id \
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL AND identity_id IS NULL \
|
||||
AND (stranger_id IS NULL OR stranger_id != trace_id)",
|
||||
fd_table
|
||||
)
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
let stranger_count = stranger_update.rows_affected();
|
||||
|
||||
tracing::info!(
|
||||
"[FaceMatch] Done: {}/{} traces matched ({}%)",
|
||||
"[FaceMatch] Done: {}/{} traces matched ({}%), {} strangers",
|
||||
matched.len(),
|
||||
total_traces,
|
||||
matched.len() * 100 / total_traces
|
||||
matched.len() * 100 / total_traces,
|
||||
stranger_count
|
||||
);
|
||||
Ok(updated)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user