Merge branch 'main' of 192.168.110.201:/Users/accusys/momentry_core_0.1/
This commit is contained in:
@@ -71,4 +71,11 @@ REDIS_CACHE_TTL_VIDEO_META=3600
|
||||
|
||||
# TMDb Integration (probe phase - auto-create identities from movie metadata)
|
||||
TMDB_API_KEY=e9cde52197f6f8df4d9db99da93db1fb
|
||||
MOMENTRY_TMDB_PROBE_ENABLED=true
|
||||
MOMENTRY_TMDB_PROBE_ENABLED=true
|
||||
# LLM for 5W1H summary (points to M5 Gemma4)
|
||||
MOMENTRY_LLM_SUMMARY_URL=http://localhost:8082/v1/chat/completions
|
||||
MOMENTRY_LLM_SUMMARY_MODEL=google_gemma-4-26B-A4B-it-Q5_K_M.gguf
|
||||
MOMENTRY_LLM_SUMMARY_ENABLED=true
|
||||
|
||||
# Embedding (ANE CoreML server)
|
||||
MOMENTRY_EMBED_URL=http://localhost:11436
|
||||
|
||||
32
docs_v1.0/M4_workspace/2026-05-07_M5_release_ready.md
Normal file
32
docs_v1.0/M4_workspace/2026-05-07_M5_release_ready.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# M5 Release Ready
|
||||
|
||||
## 可 sync
|
||||
|
||||
M5 已完成 v1.0.0 release candidate 的所有程式碼變更,所有服務正常運行。
|
||||
|
||||
```bash
|
||||
# 1. DB
|
||||
scp accusys@192.168.110.201:/tmp/momentry_3abeee81.sql /tmp/
|
||||
psql -U accusys -d momentry -c "DROP SCHEMA IF EXISTS dev CASCADE; CREATE SCHEMA dev;"
|
||||
psql -U accusys -d momentry -f /tmp/momentry_3abeee81.sql
|
||||
|
||||
# 2. Binary(可選,M4 通常不需跑 playground)
|
||||
scp accusys@192.168.110.201:/Users/accusys/momentry_core_0.1/target/debug/momentry_playground /tmp/
|
||||
|
||||
# 3. Git
|
||||
cd /path/to/momentry_docs
|
||||
git pull
|
||||
```
|
||||
|
||||
## 5W1H+ 仍在背景
|
||||
|
||||
目前完成 8/721 scene,預計還需要一段時間。完成後會自動 vectorize。屆時再做一次增量 sync。
|
||||
|
||||
## 本次 Release 變更
|
||||
|
||||
- 5W1H+ Agent(遞迴式摘要、face/speaker 資訊整合)
|
||||
- Identity Agent 自動觸發
|
||||
- EmbeddingGemma 300M(768D,多語)
|
||||
- Pipeline bug fixes
|
||||
|
||||
詳細變更請見 `git log`。
|
||||
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,7 @@ use sqlx::Row;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::api::server::AppState;
|
||||
use crate::core::db::PostgresDb;
|
||||
|
||||
pub fn identity_agent_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
@@ -124,6 +125,13 @@ async fn analyze_identity(
|
||||
let face_clustered_path = video_dir.join(format!("{}.face_clustered.json", req.file_uuid));
|
||||
let asrx_path = video_dir.join(format!("{}.asrx.json", req.file_uuid));
|
||||
|
||||
// 如果子目錄找不到,試根目錄
|
||||
let face_clustered_path = if face_clustered_path.exists() {
|
||||
face_clustered_path
|
||||
} else {
|
||||
PathBuf::from(&output_dir).join(format!("{}.face_clustered.json", req.file_uuid))
|
||||
};
|
||||
|
||||
if !face_clustered_path.exists() {
|
||||
return Err((
|
||||
StatusCode::NOT_FOUND,
|
||||
@@ -132,37 +140,15 @@ async fn analyze_identity(
|
||||
}
|
||||
|
||||
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to read face data: {}", e),
|
||||
)
|
||||
})?
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read face data: {}", e)))?
|
||||
.parse()
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to parse face data: {}", e),
|
||||
)
|
||||
})?;
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse face data: {}", e)))?;
|
||||
|
||||
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
|
||||
Some(
|
||||
std::fs::read_to_string(&asrx_path)
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to read asrx data: {}", e),
|
||||
)
|
||||
})?
|
||||
.parse()
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to parse asrx data: {}", e),
|
||||
)
|
||||
})?,
|
||||
)
|
||||
Some(std::fs::read_to_string(&asrx_path)
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to read asrx data: {}", e)))?
|
||||
.parse()
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to parse asrx data: {}", e)))?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -172,6 +158,37 @@ async fn analyze_identity(
|
||||
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
// 將 identity 結果寫入 DB
|
||||
let pool = state.db.pool();
|
||||
for id_result in &identities {
|
||||
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
"speaker_ids": id_result.speaker_ids,
|
||||
"confidence": id_result.confidence,
|
||||
"evidence": {
|
||||
"speaker_overlap": id_result.evidence.speaker_overlap,
|
||||
"frame_ratio": id_result.evidence.frame_ratio,
|
||||
},
|
||||
"reasoning": id_result.reasoning,
|
||||
});
|
||||
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identities (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING"
|
||||
)
|
||||
.bind(&identity_name)
|
||||
.bind(&metadata)
|
||||
.execute(pool)
|
||||
.await;
|
||||
}
|
||||
|
||||
// 迭代多角度 face embedding 比對(TMDb seed → 傳播)
|
||||
let _ = match_faces_iterative(pool, &req.file_uuid).await.unwrap_or(0);
|
||||
|
||||
// 將 ASRX speaker 綁定到已匹配 identity 的 trace
|
||||
let _ = bind_speakers(pool, &req.file_uuid).await.unwrap_or(0);
|
||||
|
||||
let processing_status = IdentityProcessingStatus {
|
||||
status: "completed".to_string(),
|
||||
persons_analyzed: persons.len() as i32,
|
||||
@@ -287,33 +304,27 @@ fn extract_persons_from_face_data(face_data: &serde_json::Value) -> Vec<PersonDa
|
||||
|
||||
fn extract_speakers_from_asrx_data(asrx_data: &Option<serde_json::Value>) -> Vec<SpeakerData> {
|
||||
let mut speakers = Vec::new();
|
||||
|
||||
if let Some(data) = asrx_data {
|
||||
if let Some(segments) = data.get("segments").and_then(|s| s.as_array()) {
|
||||
let mut speaker_segments_map: std::collections::HashMap<String, Vec<(f64, f64)>> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for segment in segments {
|
||||
if let Some(speaker_id) = segment.get("speaker").and_then(|s| s.as_str()) {
|
||||
let start = segment.get("start").and_then(|s| s.as_f64()).unwrap_or(0.0);
|
||||
let end = segment.get("end").and_then(|e| e.as_f64()).unwrap_or(0.0);
|
||||
|
||||
let speaker_id = segment.get("speaker_id").and_then(|s| s.as_str())
|
||||
.or_else(|| segment.get("speaker").and_then(|s| s.as_str()));
|
||||
if let Some(speaker_id) = speaker_id {
|
||||
let start = segment.get("start").or_else(|| segment.get("start_time")).and_then(|s| s.as_f64()).unwrap_or(0.0);
|
||||
let end = segment.get("end").or_else(|| segment.get("end_time")).and_then(|e| e.as_f64()).unwrap_or(0.0);
|
||||
speaker_segments_map
|
||||
.entry(speaker_id.to_string())
|
||||
.or_insert_with(Vec::new)
|
||||
.push((start, end));
|
||||
}
|
||||
}
|
||||
|
||||
for (speaker_id, segments) in speaker_segments_map {
|
||||
speakers.push(SpeakerData {
|
||||
speaker_id,
|
||||
segments,
|
||||
});
|
||||
speakers.push(SpeakerData { speaker_id, segments });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
speakers
|
||||
}
|
||||
|
||||
@@ -374,10 +385,8 @@ fn analyze_person_speaker_overlap(
|
||||
|
||||
fn calculate_overlap(person: &PersonData, speaker: &SpeakerData) -> i32 {
|
||||
let mut overlap_count = 0;
|
||||
|
||||
for frame_num in &person.frames {
|
||||
let frame_time = *frame_num as f64 / 23.976;
|
||||
|
||||
let frame_time = *frame_num as f64 / 25.0; // default fps=25
|
||||
for (start, end) in &speaker.segments {
|
||||
if frame_time >= *start && frame_time <= *end {
|
||||
overlap_count += 1;
|
||||
@@ -385,7 +394,6 @@ fn calculate_overlap(person: &PersonData, speaker: &SpeakerData) -> i32 {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
overlap_count
|
||||
}
|
||||
|
||||
@@ -416,31 +424,31 @@ async fn suggest_clustering(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<SuggestClusteringRequest>,
|
||||
) -> Result<Json<SuggestClusteringResponse>, (StatusCode, String)> {
|
||||
let min_cluster_size = req.min_cluster_size.unwrap_or(3);
|
||||
|
||||
let file_filter = match &req.file_uuid {
|
||||
Some(uuid) => format!("AND fc.file_uuid = '{}'", uuid),
|
||||
Some(uuid) => format!("AND fd.file_uuid = '{}'", uuid),
|
||||
None => String::new(),
|
||||
};
|
||||
|
||||
let query = format!(
|
||||
r#"
|
||||
SELECT fc.cluster_id, fc.file_uuid, fc.n_faces, fc.metadata
|
||||
FROM face_clusters fc
|
||||
WHERE fc.n_faces >= $1
|
||||
SELECT trace_id, file_uuid, COUNT(*) as face_count
|
||||
FROM dev.face_detections fd
|
||||
WHERE fd.trace_id IS NOT NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM identities i
|
||||
WHERE i.metadata->>'cluster_id' = fc.cluster_id
|
||||
SELECT 1 FROM dev.identities i
|
||||
WHERE i.metadata->>'trace_id' = fd.trace_id::text
|
||||
)
|
||||
{}
|
||||
ORDER BY fc.n_faces DESC
|
||||
GROUP BY trace_id, file_uuid
|
||||
HAVING COUNT(*) >= $1
|
||||
ORDER BY face_count DESC
|
||||
"#,
|
||||
file_filter
|
||||
);
|
||||
|
||||
let pool = state.db.pool();
|
||||
let rows = sqlx::query(&query)
|
||||
.bind(min_cluster_size as i64)
|
||||
.bind(req.min_cluster_size.unwrap_or(3) as i64)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
@@ -448,27 +456,14 @@ async fn suggest_clustering(
|
||||
let suggestions: Vec<ClusteringSuggestion> = rows
|
||||
.into_iter()
|
||||
.map(|row| {
|
||||
let cluster_id: String = row.get("cluster_id");
|
||||
let n_faces: i32 = row.get("n_faces");
|
||||
let metadata: serde_json::Value =
|
||||
row.try_get("metadata").unwrap_or(serde_json::Value::Null);
|
||||
|
||||
let avg_confidence = metadata
|
||||
.get("avg_confidence")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
let representative_face = metadata
|
||||
.get("representative_face_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let trace_id: Option<i32> = row.try_get("trace_id").ok();
|
||||
let face_count: i64 = row.get("face_count");
|
||||
ClusteringSuggestion {
|
||||
cluster_id,
|
||||
face_count: n_faces as usize,
|
||||
avg_confidence,
|
||||
cluster_id: format!("trace_{}", trace_id.unwrap_or(0)),
|
||||
face_count: face_count as usize,
|
||||
avg_confidence: 0.0,
|
||||
suggested_name: None,
|
||||
representative_face,
|
||||
representative_face: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -601,3 +596,325 @@ struct SpeakerData {
|
||||
speaker_id: String,
|
||||
segments: Vec<(f64, f64)>,
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() { return 0.0; }
|
||||
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
|
||||
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
|
||||
}
|
||||
|
||||
/// 迭代多角度 face embedding 比對 + 傳播
|
||||
/// Round 1: 用 TMDb seed face_embedding 比對 face_detections (threshold 0.50)
|
||||
/// Round 2+: 用已匹配 trace 的所有 face 作為 seed,傳播到未匹配 trace
|
||||
async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
// Step 1: 載入 TMDb identities (source='tmdb' 且有 face_embedding)
|
||||
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
|
||||
"SELECT id, name, face_embedding::real[] FROM dev.identities WHERE source='tmdb' AND face_embedding IS NOT NULL"
|
||||
)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if tmdb_rows.is_empty() {
|
||||
tracing::warn!("[FaceMatch] No TMDb identities with face embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
tracing::info!("[FaceMatch] Loaded {} TMDb seed identities", tmdb_rows.len());
|
||||
|
||||
// Step 2: 載入所有 face_detections,按 trace_id 分組
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
|
||||
"SELECT trace_id, embedding FROM dev.face_detections \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id"
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if fd_rows.is_empty() {
|
||||
tracing::warn!("[FaceMatch] No face detections with embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// 分組:trace_id → Vec<embedding>
|
||||
use std::collections::HashMap;
|
||||
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
|
||||
for (tid, emb) in &fd_rows {
|
||||
trace_faces.entry(*tid).or_insert_with(Vec::new).push(emb.clone());
|
||||
}
|
||||
|
||||
// 去重:同一個 trace 內,embedding 太接近的只留一個
|
||||
for faces in trace_faces.values_mut() {
|
||||
faces.sort_by(|a, b| b[0].partial_cmp(&a[0]).unwrap_or(std::cmp::Ordering::Equal));
|
||||
faces.dedup_by(|a, b| cosine_similarity(a, b) > 0.99);
|
||||
}
|
||||
|
||||
let total_traces = trace_faces.len();
|
||||
tracing::info!("[FaceMatch] Loaded {} traces with {} faces", total_traces, fd_rows.len());
|
||||
|
||||
// Step 3: 建立 TMDb 查找表
|
||||
let tmdb_seeds: Vec<(i32, String, Vec<f32>)> = tmdb_rows;
|
||||
|
||||
// Step 4: 迭代匹配
|
||||
const TH: f32 = 0.50;
|
||||
let mut matched: HashMap<i32, String> = HashMap::new(); // trace_id → identity_name
|
||||
|
||||
// Round 1: 直接比對 TMDb
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
for (_, ref name, ref tmdb_emb) in &tmdb_seeds {
|
||||
for face_emb in faces {
|
||||
let s = cosine_similarity(face_emb, tmdb_emb);
|
||||
if s > best_sim { best_sim = s; best_name = name.clone(); }
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
matched.insert(tid, best_name);
|
||||
}
|
||||
}
|
||||
tracing::info!("[FaceMatch] Round 1: {} matched ({}%)", matched.len(), matched.len() * 100 / total_traces);
|
||||
|
||||
// Round 2+: 用已匹配的 face 作為 seed 傳播
|
||||
for round_n in 2..=10 {
|
||||
let prev = matched.len();
|
||||
// 建立 seed pool: name → Vec<embedding>
|
||||
let mut seed_pool: HashMap<String, Vec<&Vec<f32>>> = HashMap::new();
|
||||
for (&tid, name) in &matched {
|
||||
if let Some(faces) = trace_faces.get(&tid) {
|
||||
seed_pool.entry(name.clone()).or_default().extend(faces.iter());
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_matches: Vec<(i32, String)> = Vec::new();
|
||||
for (&tid, faces) in &trace_faces {
|
||||
if matched.contains_key(&tid) { continue; }
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
if faces.is_empty() { continue; }
|
||||
let ref_face = &faces[0];
|
||||
for (name, seed_faces) in &seed_pool {
|
||||
for seed in seed_faces {
|
||||
let s = cosine_similarity(ref_face, seed);
|
||||
if s > best_sim { best_sim = s; best_name = name.clone(); }
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
new_matches.push((tid, best_name));
|
||||
}
|
||||
}
|
||||
for (tid, name) in new_matches {
|
||||
matched.insert(tid, name);
|
||||
}
|
||||
let new = matched.len() - prev;
|
||||
tracing::info!("[FaceMatch] Round {}: +{} matched (total {}, {}%)", round_n, new, matched.len(), matched.len() * 100 / total_traces);
|
||||
if new < 5 { break; }
|
||||
}
|
||||
|
||||
// Step 5: 寫入 DB
|
||||
let mut updated = 0usize;
|
||||
for (tid, name) in &matched {
|
||||
let id_opt = sqlx::query_scalar::<_, Option<i32>>(
|
||||
"SELECT id FROM dev.identities WHERE name=$1 AND source='tmdb'"
|
||||
)
|
||||
.bind(name)
|
||||
.fetch_optional(pool).await?;
|
||||
if let Some(identity_id) = id_opt {
|
||||
let _ = sqlx::query(
|
||||
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
.execute(pool).await;
|
||||
updated += 1;
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("[FaceMatch] Done: {}/{} traces matched ({}%)", matched.len(), total_traces, matched.len() * 100 / total_traces);
|
||||
Ok(updated)
|
||||
}
|
||||
|
||||
/// Bind ASRX speakers to face traces based on temporal overlap.
|
||||
/// Reads face_detections (trace_id, identity_id, frame_number) and ASRX
|
||||
/// segments (speaker_id, start_time, end_time), computes overlap,
|
||||
/// and stores bindings in identity_bindings table.
|
||||
pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result<usize> {
|
||||
// Load face traces with identity_id and frame numbers
|
||||
let traces = sqlx::query_as::<_, (i32, Vec<i32>)>(
|
||||
"SELECT trace_id, array_agg(frame_number ORDER BY frame_number) \
|
||||
FROM dev.face_detections WHERE file_uuid=$1 AND trace_id IS NOT NULL AND identity_id IS NOT NULL \
|
||||
GROUP BY trace_id"
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if traces.is_empty() {
|
||||
tracing::info!("[SpeakerBind] No face traces with identities");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Load ASRX speakers from the output JSON
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
|
||||
let asrx_path = std::path::Path::new(&output_dir).join(format!("{}.asrx.json", file_uuid));
|
||||
|
||||
let asrx_data: serde_json::Value = match std::fs::read_to_string(&asrx_path) {
|
||||
Ok(s) => serde_json::from_str(&s).unwrap_or_default(),
|
||||
Err(_) => {
|
||||
tracing::info!("[SpeakerBind] No ASRX file found");
|
||||
return Ok(0);
|
||||
}
|
||||
};
|
||||
|
||||
// Extract speaker segments: speaker_id → [(start_time, end_time)]
|
||||
use std::collections::HashMap;
|
||||
let mut speakers: HashMap<String, Vec<(f64, f64)>> = HashMap::new();
|
||||
if let Some(segments) = asrx_data.get("segments").and_then(|s| s.as_array()) {
|
||||
for seg in segments {
|
||||
let sid = seg.get("speaker_id").and_then(|s| s.as_str())
|
||||
.or_else(|| seg.get("speaker").and_then(|s| s.as_str()));
|
||||
if let Some(sid) = sid {
|
||||
let start = seg.get("start_time").or_else(|| seg.get("start")).and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let end = seg.get("end_time").or_else(|| seg.get("end")).and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
speakers.entry(sid.to_string()).or_default().push((start, end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if speakers.is_empty() {
|
||||
tracing::info!("[SpeakerBind] No speakers found in ASRX data");
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Get fps for frame-to-time conversion
|
||||
let fps: f64 = 25.0; // default, could also read from DB
|
||||
|
||||
// For each trace, compute overlap with each speaker
|
||||
let mut bindings = 0usize;
|
||||
for (trace_id, frames) in &traces {
|
||||
if frames.is_empty() { continue; }
|
||||
|
||||
// Get identity_id for this trace
|
||||
let identity_id: Option<i32> = sqlx::query_scalar(
|
||||
"SELECT identity_id FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2 AND identity_id IS NOT NULL LIMIT 1"
|
||||
)
|
||||
.bind(file_uuid).bind(trace_id)
|
||||
.fetch_optional(pool).await?.flatten();
|
||||
|
||||
if identity_id.is_none() { continue; }
|
||||
let identity_id = identity_id.unwrap();
|
||||
|
||||
// Compute overlap with each speaker
|
||||
let mut best_speaker = String::new();
|
||||
let mut best_overlap = 0usize;
|
||||
|
||||
for (speaker_id, segments) in &speakers {
|
||||
let mut overlap = 0usize;
|
||||
for &fn_num in frames {
|
||||
let frame_time = fn_num as f64 / fps;
|
||||
for (start, end) in segments {
|
||||
if frame_time >= *start && frame_time <= *end {
|
||||
overlap += 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if overlap > best_overlap {
|
||||
best_overlap = overlap;
|
||||
best_speaker = speaker_id.clone();
|
||||
}
|
||||
}
|
||||
|
||||
// Only bind if meaningful overlap
|
||||
let overlap_ratio = best_overlap as f64 / frames.len() as f64;
|
||||
if overlap_ratio > 0.3 && !best_speaker.is_empty() {
|
||||
let metadata = serde_json::json!({
|
||||
"trace_id": trace_id,
|
||||
"overlap_frames": best_overlap,
|
||||
"total_frames": frames.len(),
|
||||
"overlap_ratio": overlap_ratio,
|
||||
});
|
||||
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, confidence, metadata) \
|
||||
VALUES ($1, 'speaker', $2, $3, $4::jsonb) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence, metadata = EXCLUDED.metadata"
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(&best_speaker)
|
||||
.bind(overlap_ratio)
|
||||
.bind(&metadata)
|
||||
.execute(pool).await;
|
||||
|
||||
bindings += 1;
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!("[SpeakerBind] Created {}/{} speaker bindings", bindings, traces.len());
|
||||
Ok(bindings)
|
||||
}
|
||||
|
||||
/// Pipeline-triggered entry point: runs the full identity agent for a file.
|
||||
/// Reads face_clustered.json + asrx.json, extracts persons/speakers, creates identities,
|
||||
/// runs iterative face matching, and binds speakers.
|
||||
pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<()> {
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output".to_string());
|
||||
|
||||
let video_dir = PathBuf::from(&output_dir).join(file_uuid);
|
||||
let face_clustered_path = video_dir.join(format!("{}.face_clustered.json", file_uuid));
|
||||
let face_clustered_path = if face_clustered_path.exists() {
|
||||
face_clustered_path
|
||||
} else {
|
||||
PathBuf::from(&output_dir).join(format!("{}.face_clustered.json", file_uuid))
|
||||
};
|
||||
|
||||
if !face_clustered_path.exists() {
|
||||
tracing::warn!("[IdentityAgent] face_clustered.json not found for {}", file_uuid);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let face_data: serde_json::Value = std::fs::read_to_string(&face_clustered_path)?.parse()?;
|
||||
let asrx_path = video_dir.join(format!("{}.asrx.json", file_uuid));
|
||||
let asrx_data: Option<serde_json::Value> = if asrx_path.exists() {
|
||||
Some(std::fs::read_to_string(&asrx_path)?.parse()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let persons = extract_persons_from_face_data(&face_data);
|
||||
let speakers = extract_speakers_from_asrx_data(&asrx_data);
|
||||
let identities = analyze_person_speaker_overlap(&persons, &speakers);
|
||||
|
||||
let pool = db.pool();
|
||||
for id_result in &identities {
|
||||
let identity_name = format!("person_{}", id_result.person_ids.first().map(|s| &**s).unwrap_or("unknown"));
|
||||
let metadata = serde_json::json!({
|
||||
"source": "identity_agent",
|
||||
"trace_ids": id_result.person_ids,
|
||||
"speaker_ids": id_result.speaker_ids,
|
||||
"confidence": id_result.confidence,
|
||||
"evidence": {
|
||||
"speaker_overlap": id_result.evidence.speaker_overlap,
|
||||
"frame_ratio": id_result.evidence.frame_ratio,
|
||||
},
|
||||
"reasoning": id_result.reasoning,
|
||||
});
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identities (name, identity_type, source, metadata, status) VALUES ($1, 'people', 'auto', $2::jsonb, 'pending') ON CONFLICT DO NOTHING"
|
||||
)
|
||||
.bind(&identity_name)
|
||||
.bind(&metadata)
|
||||
.execute(pool)
|
||||
.await;
|
||||
}
|
||||
|
||||
let matched = match_faces_iterative(pool, file_uuid).await.unwrap_or(0);
|
||||
let bound = bind_speakers(pool, file_uuid).await.unwrap_or(0);
|
||||
|
||||
tracing::info!(
|
||||
"[IdentityAgent] Done for {}: {} identities, {} face matches, {} speaker bindings",
|
||||
file_uuid, identities.len(), matched, bound
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -282,16 +282,44 @@ async fn trace_video(
|
||||
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
|
||||
let seek = (start_sec - padding).max(0.0);
|
||||
|
||||
// Build filters: per-frame bbox + text
|
||||
// Build filters: bbox+text holding at last detection until next one
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
for (frame, x, y, w, h) in &rows {
|
||||
let offset = frame - first_frame + (padding * fps) as i32;
|
||||
for (i, (frame, x, y, w, h)) in rows.iter().enumerate() {
|
||||
// Hold this detection until the next one (or end)
|
||||
let next_frame = if i + 1 < rows.len() {
|
||||
rows[i + 1].0
|
||||
} else {
|
||||
// For last detection, extend to duration end
|
||||
last_frame + (padding * fps) as i32
|
||||
};
|
||||
let start_offset = frame - first_frame + (padding * fps) as i32;
|
||||
let end_offset = next_frame - first_frame + (padding * fps) as i32;
|
||||
|
||||
// Bbox: visible from this frame until next detection
|
||||
parts.push(format!(
|
||||
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='eq(n,{})'",
|
||||
x, y, w, h, offset
|
||||
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=8:enable='between(n,{},{})'",
|
||||
x, y, w, h, start_offset, end_offset - 1
|
||||
));
|
||||
// Text: same hold behavior
|
||||
let label = format!("t{}", trace_id);
|
||||
render_text(&mut parts, &label, *x + 6, *y + 6, Some(offset));
|
||||
let mut tx = *x + 6;
|
||||
let mut ty = *y + 6;
|
||||
for ch in label.chars() {
|
||||
let bm = bitmap_char(ch);
|
||||
for (row, bits) in bm.iter().enumerate() {
|
||||
for col in 0..5 {
|
||||
if bits & (1 << (4 - col)) != 0 {
|
||||
let dx = tx + col as i32 * 3;
|
||||
let dy = ty + row as i32 * 3;
|
||||
parts.push(format!(
|
||||
"drawbox=x={}:y={}:w=3:h=3:color=white@1.0:t=fill:enable='between(n,{},{})'",
|
||||
dx, dy, start_offset, end_offset - 1
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
tx += CHAR_ADVANCE;
|
||||
}
|
||||
}
|
||||
|
||||
let vf = if parts.is_empty() {
|
||||
|
||||
@@ -162,7 +162,7 @@ async fn get_ollama_embedding(
|
||||
) -> Result<Vec<f32>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let client = reqwest::Client::new();
|
||||
let payload = serde_json::json!({
|
||||
"model": "nomic-embed-text",
|
||||
"model": "mxbai-embed-large",
|
||||
"prompt": text
|
||||
});
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ use super::identity_api;
|
||||
use super::identity_binding;
|
||||
use super::middleware::api_key_validation;
|
||||
use super::search::search_routes;
|
||||
use super::trace_agent_api;
|
||||
use super::universal_search::universal_search_routes;
|
||||
use super::visual_chunk_search;
|
||||
use crate::core::chunk::types::Chunk;
|
||||
@@ -794,8 +795,6 @@ async fn register_single_file(
|
||||
.arg(&cut_script)
|
||||
.arg(&canonical_path)
|
||||
.arg(&cut_path)
|
||||
.arg("--threshold")
|
||||
.arg("27")
|
||||
.output();
|
||||
if let Ok(output) = cut_output {
|
||||
if output.status.success() {
|
||||
@@ -2246,42 +2245,58 @@ async fn list_jobs(Query(params): Query<JobsQuery>) -> Result<Json<JobListRespon
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// TODO: 需要修改 PostgresDb::get_pending_jobs 以支持分頁和狀態過濾
|
||||
// 目前先使用現有方法,獲取所有工作然後手動分頁
|
||||
let jobs = pg
|
||||
.get_pending_jobs(1000) // 臨時解決方案:獲取較多工作
|
||||
let table = crate::core::db::schema::table_name("monitor_jobs");
|
||||
|
||||
// Build status IN clause
|
||||
let statuses: Vec<String> = status_filter
|
||||
.split(',')
|
||||
.map(|s| format!("'{}'", s.trim()))
|
||||
.collect();
|
||||
let status_clause = statuses.join(",");
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
|
||||
error_count, last_error, started_at::TEXT, updated_at::TEXT, created_at::TEXT,
|
||||
processors, completed_processors, failed_processors, video_id
|
||||
FROM {}
|
||||
WHERE status IN ({})
|
||||
ORDER BY created_at DESC
|
||||
LIMIT {} OFFSET {}",
|
||||
table, status_clause, page_size, offset
|
||||
);
|
||||
|
||||
let count_query = format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE status IN ({})",
|
||||
table, status_clause
|
||||
);
|
||||
|
||||
let total_count: i64 = sqlx::query_scalar(&count_query)
|
||||
.fetch_one(pg.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// 過濾狀態
|
||||
let filtered_jobs: Vec<_> = jobs
|
||||
.into_iter()
|
||||
.filter(|j| {
|
||||
let job_status = j.status.as_str();
|
||||
status_filter.split(',').any(|s| s.trim() == job_status)
|
||||
})
|
||||
.collect();
|
||||
use crate::core::db::MonitorJobStatus;
|
||||
|
||||
let total_count = filtered_jobs.len() as i64;
|
||||
let rows = sqlx::query(&query)
|
||||
.fetch_all(pg.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// 手動分頁
|
||||
let paginated_jobs: Vec<_> = filtered_jobs
|
||||
let job_infos: Vec<JobInfoResponse> = rows
|
||||
.into_iter()
|
||||
.skip(offset as usize)
|
||||
.take(page_size)
|
||||
.collect();
|
||||
|
||||
let job_infos: Vec<JobInfoResponse> = paginated_jobs
|
||||
.into_iter()
|
||||
.map(|j| JobInfoResponse {
|
||||
id: j.id,
|
||||
uuid: j.uuid,
|
||||
status: j.status.as_str().to_string(),
|
||||
current_processor: j.current_processor,
|
||||
progress_current: j.progress_current,
|
||||
progress_total: j.progress_total,
|
||||
created_at: j.created_at.to_string(),
|
||||
started_at: j.started_at.map(|t| t.to_string()),
|
||||
.map(|r| {
|
||||
let status_str: String = r.try_get("status").unwrap_or_default();
|
||||
let status = MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
|
||||
JobInfoResponse {
|
||||
id: r.try_get("id").unwrap_or(0),
|
||||
uuid: r.try_get("uuid").unwrap_or_default(),
|
||||
status: status.as_str().to_string(),
|
||||
current_processor: r.try_get("current_processor").ok(),
|
||||
progress_current: r.try_get("progress_current").unwrap_or(0),
|
||||
progress_total: r.try_get("progress_total").unwrap_or(0),
|
||||
created_at: r.try_get::<String, _>("created_at").unwrap_or_default(),
|
||||
started_at: r.try_get::<String, _>("started_at").ok(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -2537,6 +2552,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
.merge(super::identity_agent_api::identity_agent_routes()) // Phase 5 Routes
|
||||
.merge(five_w1h_agent_api::five_w1h_agent_routes()) // Phase 3 Routes (5W1H Agent)
|
||||
.merge(super::media_api::bbox_routes()) // Media: video/bbox/thumbnail
|
||||
.merge(super::trace_agent_api::trace_agent_routes()) // Trace listing
|
||||
.merge(search_routes()) // Smart search drill-down
|
||||
.merge(universal_search_routes()) // Universal / frames / persons search
|
||||
.merge(protected_routes)
|
||||
@@ -3242,7 +3258,7 @@ async fn list_pre_chunks(
|
||||
let data_query = format!(
|
||||
"SELECT id, processor_type, coordinate_type, coordinate_index,
|
||||
start_frame, end_frame, start_time, end_time, fps,
|
||||
data, identity_id, confidence, created_at
|
||||
data, created_at
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 {}
|
||||
ORDER BY coordinate_index ASC
|
||||
@@ -3261,8 +3277,6 @@ async fn list_pre_chunks(
|
||||
Option<f64>,
|
||||
Option<f64>,
|
||||
serde_json::Value,
|
||||
Option<uuid::Uuid>,
|
||||
Option<f64>,
|
||||
chrono::DateTime<chrono::Utc>,
|
||||
)> = sqlx::query_as(&data_query)
|
||||
.bind(&uuid)
|
||||
@@ -3283,9 +3297,9 @@ async fn list_pre_chunks(
|
||||
end_time: row.7,
|
||||
fps: row.8,
|
||||
data: row.9.clone(),
|
||||
identity_id: row.10.map(|id| id.to_string()),
|
||||
confidence: row.11,
|
||||
created_at: row.12.to_rfc3339(),
|
||||
identity_id: None,
|
||||
confidence: None,
|
||||
created_at: row.10.to_rfc3339(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
extract::{Path, Query, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -10,10 +10,15 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::core::db::PostgresDb;
|
||||
|
||||
pub fn trace_agent_routes() -> Router<crate::api::server::AppState> {
|
||||
Router::new().route(
|
||||
"/api/v1/file/:file_uuid/face_trace/sortby",
|
||||
post(list_traces_sorted),
|
||||
)
|
||||
Router::new()
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/face_trace/sortby",
|
||||
post(list_traces_sorted),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/trace/:trace_id/faces",
|
||||
get(list_trace_faces),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -21,6 +26,8 @@ struct TracesRequest {
|
||||
min_faces: Option<i64>,
|
||||
sort_by: Option<String>,
|
||||
limit: Option<i64>,
|
||||
min_confidence: Option<f64>,
|
||||
max_confidence: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -53,14 +60,15 @@ async fn list_traces_sorted(
|
||||
let min_faces = req.min_faces.unwrap_or(1);
|
||||
let sort = req.sort_by.as_deref().unwrap_or("first_appearance");
|
||||
let limit = req.limit.unwrap_or(500);
|
||||
let min_confidence = req.min_confidence.unwrap_or(0.0);
|
||||
let max_confidence = req.max_confidence.unwrap_or(1.0);
|
||||
|
||||
let order_clause = match sort {
|
||||
"face_count" => "face_count DESC",
|
||||
"duration" => "duration_sec DESC",
|
||||
"duration" => "(MAX(frame_number) - MIN(frame_number)) DESC",
|
||||
_ => "first_frame ASC",
|
||||
};
|
||||
|
||||
// Get actual video FPS
|
||||
let fps: f64 =
|
||||
sqlx::query_scalar("SELECT COALESCE(fps, 24.0) FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(&file_uuid)
|
||||
@@ -84,6 +92,7 @@ async fn list_traces_sorted(
|
||||
AVG(confidence) AS avg_confidence
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
AND confidence >= $4 AND confidence <= $5
|
||||
GROUP BY trace_id
|
||||
HAVING COUNT(*) >= $2
|
||||
ORDER BY {}
|
||||
@@ -103,6 +112,8 @@ async fn list_traces_sorted(
|
||||
.bind(&file_uuid)
|
||||
.bind(min_faces)
|
||||
.bind(limit)
|
||||
.bind(min_confidence)
|
||||
.bind(max_confidence)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
@@ -138,3 +149,146 @@ async fn list_traces_sorted(
|
||||
traces,
|
||||
}))
|
||||
}
|
||||
|
||||
// ── Individual face detections for a trace ──
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TraceFacesQuery {
|
||||
limit: Option<i64>,
|
||||
offset: Option<i64>,
|
||||
interpolate: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TraceFaceItem {
|
||||
id: i32,
|
||||
start_frame: i32,
|
||||
start_time: f64,
|
||||
x: Option<i32>,
|
||||
y: Option<i32>,
|
||||
width: Option<i32>,
|
||||
height: Option<i32>,
|
||||
confidence: f64,
|
||||
interpolated: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TraceFacesResponse {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
trace_id: i32,
|
||||
total: i64,
|
||||
faces: Vec<TraceFaceItem>,
|
||||
}
|
||||
|
||||
fn lerp_i32(a: Option<i32>, b: Option<i32>, t: f64) -> Option<i32> {
|
||||
match (a, b) {
|
||||
(Some(av), Some(bv)) => Some((av as f64 + (bv - av) as f64 * t).round() as i32),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_trace_faces(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Path((file_uuid, trace_id)): Path<(String, i32)>,
|
||||
Query(q): Query<TraceFacesQuery>,
|
||||
) -> Result<Json<TraceFacesResponse>, (StatusCode, String)> {
|
||||
let limit = q.limit.unwrap_or(200).min(1000);
|
||||
let offset = q.offset.unwrap_or(0);
|
||||
let interpolate = q.interpolate.unwrap_or(false);
|
||||
|
||||
let fps: f64 =
|
||||
sqlx::query_scalar("SELECT COALESCE(fps, 24.0) FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
|
||||
.unwrap_or(24.0);
|
||||
|
||||
let total_detected: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid = $1 AND trace_id = $2"
|
||||
)
|
||||
.bind(&file_uuid)
|
||||
.bind(trace_id)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let rows: Vec<(i32, i32, Option<i32>, Option<i32>, Option<i32>, Option<i32>, f32)> =
|
||||
sqlx::query_as(
|
||||
"SELECT id, frame_number, x, y, width, height, confidence
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = $1 AND trace_id = $2
|
||||
ORDER BY frame_number ASC
|
||||
LIMIT $3 OFFSET $4"
|
||||
)
|
||||
.bind(&file_uuid)
|
||||
.bind(trace_id)
|
||||
.bind(limit)
|
||||
.bind(offset)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let mut faces: Vec<TraceFaceItem> = Vec::new();
|
||||
|
||||
for (i, (id, frame, x, y, w, h, conf)) in rows.iter().enumerate() {
|
||||
let cur = (x, y, w, h);
|
||||
|
||||
// Add interpolated frames between previous and current detection
|
||||
if interpolate && i > 0 {
|
||||
let prev = &rows[i - 1];
|
||||
let prev_frame = prev.1;
|
||||
let gap = frame - prev_frame;
|
||||
if gap > 1 {
|
||||
for mid in 1..gap {
|
||||
let t = mid as f64 / gap as f64;
|
||||
let mid_x = lerp_i32(prev.2, *x, t);
|
||||
let mid_y = lerp_i32(prev.3, *y, t);
|
||||
let mid_w = lerp_i32(prev.4, *w, t);
|
||||
let mid_h = lerp_i32(prev.5, *h, t);
|
||||
let mid_frame = prev_frame + mid;
|
||||
faces.push(TraceFaceItem {
|
||||
id: 0,
|
||||
start_frame: mid_frame,
|
||||
start_time: (mid_frame as f64 / fps * 10.0).round() / 10.0,
|
||||
x: mid_x,
|
||||
y: mid_y,
|
||||
width: mid_w,
|
||||
height: mid_h,
|
||||
confidence: 0.0,
|
||||
interpolated: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the real detection
|
||||
let frame_val = *frame;
|
||||
faces.push(TraceFaceItem {
|
||||
id: *id,
|
||||
start_frame: frame_val,
|
||||
start_time: (frame_val as f64 / fps * 10.0).round() / 10.0,
|
||||
x: *x,
|
||||
y: *y,
|
||||
width: *w,
|
||||
height: *h,
|
||||
confidence: *conf as f64,
|
||||
interpolated: false,
|
||||
});
|
||||
}
|
||||
|
||||
let total = if interpolate && faces.len() as i64 > total_detected {
|
||||
faces.len() as i64
|
||||
} else {
|
||||
total_detected
|
||||
};
|
||||
|
||||
Ok(Json(TraceFacesResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
trace_id,
|
||||
total,
|
||||
faces,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT chunk_id FROM chunks
|
||||
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
|
||||
WHERE file_uuid = $1 AND chunk_type = 'sentence'
|
||||
AND start_frame >= $2
|
||||
AND end_frame <= $3
|
||||
"#,
|
||||
@@ -99,7 +99,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let texts: Vec<String> = sqlx::query_scalar(
|
||||
r#"
|
||||
SELECT text_content FROM chunks
|
||||
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
|
||||
WHERE file_uuid = $1 AND chunk_type = 'sentence'
|
||||
AND start_frame >= $2
|
||||
AND end_frame <= $3
|
||||
ORDER BY start_frame ASC
|
||||
@@ -135,7 +135,7 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
);
|
||||
|
||||
// 4. Insert into dev.chunks
|
||||
let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE uuid = $1")
|
||||
let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE file_uuid = $1")
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
@@ -150,11 +150,11 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO chunks (
|
||||
uuid, chunk_id, chunk_index, chunk_type,
|
||||
file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type,
|
||||
start_time, end_time, fps, start_frame, end_frame,
|
||||
content, text_content, summary_text, metadata, child_chunk_ids
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
|
||||
ON CONFLICT (uuid, chunk_id) DO NOTHING
|
||||
) VALUES ($1, $2, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(file_uuid)
|
||||
|
||||
@@ -1241,7 +1241,7 @@ impl PostgresDb {
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
sqlx::query(&format!("DELETE FROM {} WHERE uuid = $1", chunks))
|
||||
sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunks))
|
||||
.bind(uuid)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
@@ -1279,7 +1279,7 @@ impl PostgresDb {
|
||||
pub async fn get_chunk_count(&self, uuid: &str) -> Result<(i64, i64)> {
|
||||
let chunks = schema::table_name("chunks");
|
||||
let sentence_count: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE uuid = $1 AND chunk_type = 'sentence'",
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence'",
|
||||
chunks
|
||||
))
|
||||
.bind(uuid)
|
||||
@@ -1287,7 +1287,7 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
let time_count: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE uuid = $1 AND chunk_type = 'time_based'",
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND chunk_type = 'time_based'",
|
||||
chunks
|
||||
))
|
||||
.bind(uuid)
|
||||
@@ -2567,9 +2567,9 @@ impl PostgresDb {
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
@@ -2642,9 +2642,9 @@ impl PostgresDb {
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, old_chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
@@ -4453,7 +4453,7 @@ impl PostgresDb {
|
||||
COUNT(*) as chunks_count,
|
||||
COALESCE(SUM(end_frame - start_frame), 0) as chunks_frames
|
||||
FROM {}
|
||||
WHERE uuid = $1
|
||||
WHERE file_uuid = $1
|
||||
"#,
|
||||
chunks_table
|
||||
))
|
||||
@@ -4720,7 +4720,7 @@ impl PostgresDb {
|
||||
1 - (embedding <=> $1::vector) as similarity,
|
||||
bbox
|
||||
FROM {}
|
||||
WHERE file_uuid = $2
|
||||
WHERE uuid = $2
|
||||
AND embedding IS NOT NULL
|
||||
AND 1 - (embedding <=> $1::vector) >= $3
|
||||
ORDER BY embedding <=> $1::vector
|
||||
|
||||
@@ -88,6 +88,44 @@ impl QdrantDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 確保指定 collection 存在,不存在則自動建立
|
||||
pub async fn ensure_collection(&self, collection: &str, vector_dim: usize) -> Result<()> {
|
||||
let url = format!("{}/collections/{}", self.base_url, collection);
|
||||
|
||||
let exists = self
|
||||
.client
|
||||
.get(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.send()
|
||||
.await
|
||||
.map(|r| r.status().is_success())
|
||||
.unwrap_or(false);
|
||||
|
||||
if exists {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let create_url = format!("{}/collections", self.base_url);
|
||||
let body = serde_json::json!({
|
||||
"vectors": {
|
||||
"size": vector_dim,
|
||||
"distance": "Cosine"
|
||||
}
|
||||
});
|
||||
|
||||
self.client
|
||||
.post(&create_url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context(format!("Failed to create Qdrant collection: {}", collection))?;
|
||||
|
||||
tracing::info!("Created Qdrant collection: {} (dim={})", collection, vector_dim);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將向量寫入指定 collection(支援多 collection)
|
||||
pub async fn upsert_vector_to_collection(
|
||||
&self,
|
||||
@@ -687,14 +725,13 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
let qdrant: QdrantDb = QdrantDb::new();
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {}.{} WHERE file_uuid = $1 AND embedding IS NOT NULL",
|
||||
schema, table
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
|
||||
table
|
||||
);
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
|
||||
|
||||
|
||||
@@ -19,15 +19,34 @@ struct EmbedResponse {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct OpenAIEmbedResponse {
|
||||
data: Vec<OpenAIEmbedData>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct OpenAIEmbedData {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
impl Embedder {
|
||||
pub fn new(model: String) -> Self {
|
||||
Self::with_url(model, Self::default_url())
|
||||
}
|
||||
|
||||
pub fn with_url(model: String, base_url: String) -> Self {
|
||||
Self {
|
||||
model,
|
||||
client: Client::new(),
|
||||
base_url: "http://localhost:11434".to_string(),
|
||||
base_url,
|
||||
}
|
||||
}
|
||||
|
||||
fn default_url() -> String {
|
||||
std::env::var("MOMENTRY_EMBED_URL")
|
||||
.unwrap_or_else(|_| "http://localhost:11434".to_string())
|
||||
}
|
||||
|
||||
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
|
||||
self.embed_with_prefix(text, "").await
|
||||
}
|
||||
@@ -41,32 +60,64 @@ impl Embedder {
|
||||
}
|
||||
|
||||
async fn embed_with_prefix(&self, text: &str, prefix: &str) -> Result<Vec<f32>> {
|
||||
let url = format!("{}/api/embeddings", self.base_url);
|
||||
let prompt = format!("{}{}", prefix, text);
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.json(&EmbedRequest {
|
||||
model: self.model.clone(),
|
||||
prompt,
|
||||
})
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send embedding request to Ollama")?;
|
||||
// Ollama API: POST {base_url}/api/embeddings with {model, prompt}
|
||||
// OpenAI-compatible: POST {base_url}/v1/embeddings with {input, model}
|
||||
let is_openai = self.base_url.contains(":1143"); // llama.cpp ports: 11436, 11437
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Ollama API error ({}): {}", status, body);
|
||||
if is_openai {
|
||||
let url = format!("{}/v1/embeddings", self.base_url);
|
||||
let body = serde_json::json!({
|
||||
"input": prompt,
|
||||
"model": self.model,
|
||||
});
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send embedding request")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body_text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Embedding API error ({}): {}", status, body_text);
|
||||
}
|
||||
|
||||
let result: OpenAIEmbedResponse = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse embedding response")?;
|
||||
|
||||
Ok(result.data.into_iter().next().map(|d| d.embedding).unwrap_or_default())
|
||||
} else {
|
||||
let url = format!("{}/api/embeddings", self.base_url);
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.json(&EmbedRequest {
|
||||
model: self.model.clone(),
|
||||
prompt,
|
||||
})
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send embedding request to Ollama")?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body_text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Ollama API error ({}): {}", status, body_text);
|
||||
}
|
||||
|
||||
let result: EmbedResponse = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse Ollama response")?;
|
||||
|
||||
Ok(result.embedding)
|
||||
}
|
||||
|
||||
let result: EmbedResponse = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse Ollama response")?;
|
||||
|
||||
Ok(result.embedding)
|
||||
}
|
||||
|
||||
pub async fn embed_chunk_content(&self, chunk: &crate::core::chunk::Chunk) -> Result<Vec<f32>> {
|
||||
|
||||
@@ -233,14 +233,24 @@ impl PythonExecutor {
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// 錯誤時 rename .json.tmp → .json.err
|
||||
// 錯誤時 rename .json.tmp → .json.err(若 .tmp 非有效 JSON)
|
||||
// 若 .tmp 是有效 JSON,保留為 .json(保留部分結果)
|
||||
let mark_failed = || {
|
||||
if let Some(tmp) = &tmp_path {
|
||||
if tmp.exists() {
|
||||
if let Some(out) = &output_path {
|
||||
let mut err_path = out.to_path_buf();
|
||||
err_path.set_extension("json.err");
|
||||
let _ = std::fs::rename(tmp, &err_path);
|
||||
let is_valid = std::fs::read_to_string(tmp)
|
||||
.ok()
|
||||
.and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
|
||||
.is_some();
|
||||
if is_valid {
|
||||
let _ = std::fs::rename(tmp, out);
|
||||
tracing::warn!("[Executor] Partial output preserved: {:?}", out);
|
||||
} else {
|
||||
let mut err_path = out.to_path_buf();
|
||||
err_path.set_extension("json.err");
|
||||
let _ = std::fs::rename(tmp, &err_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,10 +65,17 @@ pub async fn process_scene_classification(
|
||||
});
|
||||
}
|
||||
|
||||
let coreml_path = "/Users/accusys/models/resnet18_places365.mlpackage";
|
||||
let mut args = vec![video_path, output_path];
|
||||
if std::path::Path::new(coreml_path).exists() {
|
||||
args.push("--model");
|
||||
args.push(coreml_path);
|
||||
}
|
||||
|
||||
executor
|
||||
.run(
|
||||
"scene_classifier.py",
|
||||
&[video_path, output_path],
|
||||
&args,
|
||||
uuid,
|
||||
"SCENE",
|
||||
Some(SCENE_TIMEOUT),
|
||||
|
||||
@@ -1,146 +1,235 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use tracing::{error, info};
|
||||
use std::collections::HashMap;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::core::db::PostgresDb;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct FaceDetection {
|
||||
face_id: String,
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TmdbIdentity {
|
||||
id: i64,
|
||||
id: i32,
|
||||
name: String,
|
||||
face_embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
const MATCH_THRESHOLD: f32 = 0.55;
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
dot / (norm_a * norm_b)
|
||||
if a.len() != b.len() || a.is_empty() { return 0.0; }
|
||||
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
|
||||
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if na == 0.0 || nb == 0.0 { 0.0 } else { dot / (na * nb) }
|
||||
}
|
||||
|
||||
/// Match unassigned face detections against TMDb-sourced identities.
|
||||
/// For each face detection with identity_id IS NULL, compute cosine similarity
|
||||
/// against all TMDb identities that have face_embedding set.
|
||||
/// If similarity > MATCH_THRESHOLD, bind the face to the identity.
|
||||
/// Match face detections against TMDb identities using iterative multi-angle propagation.
|
||||
/// Round 1: seed match against TMDb face_embeddings (threshold 0.50)
|
||||
/// Round 2+: propagate to remaining traces using matched faces as reference
|
||||
pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Result<usize> {
|
||||
// Step 1: Fetch unassigned face detections for this file
|
||||
let detections: Vec<FaceDetection> = sqlx::query_as::<_, (String, Vec<f32>)>(
|
||||
"SELECT face_id, embedding FROM dev.face_detections \
|
||||
WHERE file_uuid = $1 AND identity_id IS NULL AND embedding IS NOT NULL",
|
||||
let pool = db.pool();
|
||||
|
||||
// Step 1: Load TMDb identities with face embeddings
|
||||
let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec<f32>)>(
|
||||
"SELECT id, name, face_embedding::real[] FROM dev.identities WHERE source='tmdb' AND face_embedding IS NOT NULL"
|
||||
)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if tmdb_rows.is_empty() {
|
||||
info!("[TKG-MATCH] No TMDb identities with face embeddings");
|
||||
return Ok(0);
|
||||
}
|
||||
info!("[TKG-MATCH] {} TMDb seeds loaded", tmdb_rows.len());
|
||||
|
||||
// Step 2: Load face_detections grouped by trace_id
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
|
||||
"SELECT trace_id, embedding FROM dev.face_detections \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id"
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.context("Failed to fetch unassigned face detections")?
|
||||
.into_iter()
|
||||
.map(|(face_id, embedding)| FaceDetection { face_id, embedding })
|
||||
.collect();
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if detections.is_empty() {
|
||||
info!(
|
||||
"[TMDB-FACE] No unassigned face detections for {}",
|
||||
file_uuid
|
||||
);
|
||||
if fd_rows.is_empty() {
|
||||
info!("[TKG-MATCH] No face detections for {}", file_uuid);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Step 2: Fetch TMDb identities with face embeddings
|
||||
let identities: Vec<TmdbIdentity> = sqlx::query_as::<_, (i64, String, Vec<f32>)>(
|
||||
"SELECT id, name, face_embedding::real[] FROM dev.identities \
|
||||
WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
|
||||
)
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.context("Failed to fetch TMDb identities")?
|
||||
.into_iter()
|
||||
.map(|(id, name, emb)| TmdbIdentity {
|
||||
id,
|
||||
name,
|
||||
face_embedding: emb,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if identities.is_empty() {
|
||||
info!("[TMDB-FACE] No TMDb identities with face embeddings for matching");
|
||||
return Ok(0);
|
||||
let mut trace_faces: HashMap<i32, Vec<Vec<f32>>> = HashMap::new();
|
||||
for (tid, emb) in &fd_rows {
|
||||
trace_faces.entry(*tid).or_default().push(emb.clone());
|
||||
}
|
||||
// Dedup near-identical embeddings within trace
|
||||
for faces in trace_faces.values_mut() {
|
||||
faces.sort_by(|a, b| a[0].partial_cmp(&b[0]).unwrap_or(std::cmp::Ordering::Equal));
|
||||
faces.dedup_by(|a, b| cosine_similarity(a, b) > 0.99);
|
||||
}
|
||||
|
||||
info!(
|
||||
"[TMDB-FACE] Matching {} face detections against {} TMDb identities",
|
||||
detections.len(),
|
||||
identities.len()
|
||||
);
|
||||
let total = trace_faces.len();
|
||||
info!("[TKG-MATCH] {} traces with {} faces", total, fd_rows.len());
|
||||
|
||||
// Step 3: For each face detection, find best matching identity
|
||||
let mut bindings_created = 0usize;
|
||||
// Step 3: Iterative matching
|
||||
const TH: f32 = 0.50;
|
||||
let mut matched: HashMap<i32, (i32, String)> = HashMap::new(); // trace_id → (identity_id, name)
|
||||
|
||||
for det in &detections {
|
||||
let mut best_match: Option<(i64, f32)> = None;
|
||||
// Round 1: against TMDb seeds
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let mut best_id = 0i32;
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
for (id, name, tmdb_emb) in &tmdb_rows {
|
||||
for face in faces {
|
||||
let s = cosine_similarity(face, tmdb_emb);
|
||||
if s > best_sim { best_sim = s; best_id = *id; best_name = name.clone(); }
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
matched.insert(tid, (best_id, best_name));
|
||||
}
|
||||
}
|
||||
info!("[TKG-MATCH] Round 1: {} ({}/{})", matched.len(), matched.len() * 100 / total, total);
|
||||
|
||||
for identity in &identities {
|
||||
let sim = cosine_similarity(&det.embedding, &identity.face_embedding);
|
||||
if sim > MATCH_THRESHOLD {
|
||||
match best_match {
|
||||
Some((_, best_sim)) if sim > best_sim => {
|
||||
best_match = Some((identity.id, sim));
|
||||
}
|
||||
None => {
|
||||
best_match = Some((identity.id, sim));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
// Round 2+: propagate
|
||||
for round_n in 2..=10 {
|
||||
let prev = matched.len();
|
||||
let mut seed_pool: HashMap<i32, Vec<&Vec<f32>>> = HashMap::new();
|
||||
for (&tid, (id, _)) in &matched {
|
||||
if let Some(faces) = trace_faces.get(&tid) {
|
||||
seed_pool.entry(*id).or_default().extend(faces.iter());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((identity_id, similarity)) = best_match {
|
||||
// Update face_detection with identity_id
|
||||
let _ = sqlx::query(
|
||||
"UPDATE dev.face_detections SET identity_id = $1, identity_confidence = $2 \
|
||||
WHERE file_uuid = $3 AND face_id = $4",
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(similarity as f64)
|
||||
.bind(file_uuid)
|
||||
.bind(&det.face_id)
|
||||
.execute(db.pool())
|
||||
.await
|
||||
.ok();
|
||||
|
||||
// Also create identity_binding
|
||||
let _ = sqlx::query(
|
||||
"INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, source, confidence) \
|
||||
VALUES ($1, 'face', $2, 'tmdb_agent', $3) \
|
||||
ON CONFLICT (identity_id, identity_type, identity_value) DO UPDATE SET confidence = EXCLUDED.confidence"
|
||||
)
|
||||
.bind(identity_id)
|
||||
.bind(&det.face_id)
|
||||
.bind(similarity as f64)
|
||||
.execute(db.pool())
|
||||
.await
|
||||
.ok();
|
||||
|
||||
bindings_created += 1;
|
||||
let mut new_matches: Vec<(i32, i32, String)> = Vec::new();
|
||||
for (&tid, faces) in &trace_faces {
|
||||
if matched.contains_key(&tid) || faces.is_empty() { continue; }
|
||||
let ref_face = &faces[0];
|
||||
let mut best_id = 0i32;
|
||||
let mut best_name = String::new();
|
||||
let mut best_sim = 0.0f32;
|
||||
for (&id, seed_faces) in &seed_pool {
|
||||
for seed in seed_faces {
|
||||
let s = cosine_similarity(ref_face, seed);
|
||||
if s > best_sim { best_sim = s; best_id = id; }
|
||||
}
|
||||
}
|
||||
if best_sim >= TH {
|
||||
// Look up name for this id
|
||||
for (id, name, _) in &tmdb_rows {
|
||||
if *id == best_id { best_name = name.clone(); break; }
|
||||
}
|
||||
new_matches.push((tid, best_id, best_name));
|
||||
}
|
||||
}
|
||||
for (tid, id, name) in new_matches {
|
||||
matched.insert(tid, (id, name));
|
||||
}
|
||||
let new = matched.len() - prev;
|
||||
if new < 5 { break; }
|
||||
}
|
||||
|
||||
info!(
|
||||
"[TMDB-FACE] Created {} face-to-TMDb bindings for {}",
|
||||
bindings_created, file_uuid
|
||||
);
|
||||
// Step 4: Quality control
|
||||
// 4a: Remove low-confidence traces (fewer than 4 face detections)
|
||||
let mut after_qc = HashMap::new();
|
||||
for (&tid, &(id, ref name)) in &matched {
|
||||
let cnt: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2"
|
||||
)
|
||||
.bind(file_uuid).bind(tid)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
if cnt >= 4 {
|
||||
after_qc.insert(tid, (id, name.clone()));
|
||||
} else {
|
||||
info!("[TKG-QC] trace {} removed: only {} face(s), need >= 4", tid, cnt);
|
||||
}
|
||||
}
|
||||
let matched = after_qc;
|
||||
let removed_low = total - matched.len();
|
||||
if removed_low > 0 {
|
||||
info!("[TKG-QC] Removed {} low-confidence traces (< 4 faces)", removed_low);
|
||||
}
|
||||
|
||||
Ok(bindings_created)
|
||||
// 4b: Temporal collision check
|
||||
let removed_collisions = quality_check_temporal_collisions(pool, file_uuid).await?;
|
||||
if removed_collisions > 0 {
|
||||
info!("[TKG-QC] Resolved {} temporal collisions", removed_collisions);
|
||||
}
|
||||
|
||||
// Step 5: Update DB
|
||||
let mut updated = 0usize;
|
||||
for (&tid, &(id, _)) in &matched {
|
||||
let r = sqlx::query(
|
||||
"UPDATE dev.face_detections SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3"
|
||||
)
|
||||
.bind(id).bind(file_uuid).bind(tid)
|
||||
.execute(pool).await?;
|
||||
if r.rows_affected() > 0 { updated += 1; }
|
||||
}
|
||||
|
||||
info!("[TKG-MATCH] Done: {}/{} traces matched ({}%)",
|
||||
matched.len(), total, matched.len() * 100 / total);
|
||||
Ok(updated)
|
||||
}
|
||||
|
||||
/// Quality check: detect temporal collisions where two different traces of the same
|
||||
/// identity appear in the same frame (impossible for one person).
|
||||
/// Unbind the lower-confidence trace from the conflicting pair.
|
||||
/// RCA reference: docs_v1.0/API_V1.0.0/INTERNAL/RCA_TRACE39_TRACE45_COLLISION_V1.0.0.md
|
||||
async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
|
||||
// Find all collision pairs: same identity, same frame, different trace
|
||||
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(
|
||||
r#"
|
||||
SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number
|
||||
FROM dev.face_detections a
|
||||
JOIN dev.face_detections b
|
||||
ON a.file_uuid = b.file_uuid
|
||||
AND a.frame_number = b.frame_number
|
||||
AND a.trace_id < b.trace_id
|
||||
WHERE a.file_uuid = $1
|
||||
AND a.identity_id IS NOT NULL
|
||||
AND a.identity_id = b.identity_id
|
||||
ORDER BY a.identity_id, a.frame_number
|
||||
"#
|
||||
)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool).await?;
|
||||
|
||||
if collisions.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Group collisions by (identity_id, trace_a, trace_b) and count frames
|
||||
use std::collections::HashMap;
|
||||
let mut collision_groups: HashMap<(i32, i32, i32), usize> = HashMap::new();
|
||||
for (id, ta, tb, _) in &collisions {
|
||||
*collision_groups.entry((*id, *ta, *tb)).or_default() += 1;
|
||||
}
|
||||
|
||||
let mut unbound = 0usize;
|
||||
for ((id, ta, tb), overlap_frames) in &collision_groups {
|
||||
// Get face detection count for each trace
|
||||
let cnt_a: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3"
|
||||
)
|
||||
.bind(file_uuid).bind(ta).bind(id)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
|
||||
let cnt_b: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM dev.face_detections WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3"
|
||||
)
|
||||
.bind(file_uuid).bind(tb).bind(id)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
|
||||
// Unbind the trace with fewer detections (likely the false positive)
|
||||
let victim = if cnt_a <= cnt_b { *ta } else { *tb };
|
||||
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE dev.face_detections SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2"
|
||||
)
|
||||
.bind(file_uuid).bind(victim)
|
||||
.execute(pool).await?;
|
||||
|
||||
unbound += 1;
|
||||
warn!("[TKG-QC] Collision identity={}: trace {} vs trace {} ({} overlap frames). Unbound trace {} ({} detections)",
|
||||
id, ta, tb, overlap_frames, victim, victim_cnt);
|
||||
}
|
||||
|
||||
Ok(unbound)
|
||||
}
|
||||
|
||||
3322
src/main.rs.backup
Normal file
3322
src/main.rs.backup
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,13 @@ use tokio::time::sleep;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
use crate::core::chunk::{rule1_ingest, rule3_ingest};
|
||||
use crate::core::db::{MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VideoStatus};
|
||||
use crate::core::db::qdrant_db::QdrantDb;
|
||||
use crate::api::five_w1h_agent_api::run_5w1h_agent;
|
||||
use crate::api::identity_agent_api::run_identity_agent;
|
||||
use crate::core::db::{
|
||||
MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload, VideoStatus,
|
||||
};
|
||||
use crate::core::embedding::Embedder;
|
||||
use crate::worker::config::WorkerConfig;
|
||||
use crate::worker::processor::{ProcessorPool, ProcessorTask};
|
||||
use crate::worker::resources::SystemResources;
|
||||
@@ -140,7 +146,12 @@ impl JobWorker {
|
||||
.get_running_jobs_with_all_processors_done(self.config.batch_size)
|
||||
.await?;
|
||||
for job in running_jobs_done {
|
||||
let should_retry = self.check_and_complete_job(job.id, &job.uuid).await.is_ok();
|
||||
let expected_count = if job.processors.is_empty() {
|
||||
crate::core::db::ProcessorType::all().len()
|
||||
} else {
|
||||
job.processors.len()
|
||||
};
|
||||
let should_retry = self.check_and_complete_job(job.id, &job.uuid, expected_count).await.is_ok();
|
||||
if should_retry && self.processor_pool.can_start().await {
|
||||
if let Err(e) = self.process_job(job.clone()).await {
|
||||
error!("Failed to reprocess job {}: {}", job.uuid, e);
|
||||
@@ -453,22 +464,33 @@ impl JobWorker {
|
||||
|
||||
// 總是檢查是否可以完成 job(check_and_complete_job 內部會判斷)
|
||||
// processor_results 不足時它會自動略過
|
||||
self.check_and_complete_job(job.id, &job.uuid).await?;
|
||||
let expected_count = if job.processors.is_empty() {
|
||||
crate::core::db::ProcessorType::all().len()
|
||||
} else {
|
||||
job.processors.len()
|
||||
};
|
||||
self.check_and_complete_job(job.id, &job.uuid, expected_count).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn check_and_complete_job(&self, job_id: i32, uuid: &str) -> Result<()> {
|
||||
async fn check_and_complete_job(&self, job_id: i32, uuid: &str, expected_count: usize) -> Result<()> {
|
||||
let results = self.db.get_processor_results_by_job(job_id).await?;
|
||||
|
||||
// 如果 processor_results 筆數少於總 processor 數,代表有 processor 尚未啟動(如依賴未滿足)
|
||||
let all_processor_types = crate::core::db::ProcessorType::all().len();
|
||||
if results.len() < all_processor_types {
|
||||
info!(
|
||||
"check_and_complete_job: {} results={}, expected={}",
|
||||
uuid,
|
||||
results.len(),
|
||||
expected_count
|
||||
);
|
||||
|
||||
// 如果 processor_results 筆數少於期望的 processor 數,代表有 processor 尚未啟動(如依賴未滿足)
|
||||
if results.len() < expected_count {
|
||||
info!(
|
||||
"Job {} has {}/{} processor results, not all processors created yet. Skipping completion check.",
|
||||
uuid,
|
||||
results.len(),
|
||||
all_processor_types
|
||||
expected_count
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
@@ -491,6 +513,14 @@ impl JobWorker {
|
||||
.iter()
|
||||
.any(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Failed));
|
||||
|
||||
let any_pending = results
|
||||
.iter()
|
||||
.any(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Pending));
|
||||
|
||||
let any_skipped = results
|
||||
.iter()
|
||||
.any(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Skipped));
|
||||
|
||||
let completed_count = results
|
||||
.iter()
|
||||
.filter(|r| {
|
||||
@@ -544,7 +574,14 @@ impl JobWorker {
|
||||
let fps = video.fps;
|
||||
match rule1_ingest::execute_rule1(&db_clone, &uuid_clone, fps).await {
|
||||
Ok(count) => {
|
||||
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count)
|
||||
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count);
|
||||
// Automatically vectorize new sentence chunks
|
||||
if count > 0 {
|
||||
info!("📝 Starting automatic vectorize for {} chunks...", count);
|
||||
if let Err(e) = Self::vectorize_chunks(&db_clone, &uuid_clone).await {
|
||||
error!("❌ Auto-vectorize failed for {}: {}", uuid_clone, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
|
||||
}
|
||||
@@ -640,19 +677,27 @@ impl JobWorker {
|
||||
// 🚀 P3 Trigger: Identity Agent (Face + ASRX)
|
||||
if has_face && has_asrx {
|
||||
info!("📝 Prerequisites met for Identity Agent. Starting analysis...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
tracing::info!("Identity Agent started for video {}", uuid_clone);
|
||||
match run_identity_agent(&db_clone, &uuid_clone).await {
|
||||
Ok(()) => info!("✅ Identity Agent completed for {}", uuid_clone),
|
||||
Err(e) => error!("❌ Identity Agent failed for {}: {}", uuid_clone, e),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 🚀 P4 Trigger: 5W1H Agent (after Rule 3 completion)
|
||||
if has_cut && has_asr {
|
||||
info!("📝 Prerequisites met for 5W1H Agent. Will trigger after Rule 3...");
|
||||
info!("📝 Prerequisites met for 5W1H Agent. Starting...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
|
||||
tracing::info!("5W1H Agent started for video {}", uuid_clone);
|
||||
match run_5w1h_agent(&db_clone, &uuid_clone).await {
|
||||
Ok(()) => info!("✅ 5W1H Agent completed for {}", uuid_clone),
|
||||
Err(e) => error!("❌ 5W1H Agent failed for {}: {}", uuid_clone, e),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -679,8 +724,8 @@ impl JobWorker {
|
||||
self.redis.delete_worker_job(uuid).await?;
|
||||
|
||||
info!("Job {} completed successfully", job_id);
|
||||
} else if essential_completed && !all_completed {
|
||||
// 必要 processor 完成但部分非必要失敗 → 仍算完成
|
||||
} else if essential_completed && !all_completed && !any_pending && !any_skipped {
|
||||
// 必要 processor 完成但部分非必要失敗 → 仍算完成(但無 pending 者才觸發)
|
||||
info!(
|
||||
"Job {} completed with non-essential failures. Essential: {:?}",
|
||||
job_id, essential_processors
|
||||
@@ -738,6 +783,66 @@ impl JobWorker {
|
||||
info!("Shutting down worker...");
|
||||
self.processor_pool.cancel_all().await;
|
||||
}
|
||||
|
||||
/// 自動對 sentence chunks 產生 vector embedding 並存入 PG + Qdrant
|
||||
async fn vectorize_chunks(db: &PostgresDb, uuid: &str) -> anyhow::Result<()> {
|
||||
let embedder = Embedder::new("mxbai-embed-large:latest".to_string());
|
||||
let qdrant = QdrantDb::new();
|
||||
let pool = db.pool();
|
||||
|
||||
let rows = sqlx::query_as::<_, (String, String, String, f64, f64, String)>(
|
||||
"SELECT chunk_id, chunk_type, text_content, start_time, end_time, content::text FROM dev.chunks WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') ORDER BY chunk_index",
|
||||
)
|
||||
.bind(uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
info!("[Vectorize] No sentence chunks to vectorize for {}", uuid);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let total = rows.len();
|
||||
info!("[Vectorize] Starting vectorize of {} chunks for {}", total, uuid);
|
||||
|
||||
let mut stored = 0usize;
|
||||
for (chunk_id, _chunk_type, text, start_time, end_time, _content_str) in &rows {
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
match embedder.embed_document(text).await {
|
||||
Ok(vector) => {
|
||||
if let Err(e) = db.store_vector(chunk_id, &vector, uuid).await {
|
||||
error!("[Vectorize] PG store failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.to_string(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "sentence".to_string(),
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
};
|
||||
if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await {
|
||||
error!("[Vectorize] Qdrant upsert failed for {}: {}", chunk_id, e);
|
||||
continue;
|
||||
}
|
||||
stored += 1;
|
||||
if stored % 50 == 0 {
|
||||
info!("[Vectorize] {}/{} vectors stored for {}", stored, total, uuid);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("[Vectorize] Embedding failed for {}: {}", chunk_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("[Vectorize] Completed: {}/{} vectors stored for {}", stored, total, uuid);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,10 +1,37 @@
|
||||
use anyhow::{Context, Result};
|
||||
use libc;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{mpsc, RwLock};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
/// Guard that ensures processor pool cleanup runs even if the task panics.
|
||||
struct ProcessorCleanupGuard {
|
||||
job_id: i32,
|
||||
running: Arc<RwLock<HashMap<i32, ProcessorHandle>>>,
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
}
|
||||
|
||||
impl Drop for ProcessorCleanupGuard {
|
||||
fn drop(&mut self) {
|
||||
use tokio::sync::TryLockError;
|
||||
// 嘗試同步清理;若 lock 被佔用則跳過(避免 deadlock)
|
||||
if let Ok(mut guard) = self.running.try_write() {
|
||||
guard.remove(&self.job_id);
|
||||
} else {
|
||||
warn!("[ProcessorCleanupGuard] running lock contended, skipping cleanup");
|
||||
}
|
||||
if let Ok(mut guard) = self.running_count.try_write() {
|
||||
if *guard > 0 {
|
||||
*guard -= 1;
|
||||
}
|
||||
} else {
|
||||
warn!("[ProcessorCleanupGuard] running_count lock contended, skipping cleanup");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
|
||||
use crate::core::db::{
|
||||
MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
|
||||
@@ -93,7 +120,7 @@ impl ProcessorPool {
|
||||
if handle_count == 0 && count == 0 {
|
||||
if let Err(e) = self
|
||||
.db
|
||||
.reset_stale_processor_results(ProcessorJobStatus::Failed, "Worker restarted")
|
||||
.reset_stale_processor_results(ProcessorJobStatus::Pending, "Worker restarted")
|
||||
.await
|
||||
{
|
||||
error!("Failed to reset stale processor results: {}", e);
|
||||
@@ -101,7 +128,29 @@ impl ProcessorPool {
|
||||
}
|
||||
}
|
||||
|
||||
async fn kill_existing_processor(redis: &RedisClient, uuid: &str, processor: &str) {
|
||||
let prefix = crate::core::config::REDIS_KEY_PREFIX.as_str();
|
||||
let key = format!("{}worker:job:{}:processor:{}", prefix, uuid, processor);
|
||||
if let Ok(mut conn) = redis.get_conn().await {
|
||||
let old_pid: Option<i32> = redis::cmd("HGET")
|
||||
.arg(&key)
|
||||
.arg("pid")
|
||||
.query_async(&mut conn)
|
||||
.await
|
||||
.ok()
|
||||
.flatten();
|
||||
if let Some(pid) = old_pid {
|
||||
if pid > 0 {
|
||||
warn!("[PID] Killing existing process {} for {}/{}", pid, uuid, processor);
|
||||
unsafe { libc::kill(pid, libc::SIGKILL); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
|
||||
Self::kill_existing_processor(&*self.redis, &task.job.uuid, task.processor_type.as_str()).await;
|
||||
|
||||
let (cancel_tx, cancel_rx) = mpsc::channel(1);
|
||||
let job_id = task.job.id;
|
||||
let processor_type = task.processor_type;
|
||||
@@ -144,6 +193,13 @@ impl ProcessorPool {
|
||||
}
|
||||
|
||||
tokio::spawn(async move {
|
||||
// Guard 的 Drop 確保 panic 時也清理 pool state
|
||||
let _guard = ProcessorCleanupGuard {
|
||||
job_id,
|
||||
running: running.clone(),
|
||||
running_count: running_count.clone(),
|
||||
};
|
||||
|
||||
info!("Starting processor {} for job {}", processor_name, job.uuid);
|
||||
|
||||
let _ = db
|
||||
@@ -171,19 +227,6 @@ impl ProcessorPool {
|
||||
|
||||
let result = Self::run_processor(&db, &redis, &job, processor_type, cancel_rx).await;
|
||||
|
||||
// Store child PID for stability
|
||||
{
|
||||
let mut pid_lock = child_pid.write().await;
|
||||
*pid_lock = Some(0);
|
||||
}
|
||||
|
||||
{
|
||||
let mut running_guard = running.write().await;
|
||||
running_guard.remove(&job_id);
|
||||
let mut count_guard = running_count.write().await;
|
||||
*count_guard -= 1;
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(output) => {
|
||||
info!(
|
||||
@@ -747,6 +790,12 @@ impl ProcessorPool {
|
||||
"_face"
|
||||
);
|
||||
|
||||
// 確保 collection 存在(dim=512 for FaceNet)
|
||||
if let Err(e) = qdrant.ensure_collection(&collection, 512).await {
|
||||
tracing::error!("Failed to ensure Qdrant face collection: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
for frame in &face_result.frames {
|
||||
for face in &frame.faces {
|
||||
@@ -807,6 +856,12 @@ impl ProcessorPool {
|
||||
"_voice"
|
||||
);
|
||||
|
||||
// 確保 collection 存在(dim=192 for ASRX voice)
|
||||
if let Err(e) = qdrant.ensure_collection(&collection, 192).await {
|
||||
tracing::error!("Failed to ensure Qdrant voice collection: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let embeddings = match &asrx_result.embeddings {
|
||||
Some(e) => e,
|
||||
None => return Ok(()),
|
||||
@@ -958,6 +1013,24 @@ impl ProcessorPool {
|
||||
|
||||
db.store_scene_pre_chunks_batch(uuid, &scenes).await?;
|
||||
|
||||
for (i, scene) in scene_result.scenes.iter().enumerate() {
|
||||
let chk_id = format!("scene_{}", i + 1);
|
||||
let meta = serde_json::json!({
|
||||
"scene_type": scene.scene_type,
|
||||
"scene_type_zh": scene.scene_type_zh,
|
||||
"confidence": scene.confidence,
|
||||
"top_5": scene.top_5,
|
||||
});
|
||||
let _ = sqlx::query(
|
||||
"UPDATE dev.chunks SET metadata = metadata || $1::jsonb WHERE file_uuid=$2 AND chunk_id=$3"
|
||||
)
|
||||
.bind(&meta)
|
||||
.bind(uuid)
|
||||
.bind(&chk_id)
|
||||
.execute(db.pool())
|
||||
.await;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Stored {} Scene pre-chunks for video {}",
|
||||
scenes.len(),
|
||||
|
||||
Reference in New Issue
Block a user