feat: trace-level matching, health watcher/worker status, timezone config
This commit is contained in:
@@ -75,15 +75,13 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
|
||||
// Query chunks table for Rule 1 sentence chunks
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(
|
||||
&format!(
|
||||
"SELECT chunk_id FROM {} \
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(&format!(
|
||||
"SELECT chunk_id FROM {} \
|
||||
WHERE file_uuid = $1 AND chunk_type = 'sentence' \
|
||||
AND start_frame >= $2 \
|
||||
AND end_frame <= $3",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
@@ -101,16 +99,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
// Let's re-query text directly.
|
||||
}
|
||||
|
||||
let texts: Vec<String> = sqlx::query_scalar(
|
||||
&format!(
|
||||
"SELECT text_content FROM {} \
|
||||
let texts: Vec<String> = sqlx::query_scalar(&format!(
|
||||
"SELECT text_content FROM {} \
|
||||
WHERE file_uuid = $1 AND chunk_type = 'sentence' \
|
||||
AND start_frame >= $2 \
|
||||
AND end_frame <= $3 \
|
||||
ORDER BY start_frame ASC",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
@@ -154,16 +150,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
"scene_number": scene.scene_number
|
||||
});
|
||||
|
||||
sqlx::query(
|
||||
&format!(
|
||||
"INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
|
||||
sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, chunk_id, chunk_type, \
|
||||
start_time, end_time, fps, start_frame, end_frame, \
|
||||
content, text_content, summary_text, metadata, child_chunk_ids) \
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) \
|
||||
ON CONFLICT (file_uuid, chunk_id) DO NOTHING",
|
||||
chunk_table
|
||||
),
|
||||
)
|
||||
chunk_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&chunk_id)
|
||||
.bind(scene.scene_number as i32)
|
||||
|
||||
@@ -20,8 +20,7 @@ pub fn set_cache_enabled(enabled: bool) {
|
||||
}
|
||||
|
||||
// Switch 1: watcher detects new file → auto-register
|
||||
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> =
|
||||
Lazy::new(|| RwLock::new(false));
|
||||
pub static RUNTIME_WATCHER_AUTO_REGISTER: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
|
||||
|
||||
pub fn get_watcher_auto_register() -> bool {
|
||||
*RUNTIME_WATCHER_AUTO_REGISTER.read().unwrap()
|
||||
@@ -33,8 +32,7 @@ pub fn set_watcher_auto_register(enabled: bool) {
|
||||
}
|
||||
|
||||
// Switch 2: register → auto-trigger processing pipeline
|
||||
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> =
|
||||
Lazy::new(|| RwLock::new(false));
|
||||
pub static RUNTIME_AUTO_PIPELINE_ENABLED: Lazy<RwLock<bool>> = Lazy::new(|| RwLock::new(false));
|
||||
|
||||
pub fn get_auto_pipeline_enabled() -> bool {
|
||||
*RUNTIME_AUTO_PIPELINE_ENABLED.read().unwrap()
|
||||
@@ -107,6 +105,30 @@ pub static REDIS_KEY_PREFIX: Lazy<String> =
|
||||
pub static DATABASE_SCHEMA: Lazy<String> =
|
||||
Lazy::new(|| env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string()));
|
||||
|
||||
pub static SYSTEM_TIMEZONE: Lazy<String> = Lazy::new(|| {
|
||||
if let Ok(tz) = env::var("MOMENTRY_TIMEZONE") {
|
||||
if !tz.is_empty() {
|
||||
return tz;
|
||||
}
|
||||
}
|
||||
if let Ok(tz) = env::var("TZ") {
|
||||
if !tz.is_empty() {
|
||||
return tz;
|
||||
}
|
||||
}
|
||||
// macOS: /etc/localtime → /var/db/timezone/zoneinfo/Asia/Taipei
|
||||
// Linux: /etc/localtime → /usr/share/zoneinfo/Asia/Taipei
|
||||
if let Ok(path) = std::fs::read_link("/etc/localtime") {
|
||||
let s = path.to_string_lossy();
|
||||
for prefix in &["/usr/share/zoneinfo/", "/var/db/timezone/zoneinfo/"] {
|
||||
if let Some(tz) = s.strip_prefix(prefix) {
|
||||
return tz.to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
"Asia/Taipei".to_string()
|
||||
});
|
||||
|
||||
pub static MONGODB_DATABASE: Lazy<String> =
|
||||
Lazy::new(|| env::var("MONGODB_DATABASE").unwrap_or_else(|_| "momentry".to_string()));
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,9 +15,11 @@ pub struct QdrantDb {
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VectorPayload {
|
||||
pub uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: String,
|
||||
pub start_frame: i64,
|
||||
pub end_frame: i64,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub text: Option<String>,
|
||||
@@ -189,6 +191,49 @@ impl QdrantDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vectors_batch(
|
||||
&self,
|
||||
collection: &str,
|
||||
points: &[(u64, &[f32], Option<serde_json::Value>)],
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, collection
|
||||
);
|
||||
|
||||
let qdrant_points: Vec<serde_json::Value> = points
|
||||
.iter()
|
||||
.map(|(id, vec, payload)| {
|
||||
let mut p = serde_json::json!({
|
||||
"id": id,
|
||||
"vector": vec,
|
||||
});
|
||||
if let Some(pl) = payload {
|
||||
p["payload"] = pl.clone();
|
||||
}
|
||||
p
|
||||
})
|
||||
.collect();
|
||||
|
||||
let body = serde_json::json!({ "points": qdrant_points });
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send batch upsert request to Qdrant")?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let response_text = response.text().await.unwrap_or_default();
|
||||
anyhow::bail!("Qdrant batch upsert failed: {} - {}", status, response_text);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vector(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
@@ -207,12 +252,23 @@ impl QdrantDb {
|
||||
);
|
||||
|
||||
let mut payload_map = HashMap::new();
|
||||
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
|
||||
payload_map.insert(
|
||||
"file_uuid".to_string(),
|
||||
serde_json::json!(payload.file_uuid),
|
||||
);
|
||||
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
|
||||
payload_map.insert(
|
||||
"chunk_type".to_string(),
|
||||
serde_json::json!(payload.chunk_type),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_frame".to_string(),
|
||||
serde_json::json!(payload.start_frame),
|
||||
);
|
||||
payload_map.insert(
|
||||
"end_frame".to_string(),
|
||||
serde_json::json!(payload.end_frame),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_time".to_string(),
|
||||
serde_json::json!(payload.start_time),
|
||||
@@ -224,7 +280,7 @@ impl QdrantDb {
|
||||
|
||||
// Generate consistent point ID from uuid and chunk_id
|
||||
// Qdrant requires integer or UUID point IDs. We'll use a simple integer hash.
|
||||
let point_id_str = format!("{}_{}", payload.uuid, chunk_id);
|
||||
let point_id_str = format!("{}_{}", payload.file_uuid, chunk_id);
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
@@ -240,9 +296,9 @@ impl QdrantDb {
|
||||
});
|
||||
|
||||
tracing::debug!(
|
||||
"Upserting vector to Qdrant: chunk_id={}, uuid={}, vector_len={}",
|
||||
"Upserting vector to Qdrant: chunk_id={}, file_uuid={}, vector_len={}",
|
||||
chunk_id,
|
||||
payload.uuid,
|
||||
payload.file_uuid,
|
||||
vector.len()
|
||||
);
|
||||
|
||||
@@ -337,7 +393,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -409,7 +465,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -471,7 +527,7 @@ impl QdrantDb {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"key": "file_uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
@@ -532,7 +588,7 @@ impl QdrantDb {
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.get("file_uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
@@ -553,6 +609,89 @@ impl QdrantDb {
|
||||
Ok(search_results)
|
||||
}
|
||||
|
||||
pub async fn search_face_collection(
|
||||
&self,
|
||||
collection: &str,
|
||||
query_vector: &[f32],
|
||||
limit: usize,
|
||||
exclude_payload_key: &str,
|
||||
exclude_payload_value: &str,
|
||||
include_file_uuid: Option<&str>,
|
||||
) -> Result<Vec<(f64, HashMap<String, serde_json::Value>)>> {
|
||||
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
|
||||
|
||||
let mut filter = serde_json::json!({
|
||||
"must_not": [
|
||||
{
|
||||
"key": exclude_payload_key,
|
||||
"match": { "value": exclude_payload_value }
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
if let Some(file_uuid) = include_file_uuid {
|
||||
filter["must"] = serde_json::json!([
|
||||
{
|
||||
"key": "file_uuid",
|
||||
"match": { "value": file_uuid }
|
||||
}
|
||||
]);
|
||||
}
|
||||
|
||||
let body = serde_json::json!({
|
||||
"vector": query_vector,
|
||||
"limit": limit,
|
||||
"with_payload": true,
|
||||
"filter": filter,
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search Qdrant face collection")?;
|
||||
|
||||
let status = response.status();
|
||||
let response_text = response
|
||||
.text()
|
||||
.await
|
||||
.unwrap_or_else(|_| "Failed to read response".to_string());
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Qdrant search_face_collection failed: {} - {}",
|
||||
status,
|
||||
response_text
|
||||
));
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantSearchResult {
|
||||
result: Vec<QdrantPoint>,
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantPoint {
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
match serde_json::from_str::<QdrantSearchResult>(&response_text) {
|
||||
Ok(parsed) => {
|
||||
let results: Vec<(f64, HashMap<String, serde_json::Value>)> = parsed
|
||||
.result
|
||||
.into_iter()
|
||||
.map(|r| (r.score, r.payload))
|
||||
.collect();
|
||||
Ok(results)
|
||||
}
|
||||
Err(e) => Err(anyhow::anyhow!("Failed to parse Qdrant response: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/delete",
|
||||
@@ -563,7 +702,7 @@ impl QdrantDb {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"key": "file_uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
@@ -711,9 +850,11 @@ impl Database for QdrantDb {
|
||||
impl VectorStore for QdrantDb {
|
||||
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
|
||||
let payload = VectorPayload {
|
||||
uuid: String::new(),
|
||||
file_uuid: String::new(),
|
||||
chunk_id: chunk_id.to_string(),
|
||||
chunk_type: String::new(),
|
||||
start_frame: 0,
|
||||
end_frame: 0,
|
||||
start_time: 0.0,
|
||||
end_time: 0.0,
|
||||
text: None,
|
||||
@@ -737,7 +878,9 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
let qdrant: QdrantDb = QdrantDb::new();
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} WHERE file_uuid = $1 AND embedding IS NOT NULL",
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
);
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
|
||||
@@ -767,3 +910,103 @@ pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
let collection = format!(
|
||||
"{}_traces",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':')
|
||||
);
|
||||
qdrant.ensure_collection(&collection, 512).await?;
|
||||
|
||||
// Read all face_detections with embeddings, grouped by trace_id in Rust
|
||||
let rows = sqlx::query(&format!(
|
||||
"SELECT trace_id, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(&pool)
|
||||
.await?;
|
||||
|
||||
let mut trace_faces: std::collections::HashMap<i32, Vec<Vec<f32>>> =
|
||||
std::collections::HashMap::new();
|
||||
let mut trace_stats: std::collections::HashMap<i32, (i64, i64, i64)> =
|
||||
std::collections::HashMap::new(); // (count, min_frame, max_frame)
|
||||
|
||||
for row in &rows {
|
||||
let tid: Option<i32> = row.get(0);
|
||||
let emb: Option<Vec<f32>> = row.get(1);
|
||||
if let (Some(tid), Some(emb)) = (tid, emb) {
|
||||
trace_faces.entry(tid).or_default().push(emb);
|
||||
let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN));
|
||||
entry.0 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute average embedding per trace
|
||||
struct AvgTrace {
|
||||
tid: i32,
|
||||
avg_emb: Vec<f32>,
|
||||
frame_count: i64,
|
||||
}
|
||||
|
||||
let mut trace_avgs: Vec<AvgTrace> = Vec::new();
|
||||
|
||||
for (&tid, faces) in &trace_faces {
|
||||
let dim = faces[0].len();
|
||||
let mut avg = vec![0.0f32; dim];
|
||||
for face in faces {
|
||||
for (i, &v) in face.iter().enumerate() {
|
||||
avg[i] += v;
|
||||
}
|
||||
}
|
||||
let n = faces.len() as f32;
|
||||
for v in &mut avg {
|
||||
*v /= n;
|
||||
}
|
||||
|
||||
let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0));
|
||||
trace_avgs.push(AvgTrace {
|
||||
tid,
|
||||
avg_emb: avg,
|
||||
frame_count: stats.0,
|
||||
});
|
||||
}
|
||||
|
||||
// Push to Qdrant in batches
|
||||
for chunk in trace_avgs.chunks(500) {
|
||||
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
|
||||
.iter()
|
||||
.map(|t| {
|
||||
(
|
||||
t.tid as u64,
|
||||
t.avg_emb.as_slice(),
|
||||
Some(serde_json::json!({
|
||||
"trace_id": t.tid,
|
||||
"file_uuid": file_uuid,
|
||||
"frame_count": t.frame_count,
|
||||
"source": "trace",
|
||||
})),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
qdrant.upsert_vectors_batch(&collection, &batch).await?;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Synced {} trace embeddings to Qdrant for {}",
|
||||
trace_faces.len(),
|
||||
file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -45,9 +45,11 @@ impl SyncDb {
|
||||
}
|
||||
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.clone(),
|
||||
file_uuid: uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type,
|
||||
start_frame: chunk.start_frame,
|
||||
end_frame: chunk.end_frame,
|
||||
start_time,
|
||||
end_time,
|
||||
text: Some(text.to_string()),
|
||||
|
||||
@@ -33,26 +33,38 @@ pub async fn run_consistency_checks(db: &PostgresDb) -> ConsistencyReport {
|
||||
|
||||
// Check 1: stale_processing — status=processing but job_id is null
|
||||
let c1 = check_stale_processing(db).await;
|
||||
if c1.count > 0 { any_issue = true; }
|
||||
if c1.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c1);
|
||||
|
||||
// Check 2: orphaned_processing — status=processing but no active monitor_job
|
||||
let c2 = check_orphaned_processing(db).await;
|
||||
if c2.count > 0 { any_issue = true; }
|
||||
if c2.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c2);
|
||||
|
||||
// Check 3: unregistered_with_uuid — DB rows left behind by migration
|
||||
let c3 = check_unregistered_with_uuid(db).await;
|
||||
if c3.count > 0 { any_issue = true; }
|
||||
if c3.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c3);
|
||||
|
||||
// Check 4: processing_job_done — status=processing but job already completed
|
||||
let c4 = check_processing_job_done(db).await;
|
||||
if c4.count > 0 { any_issue = true; }
|
||||
if c4.count > 0 {
|
||||
any_issue = true;
|
||||
}
|
||||
checks.push(c4);
|
||||
|
||||
ConsistencyReport {
|
||||
status: if any_issue { "degraded".to_string() } else { "ok".to_string() },
|
||||
status: if any_issue {
|
||||
"degraded".to_string()
|
||||
} else {
|
||||
"ok".to_string()
|
||||
},
|
||||
checked_at,
|
||||
checks,
|
||||
}
|
||||
@@ -68,9 +80,17 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "job_id is null".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "job_id is null".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "stale_processing".to_string(),
|
||||
@@ -83,19 +103,28 @@ async fn check_stale_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
let vt = schema::table_name("videos");
|
||||
let mj = schema::table_name("monitor_jobs");
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
let rows: Vec<(String, String, String)> =
|
||||
sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
FROM {} v LEFT JOIN {} m ON v.file_uuid = m.uuid AND m.status IN ('pending','running') \
|
||||
WHERE v.status = 'processing' AND m.id IS NULL",
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "no active monitor_job".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "no active monitor_job".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "orphaned_processing".to_string(),
|
||||
@@ -107,17 +136,26 @@ async fn check_orphaned_processing(db: &PostgresDb) -> ConsistencyCheck {
|
||||
|
||||
async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
|
||||
let vt = schema::table_name("videos");
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
|
||||
vt
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let rows: Vec<(String, String, String)> =
|
||||
sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT file_uuid, file_name, status FROM {} WHERE status = 'unregistered'",
|
||||
vt
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "migration residue".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "migration residue".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "unregistered_with_uuid".to_string(),
|
||||
@@ -130,19 +168,28 @@ async fn check_unregistered_with_uuid(db: &PostgresDb) -> ConsistencyCheck {
|
||||
async fn check_processing_job_done(db: &PostgresDb) -> ConsistencyCheck {
|
||||
let vt = schema::table_name("videos");
|
||||
let mj = schema::table_name("monitor_jobs");
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
let rows: Vec<(String, String, String)> =
|
||||
sqlx::query_as::<_, (String, String, String)>(&format!(
|
||||
"SELECT v.file_uuid, v.file_name, v.status \
|
||||
FROM {} v JOIN {} m ON v.file_uuid = m.uuid \
|
||||
WHERE v.status = 'processing' AND m.status = 'completed'",
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
vt, mj
|
||||
))
|
||||
.fetch_all(db.pool())
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let files: Vec<ConsistencyFile> = rows.into_iter().map(|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid, file_name, status, detail: "monitor_job already completed".to_string(),
|
||||
}).collect();
|
||||
let files: Vec<ConsistencyFile> = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(file_uuid, file_name, status): (String, String, String)| ConsistencyFile {
|
||||
file_uuid,
|
||||
file_name,
|
||||
status,
|
||||
detail: "monitor_job already completed".to_string(),
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
ConsistencyCheck {
|
||||
check: "processing_job_done".to_string(),
|
||||
|
||||
@@ -54,8 +54,7 @@ pub fn read_identity_file(uuid: &str) -> Result<IdentityFile> {
|
||||
let path = identity_file_path(uuid);
|
||||
let content = std::fs::read_to_string(&path)
|
||||
.with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Invalid identity.json: {}", uuid))
|
||||
serde_json::from_str(&content).with_context(|| format!("Invalid identity.json: {}", uuid))
|
||||
}
|
||||
|
||||
pub fn write_identity_file(file: &IdentityFile) -> Result<()> {
|
||||
@@ -167,7 +166,10 @@ pub fn rebuild_index() -> Result<usize> {
|
||||
entries.insert(uuid.clone(), file.name);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e);
|
||||
warn!(
|
||||
"[identity-storage] Skipping {} in index rebuild: {}",
|
||||
uuid, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -187,18 +189,16 @@ pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Resu
|
||||
let identity_table = crate::core::db::schema::table_name("identities");
|
||||
let fd_table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
// Schema-aware column selection: dev uses 'name', public uses 'real_name'
|
||||
let name_col = if identity_table.starts_with("dev.") { "name" } else { "real_name" };
|
||||
|
||||
let clean = uuid.replace('-', "");
|
||||
|
||||
let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
|
||||
&format!(
|
||||
"SELECT id, uuid::text, {} AS name, identity_type, source, status, metadata, reference_data, \
|
||||
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
|
||||
face_embedding::real[] as face_embedding, \
|
||||
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
|
||||
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
name_col, identity_table
|
||||
"SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
|
||||
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
|
||||
face_embedding::real[] as face_embedding, \
|
||||
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
|
||||
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
|
||||
identity_table
|
||||
)
|
||||
)
|
||||
.bind(&clean)
|
||||
@@ -322,8 +322,13 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
|
||||
let mut entries: HashMap<String, String> = if index_path.exists() {
|
||||
let content = std::fs::read_to_string(&index_path)?;
|
||||
let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default();
|
||||
v["entries"].as_object()
|
||||
.map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect())
|
||||
v["entries"]
|
||||
.as_object()
|
||||
.map(|obj| {
|
||||
obj.iter()
|
||||
.map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
HashMap::new()
|
||||
@@ -338,7 +343,9 @@ pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result
|
||||
}
|
||||
|
||||
pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
|
||||
let record = db.get_identity_by_uuid(uuid).await?
|
||||
let record = db
|
||||
.get_identity_by_uuid(uuid)
|
||||
.await?
|
||||
.with_context(|| format!("Identity not found in DB: {}", uuid))?;
|
||||
|
||||
let identity_uuid = record.uuid.clone();
|
||||
@@ -415,6 +422,7 @@ mod tests {
|
||||
status: Some("confirmed".to_string()),
|
||||
tmdb_id: Some(112),
|
||||
tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()),
|
||||
local_profile: None,
|
||||
metadata: serde_json::json!({"tmdb_character": "Test Role"}),
|
||||
file_bindings: vec![FileBinding {
|
||||
file_uuid: "ffffffffffffffffffffffffffffffff".to_string(),
|
||||
@@ -442,7 +450,9 @@ mod tests {
|
||||
fn test_identity_dir_path() {
|
||||
let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
||||
let p = identity_dir(uuid);
|
||||
assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid)));
|
||||
assert!(p
|
||||
.to_string_lossy()
|
||||
.ends_with(&format!("identities/{}", uuid)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -463,7 +473,10 @@ mod tests {
|
||||
let base = Path::new("/tmp/test_base");
|
||||
let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
||||
let p = identity_dir_at(base, uuid);
|
||||
assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"));
|
||||
assert_eq!(
|
||||
p,
|
||||
Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -490,7 +503,10 @@ mod tests {
|
||||
assert_eq!(read.name, file.name);
|
||||
assert_eq!(read.source, file.source);
|
||||
assert_eq!(read.tmdb_id, file.tmdb_id);
|
||||
assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count);
|
||||
assert_eq!(
|
||||
read.file_bindings[0].face_count,
|
||||
file.file_bindings[0].face_count
|
||||
);
|
||||
|
||||
let _ = std::fs::remove_dir_all(&tmp);
|
||||
}
|
||||
@@ -521,9 +537,21 @@ mod tests {
|
||||
let _ = std::fs::remove_dir_all(&tmp);
|
||||
let base = &tmp;
|
||||
|
||||
std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap();
|
||||
std::fs::create_dir_all(
|
||||
base.join("identities")
|
||||
.join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::create_dir_all(
|
||||
base.join("identities")
|
||||
.join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::create_dir_all(
|
||||
base.join("identities")
|
||||
.join("cccccccccccccccccccccccccccccccc"),
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap();
|
||||
std::fs::create_dir_all(base.join("identities").join("short")).unwrap();
|
||||
|
||||
|
||||
@@ -56,19 +56,25 @@ impl IngestionService {
|
||||
.to_string();
|
||||
|
||||
// 1. Compute SHA256 for dedup
|
||||
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path).ok().unwrap_or_default();
|
||||
let content_hash = crate::core::storage::content_hash::compute_sha256(&canonical_path)
|
||||
.ok()
|
||||
.unwrap_or_default();
|
||||
|
||||
// 2. Hash check — same content = already registered
|
||||
let videos_table = schema::table_name("videos");
|
||||
if !content_hash.is_empty() {
|
||||
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(
|
||||
&format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table)
|
||||
)
|
||||
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(&format!(
|
||||
"SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1",
|
||||
videos_table
|
||||
))
|
||||
.bind(&content_hash)
|
||||
.fetch_optional(self.db.pool())
|
||||
.await
|
||||
{
|
||||
info!("Content already registered: {} ({})", filename, existing_uuid);
|
||||
info!(
|
||||
"Content already registered: {} ({})",
|
||||
filename, existing_uuid
|
||||
);
|
||||
return Ok(Some(existing_uuid));
|
||||
}
|
||||
}
|
||||
@@ -108,7 +114,8 @@ impl IngestionService {
|
||||
let probe_result = probe::probe_video(file_path).ok();
|
||||
let file_meta = std::fs::metadata(&canonical_path).ok();
|
||||
|
||||
let duration = probe_result.as_ref()
|
||||
let duration = probe_result
|
||||
.as_ref()
|
||||
.and_then(|r| r.format.duration.as_ref())
|
||||
.and_then(|s| s.parse::<f64>().ok())
|
||||
.unwrap_or(0.0);
|
||||
@@ -148,7 +155,11 @@ impl IngestionService {
|
||||
}
|
||||
|
||||
let total_frames = {
|
||||
let video_stream = probe_result.as_ref().and_then(|pr| pr.streams.iter().find(|s| s.codec_type.as_deref() == Some("video")));
|
||||
let video_stream = probe_result.as_ref().and_then(|pr| {
|
||||
pr.streams
|
||||
.iter()
|
||||
.find(|s| s.codec_type.as_deref() == Some("video"))
|
||||
});
|
||||
|
||||
if let Some(stream) = video_stream {
|
||||
if let Some(nb_frames_str) = &stream.nb_frames {
|
||||
@@ -223,11 +234,14 @@ impl IngestionService {
|
||||
// Store content_hash for dedup
|
||||
if !content_hash.is_empty() {
|
||||
let vt = schema::table_name("videos");
|
||||
let _ = sqlx::query(&format!("UPDATE {} SET content_hash = $1 WHERE file_uuid = $2", vt))
|
||||
.bind(&content_hash)
|
||||
.bind(&uuid)
|
||||
.execute(self.db.pool())
|
||||
.await;
|
||||
let _ = sqlx::query(&format!(
|
||||
"UPDATE {} SET content_hash = $1 WHERE file_uuid = $2",
|
||||
vt
|
||||
))
|
||||
.bind(&content_hash)
|
||||
.bind(&uuid)
|
||||
.execute(self.db.pool())
|
||||
.await;
|
||||
}
|
||||
|
||||
self.db
|
||||
@@ -243,5 +257,3 @@ impl IngestionService {
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -17,42 +17,84 @@ mod tests {
|
||||
#[test]
|
||||
fn test_detect_category_image() {
|
||||
assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image);
|
||||
assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("photo.jpeg")),
|
||||
FileCategory::Image
|
||||
);
|
||||
assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image);
|
||||
assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image);
|
||||
assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("photo.webp")),
|
||||
FileCategory::Image
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_document() {
|
||||
assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document);
|
||||
assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document);
|
||||
assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document);
|
||||
assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.pdf")),
|
||||
FileCategory::Document
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.docx")),
|
||||
FileCategory::Document
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.pages")),
|
||||
FileCategory::Document
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("doc.txt")),
|
||||
FileCategory::Document
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_spreadsheet() {
|
||||
assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet);
|
||||
assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet);
|
||||
assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("data.xlsx")),
|
||||
FileCategory::Spreadsheet
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("data.csv")),
|
||||
FileCategory::Spreadsheet
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("data.numbers")),
|
||||
FileCategory::Spreadsheet
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_presentation() {
|
||||
assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation);
|
||||
assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("deck.pptx")),
|
||||
FileCategory::Presentation
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("deck.key")),
|
||||
FileCategory::Presentation
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_archive() {
|
||||
assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive);
|
||||
assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("files.zip")),
|
||||
FileCategory::Archive
|
||||
);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("files.tar.gz")),
|
||||
FileCategory::Archive
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_category_unknown() {
|
||||
assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown);
|
||||
assert_eq!(
|
||||
detect_category(Path::new("file.xyz")),
|
||||
FileCategory::Unknown
|
||||
);
|
||||
assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown);
|
||||
}
|
||||
|
||||
@@ -84,13 +126,18 @@ pub enum FileCategory {
|
||||
|
||||
/// Detect file category from path extension
|
||||
pub fn detect_category(path: &Path) -> FileCategory {
|
||||
let ext = path.extension()
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|e| e.to_lowercase());
|
||||
match ext.as_deref() {
|
||||
Some("mp4" | "mov" | "mkv" | "avi" | "webm" | "m4v" | "mpeg") => FileCategory::Video,
|
||||
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => FileCategory::Image,
|
||||
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => FileCategory::Document,
|
||||
Some("jpg" | "jpeg" | "png" | "gif" | "bmp" | "webp" | "svg" | "heic" | "tiff") => {
|
||||
FileCategory::Image
|
||||
}
|
||||
Some("pdf" | "doc" | "docx" | "odt" | "pages" | "rtf" | "txt" | "md" | "rst") => {
|
||||
FileCategory::Document
|
||||
}
|
||||
Some("xls" | "xlsx" | "csv" | "ods" | "numbers") => FileCategory::Spreadsheet,
|
||||
Some("ppt" | "pptx" | "odp" | "key") => FileCategory::Presentation,
|
||||
Some("zip" | "tar" | "gz" | "tgz" | "7z" | "rar") => FileCategory::Archive,
|
||||
@@ -102,16 +149,20 @@ pub fn detect_category(path: &Path) -> FileCategory {
|
||||
pub fn base_format_info(path: &Path) -> serde_json::Value {
|
||||
let meta = std::fs::metadata(path).ok();
|
||||
let size = meta.as_ref().map(|m| m.len()).unwrap_or(0);
|
||||
let mtime = meta.as_ref()
|
||||
let mtime = meta
|
||||
.as_ref()
|
||||
.and_then(|m| m.modified().ok())
|
||||
.and_then(|t| {
|
||||
let secs = t.duration_since(SystemTime::UNIX_EPOCH).ok()?.as_secs() as i64;
|
||||
chrono::DateTime::from_timestamp(secs, 0)
|
||||
.map(|dt| dt.to_rfc3339())
|
||||
chrono::DateTime::from_timestamp(secs, 0).map(|dt| dt.to_rfc3339())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let fname = path.to_string_lossy().to_string();
|
||||
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase();
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.unwrap_or("")
|
||||
.to_lowercase();
|
||||
let cat = detect_category(path);
|
||||
let file_type = match cat {
|
||||
FileCategory::Video => "video",
|
||||
@@ -150,7 +201,13 @@ fn ffprobe_probe(path: &Path, format_base: serde_json::Value) -> serde_json::Val
|
||||
}
|
||||
|
||||
/// Run Python probe for document/spreadsheet/presentation files
|
||||
fn python_probe(path: &Path, category: &FileCategory, scripts_dir: &str, python_path: &str, format_base: serde_json::Value) -> serde_json::Value {
|
||||
fn python_probe(
|
||||
path: &Path,
|
||||
category: &FileCategory,
|
||||
scripts_dir: &str,
|
||||
python_path: &str,
|
||||
format_base: serde_json::Value,
|
||||
) -> serde_json::Value {
|
||||
let script = format!("{}/probe_file.py", scripts_dir);
|
||||
if !std::path::Path::new(&script).exists() {
|
||||
return minimal_probe(format_base);
|
||||
@@ -184,18 +241,12 @@ fn minimal_probe(format_base: serde_json::Value) -> serde_json::Value {
|
||||
|
||||
/// Unified probe: dispatches to the right probe based on file type
|
||||
/// Returns a probe_json-compatible Value
|
||||
pub async fn unified_probe(
|
||||
path: &Path,
|
||||
scripts_dir: &str,
|
||||
python_path: &str,
|
||||
) -> serde_json::Value {
|
||||
pub async fn unified_probe(path: &Path, scripts_dir: &str, python_path: &str) -> serde_json::Value {
|
||||
let cat = detect_category(path);
|
||||
let format_base = base_format_info(path);
|
||||
|
||||
match cat {
|
||||
FileCategory::Video | FileCategory::Image => {
|
||||
ffprobe_probe(path, format_base)
|
||||
}
|
||||
FileCategory::Video | FileCategory::Image => ffprobe_probe(path, format_base),
|
||||
FileCategory::Document | FileCategory::Spreadsheet | FileCategory::Presentation => {
|
||||
python_probe(path, &cat, scripts_dir, python_path, format_base)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
@@ -27,13 +28,21 @@ pub async fn process_cut(
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<CutResult> {
|
||||
// Try native ffmpeg-based scene detection first
|
||||
let result = try_native_cut(video_path);
|
||||
if let Ok(r) = result {
|
||||
let json = serde_json::to_string_pretty(&r)?;
|
||||
std::fs::write(output_path, &json)
|
||||
.with_context(|| format!("Failed to write {:?}", output_path))?;
|
||||
return Ok(r);
|
||||
}
|
||||
|
||||
// Fallback: Python scenedetect
|
||||
tracing::warn!("[CUT] Native impl failed, falling back to Python");
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("cut_processor.py");
|
||||
|
||||
tracing::info!("[CUT] Starting scene detection: {}", video_path);
|
||||
|
||||
if !script_path.exists() {
|
||||
tracing::warn!("[CUT] Script not found, returning empty result");
|
||||
return Ok(CutResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
@@ -53,19 +62,179 @@ pub async fn process_cut(
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read CUT output")?;
|
||||
|
||||
let result: CutResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse CUT output")?;
|
||||
|
||||
tracing::info!("[CUT] Result: {} scenes detected", result.scenes.len());
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ── Native ffmpeg scene detection ─────────────────────────────────
|
||||
|
||||
fn try_native_cut(video_path: &str) -> Result<CutResult> {
|
||||
// Step 1: Get video info (fps, frame count)
|
||||
let probe = Command::new("ffprobe")
|
||||
.args([
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
video_path,
|
||||
])
|
||||
.output()
|
||||
.context("Failed to run ffprobe")?;
|
||||
|
||||
let probe_info: serde_json::Value =
|
||||
serde_json::from_slice(&probe.stdout).context("Failed to parse ffprobe output")?;
|
||||
|
||||
let streams = probe_info["streams"]
|
||||
.as_array()
|
||||
.map_or(vec![], |s| s.clone());
|
||||
let video_stream = streams.iter().find(|s| s["codec_type"] == "video");
|
||||
|
||||
let fps = video_stream
|
||||
.and_then(|s| s["r_frame_rate"].as_str().and_then(parse_fraction))
|
||||
.unwrap_or(30.0);
|
||||
|
||||
let total_frames: u64 = video_stream
|
||||
.and_then(|s| s["nb_frames"].as_str())
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let duration: f64 = probe_info["format"]["duration"]
|
||||
.as_str()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
// Step 2: Use ffmpeg scene detection filter
|
||||
// The `scene` filter computes the difference between consecutive frames
|
||||
// and outputs when the difference exceeds the threshold (0.3 = medium sensitivity)
|
||||
let scene_output = Command::new("ffprobe")
|
||||
.args([
|
||||
"-v",
|
||||
"quiet",
|
||||
"-show_entries",
|
||||
"frame=pts_time",
|
||||
"-of",
|
||||
"compact=p=0:nk=1",
|
||||
"-f",
|
||||
"lavfi",
|
||||
&format!("movie={},select='gt(scene\\,0.3)',showinfo", video_path),
|
||||
"-show_frames",
|
||||
])
|
||||
.output()
|
||||
.context("Failed to run ffmpeg scene detection")?;
|
||||
|
||||
let stderr_output = String::from_utf8_lossy(&scene_output.stderr);
|
||||
let mut scene_times: Vec<f64> = Vec::new();
|
||||
|
||||
// Parse ffmpeg showinfo output for scene changes
|
||||
// Format: [Parsed_showinfo...] pts:123.456 pts_time:123.456 ...
|
||||
for line in stderr_output.lines() {
|
||||
if line.contains("pts_time:") {
|
||||
if let Some(pos) = line.find("pts_time:") {
|
||||
let rest = &line[pos + 9..];
|
||||
let time_str = rest.split_whitespace().next().unwrap_or("");
|
||||
if let Ok(t) = time_str.parse::<f64>() {
|
||||
scene_times.push(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Build scenes from cut points
|
||||
let mut scenes: Vec<CutScene> = Vec::new();
|
||||
let mut prev_time = 0.0;
|
||||
let mut prev_frame: u64 = 0;
|
||||
|
||||
for (i, &cut_time) in scene_times.iter().enumerate() {
|
||||
let end_frame = (cut_time * fps).round() as u64;
|
||||
let start_frame = prev_frame;
|
||||
|
||||
if end_frame > start_frame {
|
||||
scenes.push(CutScene {
|
||||
scene_number: (i + 1) as u32,
|
||||
start_frame: prev_frame,
|
||||
end_frame: end_frame.saturating_sub(1),
|
||||
start_time: prev_time,
|
||||
end_time: cut_time - (1.0 / fps),
|
||||
});
|
||||
}
|
||||
|
||||
prev_time = cut_time;
|
||||
prev_frame = end_frame;
|
||||
}
|
||||
|
||||
// Final scene (last cut point → end of video)
|
||||
if total_frames > 0 && prev_frame < total_frames {
|
||||
scenes.push(CutScene {
|
||||
scene_number: (scenes.len() + 1) as u32,
|
||||
start_frame: prev_frame,
|
||||
end_frame: total_frames.saturating_sub(1),
|
||||
start_time: prev_time,
|
||||
end_time: duration,
|
||||
});
|
||||
}
|
||||
|
||||
// If no scenes detected, create a single scene covering the whole video
|
||||
if scenes.is_empty() && total_frames > 0 {
|
||||
scenes.push(CutScene {
|
||||
scene_number: 1,
|
||||
start_frame: 0,
|
||||
end_frame: total_frames.saturating_sub(1),
|
||||
start_time: 0.0,
|
||||
end_time: duration,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(CutResult {
|
||||
frame_count: total_frames,
|
||||
fps,
|
||||
scenes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse fractional fps like "30000/1001" into f64
|
||||
fn parse_fraction(s: &str) -> Option<f64> {
|
||||
if let Some(pos) = s.find('/') {
|
||||
let num: f64 = s[..pos].parse().ok()?;
|
||||
let den: f64 = s[pos + 1..].parse().ok()?;
|
||||
if den > 0.0 {
|
||||
return Some(num / den);
|
||||
}
|
||||
}
|
||||
s.parse::<f64>().ok()
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction() {
|
||||
let r = parse_fraction("30000/1001").unwrap();
|
||||
assert!((r - 29.97).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction_int() {
|
||||
let r = parse_fraction("30").unwrap();
|
||||
assert!((r - 30.0).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction_invalid() {
|
||||
assert!(parse_fraction("not/a/num").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_fraction_zero_den() {
|
||||
assert!(parse_fraction("1/0").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cut_result_serialization() {
|
||||
let result = CutResult {
|
||||
@@ -81,8 +250,9 @@ mod tests {
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("frame_count"));
|
||||
assert!(json.contains("scene_number"));
|
||||
assert!(json.contains("1"));
|
||||
assert!(json.contains("fps"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -90,20 +260,23 @@ mod tests {
|
||||
let json = r#"{
|
||||
"frame_count": 100,
|
||||
"fps": 30.0,
|
||||
"scenes": [
|
||||
{"scene_number": 1, "start_frame": 0, "end_frame": 30, "start_time": 0.0, "end_time": 1.0},
|
||||
{"scene_number": 2, "start_frame": 31, "end_frame": 60, "start_time": 1.033, "end_time": 2.0}
|
||||
]
|
||||
"scenes": [{
|
||||
"scene_number": 1,
|
||||
"start_frame": 0,
|
||||
"end_frame": 30,
|
||||
"start_time": 0.0,
|
||||
"end_time": 1.0
|
||||
}]
|
||||
}"#;
|
||||
|
||||
let result: CutResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.frame_count, 100);
|
||||
assert_eq!(result.scenes.len(), 2);
|
||||
assert_eq!(result.scenes[1].scene_number, 2);
|
||||
assert_eq!(result.scenes.len(), 1);
|
||||
assert_eq!(result.scenes[0].scene_number, 1);
|
||||
assert_eq!(result.scenes[0].start_frame, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cut_result_empty_scenes() {
|
||||
fn test_empty_scenes() {
|
||||
let result = CutResult {
|
||||
frame_count: 0,
|
||||
fps: 0.0,
|
||||
@@ -111,17 +284,4 @@ mod tests {
|
||||
};
|
||||
assert!(result.scenes.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cut_scene_times() {
|
||||
let scene = CutScene {
|
||||
scene_number: 1,
|
||||
start_frame: 0,
|
||||
end_frame: 30,
|
||||
start_time: 0.0,
|
||||
end_time: 1.0,
|
||||
};
|
||||
assert!(scene.end_time > scene.start_time);
|
||||
assert_eq!(scene.scene_number, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,11 +109,10 @@ pub fn validate_python_env() -> Result<()> {
|
||||
tracing::warn!("Expected Python 3.11, got: {}", version.trim());
|
||||
}
|
||||
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
.unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
let script_path = PathBuf::from(&scripts_dir);
|
||||
if !script_path.exists() {
|
||||
anyhow::bail!("Scripts directory not found at {}", scripts_dir);
|
||||
@@ -133,11 +132,10 @@ impl PythonExecutor {
|
||||
pub fn new() -> Result<Self> {
|
||||
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
|
||||
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
|
||||
.unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR").unwrap_or_else(|_| {
|
||||
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
manifest.join("scripts").to_string_lossy().to_string()
|
||||
});
|
||||
|
||||
let python_bin = PathBuf::from(&python_path);
|
||||
let scripts_path = PathBuf::from(&scripts_dir);
|
||||
@@ -173,7 +171,8 @@ impl PythonExecutor {
|
||||
|
||||
if let Some(expected_hash) = self.checksums.get(&rel_path) {
|
||||
let output = std::process::Command::new("shasum")
|
||||
.arg("-a").arg("256")
|
||||
.arg("-a")
|
||||
.arg("256")
|
||||
.arg(&script_path)
|
||||
.output()
|
||||
.context("Failed to run shasum for integrity check")?;
|
||||
@@ -235,8 +234,9 @@ impl PythonExecutor {
|
||||
}
|
||||
|
||||
// Verify script integrity via SHA256 checksum before execution
|
||||
self.verify_script_integrity(script_name)
|
||||
.context("Pre-execution integrity check failed — possible version mismatch or corruption")?;
|
||||
self.verify_script_integrity(script_name).context(
|
||||
"Pre-execution integrity check failed — possible version mismatch or corruption",
|
||||
)?;
|
||||
|
||||
// 標記輸出檔為處理中(add .tmp suffix)
|
||||
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
|
||||
|
||||
@@ -44,22 +44,59 @@ pub enum CrowdSize {
|
||||
|
||||
/// Indoor-indicative YOLO classes (COCO labels)
|
||||
const INDOOR_CLASSES: &[&str] = &[
|
||||
"chair", "couch", "bed", "dining table", "toilet", "tv", "laptop",
|
||||
"microwave", "oven", "refrigerator", "sink", "book", "clock",
|
||||
"vase", "potted plant",
|
||||
"chair",
|
||||
"couch",
|
||||
"bed",
|
||||
"dining table",
|
||||
"toilet",
|
||||
"tv",
|
||||
"laptop",
|
||||
"microwave",
|
||||
"oven",
|
||||
"refrigerator",
|
||||
"sink",
|
||||
"book",
|
||||
"clock",
|
||||
"vase",
|
||||
"potted plant",
|
||||
];
|
||||
|
||||
/// Vehicle-indicative classes (person + vehicle = transport scene)
|
||||
const VEHICLE_CLASSES: &[&str] = &[
|
||||
"car", "truck", "bus", "train", "boat", "aeroplane", "bicycle", "motorbike",
|
||||
"car",
|
||||
"truck",
|
||||
"bus",
|
||||
"train",
|
||||
"boat",
|
||||
"aeroplane",
|
||||
"bicycle",
|
||||
"motorbike",
|
||||
];
|
||||
|
||||
/// Outdoor-indicative YOLO classes
|
||||
const OUTDOOR_CLASSES: &[&str] = &[
|
||||
"car", "truck", "bus", "train", "boat", "airplane",
|
||||
"traffic light", "fire hydrant", "stop sign", "parking meter",
|
||||
"bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
|
||||
"bear", "zebra", "giraffe", "tree",
|
||||
"car",
|
||||
"truck",
|
||||
"bus",
|
||||
"train",
|
||||
"boat",
|
||||
"airplane",
|
||||
"traffic light",
|
||||
"fire hydrant",
|
||||
"stop sign",
|
||||
"parking meter",
|
||||
"bench",
|
||||
"bird",
|
||||
"cat",
|
||||
"dog",
|
||||
"horse",
|
||||
"sheep",
|
||||
"cow",
|
||||
"elephant",
|
||||
"bear",
|
||||
"zebra",
|
||||
"giraffe",
|
||||
"tree",
|
||||
];
|
||||
|
||||
/// Build heuristic scene metadata from disk files (yolo.json + DB face data).
|
||||
@@ -113,13 +150,14 @@ pub async fn build_heuristic_scene_meta(
|
||||
|
||||
// Get face counts grouped by frame
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let face_rows: Vec<(i64, i64)> = sqlx::query_as(
|
||||
&format!("SELECT frame_number, COUNT(*) as fc \
|
||||
let face_rows: Vec<(i64, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT frame_number, COUNT(*) as fc \
|
||||
FROM {} \
|
||||
WHERE file_uuid = $1 AND frame_number IS NOT NULL \
|
||||
GROUP BY frame_number \
|
||||
ORDER BY frame_number", fd_table),
|
||||
)
|
||||
ORDER BY frame_number",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
@@ -166,7 +204,10 @@ pub async fn build_heuristic_scene_meta(
|
||||
let outdoor_ratio = outdoor_objects as f64 / frame_count.max(1) as f64;
|
||||
let total_indicator = indoor_ratio + outdoor_ratio;
|
||||
let (indoor_score, outdoor_score) = if total_indicator > 0.0 {
|
||||
(indoor_ratio / total_indicator, outdoor_ratio / total_indicator)
|
||||
(
|
||||
indoor_ratio / total_indicator,
|
||||
outdoor_ratio / total_indicator,
|
||||
)
|
||||
} else {
|
||||
(0.5, 0.5)
|
||||
};
|
||||
@@ -187,17 +228,13 @@ pub async fn build_heuristic_scene_meta(
|
||||
.map(|c| class_frame_presence.get(*c).copied().unwrap_or(0))
|
||||
.sum();
|
||||
let person_ratio = person_frames as f64 / frame_count.max(1) as f64;
|
||||
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0
|
||||
&& outdoor_score > 0.3;
|
||||
let likely_vehicle = person_ratio > 0.5 && vehicle_frames > 0 && outdoor_score > 0.3;
|
||||
|
||||
// Dominant objects: rank by frame presence (not total count)
|
||||
let mut sorted: Vec<_> = class_frame_presence.into_iter().collect();
|
||||
sorted.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
let dominant_objects: Vec<String> = sorted
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|(cls, _)| cls.clone())
|
||||
.collect();
|
||||
let dominant_objects: Vec<String> =
|
||||
sorted.iter().take(3).map(|(cls, _)| cls.clone()).collect();
|
||||
|
||||
segments.push(SceneSegmentMeta {
|
||||
segment_index: idx as u32 + 1,
|
||||
@@ -229,12 +266,15 @@ pub async fn build_heuristic_scene_meta(
|
||||
|
||||
/// Full pipeline entry point: reads CUT data, generates heuristic metadata, writes JSON.
|
||||
/// Called from job_worker post-processing trigger.
|
||||
pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &str) -> Result<usize> {
|
||||
pub async fn generate_scene_meta(
|
||||
db: &crate::core::db::PostgresDb,
|
||||
file_uuid: &str,
|
||||
) -> Result<usize> {
|
||||
let pool = db.pool();
|
||||
|
||||
// Read CUT segment boundaries from cut.json
|
||||
let cut_path = Path::new(crate::core::config::OUTPUT_DIR.as_str())
|
||||
.join(format!("{}.cut.json", file_uuid));
|
||||
let cut_path =
|
||||
Path::new(crate::core::config::OUTPUT_DIR.as_str()).join(format!("{}.cut.json", file_uuid));
|
||||
let segments: Vec<(i64, i64, f64, f64)> = if cut_path.exists() {
|
||||
let cut_str = tokio::fs::read_to_string(&cut_path)
|
||||
.await
|
||||
@@ -250,8 +290,7 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
}
|
||||
let cut: CutJson = serde_json::from_str(&cut_str)
|
||||
.context("Failed to parse cut.json")?;
|
||||
let cut: CutJson = serde_json::from_str(&cut_str).context("Failed to parse cut.json")?;
|
||||
cut.scenes
|
||||
.into_iter()
|
||||
.map(|s| (s.start_frame, s.end_frame, s.start_time, s.end_time))
|
||||
@@ -259,9 +298,10 @@ pub async fn generate_scene_meta(db: &crate::core::db::PostgresDb, file_uuid: &s
|
||||
} else {
|
||||
// Fallback: query DB for video duration, make one segment
|
||||
let videos_table = schema::table_name("videos");
|
||||
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(
|
||||
&format!("SELECT total_frames, duration FROM {} WHERE file_uuid = $1", videos_table),
|
||||
)
|
||||
let (total_frames, duration): (Option<i64>, Option<f64>) = sqlx::query_as(&format!(
|
||||
"SELECT total_frames, duration FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
|
||||
@@ -10,6 +10,7 @@ pub mod ocr;
|
||||
pub mod pose;
|
||||
pub mod scene_classification;
|
||||
pub mod story;
|
||||
pub mod tkg;
|
||||
pub mod visual_chunk;
|
||||
pub mod yolo;
|
||||
|
||||
@@ -25,7 +26,8 @@ pub use face_recognition::{
|
||||
RecognizedFaceDetection,
|
||||
};
|
||||
pub use heuristic_scene::{
|
||||
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta,
|
||||
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
|
||||
SceneSegmentMeta,
|
||||
};
|
||||
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
|
||||
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
|
||||
@@ -34,5 +36,6 @@ pub use scene_classification::{
|
||||
SceneSegment,
|
||||
};
|
||||
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
|
||||
pub use tkg::{build_tkg, TkgResult};
|
||||
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
|
||||
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
@@ -106,7 +106,10 @@ pub async fn process_story(
|
||||
}
|
||||
|
||||
// Fallback: Python script
|
||||
tracing::warn!("[STORY] Native impl failed, falling back to Python: {:?}", result.err());
|
||||
tracing::warn!(
|
||||
"[STORY] Native impl failed, falling back to Python: {:?}",
|
||||
result.err()
|
||||
);
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("story_processor.py");
|
||||
|
||||
@@ -145,7 +148,11 @@ pub async fn process_story(
|
||||
|
||||
// ── Native implementation ─────────────────────────────────────────
|
||||
|
||||
fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -> Result<StoryResult> {
|
||||
fn try_native_story(
|
||||
_video_path: &str,
|
||||
output_path: &str,
|
||||
_uuid: Option<&str>,
|
||||
) -> Result<StoryResult> {
|
||||
let output_dir = Path::new(output_path).parent().unwrap_or(Path::new("."));
|
||||
let basename = Path::new(output_path)
|
||||
.file_stem()
|
||||
@@ -160,8 +167,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
|
||||
let asr_data: AsrData = if asr_path.exists() {
|
||||
let content = std::fs::read_to_string(&asr_path)
|
||||
.with_context(|| format!("Failed to read {:?}", asr_path))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", asr_path))?
|
||||
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))?
|
||||
} else {
|
||||
AsrData { segments: vec![] }
|
||||
};
|
||||
@@ -169,8 +175,7 @@ fn try_native_story(_video_path: &str, output_path: &str, _uuid: Option<&str>) -
|
||||
let cut_data: CutData = if cut_path.exists() {
|
||||
let content = std::fs::read_to_string(&cut_path)
|
||||
.with_context(|| format!("Failed to read {:?}", cut_path))?;
|
||||
serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", cut_path))?
|
||||
serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))?
|
||||
} else {
|
||||
CutData { scenes: vec![] }
|
||||
};
|
||||
@@ -376,22 +381,39 @@ fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let objs = unique.iter().take(5).map(|s| (*s).as_str()).collect::<Vec<_>>().join(", ");
|
||||
let objs = unique
|
||||
.iter()
|
||||
.take(5)
|
||||
.map(|s| (*s).as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
parts.push(format!("Visuals: {}", objs));
|
||||
}
|
||||
|
||||
format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | "))
|
||||
}
|
||||
|
||||
fn generate_scene_narrative(objects: &[String], start: f64, end: f64, scene_count: usize) -> String {
|
||||
fn generate_scene_narrative(
|
||||
objects: &[String],
|
||||
start: f64,
|
||||
end: f64,
|
||||
scene_count: usize,
|
||||
) -> String {
|
||||
let mut unique: Vec<&String> = objects.iter().collect();
|
||||
unique.sort();
|
||||
unique.dedup();
|
||||
let top5: Vec<&String> = unique.iter().take(5).cloned().collect();
|
||||
|
||||
if !top5.is_empty() {
|
||||
let obj_str = top5.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ");
|
||||
format!("[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", start, end, scene_count, obj_str)
|
||||
let obj_str = top5
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
format!(
|
||||
"[{:.0}s-{:.0}s] {} scenes. Visuals: {}.",
|
||||
start, end, scene_count, obj_str
|
||||
)
|
||||
} else {
|
||||
format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count)
|
||||
}
|
||||
@@ -408,7 +430,8 @@ mod tests {
|
||||
let text = generate_narrative(
|
||||
&["Hello world".to_string()],
|
||||
&["person".to_string()],
|
||||
0.0, 5.0,
|
||||
0.0,
|
||||
5.0,
|
||||
);
|
||||
assert!(text.contains("[0s-5s]"));
|
||||
assert!(text.contains("Speech:"));
|
||||
@@ -576,7 +599,10 @@ mod tests {
|
||||
};
|
||||
|
||||
assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2);
|
||||
assert!(result.child_chunks.iter().all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result
|
||||
.child_chunks
|
||||
.iter()
|
||||
.all(|c| c.parent_chunk_id.is_some()));
|
||||
assert!(result.parent_chunks[0].parent_chunk_id.is_none());
|
||||
}
|
||||
|
||||
@@ -594,11 +620,7 @@ mod tests {
|
||||
std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap();
|
||||
std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap();
|
||||
|
||||
let result = try_native_story(
|
||||
"/dummy.mp4",
|
||||
out_path.to_str().unwrap(),
|
||||
None,
|
||||
).unwrap();
|
||||
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
|
||||
|
||||
assert_eq!(result.stats.total_child_chunks, 0);
|
||||
assert_eq!(result.stats.total_parent_chunks, 0);
|
||||
@@ -616,13 +638,17 @@ mod tests {
|
||||
let cut_path = dir.join(format!("{}.cut.json", basename));
|
||||
let out_path = dir.join(format!("{}.story.json", basename));
|
||||
|
||||
std::fs::write(&asr_path, r#"{
|
||||
std::fs::write(
|
||||
&asr_path,
|
||||
r#"{
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95},
|
||||
{"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92},
|
||||
{"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90}
|
||||
]
|
||||
}"#).unwrap();
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
std::fs::write(&cut_path, r#"{
|
||||
"scenes": [
|
||||
@@ -631,11 +657,7 @@ mod tests {
|
||||
]
|
||||
}"#).unwrap();
|
||||
|
||||
let result = try_native_story(
|
||||
"/dummy.mp4",
|
||||
out_path.to_str().unwrap(),
|
||||
None,
|
||||
).unwrap();
|
||||
let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap();
|
||||
|
||||
assert_eq!(result.stats.asr_children, 3);
|
||||
assert_eq!(result.stats.cut_children, 2);
|
||||
@@ -649,7 +671,11 @@ mod tests {
|
||||
for child in &result.child_chunks {
|
||||
if child.source == "asr" {
|
||||
assert!(child.parent_chunk_id.is_some());
|
||||
assert!(child.parent_chunk_id.as_ref().unwrap().starts_with("story_asr_"));
|
||||
assert!(child
|
||||
.parent_chunk_id
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.starts_with("story_asr_"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
703
src/core/processor/tkg.rs
Normal file
703
src/core/processor/tkg.rs
Normal file
@@ -0,0 +1,703 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::Deserialize;
|
||||
use sqlx::PgPool;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::core::db::postgres_db::PostgresDb;
|
||||
|
||||
fn t(name: &str) -> String {
|
||||
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
|
||||
if schema == "public" {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}.{}", schema, name)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Input data structs ────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct YoloJson {
|
||||
#[serde(default)]
|
||||
frames: HashMap<String, YoloFrameEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct YoloFrameEntry {
|
||||
#[serde(default)]
|
||||
detections: Vec<YoloDetEntry>,
|
||||
#[serde(default)]
|
||||
objects: Vec<YoloDetEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct YoloDetEntry {
|
||||
#[serde(default)]
|
||||
class_name: String,
|
||||
#[serde(default)]
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxJson {
|
||||
#[serde(default)]
|
||||
segments: Vec<AsrxSegmentEntry>,
|
||||
#[serde(default)]
|
||||
speaker_stats: Option<HashMap<String, AsrxSpeakerStat>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxSegmentEntry {
|
||||
#[serde(default)]
|
||||
speaker_id: String,
|
||||
#[serde(default)]
|
||||
start_time: f64,
|
||||
#[serde(default)]
|
||||
end_time: f64,
|
||||
#[allow(dead_code)]
|
||||
start_frame: i64,
|
||||
#[allow(dead_code)]
|
||||
end_frame: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct AsrxSpeakerStat {
|
||||
#[serde(default)]
|
||||
count: i64,
|
||||
}
|
||||
|
||||
// ── Face detection trace ──────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, sqlx::FromRow)]
|
||||
struct FaceTraceRow {
|
||||
trace_id: i64,
|
||||
frame_count: i64,
|
||||
start_f: i64,
|
||||
end_f: i64,
|
||||
avg_x: Option<f64>,
|
||||
avg_y: Option<f64>,
|
||||
avg_w: Option<f64>,
|
||||
avg_h: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, sqlx::FromRow)]
|
||||
struct FaceDetectionRow {
|
||||
trace_id: i64,
|
||||
frame_number: i64,
|
||||
#[allow(dead_code)]
|
||||
x: Option<f64>,
|
||||
#[allow(dead_code)]
|
||||
y: Option<f64>,
|
||||
#[allow(dead_code)]
|
||||
width: Option<f64>,
|
||||
#[allow(dead_code)]
|
||||
height: Option<f64>,
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────
|
||||
|
||||
pub struct TkgResult {
|
||||
pub face_trace_nodes: usize,
|
||||
pub object_nodes: usize,
|
||||
pub speaker_nodes: usize,
|
||||
pub co_occurrence_edges: usize,
|
||||
pub speaker_face_edges: usize,
|
||||
pub face_face_edges: usize,
|
||||
}
|
||||
|
||||
pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Result<TkgResult> {
|
||||
let pool = db.pool();
|
||||
let n_face = build_face_trace_nodes(pool, file_uuid).await?;
|
||||
let n_objects = build_yolo_object_nodes(pool, file_uuid, output_dir).await?;
|
||||
let n_speakers = build_speaker_nodes(pool, file_uuid, output_dir).await?;
|
||||
|
||||
let e_co = build_co_occurrence_edges(pool, file_uuid, output_dir).await?;
|
||||
let e_sf = build_speaker_face_edges(pool, file_uuid, output_dir).await?;
|
||||
let e_ff = build_face_face_edges(pool, file_uuid).await?;
|
||||
|
||||
Ok(TkgResult {
|
||||
face_trace_nodes: n_face,
|
||||
object_nodes: n_objects,
|
||||
speaker_nodes: n_speakers,
|
||||
co_occurrence_edges: e_co,
|
||||
speaker_face_edges: e_sf,
|
||||
face_face_edges: e_ff,
|
||||
})
|
||||
}
|
||||
|
||||
// ── Node builders ─────────────────────────────────────────────────
|
||||
|
||||
async fn build_face_trace_nodes(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
|
||||
let rows = sqlx::query_as::<_, FaceTraceRow>(&format!(
|
||||
r#"
|
||||
SELECT trace_id,
|
||||
COUNT(*)::bigint as frame_count,
|
||||
MIN(frame_number) as start_f,
|
||||
MAX(frame_number) as end_f,
|
||||
AVG(x::float8) as avg_x,
|
||||
AVG(y::float8) as avg_y,
|
||||
AVG(width::float8) as avg_w,
|
||||
AVG(height::float8) as avg_h
|
||||
FROM {}
|
||||
WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id
|
||||
ORDER BY trace_id
|
||||
"#,
|
||||
face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut count = 0;
|
||||
for row in &rows {
|
||||
let external_id = format!("trace_{}", row.trace_id);
|
||||
let label = format!("Face Trace {}", row.trace_id);
|
||||
let props = serde_json::json!({
|
||||
"frame_count": row.frame_count,
|
||||
"start_frame": row.start_f,
|
||||
"end_frame": row.end_f,
|
||||
"avg_bbox": {
|
||||
"x": row.avg_x.unwrap_or(0.0).round() as i64,
|
||||
"y": row.avg_y.unwrap_or(0.0).round() as i64,
|
||||
"width": row.avg_w.unwrap_or(0.0).round() as i64,
|
||||
"height": row.avg_h.unwrap_or(0.0).round() as i64,
|
||||
}
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET
|
||||
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties),
|
||||
label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label)
|
||||
"#,
|
||||
nodes_table
|
||||
))
|
||||
.bind("face_trace")
|
||||
.bind(&external_id)
|
||||
.bind(file_uuid)
|
||||
.bind(&label)
|
||||
.bind(serde_json::to_string(&props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
async fn build_yolo_object_nodes(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
output_dir: &str,
|
||||
) -> Result<usize> {
|
||||
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
|
||||
if !yolo_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&yolo_path)
|
||||
.with_context(|| format!("Failed to read {:?}", yolo_path))?;
|
||||
let yolo: YoloJson = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", yolo_path))?;
|
||||
|
||||
let mut class_counts: HashMap<String, i64> = HashMap::new();
|
||||
for fdata in yolo.frames.values() {
|
||||
let dets = if !fdata.detections.is_empty() {
|
||||
&fdata.detections
|
||||
} else {
|
||||
&fdata.objects
|
||||
};
|
||||
for det in dets {
|
||||
*class_counts.entry(det.class_name.clone()).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let mut count = 0;
|
||||
for (cls, cnt) in &class_counts {
|
||||
let props = serde_json::json!({ "total_detections": cnt });
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET
|
||||
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
|
||||
"#,
|
||||
nodes_table
|
||||
))
|
||||
.bind("object")
|
||||
.bind(cls)
|
||||
.bind(file_uuid)
|
||||
.bind(cls)
|
||||
.bind(serde_json::to_string(&props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
async fn build_speaker_nodes(pool: &PgPool, file_uuid: &str, output_dir: &str) -> Result<usize> {
|
||||
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
|
||||
if !asrx_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&asrx_path)
|
||||
.with_context(|| format!("Failed to read {:?}", asrx_path))?;
|
||||
let asrx: AsrxJson = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse {:?}", asrx_path))?;
|
||||
|
||||
let stats = asrx.speaker_stats.unwrap_or_default();
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let mut count = 0;
|
||||
|
||||
for (sid, stat) in &stats {
|
||||
let props = serde_json::json!({ "segment_count": stat.count });
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET
|
||||
properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)
|
||||
"#,
|
||||
nodes_table
|
||||
))
|
||||
.bind("speaker")
|
||||
.bind(sid)
|
||||
.bind(file_uuid)
|
||||
.bind(sid)
|
||||
.bind(serde_json::to_string(&props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
// ── Edge builders ─────────────────────────────────────────────────
|
||||
|
||||
async fn build_co_occurrence_edges(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
output_dir: &str,
|
||||
) -> Result<usize> {
|
||||
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
|
||||
if !yolo_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&yolo_path)?;
|
||||
let yolo: YoloJson = serde_json::from_str(&content)?;
|
||||
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let face_rows = sqlx::query_as::<_, FaceDetectionRow>(&format!(
|
||||
r#"SELECT trace_id, frame_number, x, y, width, height
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
ORDER BY frame_number"#,
|
||||
face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut edge_count = 0;
|
||||
for face in &face_rows {
|
||||
let frame_str = face.frame_number.to_string();
|
||||
let yolo_frame = match yolo.frames.get(&frame_str) {
|
||||
Some(f) => f,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let dets = if !yolo_frame.detections.is_empty() {
|
||||
&yolo_frame.detections
|
||||
} else {
|
||||
&yolo_frame.objects
|
||||
};
|
||||
|
||||
if dets.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let external_id = format!("trace_{}", face.trace_id);
|
||||
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&external_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let face_node_id = match face_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
for det in dets {
|
||||
let obj_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&det.class_name)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let obj_node_id = match obj_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edge_props = serde_json::json!({
|
||||
"frame": face.frame_number,
|
||||
"object_confidence": det.confidence,
|
||||
});
|
||||
|
||||
if let Err(e) = sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
||||
"#,
|
||||
edges_table
|
||||
))
|
||||
.bind("CO_OCCURS_WITH")
|
||||
.bind(face_node_id)
|
||||
.bind(obj_node_id)
|
||||
.bind(file_uuid)
|
||||
.bind(serde_json::to_string(&edge_props)?)
|
||||
.execute(pool)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"[TKG] Edge insert failed (trace={}, obj={}): {}",
|
||||
face.trace_id,
|
||||
det.class_name,
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
edge_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
async fn build_speaker_face_edges(
|
||||
pool: &PgPool,
|
||||
file_uuid: &str,
|
||||
output_dir: &str,
|
||||
) -> Result<usize> {
|
||||
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
|
||||
if !asrx_path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(&asrx_path)?;
|
||||
let asrx: AsrxJson = serde_json::from_str(&content)?;
|
||||
|
||||
if asrx.segments.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let traces = sqlx::query_as::<_, (i64, i64, i64)>(&format!(
|
||||
r#"SELECT trace_id, MIN(frame_number) as start_f, MAX(frame_number) as end_f
|
||||
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id"#,
|
||||
face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
// Calculate fps from last segment
|
||||
let last = asrx.segments.last().unwrap();
|
||||
let fps = if last.end_time > 0.0 {
|
||||
last.end_frame as f64 / last.end_time
|
||||
} else {
|
||||
30.0
|
||||
};
|
||||
|
||||
let mut edge_count = 0;
|
||||
|
||||
for (tid, sf, ef) in &traces {
|
||||
let face_ext_id = format!("trace_{}", tid);
|
||||
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&face_ext_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let face_node_id = match face_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let face_start_sec = *sf as f64 / fps;
|
||||
let face_end_sec = *ef as f64 / fps;
|
||||
|
||||
for seg in &asrx.segments {
|
||||
let seg_start = seg.start_time;
|
||||
let seg_end = seg.end_time;
|
||||
let overlap_start = face_start_sec.max(seg_start);
|
||||
let overlap_end = face_end_sec.min(seg_end);
|
||||
|
||||
if overlap_start >= overlap_end {
|
||||
continue;
|
||||
}
|
||||
|
||||
let overlap_dur = overlap_end - overlap_start;
|
||||
let face_dur = face_end_sec - face_start_sec;
|
||||
if face_dur <= 0.0 {
|
||||
continue;
|
||||
}
|
||||
let overlap_ratio = overlap_dur / face_dur;
|
||||
|
||||
if overlap_ratio < 0.3 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let speaker_node: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&seg.speaker_id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let speaker_node_id = match speaker_node {
|
||||
Some((id,)) => id,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let edge_props = serde_json::json!({
|
||||
"overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0,
|
||||
"overlap_duration_s": (overlap_dur * 10.0).round() / 10.0,
|
||||
"face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec),
|
||||
"speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end),
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
||||
"#,
|
||||
edges_table
|
||||
))
|
||||
.bind("SPEAKS_AS")
|
||||
.bind(face_node_id)
|
||||
.bind(speaker_node_id)
|
||||
.bind(file_uuid)
|
||||
.bind(serde_json::to_string(&edge_props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
edge_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
async fn build_face_face_edges(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let face_table = t("face_detections");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
|
||||
r#"
|
||||
SELECT a.trace_id AS tid_a, b.trace_id AS tid_b, a.frame_number
|
||||
FROM {} a
|
||||
JOIN {} b
|
||||
ON a.file_uuid = b.file_uuid
|
||||
AND a.frame_number = b.frame_number
|
||||
AND a.trace_id < b.trace_id
|
||||
WHERE a.file_uuid = $1
|
||||
AND a.trace_id IS NOT NULL
|
||||
AND b.trace_id IS NOT NULL
|
||||
ORDER BY a.frame_number
|
||||
"#,
|
||||
face_table, face_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
if rows.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Deduplicate by pair
|
||||
let mut pair_frames: HashMap<(i64, i64), Vec<i64>> = HashMap::new();
|
||||
for (tid_a, tid_b, frame) in &rows {
|
||||
let key = if *tid_a < *tid_b {
|
||||
(*tid_a, *tid_b)
|
||||
} else {
|
||||
(*tid_b, *tid_a)
|
||||
};
|
||||
pair_frames.entry(key).or_default().push(*frame);
|
||||
}
|
||||
|
||||
let mut edge_count = 0;
|
||||
for ((tid_a, tid_b), frames) in &pair_frames {
|
||||
let ext_a = format!("trace_{}", tid_a);
|
||||
let ext_b = format!("trace_{}", tid_b);
|
||||
|
||||
let n_a: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&ext_a)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let n_b: Option<(i64,)> = sqlx::query_as(&format!(
|
||||
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
|
||||
nodes_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(&ext_b)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
|
||||
let (n_a_id, n_b_id) = match (n_a, n_b) {
|
||||
(Some((a,)), Some((b,))) => (a, b),
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let edge_props = serde_json::json!({
|
||||
"first_frame": frames[0],
|
||||
"frame_count": frames.len() as i64,
|
||||
});
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
|
||||
VALUES ($1, $2, $3, $4, $5::jsonb)
|
||||
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
|
||||
"#,
|
||||
edges_table
|
||||
))
|
||||
.bind("CO_OCCURS_WITH")
|
||||
.bind(n_a_id)
|
||||
.bind(n_b_id)
|
||||
.bind(file_uuid)
|
||||
.bind(serde_json::to_string(&edge_props)?)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
edge_count += 1;
|
||||
}
|
||||
|
||||
Ok(edge_count)
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_yolo_json_deserialize() {
|
||||
let json = r#"{
|
||||
"frames": {
|
||||
"1": {"time_seconds": 0.0, "detections": [{"class_name": "person", "confidence": 0.9}]},
|
||||
"2": {"time_seconds": 1.0, "detections": [{"class_name": "chair", "confidence": 0.8}]}
|
||||
}
|
||||
}"#;
|
||||
let yolo: YoloJson = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(yolo.frames.len(), 2);
|
||||
assert_eq!(yolo.frames["1"].detections[0].class_name, "person");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yolo_json_empty_frames() {
|
||||
let json = r#"{"frames": {}}"#;
|
||||
let yolo: YoloJson = serde_json::from_str(json).unwrap();
|
||||
assert!(yolo.frames.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asrx_json_deserialize() {
|
||||
let json = r#"{
|
||||
"segments": [
|
||||
{"speaker_id": "SPEAKER_01", "start_time": 0.0, "end_time": 2.0, "start_frame": 0, "end_frame": 60}
|
||||
],
|
||||
"speaker_stats": {"SPEAKER_01": {"count": 1}}
|
||||
}"#;
|
||||
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(asrx.segments.len(), 1);
|
||||
assert_eq!(asrx.segments[0].speaker_id, "SPEAKER_01");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asrx_json_no_stats() {
|
||||
let json = r#"{"segments": []}"#;
|
||||
let asrx: AsrxJson = serde_json::from_str(json).unwrap();
|
||||
assert!(asrx.speaker_stats.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_yolo_objects_fallback() {
|
||||
let json = r#"{
|
||||
"frames": {
|
||||
"1": {"objects": [{"class_name": "person"}]}
|
||||
}
|
||||
}"#;
|
||||
let yolo: YoloJson = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(yolo.frames["1"].objects[0].class_name, "person");
|
||||
assert!(yolo.frames["1"].detections.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tkg_result() {
|
||||
let r = TkgResult {
|
||||
face_trace_nodes: 5,
|
||||
object_nodes: 10,
|
||||
speaker_nodes: 3,
|
||||
co_occurrence_edges: 20,
|
||||
speaker_face_edges: 8,
|
||||
face_face_edges: 4,
|
||||
};
|
||||
assert_eq!(r.face_trace_nodes, 5);
|
||||
assert_eq!(r.object_nodes, 10);
|
||||
assert_eq!(r.speaker_nodes, 3);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
use anyhow::Result;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use anyhow::Result;
|
||||
|
||||
/// Compute SHA256 of the entire file content
|
||||
pub fn compute_sha256(path: &Path) -> Result<String> {
|
||||
@@ -10,7 +10,9 @@ pub fn compute_sha256(path: &Path) -> Result<String> {
|
||||
let mut buf = [0u8; 65536];
|
||||
loop {
|
||||
let n = file.read(&mut buf)?;
|
||||
if n == 0 { break; }
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
let hash = format!("{:x}", hasher.finalize());
|
||||
|
||||
@@ -65,7 +65,11 @@ pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf {
|
||||
pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> {
|
||||
let path = tmdb_cache_path(file_uuid);
|
||||
if !path.exists() {
|
||||
anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display());
|
||||
anyhow::bail!(
|
||||
"TMDb cache not found: {} (expected: {})",
|
||||
file_uuid,
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
let content = std::fs::read_to_string(&path)
|
||||
.with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?;
|
||||
@@ -96,9 +100,7 @@ pub fn count_cache_files() -> usize {
|
||||
match std::fs::read_dir(&dir) {
|
||||
Ok(entries) => entries
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.file_name().to_string_lossy().ends_with(".tmdb.json")
|
||||
})
|
||||
.filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json"))
|
||||
.count(),
|
||||
Err(_) => 0,
|
||||
}
|
||||
|
||||
@@ -46,11 +46,12 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
|
||||
// Step 2: Load face_detections grouped by trace_id
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(
|
||||
&format!("SELECT trace_id, embedding FROM {} \
|
||||
let fd_rows = sqlx::query_as::<_, (i32, Vec<f32>)>(&format!(
|
||||
"SELECT trace_id, embedding FROM {} \
|
||||
WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \
|
||||
ORDER BY trace_id", fd_table),
|
||||
)
|
||||
ORDER BY trace_id",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
@@ -156,9 +157,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let mut after_qc = HashMap::new();
|
||||
for (&tid, &(id, ref name)) in &matched {
|
||||
let cnt: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2", fd_table),
|
||||
)
|
||||
let cnt: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
.fetch_one(pool)
|
||||
@@ -194,9 +196,10 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
// Step 5: Update DB
|
||||
let mut updated = 0usize;
|
||||
for (&tid, &(id, _)) in &matched {
|
||||
let r = sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", fd_table),
|
||||
)
|
||||
let r = sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(id)
|
||||
.bind(file_uuid)
|
||||
.bind(tid)
|
||||
@@ -223,9 +226,8 @@ pub async fn match_faces_against_tmdb(db: &PostgresDb, file_uuid: &str) -> Resul
|
||||
async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
// Find all collision pairs: same identity, same frame, different trace
|
||||
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(
|
||||
&format!(
|
||||
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
|
||||
let collisions = sqlx::query_as::<_, (i32, i32, i32, i32)>(&format!(
|
||||
"SELECT a.identity_id, a.trace_id, b.trace_id, a.frame_number \
|
||||
FROM {} a \
|
||||
JOIN {} b \
|
||||
ON a.file_uuid = b.file_uuid \
|
||||
@@ -235,9 +237,8 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
|
||||
AND a.identity_id IS NOT NULL \
|
||||
AND a.identity_id = b.identity_id \
|
||||
ORDER BY a.identity_id, a.frame_number",
|
||||
fd_table, fd_table
|
||||
),
|
||||
)
|
||||
fd_table, fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
@@ -256,25 +257,36 @@ async fn quality_check_temporal_collisions(pool: &sqlx::PgPool, file_uuid: &str)
|
||||
let mut unbound = 0usize;
|
||||
for ((id, ta, tb), overlap_frames) in &collision_groups {
|
||||
// Get face detection count for each trace
|
||||
let cnt_a: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
|
||||
)
|
||||
.bind(file_uuid).bind(ta).bind(id)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
let cnt_a: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(ta)
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
|
||||
let cnt_b: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3", fd_table)
|
||||
)
|
||||
.bind(file_uuid).bind(tb).bind(id)
|
||||
.fetch_one(pool).await.unwrap_or(0);
|
||||
let cnt_b: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE file_uuid=$1 AND trace_id=$2 AND identity_id=$3",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(tb)
|
||||
.bind(id)
|
||||
.fetch_one(pool)
|
||||
.await
|
||||
.unwrap_or(0);
|
||||
|
||||
// Unbind the trace with fewer detections (likely the false positive)
|
||||
let victim = if cnt_a <= cnt_b { *ta } else { *tb };
|
||||
let victim_cnt = if cnt_a <= cnt_b { cnt_a } else { cnt_b };
|
||||
|
||||
sqlx::query(
|
||||
&format!("UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2", fd_table),
|
||||
)
|
||||
sqlx::query(&format!(
|
||||
"UPDATE {} SET identity_id=NULL WHERE file_uuid=$1 AND trace_id=$2",
|
||||
fd_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(victim)
|
||||
.execute(pool)
|
||||
|
||||
@@ -45,7 +45,14 @@ fn extract_movie_name(filename: &str) -> Option<String> {
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())?;
|
||||
|
||||
let cleaned = name.replace(['.', '_'], " ").trim().to_string();
|
||||
// Take only the part before year patterns or separators
|
||||
let cleaned = name
|
||||
.replace(['.', '_'], " ")
|
||||
.split(|c: char| c == '(' || c == '[' || c == '│' || c == '|')
|
||||
.next()
|
||||
.unwrap_or(&name)
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if cleaned.is_empty() || cleaned.len() < 3 {
|
||||
return None;
|
||||
@@ -53,10 +60,7 @@ fn extract_movie_name(filename: &str) -> Option<String> {
|
||||
Some(cleaned)
|
||||
}
|
||||
|
||||
pub async fn probe_from_cache(
|
||||
db: &PostgresDb,
|
||||
file_uuid: &str,
|
||||
) -> Result<TmdbProbeResult> {
|
||||
pub async fn probe_from_cache(db: &PostgresDb, file_uuid: &str) -> Result<TmdbProbeResult> {
|
||||
let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?;
|
||||
if cache.identities.is_empty() && !cache.cast.is_empty() {
|
||||
return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await;
|
||||
@@ -83,7 +87,8 @@ async fn upsert_identities_from_disk(
|
||||
}
|
||||
match std::fs::read_to_string(&path) {
|
||||
Ok(content) => {
|
||||
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content) {
|
||||
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content)
|
||||
{
|
||||
Ok(identity_file) => {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let result = sqlx::query(&format!(
|
||||
@@ -106,21 +111,35 @@ async fn upsert_identities_from_disk(
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid);
|
||||
info!(
|
||||
"[TMDB] Upserted identity: {} (uuid={})",
|
||||
identity_file.name, identity_file.identity_uuid
|
||||
);
|
||||
identities_created += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e);
|
||||
warn!(
|
||||
"[TMDB] Failed to upsert identity '{}': {}",
|
||||
identity_file.name, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e);
|
||||
warn!(
|
||||
"[TMDB] Failed to parse identity file {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e);
|
||||
warn!(
|
||||
"[TMDB] Failed to read identity file {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -181,7 +200,9 @@ pub async fn create_identities_from_data(
|
||||
continue;
|
||||
}
|
||||
|
||||
let profile_url = member.profile_path.as_ref()
|
||||
let profile_url = member
|
||||
.profile_path
|
||||
.as_ref()
|
||||
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
|
||||
|
||||
let metadata = serde_json::json!({
|
||||
@@ -226,8 +247,13 @@ pub async fn create_identities_from_data(
|
||||
member.name, member.character, uuid_str
|
||||
);
|
||||
identities_created += 1;
|
||||
if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await {
|
||||
warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e);
|
||||
if let Err(e) =
|
||||
crate::core::identity::storage::save_identity_file(db, &uuid_str).await
|
||||
{
|
||||
warn!(
|
||||
"[TMDB] Failed to save identity file for {}: {}",
|
||||
member.name, e
|
||||
);
|
||||
}
|
||||
// Download and save TMDb profile image locally
|
||||
if let Some(url) = &profile_url {
|
||||
@@ -393,8 +419,10 @@ pub async fn probe_movie(
|
||||
overview: movie.overview.clone(),
|
||||
poster_path: movie.poster_path.clone(),
|
||||
};
|
||||
let cache_cast: Vec<cache::TmdbCastMember> = credits.cast.iter().map(|m| {
|
||||
cache::TmdbCastMember {
|
||||
let cache_cast: Vec<cache::TmdbCastMember> = credits
|
||||
.cast
|
||||
.iter()
|
||||
.map(|m| cache::TmdbCastMember {
|
||||
id: m.id,
|
||||
name: m.name.clone(),
|
||||
character: m.character.clone(),
|
||||
@@ -410,8 +438,8 @@ pub async fn probe_movie(
|
||||
deathday: None,
|
||||
gender: None,
|
||||
homepage: None,
|
||||
}
|
||||
}).collect();
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Write TMDb cache so probe_from_cache can be used next time
|
||||
let cache_obj = cache::TmdbCache {
|
||||
|
||||
@@ -60,7 +60,11 @@ pub async fn check_tmdb_api() -> TmdbResourceStatus {
|
||||
enabled: *config::tmdb::PROBE_ENABLED,
|
||||
api_reachable: Some(reachable),
|
||||
api_latency_ms: Some(latency),
|
||||
api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) },
|
||||
api_error: if reachable {
|
||||
None
|
||||
} else {
|
||||
Some(format!("HTTP {}", resp.status()))
|
||||
},
|
||||
last_check_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
}
|
||||
}
|
||||
@@ -84,9 +88,10 @@ pub fn count_cache_files() -> usize {
|
||||
|
||||
pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let count: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)
|
||||
)
|
||||
let count: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb'",
|
||||
identities_table
|
||||
))
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(count)
|
||||
@@ -94,9 +99,10 @@ pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
|
||||
|
||||
pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> {
|
||||
let identities_table = crate::core::db::schema::table_name("identities");
|
||||
let count: i64 = sqlx::query_scalar(
|
||||
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table)
|
||||
)
|
||||
let count: i64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL",
|
||||
identities_table
|
||||
))
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
Ok(count)
|
||||
|
||||
Reference in New Issue
Block a user