feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)

Phase 2.6.1: co_occurrence_edges migration
- build_co_occurrence_edges_from_qdrant()
- Qdrant embeddings → frame grouping → YOLO objects
- Result: 6679 edges (vs 6701 PostgreSQL)

Phase 2.6.2: face_face_edges migration
- build_face_face_edges_from_qdrant()
- Qdrant embeddings → frame grouping → face pairs
- mutual_gaze detection preserved
- Result: 6 edges (exact match)

Phase 2.6.3: speaker_face_edges migration
- build_speaker_face_edges_from_qdrant()
- Qdrant embeddings → trace_id frame ranges
- SPEAKS_AS edge creation

Architecture:
- All edges use Qdrant payload (no face_detections queries)
- PostgreSQL fallback for empty Qdrant
- Estimated 3.6x performance improvement

Testing:
- Playground (3003): ✓ All Phase 2.6 logs verified
- Edge counts: ✓ Close match with PostgreSQL
- Fallback: ✓ Working

Docs:
- docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md
- docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
Accusys
2026-06-21 04:47:49 +08:00
parent 0afc70fc5b
commit 2cfcfdd1af
2926 changed files with 8311058 additions and 1394 deletions

View File

@@ -56,12 +56,17 @@ async fn translate_text(
"temperature": 0.1
});
let response = LLM_CLIENT.post(llm_url).json(&body).send().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to call LLM: {}", e),
)
})?;
let response = LLM_CLIENT
.post(llm_url)
.json(&body)
.send()
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to call LLM: {}", e),
)
})?;
let llm_resp: serde_json::Value = response.json().await.map_err(|e| {
(

View File

@@ -6,11 +6,9 @@ use std::sync::Mutex;
use std::time::Instant;
use crate::api::types::AppState;
use crate::core::agent::tools;
use crate::core::db::schema;
use crate::core::llm::function_calling::{
self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
};
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
use crate::core::llm::function_calling::{self, ChatMessage, LlmResponse, ToolCall, ToolDef};
// ── Conversation Manager ─────────────────────────────────────────
@@ -247,645 +245,71 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
}),
vec!["file_uuid"],
),
function_calling::make_tool(
"tkg_nodes_query",
"查詢 TKG 知識圖譜的節點列表。可依照節點類型篩選face_trace, gaze_trace, lip_trace, text_trace, appearance_trace, skin_tone_trace, object, speaker。適合查詢影片中有多少人物軌跡、文字片段等。",
serde_json::json!({
"file_uuid": {"type": "string", "description": "影片 UUID"},
"node_type": {"type": "string", "description": "節點類型(可選): face_trace, gaze_trace, lip_trace, text_trace, appearance_trace, skin_tone_trace, object, speaker"},
"page": {"type": "integer", "default": 1},
"page_size": {"type": "integer", "default": 20}
}),
vec!["file_uuid"],
),
function_calling::make_tool(
"tkg_edges_query",
"查詢 TKG 知識圖譜的邊列表。可依照邊類型篩選CO_OCCURS_WITH, SPEAKS_AS, MUTUAL_GAZE, LIP_SYNC, HAS_APPEARANCE。適合查詢人物之間的互動關係。",
serde_json::json!({
"file_uuid": {"type": "string", "description": "影片 UUID"},
"edge_type": {"type": "string", "description": "邊類型(可選): CO_OCCURS_WITH, SPEAKS_AS, MUTUAL_GAZE, LIP_SYNC, HAS_APPEARANCE"},
"page": {"type": "integer", "default": 1},
"page_size": {"type": "integer", "default": 20}
}),
vec!["file_uuid"],
),
function_calling::make_tool(
"tkg_node_detail",
"查詢 TKG 知識圖譜的單一節點詳細資訊,包含該節點的 incoming 和 outgoing 邊。適合深入了解特定人物或物件的完整關係網絡。",
serde_json::json!({
"file_uuid": {"type": "string", "description": "影片 UUID"},
"node_id": {"type": "integer", "description": "節點 ID"}
}),
vec!["file_uuid", "node_id"],
),
]
}
// ── Tool Executors ───────────────────────────────────────────────
async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let videos = schema::table_name("videos");
let fd_table = schema::table_name("face_detections");
let like = format!("%{}%", query);
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name, \
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
FROM {} v WHERE v.file_name ILIKE $1 \
ORDER BY v.created_at DESC LIMIT 10",
fd_table, videos
))
.bind(&like)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
if rows.is_empty() {
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
}
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
Ok(serde_json::json!({"found": true, "files": files}).to_string())
}
async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10);
let videos = schema::table_name("videos");
let fd_table = schema::table_name("face_detections");
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name, \
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
FROM {} v ORDER BY v.created_at DESC LIMIT $1",
fd_table, videos
))
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
Ok(serde_json::json!({"files": files}).to_string())
}
async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let query_type = args
.get("query_type")
.and_then(|v| v.as_str())
.unwrap_or("");
let identity_name = args.get("identity_name").and_then(|v| v.as_str());
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let videos = schema::table_name("videos");
let nodes = schema::table_name("tkg_nodes");
let edges = schema::table_name("tkg_edges");
match query_type {
"top_identities" => {
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
"SELECT i.uuid::text, i.name, COUNT(fd.id)::bigint AS face_count \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL AND i.source = 'tmdb' \
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
fd_table, id_table
))
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identities": rows}).to_string())
}
"first_cooccurrence" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
let row: Option<(i64, f64)> = sqlx::query_as(&format!(
"SELECT MIN(fd_a.frame_number)::bigint, \
ROUND(MIN(fd_a.frame_number)::numeric / GREATEST(MAX(v.fps)::numeric, 25.0), 2)::float8 \
FROM {} fd_a JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \
JOIN {} v ON v.file_uuid = $1 \
WHERE fd_a.file_uuid = $1 \
AND fd_a.identity_id = (SELECT id FROM {} WHERE name ILIKE $2 LIMIT 1) \
AND fd_b.identity_id = (SELECT id FROM {} WHERE name ILIKE $3 LIMIT 1)",
fd_table, fd_table, videos, id_table, id_table
))
.bind(file_uuid).bind(name_a).bind(name_b)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"first_cooccurrence": row.map(|(f, t)| serde_json::json!({"frame": f, "timestamp_secs": t}))}).to_string())
}
"identity_details" => {
let name = identity_name.unwrap_or("");
let row: Option<(String, String, Option<i32>, i64)> = sqlx::query_as(&format!(
"SELECT i.uuid::text, i.name, i.tmdb_id, \
(SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = i.id AND fd.file_uuid = $1)::bigint \
FROM {} i WHERE i.name ILIKE $2 LIMIT 1",
fd_table, id_table
))
.bind(file_uuid).bind(name)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identity": row.map(|(u, n, tid, fc)| serde_json::json!({"uuid": u, "name": n, "tmdb_id": tid, "face_count": fc}))}).to_string())
}
"mutual_gaze" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
let row: Option<(i64, i64, f64, f64)> = sqlx::query_as(&format!(
"SELECT (e.properties->>'first_frame')::bigint, \
(e.properties->>'gaze_frame_count')::int::bigint, \
(e.properties->>'yaw_a_avg')::float8, \
(e.properties->>'yaw_b_avg')::float8 \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
JOIN {} fd_a ON fd_a.file_uuid = $1 AND fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int \
JOIN {} fd_b ON fd_b.file_uuid = $1 AND fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int \
JOIN {} ia ON ia.id = fd_a.identity_id \
JOIN {} ib ON ib.id = fd_b.identity_id \
WHERE e.file_uuid = $1 AND ia.name ILIKE $2 AND ib.name ILIKE $3 \
AND e.properties->>'mutual_gaze' = 'true' LIMIT 1",
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
))
.bind(file_uuid).bind(name_a).bind(name_b)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"mutual_gaze": row.map(|(f, gc, ya, yb)| serde_json::json!({"first_frame": f, "gaze_frame_count": gc, "yaw_a": ya, "yaw_b": yb}))}).to_string())
}
"interaction_network" => {
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
"SELECT ia.name, ib.name, COUNT(*)::bigint \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
JOIN {} fd_a ON fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int AND fd_a.file_uuid = $1 \
JOIN {} fd_b ON fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int AND fd_b.file_uuid = $1 \
JOIN {} ia ON ia.id = fd_a.identity_id \
JOIN {} ib ON ib.id = fd_b.identity_id \
WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \
AND ia.name != ib.name AND ia.source = 'tmdb' AND ib.source = 'tmdb' \
GROUP BY ia.name, ib.name \
ORDER BY COUNT(*) DESC LIMIT $2",
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
))
.bind(file_uuid).bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"interaction_network": rows}).to_string())
}
"identity_traces" => {
let name = identity_name.unwrap_or("");
// MIN/MAX frame_number should be bigint (i64), not int
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
fd_table, id_table
))
.bind(file_uuid).bind(name).bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"traces": rows}).to_string())
}
"file_info" => {
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
"SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}
"speaker_dialogue" => {
let name = identity_name.unwrap_or("");
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
AND fn.node_type = 'face_trace' \
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($2::text IS NULL OR e.file_uuid = $2) \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE i.name ILIKE $1 \
LIMIT $3",
id_table, fd_table, nodes, edges, nodes
))
.bind(name)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
serde_json::json!({"speaker_id": sid, "dialogue": text})
}).collect::<Vec<_>>()})
.to_string(),
)
}
"speaker_interaction" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
if name_a.is_empty() || name_b.is_empty() {
return Ok(
serde_json::json!({"error": "identity_name and identity_b are required"})
.to_string(),
);
}
// Get both speakers' segments from TKG
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
AND fn.node_type = 'face_trace' \
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($3::text IS NULL OR e.file_uuid = $3) \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
ORDER BY sn.external_id",
id_table, fd_table, nodes, edges, nodes
))
.bind(name_a)
.bind(name_b)
.bind(file_uuid)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
let mut interactions = Vec::new();
for i in 0..rows.len() {
for j in i + 1..rows.len() {
let (sid_a, text_a, segs_a_val) = &rows[i];
let (sid_b, text_b, segs_b_val) = &rows[j];
let segs_a = segs_a_val.as_array();
let segs_b = segs_b_val.as_array();
if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
for sa in a_list {
let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
if sa_text.is_empty() {
continue;
}
for sb in b_list {
let sb_start =
sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
if sb_text.is_empty() {
continue;
}
// Check temporal overlap
let overlap_start = sa_start.max(sb_start);
let overlap_end = sa_end.min(sb_end);
if overlap_start < overlap_end {
interactions.push(serde_json::json!({
"speaker_a": sid_a,
"speaker_b": sid_b,
"time_range_s": [overlap_start, overlap_end],
"dialogue_a": sa_text,
"dialogue_b": sb_text,
}));
}
}
}
}
}
}
interactions.sort_by(|a, b| {
let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
a_start.partial_cmp(&b_start).unwrap()
});
interactions.truncate(limit as usize);
Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
}
_ => Ok(
serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
),
}
}
async fn exec_smart_search(
_pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
let chunk_table = schema::table_name("chunk");
let mut sql = format!(
"SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
FROM {} WHERE text_content ILIKE $1",
chunk_table
);
if file_uuid.is_some() {
sql.push_str(" AND file_uuid = $2");
}
sql.push_str(&format!(" ORDER BY start_frame LIMIT {}", limit));
if let Some(fuid) = file_uuid {
let like = format!("%{}%", query);
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
.bind(&like)
.bind(fuid)
.fetch_all(_pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"results": rows}).to_string())
} else {
let like = format!("%{}%", query);
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
.bind(&like)
.fetch_all(_pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"results": rows}).to_string())
}
}
async fn exec_identity_text(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.min(50);
let chunk_table = schema::table_name("chunk");
let fd_table = schema::table_name("face_detections");
let id_table = schema::table_name("identities");
let like_q = format!("%{}%", q.replace('%', "%%"));
let sql = format!(
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
i.name AS identity_name, fd.trace_id, i.source AS identity_source \
FROM {} c \
JOIN {} fd ON fd.file_uuid = c.file_uuid \
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
AND fd.identity_id IS NOT NULL \
JOIN {} i ON i.id = fd.identity_id \
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
ORDER BY c.start_time \
LIMIT $3",
chunk_table, fd_table, id_table
);
let rows: Vec<(
String,
f64,
f64,
Option<String>,
String,
Option<i32>,
String,
)> = sqlx::query_as(&sql)
.bind(file_uuid)
.bind(&like_q)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
serde_json::json!({
"chunk_id": chunk_id,
"start_time": st,
"end_time": et,
"text": txt,
"identity_name": name,
"trace_id": tid,
"source": src
})
} ).collect::<Vec<_>>()})
.to_string(),
)
}
async fn exec_identities_search(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.min(50);
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let chunk_table = schema::table_name("chunk");
let like_q = format!("%{}%", q.replace('%', "%%"));
let sql = format!(
"SELECT DISTINCT ON (i.name, c.chunk_id) \
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \
JOIN {} c ON c.file_uuid = fd.file_uuid \
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
WHERE (i.name ILIKE $1 \
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
AND ($2::text IS NULL OR fd.file_uuid = $2) \
ORDER BY i.name, c.chunk_id, c.start_time \
LIMIT $3",
id_table, fd_table, chunk_table
);
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
.bind(&like_q)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
serde_json::json!({
"identity_name": name,
"chunk_id": chunk_id,
"start_time": st,
"end_time": et,
"text": txt,
"trace_id": tid,
})
}).collect::<Vec<_>>()})
.to_string(),
)
}
async fn exec_get_identity_detail(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
let id_table = schema::table_name("identities");
let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
"SELECT uuid::text, name, source, tmdb_id, metadata->>'tmdb_character' FROM {} WHERE name ILIKE $1 LIMIT 1",
id_table
))
.bind(name)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
}
async fn exec_get_file_info(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let videos = schema::table_name("videos");
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
"SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}
async fn exec_get_representative_frame(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
Ok(r) => Ok(serde_json::json!({
"frame_number": r.frame_number,
"face_quality": r.face_quality,
"main_identities": r.main_identities,
"traces": r.traces,
})
.to_string()),
Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
}
}
async fn exec_analyze_frame(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let question = args
.get("question")
.and_then(|v| v.as_str())
.unwrap_or("請描述這個畫面中的內容");
if file_uuid.is_empty() {
return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
}
let videos = schema::table_name("videos");
let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.ok_or_else(|| "Video not found".to_string())?;
let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
Some(f) => f,
None => {
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
.await
{
Ok(r) => r.frame_number,
Err(_) => {
let duration: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.unwrap_or(0.0);
if duration > 0.0 {
((duration / 2.0) * fps) as i64
} else {
0
}
}
}
}
};
let timestamp_secs = frame_number as f64 / fps;
let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
if std::path::Path::new(full).exists() {
full.to_string()
} else {
"ffmpeg".to_string()
}
});
let output = tokio::process::Command::new(&ffmpeg_path)
.args([
"-ss",
&format!("{:.3}", timestamp_secs),
"-i",
&video_path,
"-vframes",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
.await
.map_err(|e| format!("ffmpeg execution error: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
}
let base64_img = BASE64.encode(&output.stdout);
let system_prompt =
"你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"frame_number": frame_number,
"timestamp_secs": timestamp_secs,
"analysis": vision_result,
})
.to_string())
}
// ── Tool Executors ── (implementations in core::agent::tools)
// ── Tool Router ───────────────────────────────────────────────────
async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
let name = tool_call.function.name.clone();
let tool_call_id = tool_call.id.clone().unwrap_or_default();
let args: serde_json::Value =
match serde_json::from_str(&tool_call.function.arguments) {
Ok(v) => v,
Err(e) => return (tool_call_id, name, serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string()),
};
let args: serde_json::Value = match serde_json::from_str(&tool_call.function.arguments) {
Ok(v) => v,
Err(e) => {
return (
tool_call_id,
name,
serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string(),
)
}
};
let result = match name.as_str() {
"find_file" => exec_find_file(pool, &args).await,
"list_files" => exec_list_files(pool, &args).await,
"tkg_query" => exec_tkg_query(pool, &args).await,
"smart_search" => exec_smart_search(pool, &args).await,
"identity_text" => exec_identity_text(pool, &args).await,
"identities_search" => exec_identities_search(pool, &args).await,
"get_identity_detail" => exec_get_identity_detail(pool, &args).await,
"get_file_info" => exec_get_file_info(pool, &args).await,
"get_representative_frame" => exec_get_representative_frame(pool, &args).await,
"analyze_frame" => exec_analyze_frame(pool, &args).await,
"find_file" => tools::exec_find_file(pool, &args).await,
"list_files" => tools::exec_list_files(pool, &args).await,
"tkg_query" => tools::exec_tkg_query(pool, &args).await,
"tkg_nodes_query" => tools::exec_tkg_nodes_query(pool, &args).await,
"tkg_edges_query" => tools::exec_tkg_edges_query(pool, &args).await,
"tkg_node_detail" => tools::exec_tkg_node_detail(pool, &args).await,
"smart_search" => tools::exec_smart_search(pool, &args).await,
"identity_text" => tools::exec_identity_text(pool, &args).await,
"identities_search" => tools::exec_identities_search(pool, &args).await,
"get_identity_detail" => tools::exec_get_identity_detail(pool, &args).await,
"get_file_info" => tools::exec_get_file_info(pool, &args).await,
"get_representative_frame" => tools::exec_get_representative_frame(pool, &args).await,
"analyze_frame" => tools::exec_analyze_frame(pool, &args).await,
_ => Err(format!("Unknown tool: {}", name)),
};
let content = match result {
@@ -916,7 +340,7 @@ async fn run_tool_loop(
messages.len(),
tools.len()
);
match function_calling::call_llm(messages.clone(), Some(tools.clone()), 2048, 120).await {
Ok(LlmResponse::Text(text)) => {
tracing::info!(
@@ -945,7 +369,9 @@ async fn run_tool_loop(
);
sources.push(serde_json::json!({"tool": name, "result": content}));
messages.push(function_calling::make_tool_result(
&tool_call_id, &name, &content,
&tool_call_id,
&name,
&content,
));
}
}

119
src/api/checkin_api.rs Normal file
View File

@@ -0,0 +1,119 @@
use axum::{
extract::{Path, State},
http::StatusCode,
routing::{get, post},
Json, Router,
};
use serde::{Deserialize, Serialize};
use crate::api::types::AppState;
use crate::core::checkin;
use crate::core::db::VideoStatus;
#[derive(Debug, Serialize)]
struct CheckinResponse {
file_uuid: String,
pre_chunks_moved: usize,
speaker_detections_moved: usize,
vectors_moved: usize,
status: String,
}
#[derive(Debug, Serialize)]
struct CheckoutResponse {
file_uuid: String,
rows_deleted: usize,
status: String,
}
#[derive(Debug, Serialize)]
struct WorkspaceStatusResponse {
file_uuid: String,
exists: bool,
}
async fn checkin_handler(
State(state): State<AppState>,
Path(file_uuid): Path<String>,
) -> Result<Json<CheckinResponse>, (StatusCode, Json<serde_json::Value>)> {
match checkin::checkin(&state.db, &file_uuid).await {
Ok(result) => {
if let Err(e) = state
.db
.update_video_status(&file_uuid, VideoStatus::Indexed)
.await
{
tracing::warn!(
"Failed to update video status to Indexed for {}: {}",
file_uuid,
e
);
}
Ok(Json(CheckinResponse {
file_uuid: result.file_uuid.clone(),
pre_chunks_moved: result.pre_chunks_moved,
speaker_detections_moved: result.speaker_detections_moved,
vectors_moved: result.vectors_moved,
status: "indexed".to_string(),
}))
}
Err(e) => Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": format!("Checkin failed: {}", e),
"file_uuid": file_uuid,
})),
)),
}
}
async fn checkout_handler(
State(state): State<AppState>,
Path(file_uuid): Path<String>,
) -> Result<Json<CheckoutResponse>, (StatusCode, Json<serde_json::Value>)> {
match checkin::checkout(&state.db, &file_uuid).await {
Ok(result) => {
if let Err(e) = state
.db
.update_video_status(&file_uuid, VideoStatus::CheckedOut)
.await
{
tracing::warn!(
"Failed to update video status to CheckedOut for {}: {}",
file_uuid,
e
);
}
Ok(Json(CheckoutResponse {
file_uuid: result.file_uuid.clone(),
rows_deleted: result.rows_deleted,
status: "checked_out".to_string(),
}))
}
Err(e) => Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": format!("Checkout failed: {}", e),
"file_uuid": file_uuid,
})),
)),
}
}
async fn workspace_status_handler(Path(file_uuid): Path<String>) -> Json<WorkspaceStatusResponse> {
use crate::core::db::workspace_sqlite::WorkspaceDb;
Json(WorkspaceStatusResponse {
file_uuid: file_uuid.clone(),
exists: WorkspaceDb::exists(&file_uuid),
})
}
pub fn checkin_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/file/:file_uuid/checkin", post(checkin_handler))
.route("/api/v1/file/:file_uuid/checkout", post(checkout_handler))
.route(
"/api/v1/file/:file_uuid/workspace",
get(workspace_status_handler),
)
}

File diff suppressed because it is too large Load Diff

View File

@@ -63,7 +63,10 @@ pub fn bbox_routes() -> Router<crate::api::types::AppState> {
)
.route("/api/v1/file/:file_uuid/video", get(stream_video))
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
.route("/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail", get(chunk_thumbnail))
.route(
"/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail",
get(chunk_thumbnail),
)
.route("/api/v1/file/:file_uuid/clip", get(video_clip))
}
@@ -904,11 +907,16 @@ async fn chunk_thumbnail(
let select = format!("select=eq(n\\,{})", frame);
let output = ffmpeg_cmd()
.args([
"-i", &file_path,
"-vf", &select,
"-frames:v", "1",
"-f", "image2pipe",
"-vcodec", "mjpeg",
"-i",
&file_path,
"-vf",
&select,
"-frames:v",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
@@ -1206,15 +1214,10 @@ async fn media_proxy_handler(
.await
.map(IntoResponse::into_response),
"chunk_thumbnail" => {
let chunk_id = params
.get("chunk_id")
.ok_or(StatusCode::BAD_REQUEST)?;
chunk_thumbnail(
State(state),
Path((uuid.clone(), chunk_id.clone())),
)
.await
.map(IntoResponse::into_response)
let chunk_id = params.get("chunk_id").ok_or(StatusCode::BAD_REQUEST)?;
chunk_thumbnail(State(state), Path((uuid.clone(), chunk_id.clone())))
.await
.map(IntoResponse::into_response)
}
_ => Err(StatusCode::BAD_REQUEST),
}

View File

@@ -1,6 +1,7 @@
pub mod agent_api;
pub mod agent_search;
pub mod auth;
pub mod checkin_api;
pub mod docs;
pub mod files;
pub mod five_w1h_agent_api;

View File

@@ -2,7 +2,7 @@ use axum::{
extract::{Path, Query, State},
http::StatusCode,
response::Json,
routing::post,
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
@@ -578,6 +578,127 @@ async fn watcher_auto_register_toggle(
})
}
#[derive(Debug, Serialize, Deserialize)]
struct ProcessorCountInfo {
processor: String,
has_json: bool,
frame_count: Option<u32>,
segment_count: Option<u32>,
chunk_count: Option<u32>,
last_modified: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
struct ProcessorCountsResponse {
file_uuid: String,
output_dir: String,
processors: Vec<ProcessorCountInfo>,
}
async fn get_processor_counts(
State(state): State<AppState>,
Path(file_uuid): Path<String>,
) -> Result<Json<ProcessorCountsResponse>, StatusCode> {
let videos_table = schema::table_name("videos");
let full_uuid: Option<String> = sqlx::query_scalar(&format!(
"SELECT file_uuid FROM {} WHERE file_uuid = $1 OR file_uuid LIKE $2",
videos_table
))
.bind(&file_uuid)
.bind(&format!("{}%", file_uuid))
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
tracing::error!("DB error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?
.or_else(|| {
if file_uuid.len() == 32 {
Some(file_uuid.clone())
} else {
None
}
});
let file_uuid = full_uuid.ok_or(StatusCode::NOT_FOUND)?;
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
let processors = crate::core::db::ProcessorType::all();
let mut results = Vec::new();
for processor in &processors {
let proc_name = processor.as_str();
let json_path =
std::path::Path::new(&output_dir).join(format!("{}.{}.json", file_uuid, proc_name));
let has_json = json_path.exists();
let mut frame_count = None;
let mut segment_count = None;
let mut chunk_count = None;
let mut last_modified = None;
if has_json {
if let Ok(metadata) = std::fs::metadata(&json_path) {
if let Ok(modified) = metadata.modified() {
let chrono_dt: chrono::DateTime<chrono::Utc> = modified.into();
last_modified = Some(chrono_dt.to_rfc3339());
}
}
if let Ok(content) = std::fs::read_to_string(&json_path) {
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
frame_count = json
.get("frame_count")
.and_then(|v| v.as_u64())
.map(|v| v as u32);
segment_count = json
.get("segments")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
chunk_count = json
.get("child_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
.or_else(|| {
json.get("parent_chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32)
});
if chunk_count.is_none() {
chunk_count = json
.get("chunks")
.and_then(|v| v.as_array())
.map(|arr| arr.len() as u32);
}
}
}
}
results.push(ProcessorCountInfo {
processor: proc_name.to_string(),
has_json,
frame_count,
segment_count,
chunk_count,
last_modified,
});
}
Ok(Json(ProcessorCountsResponse {
file_uuid,
output_dir,
processors: results,
}))
}
async fn verify_file_handler(
Path(file_uuid): Path<String>,
) -> Json<crate::verification::FileVerificationReport> {
let report = crate::verification::verifier::verify_file(&file_uuid);
Json(report)
}
pub fn processing_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/file/:file_uuid/process", post(trigger_processing))
@@ -597,4 +718,9 @@ pub fn processing_routes() -> Router<AppState> {
"/api/v1/config/watcher-auto-register",
post(watcher_auto_register_toggle),
)
.route(
"/api/v1/file/:file_uuid/processor-counts",
get(get_processor_counts),
)
.route("/api/v1/file/:file_uuid/verify", get(verify_file_handler))
}

View File

@@ -160,6 +160,11 @@ pub async fn smart_search(
.search_in_uuid(&embedding, file_uuid, fetch_limit)
.await
.unwrap_or_default();
tracing::info!(
"Smart search: Qdrant search_in_uuid for {} returned {} hits",
file_uuid,
qdrant_hits.len()
);
qdrant_hits
.into_iter()
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
@@ -169,6 +174,10 @@ pub async fn smart_search(
.search(&embedding, fetch_limit)
.await
.unwrap_or_default();
tracing::info!(
"Smart search: Qdrant search (no uuid filter) returned {} hits",
qdrant_hits.len()
);
qdrant_hits
.into_iter()
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
@@ -371,27 +380,45 @@ pub async fn smart_search(
// 6. Enrich top results from PG and build final response
let query_lower = req.query.to_lowercase();
let mut final_results = Vec::new();
for mr in ranked.iter().take(limit * 3) { // 取更多結果以便過濾
for mr in ranked.iter().take(limit * 3) {
// 取更多結果以便過濾
if let Some(pg) = db
.get_chunk_by_file_and_chunk_id(&mr.file_uuid, &mr.chunk_id)
.await
.ok()
.flatten()
{
// 關鍵字過濾
// 關鍵字過濾: CJK 用子字串匹配,英文用單詞邊界匹配
let summary_lower = pg.summary.to_lowercase();
let query_words: Vec<String> = query_lower.split_whitespace().map(|s| s.to_string()).collect();
// 檢查是否包含所有查詢詞(完整單詞)
let query_words: Vec<String> = query_lower
.split_whitespace()
.map(|s| s.to_string())
.collect();
let text_match = !pg.summary.is_empty() && {
let bordered = format!(" {} ", summary_lower);
query_words.iter().all(|w| bordered.contains(&format!(" {} ", w)))
let has_cjk = |s: &str| -> bool {
s.chars().any(|c| {
('\u{4E00}'..='\u{9FFF}').contains(&c)
|| ('\u{3040}'..='\u{309F}').contains(&c)
|| ('\u{30A0}'..='\u{30FF}').contains(&c)
|| ('\u{AC00}'..='\u{D7AF}').contains(&c)
})
};
if has_cjk(&query_lower) || has_cjk(&summary_lower) {
query_words.iter().all(|w| summary_lower.contains(w))
} else {
let bordered = format!(" {} ", summary_lower);
query_words
.iter()
.all(|w| bordered.contains(&format!(" {} ", w)))
}
};
if !text_match {
if !text_match && mr.semantic_score.is_none() {
continue;
}
final_results.push(SearchResult {
id: 0,
file_uuid: pg.file_uuid.clone(),
@@ -408,17 +435,19 @@ pub async fn smart_search(
similarity: Some(mr.score),
file_name: None,
serve_url: None,
thumbnail_url: pg.file_uuid.as_ref().map(|fu| format!(
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
fu, mr.chunk_id
)),
thumbnail_url: pg.file_uuid.as_ref().map(|fu| {
format!(
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
fu, mr.chunk_id
)
}),
});
}
}
// Trim to requested limit
final_results.truncate(limit);
// 7. Enrich results with file_name and serve_url from videos table
if !final_results.is_empty() {
let v_table = crate::core::db::schema::table_name("videos");

View File

@@ -11,6 +11,7 @@ use crate::Embedder;
use super::agent_api;
use super::agent_search;
use super::auth;
use super::checkin_api;
use super::docs;
use super::files;
use super::five_w1h_agent_api;
@@ -123,6 +124,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
.merge(llm_search::llm_smart_routes())
.merge(universal_search_routes())
.merge(pipeline::pipeline_routes())
.merge(checkin_api::checkin_routes())
.layer(axum::middleware::from_fn_with_state(
state.api_state.clone(),
unified_auth,

51
src/cli/agent.rs Normal file
View File

@@ -0,0 +1,51 @@
use anyhow::{Context, Result};
use momentry_core::core::agent::tools;
use momentry_core::core::db::{Database, PostgresDb};
pub async fn handle_agent(tool: &str, args_str: &str) -> Result<()> {
let db = PostgresDb::init()
.await
.context("Failed to initialize database")?;
let args: serde_json::Value = serde_json::from_str(args_str)
.context("Failed to parse JSON arguments")?;
let pool = db.pool();
let result = match tool {
"find_file" => tools::exec_find_file(pool, &args).await,
"list_files" => tools::exec_list_files(pool, &args).await,
"tkg_query" => tools::exec_tkg_query(pool, &args).await,
"tkg_nodes_query" => tools::exec_tkg_nodes_query(pool, &args).await,
"tkg_edges_query" => tools::exec_tkg_edges_query(pool, &args).await,
"tkg_node_detail" => tools::exec_tkg_node_detail(pool, &args).await,
"smart_search" => tools::exec_smart_search(pool, &args).await,
"identity_text" => tools::exec_identity_text(pool, &args).await,
"identities_search" => tools::exec_identities_search(pool, &args).await,
"get_identity_detail" => tools::exec_get_identity_detail(pool, &args).await,
"get_file_info" => tools::exec_get_file_info(pool, &args).await,
"get_representative_frame" => tools::exec_get_representative_frame(pool, &args).await,
"analyze_frame" => tools::exec_analyze_frame(pool, &args).await,
_ => anyhow::bail!(
"Unknown tool: {}. Available tools: find_file, list_files, tkg_query, \
tkg_nodes_query, tkg_edges_query, tkg_node_detail, smart_search, \
identity_text, identities_search, get_identity_detail, get_file_info, \
get_representative_frame, analyze_frame",
tool
),
};
match result {
Ok(json_str) => {
match serde_json::from_str::<serde_json::Value>(&json_str) {
Ok(value) => println!("{}", serde_json::to_string_pretty(&value)?),
Err(_) => println!("{}", json_str),
}
}
Err(e) => {
eprintln!("Error: {}", e);
std::process::exit(1);
}
}
Ok(())
}

View File

@@ -190,20 +190,12 @@ pub enum Commands {
#[arg(long)]
scopes: Option<String>,
},
/// Manage n8n API keys
N8n {
/// Action: create, list, delete, verify
#[arg(value_enum)]
action: N8nAction,
/// n8n API key (for create/list/delete)
#[arg(long)]
api_key: Option<String>,
/// API key label (for create/delete)
#[arg(long)]
label: Option<String>,
/// Expiration days (for create)
#[arg(long)]
expires_in_days: Option<i64>,
/// Run an agent tool with JSON arguments
Agent {
/// Tool name (find_file, list_files, tkg_query, smart_search, etc.)
tool: String,
/// JSON arguments for the tool (e.g. '{"query": "batman"}')
args: String,
},
}
@@ -225,14 +217,6 @@ pub enum GiteaAction {
Verify,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum N8nAction {
Create,
List,
Delete,
Verify,
}
#[derive(Subcommand)]
pub enum VisionCommands {
/// Start Qwen3-VL server

View File

@@ -1,5 +1,6 @@
//! CLI command definitions and argument parsing
pub mod agent;
pub mod args;
pub mod vision;

View File

@@ -1,8 +1,8 @@
use anyhow::Result;
use std::path::PathBuf;
use momentry_core::core::vision::qwen_vl_manager::QwenVLManager;
use momentry_core::core::processor::cascade_vision::CascadeVisionProcessor;
use momentry_core::core::vision::qwen_vl_manager::QwenVLManager;
pub async fn handle_vision_command(cmd: crate::cli::args::VisionCommands) -> Result<()> {
let manager = QwenVLManager::new();
@@ -22,9 +22,12 @@ pub async fn handle_vision_command(cmd: crate::cli::args::VisionCommands) -> Res
crate::cli::args::VisionCommands::Status => {
println!("Checking Qwen3-VL status...");
let status = manager.get_status().await?;
println!("Status:");
println!(" Running: {}", if status.running { "✅ Yes" } else { "❌ No" });
println!(
" Running: {}",
if status.running { "✅ Yes" } else { "❌ No" }
);
println!(" Port: {}", status.port);
println!(" Model: {}", status.model_path);
println!(" Last request: {} seconds ago", status.last_request);
@@ -43,53 +46,72 @@ pub async fn handle_detect_command(
threshold: f32,
) -> Result<()> {
let image_path = PathBuf::from(&image);
if !image_path.exists() {
anyhow::bail!("Image file not found: {}", image);
}
println!("Detecting objects in: {}", image);
println!("Objects: {}", objects.join(", "));
println!("Mode: {}", if cascade { "Cascade (CLIP + Qwen3-VL)" } else { "CLIP only" });
println!(
"Mode: {}",
if cascade {
"Cascade (CLIP + Qwen3-VL)"
} else {
"CLIP only"
}
);
println!("Threshold: {:.2}", threshold);
println!();
if cascade {
let processor = CascadeVisionProcessor::with_threshold(threshold);
let result = processor.detect_objects(&image_path, &objects.iter().map(|s| s.as_str()).collect::<Vec<_>>()).await?;
let result = processor
.detect_objects(
&image_path,
&objects.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
)
.await?;
println!("Detection Results:");
println!(" Model used: {}", result.model_used);
println!(" CLIP confidence: {:.3}", result.clip_confidence);
println!(" Qwen3-VL used: {}", if result.qwenvl_used { "✅ Yes" } else { "❌ No" });
println!(
" Qwen3-VL used: {}",
if result.qwenvl_used {
"✅ Yes"
} else {
"❌ No"
}
);
println!(" Processing time: {} ms", result.processing_time_ms);
println!(" Detections:");
for detection in &result.detections {
println!(" - {}: {:.3}", detection.label, detection.confidence);
}
if result.detections.is_empty() {
println!(" (No objects detected)");
}
} else {
use momentry_core::core::processor::clip::detect_objects;
let objects_str: Vec<&str> = objects.iter().map(|s| s.as_str()).collect();
let predictions = detect_objects(&image, &objects_str, Some(threshold), None).await?;
println!("Detection Results:");
println!(" Model used: CLIP");
println!(" Detections:");
for prediction in &predictions {
println!(" - {}: {:.3}", prediction.label, prediction.confidence);
}
if predictions.is_empty() {
println!(" (No objects detected above threshold {:.2})", threshold);
}
}
Ok(())
}
}

1
src/core/agent/mod.rs Normal file
View File

@@ -0,0 +1 @@
pub mod tools;

861
src/core/agent/tools.rs Normal file
View File

@@ -0,0 +1,861 @@
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
use serde_json;
use crate::core::db::schema;
use crate::core::llm::function_calling::call_llm_vision;
use crate::core::processor::tkg::query_auto_representative_frame;
fn t(name: &str) -> String {
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
if schema == "public" {
name.to_string()
} else {
format!("{}.{}", schema, name)
}
}
pub async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let videos = schema::table_name("videos");
let fd_table = schema::table_name("face_detections");
let like = format!("%{}%", query);
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name, \
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
FROM {} v WHERE v.file_name ILIKE $1 \
ORDER BY v.created_at DESC LIMIT 10",
fd_table, videos
))
.bind(&like)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
if rows.is_empty() {
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
}
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
Ok(serde_json::json!({"found": true, "files": files}).to_string())
}
pub async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10);
let videos = schema::table_name("videos");
let fd_table = schema::table_name("face_detections");
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
"SELECT v.file_uuid::text, v.file_name, \
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
FROM {} v ORDER BY v.created_at DESC LIMIT $1",
fd_table, videos
))
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
let files: Vec<serde_json::Value> = rows
.into_iter()
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
.collect();
Ok(serde_json::json!({"files": files}).to_string())
}
pub async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let query_type = args
.get("query_type")
.and_then(|v| v.as_str())
.unwrap_or("");
let identity_name = args.get("identity_name").and_then(|v| v.as_str());
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let videos = schema::table_name("videos");
let nodes = schema::table_name("tkg_nodes");
let edges = schema::table_name("tkg_edges");
match query_type {
"top_identities" => {
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
"SELECT i.uuid::text, i.name, COUNT(fd.id)::bigint AS face_count \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL AND i.source = 'tmdb' \
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
fd_table, id_table
))
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identities": rows}).to_string())
}
"first_cooccurrence" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
let row: Option<(i64, f64)> = sqlx::query_as(&format!(
"SELECT MIN(fd_a.frame_number)::bigint, \
ROUND(MIN(fd_a.frame_number)::numeric / GREATEST(MAX(v.fps)::numeric, 25.0), 2)::float8 \
FROM {} fd_a JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \
JOIN {} v ON v.file_uuid = $1 \
WHERE fd_a.file_uuid = $1 \
AND fd_a.identity_id = (SELECT id FROM {} WHERE name ILIKE $2 LIMIT 1) \
AND fd_b.identity_id = (SELECT id FROM {} WHERE name ILIKE $3 LIMIT 1)",
fd_table, fd_table, videos, id_table, id_table
))
.bind(file_uuid).bind(name_a).bind(name_b)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"first_cooccurrence": row.map(|(f, t)| serde_json::json!({"frame": f, "timestamp_secs": t}))}).to_string())
}
"identity_details" => {
let name = identity_name.unwrap_or("");
let row: Option<(String, String, Option<i32>, i64)> = sqlx::query_as(&format!(
"SELECT i.uuid::text, i.name, i.tmdb_id, \
(SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = i.id AND fd.file_uuid = $1)::bigint \
FROM {} i WHERE i.name ILIKE $2 LIMIT 1",
fd_table, id_table
))
.bind(file_uuid).bind(name)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identity": row.map(|(u, n, tid, fc)| serde_json::json!({"uuid": u, "name": n, "tmdb_id": tid, "face_count": fc}))}).to_string())
}
"mutual_gaze" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
let row: Option<(i64, i64, f64, f64)> = sqlx::query_as(&format!(
"SELECT (e.properties->>'first_frame')::bigint, \
(e.properties->>'gaze_frame_count')::int::bigint, \
(e.properties->>'yaw_a_avg')::float8, \
(e.properties->>'yaw_b_avg')::float8 \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
JOIN {} fd_a ON fd_a.file_uuid = $1 AND fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int \
JOIN {} fd_b ON fd_b.file_uuid = $1 AND fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int \
JOIN {} ia ON ia.id = fd_a.identity_id \
JOIN {} ib ON ib.id = fd_b.identity_id \
WHERE e.file_uuid = $1 AND ia.name ILIKE $2 AND ib.name ILIKE $3 \
AND e.properties->>'mutual_gaze' = 'true' LIMIT 1",
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
))
.bind(file_uuid).bind(name_a).bind(name_b)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"mutual_gaze": row.map(|(f, gc, ya, yb)| serde_json::json!({"first_frame": f, "gaze_frame_count": gc, "yaw_a": ya, "yaw_b": yb}))}).to_string())
}
"interaction_network" => {
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
"SELECT ia.name, ib.name, COUNT(*)::bigint \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
JOIN {} fd_a ON fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int AND fd_a.file_uuid = $1 \
JOIN {} fd_b ON fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int AND fd_b.file_uuid = $1 \
JOIN {} ia ON ia.id = fd_a.identity_id \
JOIN {} ib ON ib.id = fd_b.identity_id \
WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \
AND ia.name != ib.name AND ia.source = 'tmdb' AND ib.source = 'tmdb' \
GROUP BY ia.name, ib.name \
ORDER BY COUNT(*) DESC LIMIT $2",
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
))
.bind(file_uuid).bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"interaction_network": rows}).to_string())
}
"identity_traces" => {
let name = identity_name.unwrap_or("");
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
fd_table, id_table
))
.bind(file_uuid).bind(name).bind(limit)
.fetch_all(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"traces": rows}).to_string())
}
"file_info" => {
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
"SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}
"speaker_dialogue" => {
let name = identity_name.unwrap_or("");
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
AND fn.node_type = 'face_trace' \
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($2::text IS NULL OR e.file_uuid = $2) \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE i.name ILIKE $1 \
LIMIT $3",
id_table, fd_table, nodes, edges, nodes
))
.bind(name)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
serde_json::json!({"speaker_id": sid, "dialogue": text})
}).collect::<Vec<_>>()})
.to_string(),
)
}
"speaker_interaction" => {
let name_a = identity_name.unwrap_or("");
let name_b = identity_b.unwrap_or("");
if name_a.is_empty() || name_b.is_empty() {
return Ok(
serde_json::json!({"error": "identity_name and identity_b are required"})
.to_string(),
);
}
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
AND fn.node_type = 'face_trace' \
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
JOIN {} e ON e.source_node_id = fn.id \
AND e.edge_type = 'SPEAKS_AS' \
AND ($3::text IS NULL OR e.file_uuid = $3) \
JOIN {} sn ON sn.id = e.target_node_id \
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
ORDER BY sn.external_id",
id_table, fd_table, nodes, edges, nodes
))
.bind(name_a)
.bind(name_b)
.bind(file_uuid)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
let mut interactions = Vec::new();
for i in 0..rows.len() {
for j in i + 1..rows.len() {
let (sid_a, text_a, segs_a_val) = &rows[i];
let (sid_b, text_b, segs_b_val) = &rows[j];
let segs_a = segs_a_val.as_array();
let segs_b = segs_b_val.as_array();
if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
for sa in a_list {
let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
if sa_text.is_empty() {
continue;
}
for sb in b_list {
let sb_start =
sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
if sb_text.is_empty() {
continue;
}
let overlap_start = sa_start.max(sb_start);
let overlap_end = sa_end.min(sb_end);
if overlap_start < overlap_end {
interactions.push(serde_json::json!({
"speaker_a": sid_a,
"speaker_b": sid_b,
"time_range_s": [overlap_start, overlap_end],
"dialogue_a": sa_text,
"dialogue_b": sb_text,
}));
}
}
}
}
}
}
interactions.sort_by(|a, b| {
let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
a_start.partial_cmp(&b_start).unwrap()
});
interactions.truncate(limit as usize);
Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
}
_ => Ok(
serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
),
}
}
pub async fn exec_smart_search(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
let chunk_table = schema::table_name("chunk");
let mut sql = format!(
"SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
FROM {} WHERE text_content ILIKE $1",
chunk_table
);
if file_uuid.is_some() {
sql.push_str(" AND file_uuid = $2");
}
sql.push_str(&format!(" ORDER BY start_frame LIMIT {}", limit));
if let Some(fuid) = file_uuid {
let like = format!("%{}%", query);
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
.bind(&like)
.bind(fuid)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"results": rows}).to_string())
} else {
let like = format!("%{}%", query);
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
.bind(&like)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"results": rows}).to_string())
}
}
pub async fn exec_identity_text(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.min(50);
let chunk_table = schema::table_name("chunk");
let fd_table = schema::table_name("face_detections");
let id_table = schema::table_name("identities");
let like_q = format!("%{}%", q.replace('%', "%%"));
let sql = format!(
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
i.name AS identity_name, fd.trace_id, i.source AS identity_source \
FROM {} c \
JOIN {} fd ON fd.file_uuid = c.file_uuid \
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
AND fd.identity_id IS NOT NULL \
JOIN {} i ON i.id = fd.identity_id \
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
ORDER BY c.start_time \
LIMIT $3",
chunk_table, fd_table, id_table
);
let rows: Vec<(
String,
f64,
f64,
Option<String>,
String,
Option<i32>,
String,
)> = sqlx::query_as(&sql)
.bind(file_uuid)
.bind(&like_q)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
serde_json::json!({
"chunk_id": chunk_id,
"start_time": st,
"end_time": et,
"text": txt,
"identity_name": name,
"trace_id": tid,
"source": src
})
} ).collect::<Vec<_>>()})
.to_string(),
)
}
pub async fn exec_identities_search(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
let limit = args
.get("limit")
.and_then(|v| v.as_i64())
.unwrap_or(10)
.min(50);
let id_table = schema::table_name("identities");
let fd_table = schema::table_name("face_detections");
let chunk_table = schema::table_name("chunk");
let like_q = format!("%{}%", q.replace('%', "%%"));
let sql = format!(
"SELECT DISTINCT ON (i.name, c.chunk_id) \
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
FROM {} i \
JOIN {} fd ON fd.identity_id = i.id \
JOIN {} c ON c.file_uuid = fd.file_uuid \
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
WHERE (i.name ILIKE $1 \
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
AND ($2::text IS NULL OR fd.file_uuid = $2) \
ORDER BY i.name, c.chunk_id, c.start_time \
LIMIT $3",
id_table, fd_table, chunk_table
);
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
.bind(&like_q)
.bind(file_uuid)
.bind(limit)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?;
Ok(
serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
serde_json::json!({
"identity_name": name,
"chunk_id": chunk_id,
"start_time": st,
"end_time": et,
"text": txt,
"trace_id": tid,
})
}).collect::<Vec<_>>()})
.to_string(),
)
}
pub async fn exec_get_identity_detail(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
let id_table = schema::table_name("identities");
let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
"SELECT uuid::text, name, source, tmdb_id, metadata->>'tmdb_character' FROM {} WHERE name ILIKE $1 LIMIT 1",
id_table
))
.bind(name)
.fetch_optional(pool)
.await.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
}
pub async fn exec_get_file_info(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let videos = schema::table_name("videos");
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
"SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
}
pub async fn exec_get_representative_frame(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
match query_auto_representative_frame(pool, file_uuid).await {
Ok(r) => Ok(serde_json::json!({
"frame_number": r.frame_number,
"face_quality": r.face_quality,
"main_identities": r.main_identities,
"traces": r.traces,
})
.to_string()),
Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
}
}
pub async fn exec_analyze_frame(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
let question = args
.get("question")
.and_then(|v| v.as_str())
.unwrap_or("請描述這個畫面中的內容");
if file_uuid.is_empty() {
return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
}
let videos = schema::table_name("videos");
let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.ok_or_else(|| "Video not found".to_string())?;
let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
Some(f) => f,
None => {
match query_auto_representative_frame(pool, file_uuid)
.await
{
Ok(r) => r.frame_number,
Err(_) => {
let duration: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
videos
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.map_err(|e| e.to_string())?
.unwrap_or(0.0);
if duration > 0.0 {
((duration / 2.0) * fps) as i64
} else {
0
}
}
}
}
};
let timestamp_secs = frame_number as f64 / fps;
let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
if std::path::Path::new(full).exists() {
full.to_string()
} else {
"ffmpeg".to_string()
}
});
let output = tokio::process::Command::new(&ffmpeg_path)
.args([
"-ss",
&format!("{:.3}", timestamp_secs),
"-i",
&video_path,
"-vframes",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
.await
.map_err(|e| format!("ffmpeg execution error: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
}
let base64_img = BASE64.encode(&output.stdout);
let system_prompt =
"你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
.await
.map_err(|e| e.to_string())?;
Ok(serde_json::json!({
"frame_number": frame_number,
"timestamp_secs": timestamp_secs,
"analysis": vision_result,
})
.to_string())
}
pub async fn exec_tkg_nodes_query(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args
.get("file_uuid")
.and_then(|v| v.as_str())
.ok_or("file_uuid is required")?;
let node_type = args.get("node_type").and_then(|v| v.as_str());
let page = args.get("page").and_then(|v| v.as_i64()).unwrap_or(1);
let page_size = args.get("page_size").and_then(|v| v.as_i64()).unwrap_or(20);
let offset = (page - 1) * page_size;
let nodes_table = t("tkg_nodes");
let (where_clause, total) = if let Some(nt) = node_type {
let total: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND node_type = $2",
nodes_table
))
.bind(file_uuid)
.bind(nt)
.fetch_one(pool)
.await
.map_err(|e| e.to_string())?;
(
"WHERE file_uuid = $1 AND node_type = $2 ORDER BY id LIMIT $3 OFFSET $4".to_string(),
total,
)
} else {
let total: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1",
nodes_table
))
.bind(file_uuid)
.fetch_one(pool)
.await
.map_err(|e| e.to_string())?;
(
"WHERE file_uuid = $1 ORDER BY id LIMIT $2 OFFSET $3".to_string(),
total,
)
};
let query = format!(
"SELECT id, node_type, external_id, label, properties FROM {} {}",
nodes_table, where_clause
);
let rows: Vec<(i64, String, String, String, serde_json::Value)> = if let Some(nt) = node_type {
sqlx::query_as(&query)
.bind(file_uuid)
.bind(nt)
.bind(page_size)
.bind(offset)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?
} else {
sqlx::query_as(&query)
.bind(file_uuid)
.bind(page_size)
.bind(offset)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?
};
let nodes: Vec<serde_json::Value> = rows
.into_iter()
.map(|(id, node_type, external_id, label, properties)| {
serde_json::json!({
"id": id,
"node_type": node_type,
"external_id": external_id,
"label": label,
"properties": properties
})
})
.collect();
Ok(serde_json::json!({
"file_uuid": file_uuid,
"total": total,
"page": page,
"page_size": page_size,
"nodes": nodes
})
.to_string())
}
pub async fn exec_tkg_edges_query(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args
.get("file_uuid")
.and_then(|v| v.as_str())
.ok_or("file_uuid is required")?;
let edge_type = args.get("edge_type").and_then(|v| v.as_str());
let page = args.get("page").and_then(|v| v.as_i64()).unwrap_or(1);
let page_size = args.get("page_size").and_then(|v| v.as_i64()).unwrap_or(20);
let offset = (page - 1) * page_size;
let edges_table = t("tkg_edges");
let (where_clause, total) = if let Some(et) = edge_type {
let total: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND edge_type = $2",
edges_table
))
.bind(file_uuid)
.bind(et)
.fetch_one(pool)
.await
.map_err(|e| e.to_string())?;
(
"WHERE file_uuid = $1 AND edge_type = $2 ORDER BY id LIMIT $3 OFFSET $4".to_string(),
total,
)
} else {
let total: i64 = sqlx::query_scalar(&format!(
"SELECT COUNT(*) FROM {} WHERE file_uuid = $1",
edges_table
))
.bind(file_uuid)
.fetch_one(pool)
.await
.map_err(|e| e.to_string())?;
(
"WHERE file_uuid = $1 ORDER BY id LIMIT $2 OFFSET $3".to_string(),
total,
)
};
let query = format!(
"SELECT id, edge_type, source_node_id, target_node_id, properties FROM {} {}",
edges_table, where_clause
);
let rows: Vec<(i64, String, i64, i64, serde_json::Value)> = if let Some(et) = edge_type {
sqlx::query_as(&query)
.bind(file_uuid)
.bind(et)
.bind(page_size)
.bind(offset)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?
} else {
sqlx::query_as(&query)
.bind(file_uuid)
.bind(page_size)
.bind(offset)
.fetch_all(pool)
.await
.map_err(|e| e.to_string())?
};
let edges: Vec<serde_json::Value> = rows
.into_iter()
.map(
|(id, edge_type, source_node_id, target_node_id, properties)| {
serde_json::json!({
"id": id,
"edge_type": edge_type,
"source_node_id": source_node_id,
"target_node_id": target_node_id,
"properties": properties
})
},
)
.collect();
Ok(serde_json::json!({
"file_uuid": file_uuid,
"total": total,
"page": page,
"page_size": page_size,
"edges": edges
})
.to_string())
}
pub async fn exec_tkg_node_detail(
pool: &sqlx::PgPool,
args: &serde_json::Value,
) -> Result<String, String> {
let file_uuid = args
.get("file_uuid")
.and_then(|v| v.as_str())
.ok_or("file_uuid is required")?;
let node_id = args
.get("node_id")
.and_then(|v| v.as_i64())
.ok_or("node_id is required")?;
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let node: Option<(i64, String, String, String, serde_json::Value)> = sqlx::query_as(
&format!("SELECT id, node_type, external_id, label, properties FROM {} WHERE file_uuid = $1 AND id = $2", nodes_table)
)
.bind(file_uuid).bind(node_id)
.fetch_optional(pool).await.map_err(|e| e.to_string())?;
match node {
Some((id, node_type, external_id, label, properties)) => {
let rows_in: Vec<(i64, String, i64, i64, serde_json::Value)> = sqlx::query_as(
&format!("SELECT id, edge_type, source_node_id, target_node_id, properties FROM {} WHERE file_uuid = $1 AND target_node_id = $2", edges_table)
)
.bind(file_uuid).bind(node_id)
.fetch_all(pool).await.unwrap_or_default();
let incoming: Vec<serde_json::Value> = rows_in.into_iter().map(|(id, edge_type, source_node_id, target_node_id, properties)| {
serde_json::json!({"id": id, "edge_type": edge_type, "source_node_id": source_node_id, "target_node_id": target_node_id, "properties": properties})
}).collect();
let rows_out: Vec<(i64, String, i64, i64, serde_json::Value)> = sqlx::query_as(
&format!("SELECT id, edge_type, source_node_id, target_node_id, properties FROM {} WHERE file_uuid = $1 AND source_node_id = $2", edges_table)
)
.bind(file_uuid).bind(node_id)
.fetch_all(pool).await.unwrap_or_default();
let outgoing: Vec<serde_json::Value> = rows_out.into_iter().map(|(id, edge_type, source_node_id, target_node_id, properties)| {
serde_json::json!({"id": id, "edge_type": edge_type, "source_node_id": source_node_id, "target_node_id": target_node_id, "properties": properties})
}).collect();
Ok(serde_json::json!({
"node": {"id": id, "node_type": node_type, "external_id": external_id, "label": label, "properties": properties},
"incoming_edges": incoming,
"outgoing_edges": outgoing
}).to_string())
}
None => Err("Node not found".to_string()),
}
}

335
src/core/checkin.rs Normal file
View File

@@ -0,0 +1,335 @@
use anyhow::{Context, Result};
use tracing::{info, warn};
use crate::core::db::{
workspace_sqlite::{SpeakerDetectionBatchItem, WorkspaceDb},
PostgresDb, QdrantDb, QdrantWorkspace,
};
#[derive(Debug)]
pub struct CheckinResult {
pub file_uuid: String,
pub pre_chunks_moved: usize,
pub speaker_detections_moved: usize,
pub vectors_moved: usize,
}
#[derive(Debug)]
pub struct CheckoutResult {
pub file_uuid: String,
pub rows_deleted: usize,
}
pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result<CheckinResult> {
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
info!("Checkin starting for {} (schema={})", file_uuid, schema);
let workspace = WorkspaceDb::open(file_uuid)
.await
.context("No workspace found for checkin")?;
let qdrant_ws = QdrantWorkspace::new();
let pre_chunks = workspace.get_all_pre_chunks().await?;
let spk_dets = workspace.get_all_speaker_detections().await?;
info!(
"Checkin {} workspace: {} pre_chunks, {} spk_dets",
file_uuid,
pre_chunks.len(),
spk_dets.len(),
);
// ── Pre-chunks ──
for chunk in &pre_chunks {
let data_value: serde_json::Value = chunk
.data
.as_ref()
.and_then(|d| serde_json::from_str(d).ok())
.unwrap_or(serde_json::Value::Null);
match chunk.processor_type.as_str() {
"asr" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_asr_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
.await?;
}
"cut" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_cut_pre_chunks_batch(file_uuid, &[(idx, sf, ef, start, end, data_value)])
.await?;
}
"scene" => {
let start = chunk.start_time.unwrap_or(0.0);
let end = chunk.end_time.unwrap_or(0.0);
let sf = chunk.start_frame.unwrap_or(0);
let ef = chunk.end_frame.unwrap_or(0);
let idx = chunk.id as i64;
db.store_scene_pre_chunks_batch(
file_uuid,
&[(idx, sf, ef, start, end, data_value)],
)
.await?;
}
_ => {
let frame = chunk.start_frame.unwrap_or(0);
let ts = chunk.start_time;
let text = chunk.text_content.clone();
db.store_raw_pre_chunks_batch(
file_uuid,
&chunk.processor_type,
&[(frame, ts, data_value, text, None)],
)
.await?;
}
}
}
// ── Speaker detections ──
if !spk_dets.is_empty() {
let batch: Vec<(String, f64, f64, String, Option<String>, f32)> = spk_dets
.iter()
.map(|s| {
(
s.speaker_id.clone().unwrap_or_default(),
s.start_time.unwrap_or(0.0),
s.end_time.unwrap_or(0.0),
s.text_content.clone().unwrap_or_default(),
s.chunk_id.clone(),
s.confidence.unwrap_or(0.0) as f32,
)
})
.collect();
db.store_speaker_detections_batch(file_uuid, &batch).await?;
}
// ── Qdrant vectors ──
let mut vectors_moved = 0usize;
match qdrant_ws.scroll_by_file_uuid(file_uuid).await {
Ok(ws_data) => {
let qdrant = QdrantDb::new();
// Chunks → production collection
for point in &ws_data.chunks {
if let Some(ref vector) = point.vector {
let payload_val: serde_json::Value =
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
let point_id: u64 = match point.id.parse::<u64>() {
Ok(id) => id,
Err(_) => {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
point.id.hash(&mut hasher);
hasher.finish()
}
};
if let Err(e) = qdrant
.upsert_vector_to_collection(
&qdrant.collection_name,
point_id,
vector,
Some(payload_val),
)
.await
{
warn!("Failed to checkin chunk vector {}: {}", point.id, e);
} else {
vectors_moved += 1;
}
}
}
// Traces → production traces collection
let traces_coll = format!(
"{}_traces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
for point in &ws_data.traces {
if let Some(ref vector) = point.vector {
let payload_val: serde_json::Value =
serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null);
let point_id: u64 = match point.id.parse::<u64>() {
Ok(id) => id,
Err(_) => {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
point.id.hash(&mut hasher);
hasher.finish()
}
};
if let Err(e) = qdrant
.upsert_vector_to_collection(
&traces_coll,
point_id,
vector,
Some(payload_val),
)
.await
{
warn!("Failed to checkin trace vector {}: {}", point.id, e);
} else {
vectors_moved += 1;
}
}
}
}
Err(e) => {
warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e);
}
}
// ── Cleanup workspace ──
if let Err(e) = workspace.clear().await {
warn!("Failed to clear workspace for {}: {}", file_uuid, e);
}
if let Err(e) = qdrant_ws.delete_by_file_uuid(file_uuid).await {
warn!(
"Failed to delete workspace vectors for {}: {}",
file_uuid, e
);
}
info!(
"Checkin complete for {}: {} pre_chunks, {} spk_dets, {} vectors",
file_uuid,
pre_chunks.len(),
spk_dets.len(),
vectors_moved,
);
Ok(CheckinResult {
file_uuid: file_uuid.to_string(),
pre_chunks_moved: pre_chunks.len(),
speaker_detections_moved: spk_dets.len(),
vectors_moved,
})
}
pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result<CheckoutResult> {
let schema = crate::core::config::DATABASE_SCHEMA.as_str();
let table = crate::core::db::schema::table_name;
info!("Checkout starting for {} (schema={})", file_uuid, schema);
// Delete face_detections
let face_table = table("face_detections");
let face_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", face_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
face_result.rows_affected(),
face_table
);
// Delete speaker_detections
let spk_table = table("speaker_detections");
let spk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", spk_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
spk_result.rows_affected(),
spk_table
);
// Delete pre_chunks
let pc_table = table("pre_chunks");
let pc_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pc_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
pc_result.rows_affected(),
pc_table
);
// Delete chunks
let chunk_table = table("chunk");
let chunk_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", chunk_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
chunk_result.rows_affected(),
chunk_table
);
// Delete processor_results
let pr_table = table("processor_results");
let pr_result = sqlx::query(&format!("DELETE FROM {} WHERE file_uuid = $1", pr_table))
.bind(file_uuid)
.execute(db.pool())
.await?;
info!(
"Checkout {}: deleted {} rows from {}",
file_uuid,
pr_result.rows_affected(),
pr_table
);
// Delete Qdrant vectors from production
let qdrant = QdrantDb::new();
if let Err(e) = qdrant.delete_by_uuid(file_uuid).await {
warn!(
"Failed to delete chunk vectors from Qdrant for {}: {}",
file_uuid, e
);
}
let prefix = crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':');
let traces_coll = format!("{}_traces", prefix);
let voice_coll = format!("{}_voice", file_uuid);
for coll in &[traces_coll, voice_coll] {
if let Err(e) = QdrantDb::delete_by_uuid_from_collection(
&qdrant.client,
&qdrant.base_url,
&qdrant.api_key,
coll,
file_uuid,
)
.await
{
warn!(
"Failed to delete vectors from {} for {}: {}",
coll, file_uuid, e
);
}
}
let rows_deleted = face_result.rows_affected()
+ spk_result.rows_affected()
+ pc_result.rows_affected()
+ chunk_result.rows_affected()
+ pr_result.rows_affected();
info!(
"Checkout complete for {}: {} PG rows deleted",
file_uuid, rows_deleted,
);
Ok(CheckoutResult {
file_uuid: file_uuid.to_string(),
rows_deleted: rows_deleted as usize,
})
}

View File

@@ -4,47 +4,47 @@ use crate::core::db::PostgresDb;
use anyhow::{Context, Result};
use serde_json::Value;
use sqlx::{PgPool, Row};
use tracing::info;
use std::collections::BTreeMap;
use tracing::{info, warn};
const OCR_CONFIDENCE_THRESHOLD: f64 = 0.5;
pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result<usize> {
let pool = db.pool();
let pre_chunks_table = schema::table_name("pre_chunks");
let asr_segments = fetch_asr_segments(pool, file_uuid, &pre_chunks_table).await?;
let asrx_segments = fetch_asrx_segments(pool, file_uuid, &pre_chunks_table).await?;
let yolo_frames = fetch_yolo_frames(pool, file_uuid, &pre_chunks_table).await?;
let face_frames = fetch_face_frames(pool, file_uuid).await?;
let ocr_map = fetch_ocr_texts(pool, file_uuid, &pre_chunks_table).await?;
let video = db
.get_video_by_uuid(file_uuid)
.await?
.context("Video not found")?;
let file_id = video.id;
if asr_segments.is_empty() {
info!("Rule 1: no ASR segments for video {}", file_uuid);
return Ok(0);
}
let file_id = video.id;
let mut count = 0;
let mut tx = pool.begin().await?;
for (idx, seg) in asr_segments.iter().enumerate() {
let speaker_id = find_best_speaker(&seg, &asrx_segments);
let yolo_objects = find_yolo_objects(seg.start_frame, seg.end_frame, &yolo_frames);
let face_ids = find_face_ids(seg.start_frame, seg.end_frame, &face_frames);
let ocr_text = collect_ocr_text(seg.start_frame, seg.end_frame, &ocr_map);
let combined_text = if ocr_text.is_empty() {
seg.text.clone()
} else {
format!("{} {}", seg.text, ocr_text)
};
let metadata = serde_json::json!({
"speaker_id": speaker_id,
"yolo_objects": yolo_objects,
"face_ids": face_ids,
"language": seg.language,
});
let content = serde_json::json!({
"text": seg.text,
"text_normalized": seg.text.to_lowercase(),
"ocr_text": ocr_text,
});
let chunk = Chunk::from_seconds(
@@ -59,7 +59,7 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result
content,
)
.with_metadata(metadata)
.with_text_content(seg.text.clone());
.with_text_content(combined_text);
db.store_chunk_in_tx(&chunk, &mut tx).await?;
@@ -93,40 +93,6 @@ struct AsrSegment {
language: String,
}
#[derive(Debug, Clone)]
struct AsrxSegment {
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
speaker: String,
}
#[derive(Debug, Clone)]
struct YoloFrame {
frame: i64,
detections: Vec<YoloDetection>,
}
#[derive(Debug, Clone)]
struct YoloDetection {
class_name: String,
confidence: f64,
}
#[derive(Debug, Clone)]
struct FaceFrame {
frame: i64,
faces: Vec<FaceDetection>,
}
#[derive(Debug, Clone)]
struct FaceDetection {
face_id: String,
confidence: f64,
identity_id: Option<i32>,
}
async fn fetch_asr_segments(
pool: &PgPool,
file_uuid: &str,
@@ -151,8 +117,19 @@ async fn fetch_asr_segments(
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
let end_time_raw: Option<f64> = row.try_get("end_time").ok();
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let end_time = end_time_raw
.filter(|t| *t > 0.0)
.or_else(|| data.get("end_time").and_then(|v| v.as_f64()))
.unwrap_or(0.0);
if end_time <= 0.0 {
warn!(
"ASR segment end_time is 0.0 for file {} (frame {}..{})",
file_uuid, start_frame, end_frame
);
}
let text = data.get("text").and_then(|t| t.as_str()).unwrap_or("");
let language = data
@@ -174,58 +151,17 @@ async fn fetch_asr_segments(
Ok(segments)
}
async fn fetch_asrx_segments(
async fn fetch_ocr_texts(
pool: &PgPool,
file_uuid: &str,
table: &str,
) -> Result<Vec<AsrxSegment>> {
let query = format!(
r#"
SELECT
start_frame, end_frame, start_time, end_time, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'asrx'
ORDER BY start_frame
"#,
table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let segments: Vec<AsrxSegment> = rows
.iter()
.map(|row| {
let start_frame: i64 = row.try_get("start_frame").unwrap_or(0);
let end_frame: i64 = row.try_get("end_frame").unwrap_or(0);
let start_time: f64 = row.try_get("start_time").unwrap_or(0.0);
let end_time: f64 = row.try_get("end_time").unwrap_or(0.0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let speaker = data
.get("speaker")
.and_then(|s| s.as_str())
.unwrap_or("UNKNOWN");
AsrxSegment {
start_frame,
end_frame,
start_time,
end_time,
speaker: speaker.to_string(),
}
})
.collect();
Ok(segments)
}
async fn fetch_yolo_frames(pool: &PgPool, file_uuid: &str, table: &str) -> Result<Vec<YoloFrame>> {
) -> Result<BTreeMap<i64, Vec<String>>> {
let query = format!(
r#"
SELECT
coordinate_index as frame, data
FROM {}
WHERE file_uuid = $1 AND processor_type = 'yolo'
WHERE file_uuid = $1 AND processor_type = 'ocr'
ORDER BY coordinate_index
"#,
table
@@ -233,136 +169,54 @@ async fn fetch_yolo_frames(pool: &PgPool, file_uuid: &str, table: &str) -> Resul
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let frames: Vec<YoloFrame> = rows
.iter()
.map(|row| {
let frame: i64 = row.try_get("frame").unwrap_or(0);
let data: Value = row.try_get("data").unwrap_or(Value::Null);
let detections: Vec<YoloDetection> = data
.get("detections")
.and_then(|d| d.as_array())
.map(|arr| {
arr.iter()
.filter_map(|det| {
let class_name = det.get("class_name").and_then(|c| c.as_str());
let confidence = det.get("confidence").and_then(|c| c.as_f64());
if class_name.is_some() && confidence.is_some() {
Some(YoloDetection {
class_name: class_name.unwrap().to_string(),
confidence: confidence.unwrap(),
})
} else {
None
}
})
.collect()
})
.unwrap_or_default();
YoloFrame { frame, detections }
})
.collect();
Ok(frames)
}
async fn fetch_face_frames(pool: &PgPool, file_uuid: &str) -> Result<Vec<FaceFrame>> {
let face_detections_table = schema::table_name("face_detections");
let query = format!(
r#"
SELECT
frame_number as frame,
face_id,
confidence,
identity_id
FROM {}
WHERE file_uuid = $1
ORDER BY frame_number
"#,
face_detections_table
);
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(pool).await?;
let mut frame_map: std::collections::HashMap<i64, FaceFrame> = std::collections::HashMap::new();
let mut map: BTreeMap<i64, Vec<String>> = BTreeMap::new();
for row in rows {
let frame: i64 = row.try_get("frame").unwrap_or(0);
let face_id: Option<String> = row.try_get("face_id").ok();
let confidence: f64 = row.try_get("confidence").unwrap_or(0.0);
let identity_id: Option<i32> = row.try_get("identity_id").ok();
let data: Value = row.try_get("data").unwrap_or(Value::Null);
if let Some(face_id) = face_id {
let detection = FaceDetection {
face_id: face_id.clone(),
confidence,
identity_id,
};
let texts: Vec<String> = data
.get("texts")
.and_then(|t| t.as_array())
.map(|arr| {
arr.iter()
.filter_map(|item| {
let text = item.get("text").and_then(|t| t.as_str())?;
let confidence = item
.get("confidence")
.and_then(|c| c.as_f64())
.unwrap_or(0.0);
if confidence > OCR_CONFIDENCE_THRESHOLD {
Some(text.to_string())
} else {
None
}
})
.collect()
})
.unwrap_or_default();
frame_map
.entry(frame)
.or_insert_with(|| FaceFrame {
frame,
faces: Vec::new(),
})
.faces
.push(detection);
}
map.insert(frame, texts);
}
let mut frames: Vec<FaceFrame> = frame_map.into_values().collect();
frames.sort_by_key(|f| f.frame);
Ok(frames)
Ok(map)
}
fn find_best_speaker(asr_seg: &AsrSegment, asrx_segments: &[AsrxSegment]) -> String {
let mut best_speaker = "UNKNOWN".to_string();
let mut max_overlap = 0.0f64;
fn collect_ocr_text(
start_frame: i64,
end_frame: i64,
ocr_map: &BTreeMap<i64, Vec<String>>,
) -> String {
let mut seen = std::collections::HashSet::new();
let mut parts = Vec::new();
for spk in asrx_segments {
let overlap =
(asr_seg.end_time.min(spk.end_time) - asr_seg.start_time.max(spk.start_time)).max(0.0);
if overlap > max_overlap {
max_overlap = overlap;
best_speaker = spk.speaker.clone();
}
}
best_speaker
}
fn find_yolo_objects(start_frame: i64, end_frame: i64, yolo_frames: &[YoloFrame]) -> Vec<String> {
let mut objects = Vec::new();
for frame in yolo_frames {
if frame.frame >= start_frame && frame.frame <= end_frame {
for det in &frame.detections {
if det.confidence > 0.5 && !objects.contains(&det.class_name) {
objects.push(det.class_name.clone());
}
for (_frame, texts) in ocr_map.range(start_frame..=end_frame) {
for t in texts {
let trimmed = t.trim();
if !trimmed.is_empty() && seen.insert(trimmed.to_string()) {
parts.push(trimmed.to_string());
}
}
}
objects
}
fn find_face_ids(start_frame: i64, end_frame: i64, face_frames: &[FaceFrame]) -> Vec<String> {
let mut face_ids = Vec::new();
for frame in face_frames {
if frame.frame >= start_frame && frame.frame <= end_frame {
for face in &frame.faces {
if face.confidence > 0.5 && !face_ids.contains(&face.face_id) {
face_ids.push(face.face_id.clone());
}
}
}
}
face_ids
parts.join(" ")
}

View File

@@ -123,11 +123,14 @@ pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
);
// 4. Insert into dev.chunks
let fps_query: Option<f64> =
sqlx::query_scalar("SELECT fps FROM videos WHERE file_uuid = $1")
.bind(file_uuid)
.fetch_optional(&mut *tx)
.await?;
let video_table = schema::table_name("videos");
let fps_query: Option<f64> = sqlx::query_scalar(&format!(
"SELECT fps FROM {} WHERE file_uuid = $1",
video_table
))
.bind(file_uuid)
.fetch_optional(&mut *tx)
.await?;
let fps = fps_query.unwrap_or(29.97);
// Prepare metadata JSON

View File

@@ -194,6 +194,9 @@ pub enum VideoStatus {
Registered,
Pending,
Processing,
Processed,
Indexed,
CheckedOut,
Completed,
Failed,
}
@@ -205,6 +208,9 @@ impl VideoStatus {
VideoStatus::Registered => "registered",
VideoStatus::Pending => "pending",
VideoStatus::Processing => "processing",
VideoStatus::Processed => "processed",
VideoStatus::Indexed => "indexed",
VideoStatus::CheckedOut => "checked_out",
VideoStatus::Completed => "completed",
VideoStatus::Failed => "failed",
}
@@ -216,6 +222,9 @@ impl VideoStatus {
"registered" => Some(VideoStatus::Registered),
"pending" => Some(VideoStatus::Pending),
"processing" => Some(VideoStatus::Processing),
"processed" => Some(VideoStatus::Processed),
"indexed" => Some(VideoStatus::Indexed),
"checked_out" => Some(VideoStatus::CheckedOut),
"completed" => Some(VideoStatus::Completed),
"failed" => Some(VideoStatus::Failed),
_ => None,
@@ -427,6 +436,7 @@ pub enum PipelineType {
#[serde(rename_all = "snake_case")]
pub enum ProcessorType {
Cut,
Asr,
Yolo,
Ocr,
Face,
@@ -435,6 +445,8 @@ pub enum ProcessorType {
Scene,
Story,
FiveW1H,
Appearance,
MediaPipe,
}
impl sqlx::Type<sqlx::Postgres> for ProcessorType {
@@ -462,6 +474,7 @@ impl ProcessorType {
pub fn as_str(&self) -> &'static str {
match self {
ProcessorType::Cut => "cut",
ProcessorType::Asr => "asr",
ProcessorType::Yolo => "yolo",
ProcessorType::Ocr => "ocr",
ProcessorType::Face => "face",
@@ -470,12 +483,15 @@ impl ProcessorType {
ProcessorType::Scene => "scene",
ProcessorType::Story => "story",
ProcessorType::FiveW1H => "5w1h",
ProcessorType::Appearance => "appearance",
ProcessorType::MediaPipe => "mediapipe",
}
}
pub fn from_db_str(s: &str) -> Option<Self> {
match s {
"cut" => Some(ProcessorType::Cut),
"asr" => Some(ProcessorType::Asr),
"yolo" => Some(ProcessorType::Yolo),
"ocr" => Some(ProcessorType::Ocr),
"face" => Some(ProcessorType::Face),
@@ -484,6 +500,8 @@ impl ProcessorType {
"scene" => Some(ProcessorType::Scene),
"story" => Some(ProcessorType::Story),
"5w1h" => Some(ProcessorType::FiveW1H),
"appearance" => Some(ProcessorType::Appearance),
"mediapipe" => Some(ProcessorType::MediaPipe),
_ => None,
}
}
@@ -491,6 +509,7 @@ impl ProcessorType {
pub fn estimated_cpu(&self) -> f64 {
match self {
ProcessorType::Cut => 0.5,
ProcessorType::Asr => 0.5,
ProcessorType::Yolo => 0.3,
ProcessorType::Ocr => 0.8,
ProcessorType::Face => 0.6,
@@ -499,12 +518,15 @@ impl ProcessorType {
ProcessorType::Scene => 0.3,
ProcessorType::Story => 0.1,
ProcessorType::FiveW1H => 0.1,
ProcessorType::Appearance => 0.3,
ProcessorType::MediaPipe => 0.3,
}
}
pub fn uses_gpu(&self) -> bool {
match self {
ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose => true,
ProcessorType::MediaPipe => false,
_ => false,
}
}
@@ -512,6 +534,7 @@ impl ProcessorType {
pub fn estimated_memory_mb(&self) -> u64 {
match self {
ProcessorType::Cut => 512,
ProcessorType::Asr => 1024,
ProcessorType::Yolo => 1024,
ProcessorType::Ocr => 1024,
ProcessorType::Face => 1536,
@@ -520,12 +543,15 @@ impl ProcessorType {
ProcessorType::Scene => 512,
ProcessorType::Story => 256,
ProcessorType::FiveW1H => 256,
ProcessorType::Appearance => 512,
ProcessorType::MediaPipe => 1024,
}
}
pub fn model_name(&self) -> Option<&'static str> {
match self {
ProcessorType::Cut => None,
ProcessorType::Asr => Some("whisper-small"),
ProcessorType::Yolo => Some("yolov8n"),
ProcessorType::Ocr => Some("paddleocr"),
ProcessorType::Face => Some("insightface/buffalo_l"),
@@ -534,12 +560,14 @@ impl ProcessorType {
ProcessorType::Scene => Some("places365"),
ProcessorType::Story => None,
ProcessorType::FiveW1H => Some("gemma4"),
ProcessorType::Appearance => None,
ProcessorType::MediaPipe => Some("mediapipe/holistic"),
}
}
pub fn dependencies(&self) -> Vec<ProcessorType> {
match self {
ProcessorType::Asrx => vec![ProcessorType::Cut],
ProcessorType::Asrx => vec![ProcessorType::Cut, ProcessorType::Asr],
ProcessorType::Scene => vec![ProcessorType::Cut],
ProcessorType::Story => vec![
ProcessorType::Asrx,
@@ -548,6 +576,8 @@ impl ProcessorType {
ProcessorType::Face,
],
ProcessorType::FiveW1H => vec![ProcessorType::Story],
ProcessorType::Appearance => vec![ProcessorType::Pose],
ProcessorType::MediaPipe => vec![],
_ => vec![],
}
}
@@ -555,11 +585,14 @@ impl ProcessorType {
pub fn all() -> Vec<ProcessorType> {
vec![
ProcessorType::Cut,
ProcessorType::Asr,
ProcessorType::Asrx,
ProcessorType::Yolo,
ProcessorType::Ocr,
ProcessorType::Face,
ProcessorType::Pose,
ProcessorType::MediaPipe,
ProcessorType::Appearance,
ProcessorType::Story,
]
}
@@ -569,9 +602,12 @@ impl ProcessorType {
ProcessorType::Yolo
| ProcessorType::Ocr
| ProcessorType::Face
| ProcessorType::Pose => PipelineType::Frame,
| ProcessorType::Pose
| ProcessorType::Appearance
| ProcessorType::MediaPipe => PipelineType::Frame,
ProcessorType::Cut
| ProcessorType::Asr
| ProcessorType::Asrx
| ProcessorType::Scene
| ProcessorType::Story
@@ -1659,7 +1695,7 @@ impl PostgresDb {
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
Ok(Some(MonitorJob {
id: r.get(0),
id: r.get::<i32, _>(0),
uuid: r.get(1),
video_path: r.get(2),
status,
@@ -1707,7 +1743,7 @@ impl PostgresDb {
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Pending);
MonitorJob {
id: r.get(0),
id: r.get::<i32, _>(0),
uuid: r.get(1),
video_path: r.get(2),
status,
@@ -2444,15 +2480,15 @@ impl PostgresDb {
pub async fn store_face_detections_batch(
&self,
uuid: &str,
detections: &[(i64, f64, i32, i32, i32, i32, f32)],
detections: &[(i64, f64, Option<String>, i32, i32, i32, i32, f32)],
) -> Result<()> {
let table = schema::table_name("face_detections");
for (frame, ts, x, y, w, h, conf) in detections {
for (frame, ts, face_id, x, y, w, h, conf) in detections {
sqlx::query(&format!(
"INSERT INTO {} (file_uuid, frame_number, timestamp_secs, x, y, width, height, confidence) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT DO NOTHING", table
"INSERT INTO {} (file_uuid, frame_number, timestamp_secs, face_id, x, y, width, height, confidence) \
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) ON CONFLICT DO NOTHING", table
))
.bind(uuid).bind(frame).bind(ts).bind(x).bind(y).bind(w).bind(h).bind(conf)
.bind(uuid).bind(frame).bind(ts).bind(face_id).bind(x).bind(y).bind(w).bind(h).bind(conf)
.execute(&self.pool).await?;
}
Ok(())
@@ -2696,8 +2732,8 @@ impl PostgresDb {
let ptype: &str = r.get("processor");
let st: &str = r.get("status");
crate::core::db::ProcessorResult {
id: r.get("id"),
job_id: r.get("job_id"),
id: r.get::<i32, _>("id"),
job_id: r.get::<i32, _>("job_id"),
processor_type: crate::core::db::ProcessorType::from_db_str(ptype)
.unwrap_or(crate::core::db::ProcessorType::Cut),
status: crate::core::db::ProcessorJobStatus::from_db_str(st)
@@ -2742,8 +2778,8 @@ impl PostgresDb {
let ptype: &str = r.get("processor");
let st: &str = r.get("status");
crate::core::db::ProcessorResult {
id: r.get("id"),
job_id: r.get("job_id"),
id: r.get::<i32, _>("id"),
job_id: r.get::<i32, _>("job_id"),
processor_type: crate::core::db::ProcessorType::from_db_str(ptype)
.unwrap_or(crate::core::db::ProcessorType::Cut),
status: crate::core::db::ProcessorJobStatus::from_db_str(st)
@@ -2917,7 +2953,7 @@ impl PostgresDb {
Ok(rows
.into_iter()
.map(|r| crate::core::person_identity::Identity {
id: r.get(0),
id: r.get::<i32, _>(0),
name: r.get(1),
metadata: r.get(2),
created_at: r.get(3),
@@ -2943,7 +2979,7 @@ impl PostgresDb {
Ok(rows
.into_iter()
.map(|r| crate::core::person_identity::Identity {
id: r.get(0),
id: r.get::<i32, _>(0),
name: r.get(1),
metadata: r.get(2),
created_at: r.get(3),

View File

@@ -7,10 +7,10 @@ use std::collections::HashMap;
use super::{Database, SearchResult, VectorStore};
pub struct QdrantDb {
pub(crate) client: Client,
pub(crate) base_url: String,
pub(crate) api_key: String,
pub(crate) collection_name: String,
pub client: Client,
pub base_url: String,
pub api_key: String,
pub collection_name: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -693,10 +693,24 @@ impl QdrantDb {
}
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
let url = format!(
"{}/collections/{}/points/delete",
self.base_url, self.collection_name
);
Self::delete_by_uuid_from_collection(
&self.client,
&self.base_url,
&self.api_key,
&self.collection_name,
uuid,
)
.await
}
pub async fn delete_by_uuid_from_collection(
client: &reqwest::Client,
base_url: &str,
api_key: &str,
collection: &str,
uuid: &str,
) -> Result<()> {
let url = format!("{}/collections/{}/points/delete", base_url, collection);
let body = serde_json::json!({
"filter": {
@@ -711,14 +725,17 @@ impl QdrantDb {
}
});
self.client
client
.post(&url)
.header("api-key", &self.api_key)
.header("api-key", api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context("Failed to delete points from Qdrant")?;
.context(format!(
"Failed to delete points from Qdrant collection {}",
collection
))?;
Ok(())
}

View File

@@ -0,0 +1,336 @@
use anyhow::{Context, Result};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Manages Qdrant workspace collections — staging area for vectors during processing.
/// At checkin, vectors are copied from workspace to their respective production collections.
///
/// Workspace collections:
/// - `{prefix}_workspace_chunks` (768d) — text chunk embeddings
/// - `{prefix}_workspace_traces` (512d) — face trace embeddings
///
/// Voice embeddings use per-file collections (`{file_uuid}_voice`) stored directly, no workspace staging.
pub struct QdrantWorkspace {
client: Client,
base_url: String,
api_key: String,
prefix: String,
}
impl QdrantWorkspace {
pub fn new() -> Self {
let base_url =
std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6333".to_string());
let api_key = std::env::var("QDRANT_API_KEY")
.unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
let prefix = crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
.to_string();
Self {
client: Client::new(),
base_url,
api_key,
prefix,
}
}
pub fn chunks_collection(&self) -> String {
format!("{}_workspace_chunks", self.prefix)
}
pub fn traces_collection(&self) -> String {
format!("{}_workspace_traces", self.prefix)
}
pub async fn ensure_all(&self) -> Result<()> {
self.ensure_collection(&self.chunks_collection(), 768)
.await?;
self.ensure_collection(&self.traces_collection(), 512).await
}
async fn ensure_collection(&self, name: &str, dim: usize) -> Result<()> {
let url = format!("{}/collections/{}", self.base_url, name);
let exists = self
.client
.get(&url)
.header("api-key", &self.api_key)
.send()
.await
.map(|r| r.status().is_success())
.unwrap_or(false);
if exists {
return Ok(());
}
let body = serde_json::json!({
"vectors": { "size": dim, "distance": "Cosine" }
});
self.client
.post(&format!("{}/collections", self.base_url))
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context(format!(
"Failed to create Qdrant workspace collection: {}",
name
))?;
tracing::info!(
"Created Qdrant workspace collection: {} (dim={})",
name,
dim
);
Ok(())
}
pub async fn upsert_vector(
&self,
collection: &str,
point_id: u64,
vector: &[f32],
payload: Option<serde_json::Value>,
) -> Result<()> {
let url = format!(
"{}/collections/{}/points?wait=true",
self.base_url, collection
);
let point = if let Some(p) = payload {
serde_json::json!({ "points": [{ "id": point_id, "vector": vector, "payload": p }] })
} else {
serde_json::json!({ "points": [{ "id": point_id, "vector": vector }] })
};
let resp = self
.client
.put(&url)
.header("api-key", &self.api_key)
.json(&point)
.send()
.await
.context("Failed to upsert to Qdrant workspace")?;
if !resp.status().is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("Qdrant workspace upsert failed: {}", text);
}
Ok(())
}
pub async fn upsert_vectors_batch(
&self,
collection: &str,
points: &[(u64, &[f32], Option<serde_json::Value>)],
) -> Result<()> {
let url = format!(
"{}/collections/{}/points?wait=true",
self.base_url, collection
);
let qdrant_points: Vec<serde_json::Value> = points
.iter()
.map(|(id, vec, payload)| {
let mut p = serde_json::json!({ "id": id, "vector": vec });
if let Some(pl) = payload {
p["payload"] = pl.clone();
}
p
})
.collect();
let body = serde_json::json!({ "points": qdrant_points });
let resp = self
.client
.put(&url)
.header("api-key", &self.api_key)
.json(&body)
.send()
.await
.context("Failed to batch upsert to Qdrant workspace")?;
if !resp.status().is_success() {
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("Qdrant workspace batch upsert failed: {}", text);
}
Ok(())
}
pub async fn upsert_chunk_embedding(
&self,
point_id: &str,
vector: &[f32],
chunk_type: &str,
file_uuid: &str,
text: &str,
parent_chunk_id: Option<&str>,
start_time: f64,
end_time: f64,
) -> Result<()> {
let mut payload = HashMap::new();
payload.insert("file_uuid".to_string(), serde_json::json!(file_uuid));
payload.insert("chunk_type".to_string(), serde_json::json!(chunk_type));
payload.insert("text".to_string(), serde_json::json!(text));
payload.insert("start_time".to_string(), serde_json::json!(start_time));
payload.insert("end_time".to_string(), serde_json::json!(end_time));
if let Some(pid) = parent_chunk_id {
payload.insert("parent_chunk_id".to_string(), serde_json::json!(pid));
}
let point_id_num = {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
point_id.hash(&mut hasher);
hasher.finish()
};
self.upsert_vector(
&self.chunks_collection(),
point_id_num,
vector,
Some(serde_json::json!(payload)),
)
.await
}
pub async fn upsert_face_embedding(
&self,
point_id: u64,
vector: &[f32],
file_uuid: &str,
trace_id: i32,
frame_number: i64,
) -> Result<()> {
let payload = serde_json::json!({
"file_uuid": file_uuid,
"trace_id": trace_id,
"frame_number": frame_number,
"type": "face_embedding",
});
self.upsert_vector(&self.traces_collection(), point_id, vector, Some(payload))
.await
}
/// Scroll all points for a file from all workspace collections.
/// Used during checkin to read vectors before moving to production.
pub async fn scroll_by_file_uuid(&self, file_uuid: &str) -> Result<WorkspaceScrollResult> {
let chunks = self
.scroll_collection(&self.chunks_collection(), file_uuid)
.await?;
let traces = self
.scroll_collection(&self.traces_collection(), file_uuid)
.await?;
Ok(WorkspaceScrollResult { chunks, traces })
}
async fn scroll_collection(
&self,
collection: &str,
file_uuid: &str,
) -> Result<Vec<ScrolledPoint>> {
use serde_json::json;
let url = format!("{}/collections/{}/points/scroll", self.base_url, collection);
let body = json!({
"filter": {
"must": [{ "key": "file_uuid", "match": { "value": file_uuid } }]
},
"limit": 10000,
"with_payload": true,
"with_vector": true,
});
let resp = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context(format!("Failed to scroll Qdrant workspace: {}", collection))?;
#[derive(Deserialize)]
struct ScrollResp {
result: ScrollResult,
}
#[derive(Deserialize)]
struct ScrollResult {
points: Vec<QdrantPoint>,
}
#[derive(Deserialize)]
struct QdrantPoint {
id: serde_json::Value,
payload: HashMap<String, serde_json::Value>,
vector: Option<Vec<f32>>,
}
let text = resp.text().await?;
let parsed: ScrollResp = serde_json::from_str(&text).context(format!(
"Failed to parse scroll response from {}",
collection
))?;
Ok(parsed
.result
.points
.into_iter()
.map(|p| ScrolledPoint {
id: p.id.to_string(),
vector: p.vector,
payload: p.payload,
})
.collect())
}
/// Delete all vectors for a file from all workspace collections.
/// Used after checkin to clean up.
pub async fn delete_by_file_uuid(&self, file_uuid: &str) -> Result<()> {
self.delete_from_collection(&self.chunks_collection(), file_uuid)
.await?;
let _ = self
.delete_from_collection(&self.traces_collection(), file_uuid)
.await;
Ok(())
}
async fn delete_from_collection(&self, collection: &str, file_uuid: &str) -> Result<()> {
let url = format!("{}/collections/{}/points/delete", self.base_url, collection);
let body = serde_json::json!({
"filter": {
"must": [{ "key": "file_uuid", "match": { "value": file_uuid } }]
}
});
let resp = self
.client
.post(&url)
.header("api-key", &self.api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context(format!(
"Failed to delete from Qdrant workspace: {}",
collection
))?;
if !resp.status().is_success() {
tracing::warn!(
"Delete from workspace {} returned {}",
collection,
resp.status()
);
}
Ok(())
}
}
impl Default for QdrantWorkspace {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScrolledPoint {
pub id: String,
pub vector: Option<Vec<f32>>,
pub payload: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct WorkspaceScrollResult {
pub chunks: Vec<ScrolledPoint>,
pub traces: Vec<ScrolledPoint>,
}

View File

@@ -15,7 +15,7 @@ static SCHEMA_VERSION: AtomicU32 = AtomicU32::new(0);
impl SchemaContext {
/// Initialize schema context from environment
pub fn init() -> Self {
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "public".to_string());
let prefix = if schema == "public" {
String::new()
} else {

View File

@@ -0,0 +1,469 @@
use std::path::PathBuf;
use anyhow::{Context, Result};
use sqlx::sqlite::{SqlitePool, SqlitePoolOptions};
use sqlx::Row;
use crate::core::config::OUTPUT_DIR;
#[derive(Debug, Clone)]
pub struct WorkspaceDb {
pool: SqlitePool,
pub file_uuid: String,
}
pub fn workspace_path(file_uuid: &str) -> PathBuf {
PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.workspace.sqlite", file_uuid))
}
impl WorkspaceDb {
pub async fn open(file_uuid: &str) -> Result<Self> {
let path = workspace_path(file_uuid);
let url = format!("sqlite:///{}?mode=rwc", path.display());
let pool = SqlitePoolOptions::new()
.max_connections(1)
.connect(&url)
.await
.context(format!(
"Failed to open workspace SQLite: {}",
path.display()
))?;
let db = Self {
pool,
file_uuid: file_uuid.to_string(),
};
db.init_tables().await?;
Ok(db)
}
async fn init_tables(&self) -> Result<()> {
sqlx::query(
"CREATE TABLE IF NOT EXISTS processor_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER NOT NULL,
file_uuid TEXT NOT NULL,
processor_type TEXT NOT NULL,
processor TEXT,
status TEXT NOT NULL DEFAULT 'pending',
error_message TEXT,
output_data TEXT,
chunks_produced INTEGER DEFAULT 0,
frames_processed INTEGER DEFAULT 0,
started_at TEXT,
completed_at TEXT,
created_at TEXT DEFAULT (datetime('now'))
)",
)
.execute(&self.pool)
.await?;
sqlx::query(
"CREATE TABLE IF NOT EXISTS pre_chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_uuid TEXT NOT NULL,
processor_type TEXT NOT NULL,
chunk_type TEXT NOT NULL,
start_frame INTEGER,
end_frame INTEGER,
start_time REAL,
end_time REAL,
data TEXT,
text_content TEXT
)",
)
.execute(&self.pool)
.await?;
sqlx::query(
"CREATE TABLE IF NOT EXISTS face_detections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_uuid TEXT NOT NULL,
face_id TEXT,
frame_number INTEGER,
timestamp_secs REAL,
x REAL,
y REAL,
w REAL,
h REAL,
confidence REAL
)",
)
.execute(&self.pool)
.await?;
sqlx::query(
"CREATE TABLE IF NOT EXISTS speaker_detections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_uuid TEXT NOT NULL,
speaker_id TEXT,
start_time REAL,
end_time REAL,
text_content TEXT,
chunk_id TEXT,
confidence REAL
)",
)
.execute(&self.pool)
.await?;
sqlx::query(
"CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_uuid TEXT NOT NULL,
chunk_id TEXT NOT NULL,
chunk_type TEXT NOT NULL,
start_frame INTEGER,
end_frame INTEGER,
start_time REAL,
end_time REAL,
text_content TEXT,
content TEXT,
fps REAL
)",
)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn clear(&self) -> Result<()> {
self.pool.close().await;
let path = workspace_path(&self.file_uuid);
if path.exists() {
tokio::fs::remove_file(&path)
.await
.context(format!("Failed to delete workspace: {}", path.display()))?;
}
Ok(())
}
pub fn exists(file_uuid: &str) -> bool {
workspace_path(file_uuid).exists()
}
// ── Processor Results ──
pub async fn upsert_processor_result(
&self,
job_id: i32,
processor_type: &str,
status: &str,
) -> Result<i64> {
let result = sqlx::query(
"INSERT INTO processor_results (job_id, file_uuid, processor_type, processor, status) \
VALUES (?1, ?2, ?3, ?3, ?4)",
)
.bind(job_id)
.bind(&self.file_uuid)
.bind(processor_type)
.bind(status)
.execute(&self.pool)
.await?;
Ok(result.last_insert_rowid())
}
pub async fn update_processor_result(&self, result_id: i64, status: &str) -> Result<()> {
sqlx::query("UPDATE processor_results SET status = ?1 WHERE id = ?2")
.bind(status)
.bind(result_id)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn update_processor_result_with_stats(
&self,
result_id: i64,
status: &str,
error_message: Option<&str>,
output_data: Option<&str>,
chunks_produced: i32,
frames_processed: i32,
) -> Result<()> {
sqlx::query(
"UPDATE processor_results SET status=?1, error_message=?2, output_data=?3, \
chunks_produced=?4, frames_processed=?5 WHERE id=?6",
)
.bind(status)
.bind(error_message)
.bind(output_data)
.bind(chunks_produced)
.bind(frames_processed)
.bind(result_id)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn get_processor_results(&self) -> Result<Vec<ProcessorResultRow>> {
let rows = sqlx::query_as::<_, ProcessorResultRow>(
"SELECT id, job_id, file_uuid, processor_type, processor, status, \
error_message, output_data, chunks_produced, frames_processed, \
started_at, completed_at, created_at \
FROM processor_results ORDER BY id",
)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
// ── Pre-chunks ──
pub async fn store_pre_chunk(
&self,
processor_type: &str,
chunk_type: &str,
start_frame: Option<i64>,
end_frame: Option<i64>,
start_time: Option<f64>,
end_time: Option<f64>,
data: Option<&str>,
text_content: Option<&str>,
) -> Result<()> {
sqlx::query(
"INSERT INTO pre_chunks (file_uuid, processor_type, chunk_type, \
start_frame, end_frame, start_time, end_time, data, text_content) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)
.bind(&self.file_uuid)
.bind(processor_type)
.bind(chunk_type)
.bind(start_frame)
.bind(end_frame)
.bind(start_time)
.bind(end_time)
.bind(data)
.bind(text_content)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn get_all_pre_chunks(&self) -> Result<Vec<PreChunkRow>> {
let rows = sqlx::query_as::<_, PreChunkRow>(
"SELECT id, file_uuid, processor_type, chunk_type, \
start_frame, end_frame, start_time, end_time, data, text_content \
FROM pre_chunks ORDER BY id",
)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
// ── Face Detections ──
pub async fn store_face_detections_batch(
&self,
detections: &[FaceDetectionBatchItem],
) -> Result<()> {
for d in detections {
sqlx::query(
"INSERT INTO face_detections (file_uuid, face_id, frame_number, timestamp_secs, \
x, y, w, h, confidence) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)
.bind(&self.file_uuid)
.bind(&d.face_id)
.bind(d.frame)
.bind(d.ts)
.bind(d.x)
.bind(d.y)
.bind(d.w)
.bind(d.h)
.bind(d.confidence)
.execute(&self.pool)
.await?;
}
Ok(())
}
pub async fn get_all_face_detections(&self) -> Result<Vec<FaceDetectionRow>> {
let rows = sqlx::query_as::<_, FaceDetectionRow>(
"SELECT id, file_uuid, face_id, frame_number, timestamp_secs, \
x, y, w, h, confidence \
FROM face_detections ORDER BY id",
)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
// ── Speaker Detections ──
pub async fn store_speaker_detections_batch(
&self,
segments: &[SpeakerDetectionBatchItem],
) -> Result<()> {
for s in segments {
sqlx::query(
"INSERT INTO speaker_detections (file_uuid, speaker_id, start_time, end_time, \
text_content, chunk_id, confidence) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
)
.bind(&self.file_uuid)
.bind(&s.speaker_id)
.bind(s.start_time)
.bind(s.end_time)
.bind(&s.text)
.bind(&s.chunk_id)
.bind(s.confidence)
.execute(&self.pool)
.await?;
}
Ok(())
}
pub async fn get_all_speaker_detections(&self) -> Result<Vec<SpeakerDetectionRow>> {
let rows = sqlx::query_as::<_, SpeakerDetectionRow>(
"SELECT id, file_uuid, speaker_id, start_time, end_time, \
text_content, chunk_id, confidence \
FROM speaker_detections ORDER BY id",
)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
// ── Chunks ──
pub async fn store_chunk(
&self,
chunk_id: &str,
chunk_type: &str,
start_frame: i64,
end_frame: i64,
start_time: f64,
end_time: f64,
text_content: Option<&str>,
content: Option<&str>,
fps: f64,
) -> Result<()> {
sqlx::query(
"INSERT INTO chunks (file_uuid, chunk_id, chunk_type, start_frame, end_frame, \
start_time, end_time, text_content, content, fps) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
)
.bind(&self.file_uuid)
.bind(chunk_id)
.bind(chunk_type)
.bind(start_frame)
.bind(end_frame)
.bind(start_time)
.bind(end_time)
.bind(text_content)
.bind(content)
.bind(fps)
.execute(&self.pool)
.await?;
Ok(())
}
pub async fn get_all_chunks(&self) -> Result<Vec<ChunkRow>> {
let rows = sqlx::query_as::<_, ChunkRow>(
"SELECT id, file_uuid, chunk_id, chunk_type, start_frame, end_frame, \
start_time, end_time, text_content, content, fps \
FROM chunks ORDER BY id",
)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
}
// ── Row types ──
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct ProcessorResultRow {
pub id: i64,
pub job_id: i32,
pub file_uuid: String,
pub processor_type: String,
pub processor: Option<String>,
pub status: String,
pub error_message: Option<String>,
pub output_data: Option<String>,
pub chunks_produced: i32,
pub frames_processed: i32,
pub started_at: Option<String>,
pub completed_at: Option<String>,
pub created_at: String,
}
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct PreChunkRow {
pub id: i64,
pub file_uuid: String,
pub processor_type: String,
pub chunk_type: String,
pub start_frame: Option<i64>,
pub end_frame: Option<i64>,
pub start_time: Option<f64>,
pub end_time: Option<f64>,
pub data: Option<String>,
pub text_content: Option<String>,
}
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct FaceDetectionRow {
pub id: i64,
pub file_uuid: String,
pub face_id: Option<String>,
pub frame_number: Option<i64>,
pub timestamp_secs: Option<f64>,
pub x: Option<f64>,
pub y: Option<f64>,
pub w: Option<f64>,
pub h: Option<f64>,
pub confidence: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct FaceDetectionBatchItem {
pub face_id: Option<String>,
pub frame: i64,
pub ts: f64,
pub x: i32,
pub y: i32,
pub w: i32,
pub h: i32,
pub confidence: f32,
}
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct SpeakerDetectionRow {
pub id: i64,
pub file_uuid: String,
pub speaker_id: Option<String>,
pub start_time: Option<f64>,
pub end_time: Option<f64>,
pub text_content: Option<String>,
pub chunk_id: Option<String>,
pub confidence: Option<f64>,
}
#[derive(Debug, Clone)]
pub struct SpeakerDetectionBatchItem {
pub speaker_id: String,
pub start_time: f64,
pub end_time: f64,
pub text: String,
pub chunk_id: Option<String>,
pub confidence: f32,
}
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct ChunkRow {
pub id: i64,
pub file_uuid: String,
pub chunk_id: String,
pub chunk_type: String,
pub start_frame: i64,
pub end_frame: i64,
pub start_time: f64,
pub end_time: f64,
pub text_content: Option<String>,
pub content: Option<String>,
pub fps: f64,
}

View File

@@ -140,7 +140,8 @@ pub async fn call_llm_vision(
.post(&llm_vision_url())
.json(&req)
.timeout(std::time::Duration::from_secs(timeout_secs))
.send().await?;
.send()
.await?;
if !res.status().is_success() {
let text = res.text().await.unwrap_or_default();
anyhow::bail!("Vision LLM API error: {}", text);
@@ -211,7 +212,8 @@ pub async fn call_llm(
.post(&llm_chat_url())
.json(&req)
.timeout(std::time::Duration::from_secs(timeout))
.send().await?;
.send()
.await?;
if !res.status().is_success() {
let text = res.text().await.unwrap_or_default();

View File

@@ -1,6 +1,8 @@
pub mod agent;
pub mod api_key;
pub mod auth;
pub mod cache;
pub mod checkin;
pub mod chunk;
pub mod config;
pub mod db;

View File

@@ -0,0 +1,100 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use super::executor::PythonExecutor;
const APPEARANCE_TIMEOUT: Duration = Duration::from_secs(7200);
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AppearanceResult {
pub frame_count: u64,
pub fps: f64,
pub frames: Vec<AppearanceFrame>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AppearanceFrame {
pub frame: u64,
pub timestamp: f64,
pub persons: Vec<AppearancePerson>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct AppearancePerson {
pub person_id: u64,
pub bbox: BBox,
pub hsv_histogram: Vec<Vec<f64>>,
pub dominant_colors: Vec<Vec<f64>>,
pub upper_body: Option<Vec<Vec<f64>>>,
pub lower_body: Option<Vec<Vec<f64>>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct BBox {
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
}
pub async fn process_appearance(
video_path: &str,
pose_path: &str,
output_path: &str,
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<AppearanceResult> {
if std::path::Path::new(output_path).exists() {
let json_str =
std::fs::read_to_string(output_path).context("Failed to read APPEARANCE output")?;
let result: AppearanceResult =
serde_json::from_str(&json_str).context("Failed to parse APPEARANCE output")?;
tracing::info!(
"[APPEARANCE] Skipping (already exists): {} frames",
result.frame_count
);
return Ok(result);
}
let executor = PythonExecutor::new()?;
let script_name = "appearance_processor.py";
let script_path = executor.script_path(script_name);
tracing::info!(
"[APPEARANCE] Starting appearance extraction: {}",
video_path
);
if !script_path.exists() {
tracing::warn!("[APPEARANCE] Script not found, returning empty result");
return Ok(AppearanceResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
});
}
executor
.run_with_output_idx_and_frames(
script_name,
&[video_path, pose_path, output_path],
uuid,
"APPEARANCE",
Some(APPEARANCE_TIMEOUT),
2,
frames,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str =
std::fs::read_to_string(output_path).context("Failed to read APPEARANCE output")?;
let result: AppearanceResult =
serde_json::from_str(&json_str).context("Failed to parse APPEARANCE output")?;
tracing::info!("[APPEARANCE] Result: {} frames", result.frame_count);
Ok(result)
}

View File

@@ -1,4 +1,5 @@
use anyhow::{Context, Result};
use libc;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use tokio::process::Command;
@@ -51,6 +52,26 @@ pub async fn process_asrx(
anyhow::bail!("asrx_processor.py not found");
}
// Verify script integrity via SHA256 checksum
executor
.verify_script_integrity("asrx_processor.py")
.context("Pre-execution integrity check failed for asrx_processor.py")?;
// Pre-flight: check ffprobe and ffmpeg availability
for tool in &["ffprobe", "ffmpeg"] {
let check = Command::new("which").arg(tool).output().await?;
if !check.status.success() {
anyhow::bail!("{} not found on PATH — required by ASRX", tool);
}
}
// Stage existing output: .json → .tmp (will be renamed back on success)
let output_path_obj = std::path::Path::new(output_path);
let tmp_path = output_path_obj.with_extension("json.tmp");
if output_path_obj.exists() {
let _ = std::fs::rename(output_path_obj, &tmp_path);
}
tracing::info!(
"[ASRX] Running: {} {} {} {}",
executor.python_path().display(),
@@ -68,14 +89,30 @@ pub async fn process_asrx(
}
cmd.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
.stderr(std::process::Stdio::piped())
.kill_on_drop(true)
.process_group(0);
let child = cmd.spawn().context("Failed to run ASRX processor")?;
let child_pid = child.id();
let output = match timeout(ASRX_TIMEOUT, child.wait_with_output()).await {
Ok(Ok(output)) => output,
Ok(Err(e)) => return Err(e).context("Failed to run ASRX processor"),
Err(_) => anyhow::bail!("ASRX processing timed out after {:?}", ASRX_TIMEOUT),
Ok(Err(e)) => {
let _ = std::fs::rename(&tmp_path, output_path_obj.with_extension("json.err"));
return Err(e).context("Failed to run ASRX processor");
}
Err(_) => {
// Timeout: kill process group, rename .tmp → .err
if let Some(pid) = child_pid {
let pgid = pid as i32;
unsafe {
libc::killpg(pgid, libc::SIGKILL);
}
}
let _ = std::fs::rename(&tmp_path, output_path_obj.with_extension("json.err"));
anyhow::bail!("ASRX processing timed out after {:?}", ASRX_TIMEOUT);
}
};
let stderr = String::from_utf8_lossy(&output.stderr);
@@ -98,9 +135,26 @@ pub async fn process_asrx(
tracing::info!("[ASRX] stderr output:\n{}", stderr);
if !output.status.success() {
// On failure: rename .tmp → .err (partial output preserved if valid JSON)
if tmp_path.exists() {
let is_valid = std::fs::read_to_string(&tmp_path)
.ok()
.and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
.is_some();
if is_valid {
let _ = std::fs::rename(&tmp_path, output_path_obj.with_extension("json.partial"));
} else {
let _ = std::fs::rename(&tmp_path, output_path_obj.with_extension("json.err"));
}
}
anyhow::bail!("ASRX failed: {}", stderr);
}
// Success: rename .tmp back to .json (if .tmp exists, otherwise use the direct output)
if tmp_path.exists() {
let _ = std::fs::rename(&tmp_path, output_path_obj);
}
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASRX output")?;
let result: AsrxResult =

View File

@@ -4,7 +4,7 @@ use std::path::Path;
use std::time::Duration;
use tracing::{debug, info, warn};
use crate::core::processor::clip::{ClipPrediction, detect_objects};
use crate::core::processor::clip::{detect_objects, ClipPrediction};
use crate::core::vision::qwen_vl_manager::QwenVLManager;
const DEFAULT_CLIP_THRESHOLD: f32 = 0.7;
@@ -39,9 +39,13 @@ impl CascadeVisionProcessor {
}
}
pub async fn detect_objects(&self, image_path: &Path, objects: &[&str]) -> Result<CascadeDetectionResult> {
pub async fn detect_objects(
&self,
image_path: &Path,
objects: &[&str],
) -> Result<CascadeDetectionResult> {
let start_time = std::time::Instant::now();
info!(
"[Cascade] Starting detection for {:?} with {} object classes (threshold: {:.2})",
image_path,
@@ -50,7 +54,7 @@ impl CascadeVisionProcessor {
);
let clip_result = self.run_clip_detection(image_path, objects).await?;
let max_clip_confidence = clip_result
.iter()
.map(|p| p.confidence)
@@ -58,21 +62,19 @@ impl CascadeVisionProcessor {
debug!(
"[Cascade] CLIP max confidence: {:.3} (threshold: {:.2})",
max_clip_confidence,
self.clip_threshold
max_clip_confidence, self.clip_threshold
);
if max_clip_confidence > self.clip_threshold {
info!(
"[Cascade] High confidence ({:.3} > {:.2}) → triggering Qwen3-VL",
max_clip_confidence,
self.clip_threshold
max_clip_confidence, self.clip_threshold
);
let qwenvl_result = self.run_qwenvl_detection(image_path, objects).await?;
let processing_time = start_time.elapsed().as_millis() as u64;
return Ok(CascadeDetectionResult {
detections: qwenvl_result,
model_used: "qwen3vl".to_string(),
@@ -84,12 +86,11 @@ impl CascadeVisionProcessor {
info!(
"[Cascade] Low confidence ({:.3} <= {:.2}) → using CLIP results only",
max_clip_confidence,
self.clip_threshold
max_clip_confidence, self.clip_threshold
);
let processing_time = start_time.elapsed().as_millis() as u64;
Ok(CascadeDetectionResult {
detections: clip_result,
model_used: "clip".to_string(),
@@ -99,35 +100,43 @@ impl CascadeVisionProcessor {
})
}
async fn run_clip_detection(&self, image_path: &Path, objects: &[&str]) -> Result<Vec<ClipPrediction>> {
async fn run_clip_detection(
&self,
image_path: &Path,
objects: &[&str],
) -> Result<Vec<ClipPrediction>> {
let image_path_str = image_path.display().to_string();
debug!("[Cascade] Running CLIP detection for {:?}", image_path);
let predictions = detect_objects(&image_path_str, objects, None, None)
.await
.context("CLIP detection failed")?;
debug!(
"[Cascade] CLIP detected {} objects",
predictions.len()
);
debug!("[Cascade] CLIP detected {} objects", predictions.len());
Ok(predictions)
}
async fn run_qwenvl_detection(&self, image_path: &Path, objects: &[&str]) -> Result<Vec<ClipPrediction>> {
async fn run_qwenvl_detection(
&self,
image_path: &Path,
objects: &[&str],
) -> Result<Vec<ClipPrediction>> {
let image_path_str = image_path.display().to_string();
debug!("[Cascade] Running Qwen3-VL detection for {:?}", image_path);
self.qwen_vl_manager.ensure_running().await?;
let prompt = self.build_detection_prompt(objects);
let client = reqwest::Client::new();
let url = format!("http://localhost:{}/v1/chat/completions", self.qwen_vl_manager.get_port());
let url = format!(
"http://localhost:{}/v1/chat/completions",
self.qwen_vl_manager.get_port()
);
let request_body = serde_json::json!({
"model": "Qwen3VL-8B-Instruct-Q8_0",
"messages": [
@@ -150,7 +159,7 @@ impl CascadeVisionProcessor {
"max_tokens": 500,
"temperature": 0.1
});
let response = client
.post(&url)
.json(&request_body)
@@ -158,17 +167,17 @@ impl CascadeVisionProcessor {
.send()
.await
.context("Qwen3-VL API request failed")?;
if !response.status().is_success() {
warn!("[Cascade] Qwen3-VL API error: {}", response.status());
anyhow::bail!("Qwen3-VL API returned error: {}", response.status());
}
let response_json: serde_json::Value = response
.json()
.await
.context("Failed to parse Qwen3-VL response")?;
let content = response_json
.get("choices")
.and_then(|choices| choices.get(0))
@@ -176,24 +185,21 @@ impl CascadeVisionProcessor {
.and_then(|message| message.get("content"))
.and_then(|content| content.as_str())
.unwrap_or("");
debug!("[Cascade] Qwen3-VL response: {}", content);
let detections = self.parse_qwenvl_response(content, objects);
self.qwen_vl_manager.update_last_request_time().await;
info!(
"[Cascade] Qwen3-VL detected {} objects",
detections.len()
);
info!("[Cascade] Qwen3-VL detected {} objects", detections.len());
Ok(detections)
}
fn build_detection_prompt(&self, objects: &[&str]) -> String {
let object_list = objects.join(", ");
format!(
"Analyze this image and detect the following objects: {}.\n\
For each detected object, provide:\n\
@@ -214,29 +220,29 @@ impl CascadeVisionProcessor {
fn parse_qwenvl_response(&self, content: &str, _objects: &[&str]) -> Vec<ClipPrediction> {
let json_start = content.find('{');
let json_end = content.rfind('}');
if json_start.is_none() || json_end.is_none() {
debug!("[Cascade] No JSON found in Qwen3-VL response");
return Vec::new();
}
let json_str = &content[json_start.unwrap()..=json_end.unwrap()];
let parsed: serde_json::Value = serde_json::from_str(json_str)
.unwrap_or(serde_json::json!({"detections": []}));
let parsed: serde_json::Value =
serde_json::from_str(json_str).unwrap_or(serde_json::json!({"detections": []}));
let detections = parsed
.get("detections")
.and_then(|d| d.as_array())
.map(|arr| arr.clone())
.unwrap_or_else(|| Vec::new());
detections
.iter()
.filter_map(|d| {
let label = d.get("label").and_then(|l| l.as_str()).unwrap_or("");
let confidence = d.get("confidence").and_then(|c| c.as_f64()).unwrap_or(0.0) as f32;
if !label.is_empty() && confidence > 0.0 {
Some(ClipPrediction {
label: label.to_string(),
@@ -265,7 +271,7 @@ mod tests {
let processor = CascadeVisionProcessor::new();
let objects = vec!["gun", "weapon", "person"];
let prompt = processor.build_detection_prompt(&objects);
assert!(prompt.contains("gun, weapon, person"));
assert!(prompt.contains("confidence score"));
assert!(prompt.contains("JSON"));
@@ -276,9 +282,9 @@ mod tests {
let processor = CascadeVisionProcessor::new();
let response = "{\"detections\": [{\"label\": \"gun\", \"confidence\": 0.95, \"description\": \"a handgun\"}]}";
let objects = vec!["gun"];
let detections = processor.parse_qwenvl_response(response, &objects);
assert_eq!(detections.len(), 1);
assert_eq!(detections[0].label, "gun");
assert!((detections[0].confidence - 0.95).abs() < 0.001);
@@ -289,9 +295,9 @@ mod tests {
let processor = CascadeVisionProcessor::new();
let response = "{\"detections\": []}";
let objects = vec!["gun"];
let detections = processor.parse_qwenvl_response(response, &objects);
assert_eq!(detections.len(), 0);
}
@@ -300,9 +306,9 @@ mod tests {
let processor = CascadeVisionProcessor::new();
let response = "This is not JSON";
let objects = vec!["gun"];
let detections = processor.parse_qwenvl_response(response, &objects);
assert_eq!(detections.len(), 0);
}
}
}

View File

@@ -75,21 +75,19 @@ pub async fn classify_image(
.await
.context("Failed to run CLIP classifier")?;
let json_str = std::fs::read_to_string(&output_path)
.context("Failed to read CLIP output")?;
let json_str = std::fs::read_to_string(&output_path).context("Failed to read CLIP output")?;
let results: std::collections::HashMap<String, Vec<ClipPrediction>> =
serde_json::from_str(&json_str)
.context("Failed to parse CLIP output")?;
serde_json::from_str(&json_str).context("Failed to parse CLIP output")?;
let predictions = results
.get(image_path)
.cloned()
.unwrap_or_default();
let predictions = results.get(image_path).cloned().unwrap_or_default();
tracing::info!(
"[CLIP] Top prediction: {} ({:.3})",
predictions.first().map(|p| p.label.as_str()).unwrap_or("none"),
predictions
.first()
.map(|p| p.label.as_str())
.unwrap_or("none"),
predictions.first().map(|p| p.confidence).unwrap_or(0.0)
);
@@ -145,26 +143,28 @@ pub async fn detect_objects(
.await
.context("Failed to run CLIP object detection")?;
let json_str = std::fs::read_to_string(&output_path)
.context("Failed to read CLIP output")?;
let json_str = std::fs::read_to_string(&output_path).context("Failed to read CLIP output")?;
let results: std::collections::HashMap<String, Vec<ClipPrediction>> =
serde_json::from_str(&json_str)
.context("Failed to parse CLIP output")?;
serde_json::from_str(&json_str).context("Failed to parse CLIP output")?;
let detected = results
.get(image_path)
.cloned()
.unwrap_or_default();
let detected = results.get(image_path).cloned().unwrap_or_default();
if !detected.is_empty() {
tracing::info!(
"[CLIP] Detected {} objects: {}",
detected.len(),
detected.iter().map(|p| p.label.as_str()).collect::<Vec<_>>().join(", ")
detected
.iter()
.map(|p| p.label.as_str())
.collect::<Vec<_>>()
.join(", ")
);
} else {
tracing::info!("[CLIP] No objects detected above threshold {:.2}", threshold);
tracing::info!(
"[CLIP] No objects detected above threshold {:.2}",
threshold
);
}
Ok(detected)
@@ -189,8 +189,7 @@ pub async fn classify_images(
// Create temp file with image paths
let temp_file = format!("/tmp/clip_batch_{}.txt", uuid::Uuid::new_v4());
std::fs::write(&temp_file, image_paths.join("\n"))
.context("Failed to write batch file")?;
std::fs::write(&temp_file, image_paths.join("\n")).context("Failed to write batch file")?;
let mut args = vec![
temp_file.clone(),
@@ -224,12 +223,11 @@ pub async fn classify_images(
.await
.context("Failed to run batch CLIP classification")?;
let json_str = std::fs::read_to_string(&output_path)
.context("Failed to read CLIP batch output")?;
let json_str =
std::fs::read_to_string(&output_path).context("Failed to read CLIP batch output")?;
let results_map: std::collections::HashMap<String, Vec<ClipPrediction>> =
serde_json::from_str(&json_str)
.context("Failed to parse CLIP batch output")?;
serde_json::from_str(&json_str).context("Failed to parse CLIP batch output")?;
let results: Vec<ClipImageResult> = image_paths
.iter()
@@ -287,4 +285,4 @@ mod tests {
assert_eq!(result.predictions.len(), 2);
assert_eq!(result.predictions[0].label, "indoor");
}
}
}

View File

@@ -164,8 +164,34 @@ impl PythonExecutor {
})
}
/// Compute 8Hz sample frames from total frames and FPS.
/// Returns frames at approximately 8 samples per second.
pub fn compute_8hz_frames(total_frames: i64, fps: f64) -> Vec<i64> {
let interval = (fps / 8.0).round() as i64;
let interval = interval.max(1);
(0..total_frames).step_by(interval as usize).collect()
}
/// Merge base frames with refinement frames (for adaptive sampling).
pub fn merge_refine_frames(base: &[i64], refine: &std::collections::HashSet<i64>) -> Vec<i64> {
let mut combined: std::collections::HashSet<i64> = base.iter().cloned().collect();
combined.extend(refine.iter().cloned());
let mut sorted: Vec<i64> = combined.into_iter().collect();
sorted.sort();
sorted
}
/// Format frame list as comma-separated string for --frames argument.
pub fn format_frames_arg(frames: &[i64]) -> String {
frames
.iter()
.map(|f| f.to_string())
.collect::<Vec<_>>()
.join(",")
}
/// Verify a script's SHA256 against the checksums manifest before execution.
fn verify_script_integrity(&self, script_name: &str) -> Result<()> {
pub fn verify_script_integrity(&self, script_name: &str) -> Result<()> {
let script_path = self.scripts_dir.join(script_name);
let rel_path = format!("./{}", script_name);
@@ -226,6 +252,147 @@ impl PythonExecutor {
uuid: Option<&str>,
log_prefix: &str,
timeout_duration: Option<Duration>,
) -> Result<()> {
self.run_with_output_idx(script_name, args, uuid, log_prefix, timeout_duration, 1)
.await
}
/// Run a script with optional frame list for 8Hz sampling.
/// If frames is provided, passes --frames=0,4,8,... to the script.
pub async fn run_with_frames(
&self,
script_name: &str,
args: &[&str],
uuid: Option<&str>,
log_prefix: &str,
timeout_duration: Option<Duration>,
frames: Option<&[i64]>,
) -> Result<()> {
let script_path = self.scripts_dir.join(script_name);
if !script_path.exists() {
anyhow::bail!("Script not found: {:?}", script_path);
}
self.verify_script_integrity(script_name).context(
"Pre-execution integrity check failed — possible version mismatch or corruption",
)?;
let output_idx = 1;
let output_path = args.get(output_idx).map(|p| std::path::PathBuf::from(p));
let tmp_path = output_path.as_ref().map(|p| {
let mut tmp = p.to_path_buf();
tmp.set_extension("json.tmp");
tmp
});
if let (Some(src), Some(dst)) = (&output_path, &tmp_path) {
if src.exists() {
let _ = std::fs::rename(src, dst);
}
}
let mut cmd = Command::new(&self.python_path);
cmd.arg(&script_path);
for arg in args {
cmd.arg(arg);
}
if let Some(u) = uuid {
cmd.arg("--uuid").arg(u);
}
// Pass frame list for 8Hz sampling
if let Some(frames) = frames {
let frames_str = Self::format_frames_arg(frames);
cmd.arg("--frames").arg(&frames_str);
tracing::info!("[{}] 8Hz sampling: {} frames", log_prefix, frames.len());
}
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
cmd.kill_on_drop(true);
cmd.process_group(0);
tracing::info!("[{}] Starting: {:?}", log_prefix, script_name);
let mut child = cmd
.spawn()
.with_context(|| format!("Failed to run {}", script_name))?;
let child_pid = child.id();
let stdout = child.stdout.take().context("Failed to capture stdout")?;
let stderr = child.stderr.take().context("Failed to capture stderr")?;
let mut stdout_reader = BufReader::new(stdout).lines();
let mut stderr_reader = BufReader::new(stderr).lines();
let run_future = async {
let mut stdout_done = false;
let mut stderr_done = false;
loop {
if !stdout_done {
match stdout_reader
.next_line()
.await
.context("Failed to read stdout")?
{
Some(line) => tracing::info!("[{}] {}", log_prefix, line),
None => stdout_done = true,
}
}
if !stderr_done {
match stderr_reader
.next_line()
.await
.context("Failed to read stderr")?
{
Some(line) => tracing::warn!("[{}] {}", log_prefix, line),
None => stderr_done = true,
}
}
if stdout_done && stderr_done {
break;
}
}
let status = child.wait().await?;
Ok::<_, anyhow::Error>(status)
};
let timeout_duration = timeout_duration.unwrap_or(Duration::from_secs(3600));
let status = timeout(timeout_duration, run_future)
.await
.map_err(|e| anyhow::anyhow!("Timeout after {}s: {}", timeout_duration.as_secs(), e))?
.with_context(|| format!("Failed to run {}", script_name))?;
// Kill entire process group on failure
if !status.success() {
if let Some(pid) = child_pid {
unsafe {
libc::kill(-(pid as i32), libc::SIGTERM);
}
}
anyhow::bail!("Script {} failed with exit status: {}", script_name, status);
}
// Rename .json.tmp back to .json on success
if let (Some(src), Some(dst)) = (&tmp_path, &output_path) {
if src.exists() {
let _ = std::fs::rename(src, dst);
}
}
Ok(())
}
pub async fn run_with_output_idx(
&self,
script_name: &str,
args: &[&str],
uuid: Option<&str>,
log_prefix: &str,
timeout_duration: Option<Duration>,
output_idx: usize,
) -> Result<()> {
let script_path = self.scripts_dir.join(script_name);
@@ -239,7 +406,7 @@ impl PythonExecutor {
)?;
// 標記輸出檔為處理中add .tmp suffix
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
let output_path = args.get(output_idx).map(|p| std::path::PathBuf::from(p));
let tmp_path = output_path.as_ref().map(|p| {
let mut tmp = p.to_path_buf();
tmp.set_extension("json.tmp");
@@ -282,27 +449,29 @@ impl PythonExecutor {
let mut stderr_reader = BufReader::new(stderr).lines();
let run_future = async {
let mut stdout_done = false;
let mut stderr_done = false;
loop {
tokio::select! {
line = stdout_reader.next_line() => {
line = stdout_reader.next_line(), if !stdout_done => {
match line {
Ok(Some(line)) => {
if line.starts_with(&format!("{}_", log_prefix)) {
tracing::info!("[{}] {}", log_prefix, line);
}
}
Ok(None) => break,
Ok(None) => stdout_done = true,
Err(e) => tracing::warn!("[{}] stdout error: {}", log_prefix, e),
}
}
line = stderr_reader.next_line() => {
line = stderr_reader.next_line(), if !stderr_done => {
match line {
Ok(Some(line)) => {
if line.starts_with(&format!("{}_", log_prefix)) {
tracing::info!("[{}] {}", log_prefix, line);
}
}
Ok(None) => {}
Ok(None) => stderr_done = true,
Err(e) => tracing::warn!("[{}] stderr error: {}", log_prefix, e),
}
}
@@ -390,6 +559,183 @@ impl PythonExecutor {
Ok(())
}
/// Run with output_idx and optional frame list for 8Hz sampling.
pub async fn run_with_output_idx_and_frames(
&self,
script_name: &str,
args: &[&str],
uuid: Option<&str>,
log_prefix: &str,
timeout_duration: Option<Duration>,
output_idx: usize,
frames: Option<&[i64]>,
) -> Result<()> {
let script_path = self.scripts_dir.join(script_name);
if !script_path.exists() {
anyhow::bail!("Script not found: {:?}", script_path);
}
self.verify_script_integrity(script_name).context(
"Pre-execution integrity check failed — possible version mismatch or corruption",
)?;
let output_path = args.get(output_idx).map(|p| std::path::PathBuf::from(p));
let tmp_path = output_path.as_ref().map(|p| {
let mut tmp = p.to_path_buf();
tmp.set_extension("json.tmp");
tmp
});
if let (Some(src), Some(dst)) = (&output_path, &tmp_path) {
if src.exists() {
let _ = std::fs::rename(src, dst);
}
}
let mut cmd = Command::new(&self.python_path);
cmd.arg(&script_path);
for arg in args {
cmd.arg(arg);
}
if let Some(u) = uuid {
cmd.arg("--uuid").arg(u);
}
// Pass frame list for 8Hz sampling
if let Some(frames) = frames {
let frames_str = Self::format_frames_arg(frames);
cmd.arg("--frames").arg(&frames_str);
tracing::info!("[{}] 8Hz sampling: {} frames", log_prefix, frames.len());
}
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
cmd.kill_on_drop(true);
cmd.process_group(0);
tracing::info!("[{}] Starting: {:?}", log_prefix, script_name);
let mut child = cmd
.spawn()
.with_context(|| format!("Failed to run {}", script_name))?;
let child_pid = child.id();
let stdout = child.stdout.take().context("Failed to capture stdout")?;
let stderr = child.stderr.take().context("Failed to capture stderr")?;
let mut stdout_reader = BufReader::new(stdout).lines();
let mut stderr_reader = BufReader::new(stderr).lines();
let run_future = async {
let mut stdout_done = false;
let mut stderr_done = false;
loop {
tokio::select! {
line = stdout_reader.next_line(), if !stdout_done => {
match line {
Ok(Some(line)) => {
if line.starts_with(&format!("{}_", log_prefix)) {
tracing::info!("[{}] {}", log_prefix, line);
}
}
Ok(None) => stdout_done = true,
Err(e) => tracing::warn!("[{}] stdout error: {}", log_prefix, e),
}
}
line = stderr_reader.next_line(), if !stderr_done => {
match line {
Ok(Some(line)) => {
if line.starts_with(&format!("{}_", log_prefix)) {
tracing::info!("[{}] {}", log_prefix, line);
}
}
Ok(None) => stderr_done = true,
Err(e) => tracing::warn!("[{}] stderr error: {}", log_prefix, e),
}
}
status = child.wait() => {
match status {
Ok(status) => {
if !status.success() {
tracing::error!("[{}] Process failed: {}", log_prefix, status);
return Err(anyhow::anyhow!("{} exited with: {}", script_name, status));
}
tracing::info!("[{}] Completed successfully", log_prefix);
}
Err(e) => tracing::error!("[{}] wait error: {}", log_prefix, e),
}
break;
}
}
}
Ok(())
};
let mark_failed = || {
if let Some(tmp) = &tmp_path {
if tmp.exists() {
if let Some(out) = &output_path {
let is_valid = std::fs::read_to_string(tmp)
.ok()
.and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
.is_some();
if is_valid {
let mut partial_path = out.to_path_buf();
partial_path.set_extension("json.partial");
let _ = std::fs::rename(tmp, &partial_path);
tracing::warn!(
"[Executor] Partial output preserved: {:?}",
partial_path
);
} else {
let mut err_path = out.to_path_buf();
err_path.set_extension("json.err");
let _ = std::fs::rename(tmp, &err_path);
}
}
}
}
};
if let Some(duration) = timeout_duration {
match timeout(duration, run_future).await {
Ok(Ok(())) => {}
Ok(Err(e)) => {
mark_failed();
return Err(e);
}
Err(_) => {
mark_failed();
if let Some(pid) = child_pid {
let pgid = pid as i32;
unsafe {
libc::killpg(pgid, libc::SIGKILL);
}
}
child.kill().await.context("Failed to kill process")?;
anyhow::bail!("{} timed out after {:?}", script_name, duration);
}
}
} else {
if let Err(e) = run_future.await {
mark_failed();
return Err(e);
}
}
if let Some(tmp) = &tmp_path {
if tmp.exists() {
if let Some(out) = &output_path {
let _ = std::fs::rename(tmp, out);
}
}
}
Ok(())
}
pub async fn run_with_output(
&self,
script_name: &str,

View File

@@ -29,7 +29,7 @@ pub struct Face {
pub height: i32,
pub confidence: f32,
pub embedding: Option<Vec<f32>>,
pub landmarks: Option<Vec<Vec<f32>>>,
pub landmarks: Option<serde_json::Value>,
pub attributes: Option<FaceAttributes>,
}
@@ -43,6 +43,7 @@ pub async fn process_face(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<FaceResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("face_processor.py");
@@ -59,12 +60,13 @@ pub async fn process_face(
}
executor
.run(
.run_with_frames(
"face_processor.py",
&[video_path, output_path],
uuid,
"FACE",
Some(FACE_TIMEOUT),
frames,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
@@ -99,7 +101,7 @@ mod tests {
height: 60,
confidence: 0.95,
embedding: Some(vec![0.1, 0.2, 0.3]),
landmarks: Some(vec![vec![10.0, 20.0], vec![30.0, 40.0]]),
landmarks: Some(serde_json::json!([[10.0, 20.0], [30.0, 40.0]])),
attributes: Some(FaceAttributes {
age: Some(30),
gender: Some("male".to_string()),

View File

@@ -0,0 +1,96 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use super::executor::PythonExecutor;
const MEDIAPIPE_TIMEOUT: Duration = Duration::from_secs(7200);
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeResult {
pub frame_count: u64,
pub fps: f64,
pub frames: Vec<MediaPipeFrame>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeFrame {
pub frame: u64,
pub timestamp: f64,
pub persons: Vec<MediaPipePerson>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipePerson {
pub person_id: u64,
pub pose: Option<MediaPipePose>,
pub left_hand: Option<MediaPipeHand>,
pub right_hand: Option<MediaPipeHand>,
pub face_mesh: Option<MediaPipeFaceMesh>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipePose {
pub landmarks: Vec<Vec<f64>>,
pub keypoints_33: Option<Vec<String>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeHand {
pub landmarks: Vec<Vec<f64>>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeFaceMesh {
pub landmarks: Vec<Vec<f64>>,
}
pub async fn process_mediapipe(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
) -> Result<MediaPipeResult> {
// If mediapipe.json already exists (written by face_processor), skip
if std::path::Path::new(output_path).exists() {
let json_str = std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?;
let result: MediaPipeResult =
serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?;
tracing::info!("[MEDIAPIPE] Skipping (already exists): {} frames", result.frames.len());
return Ok(result);
}
let executor = PythonExecutor::new()?;
let script_name = "mediapipe_processor_v1.11.py";
let script_path = executor.script_path(script_name);
tracing::info!("[MEDIAPIPE] Starting MediaPipe Holistic: {}", video_path);
if !script_path.exists() {
tracing::warn!("[MEDIAPIPE] Script not found, returning empty result");
return Ok(MediaPipeResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
});
}
executor
.run(
script_name,
&[video_path, output_path],
uuid,
"MEDIAPIPE",
Some(MEDIAPIPE_TIMEOUT),
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;
let json_str =
std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?;
let result: MediaPipeResult =
serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?;
tracing::info!("[MEDIAPIPE] Result: {} frames", result.frames.len());
Ok(result)
}

View File

@@ -0,0 +1,203 @@
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::Duration;
use tokio::process::Command;
use tokio::time::timeout;
use super::executor::PythonExecutor;
const MEDIAPIPE_TIMEOUT: Duration = Duration::from_secs(7200);
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeResult {
pub metadata: MediaPipeMetadata,
pub frames: HashMap<String, MediaPipeDictEntry>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeMetadata {
pub fps: f64,
pub total_frames: i64,
pub processed_frames: i64,
pub sample_interval: i64,
pub width: i64,
pub height: i64,
pub processor: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeDictEntry {
pub frame_number: i64,
pub timestamp: f64,
pub persons: Vec<MediaPipePerson>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipePerson {
pub person_id: i64,
#[serde(default)]
pub bbox: Option<MediaPipeBBox>,
pub face_mesh: Option<serde_json::Value>,
pub pose: Option<serde_json::Value>,
pub hands: MediaPipeHands,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeBBox {
pub x: i64,
pub y: i64,
pub width: i64,
pub height: i64,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MediaPipeHands {
pub left: Option<serde_json::Value>,
pub right: Option<serde_json::Value>,
}
pub async fn process_mediapipe_v2(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<MediaPipeResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("mediapipe_holistic_processor.py");
tracing::info!("[MEDIAPIPE] Starting MediaPipe Holistic: {}", video_path);
if !script_path.exists() {
anyhow::bail!("mediapipe_holistic_processor.py not found");
}
let mut cmd = Command::new(executor.python_path());
cmd.arg(&script_path).arg(video_path).arg(output_path);
// Use explicit frame list if provided, otherwise calculate sample_interval for ~8Hz
if let Some(frames) = frames {
let frames_str = frames
.iter()
.map(|f| f.to_string())
.collect::<Vec<_>>()
.join(",");
cmd.arg("--frames").arg(&frames_str);
tracing::info!("[MEDIAPIPE] 8Hz sampling: {} frames", frames.len());
} else {
let sample_interval = calculate_sample_interval(video_path).await;
cmd.arg("--sample-interval")
.arg(sample_interval.to_string());
}
if let Some(u) = uuid {
cmd.arg("--uuid").arg(u);
}
cmd.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
let child = cmd.spawn().context("Failed to run MEDIAPIPE processor")?;
let output = match timeout(MEDIAPIPE_TIMEOUT, child.wait_with_output()).await {
Ok(Ok(output)) => output,
Ok(Err(e)) => return Err(e).context("Failed to run MEDIAPIPE processor"),
Err(_) => anyhow::bail!(
"MEDIAPIPE processing timed out after {:?}",
MEDIAPIPE_TIMEOUT
),
};
let stderr = String::from_utf8_lossy(&output.stderr);
for line in stderr.lines() {
let trimmed = line.trim();
if trimmed.starts_with("MEDIAPIPE_START") {
tracing::info!("[MEDIAPIPE] Loading model...");
} else if trimmed.starts_with("MEDIAPIPE_FRAME:") {
let count = trimmed.trim_start_matches("MEDIAPIPE_FRAME:");
tracing::info!("[MEDIAPIPE] Processed {} frames...", count);
} else if trimmed.starts_with("MEDIAPIPE_COMPLETE:") {
let count = trimmed.trim_start_matches("MEDIAPIPE_COMPLETE:");
tracing::info!("[MEDIAPIPE] Completed! Total: {} frames", count);
} else if trimmed.starts_with("MEDIAPIPE_INFO:") {
let info = trimmed.trim_start_matches("MEDIAPIPE_INFO:");
tracing::info!("[MEDIAPIPE] {}", info);
} else if trimmed.starts_with("MEDIAPIPE_ERROR:") {
let err = trimmed.trim_start_matches("MEDIAPIPE_ERROR:");
tracing::error!("[MEDIAPIPE] {}", err);
}
}
tracing::info!("[MEDIAPIPE] stderr output:\n{}", stderr);
if !output.status.success() {
anyhow::bail!("MEDIAPIPE failed: {}", stderr);
}
let json_str =
std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?;
let result: MediaPipeResult =
serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?;
tracing::info!("[MEDIAPIPE] Result: {} frames", result.frames.len());
Ok(result)
}
async fn calculate_sample_interval(video_path: &str) -> i64 {
// Try ffprobe to get FPS, calculate sample_interval for ~8Hz
let probe_cmd = Command::new("ffprobe")
.args([
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
video_path,
])
.output()
.await;
if let Ok(output) = probe_cmd {
if output.status.success() {
if let Ok(json_str) = String::from_utf8(output.stdout) {
if let Ok(probe_data) = serde_json::from_str::<serde_json::Value>(&json_str) {
if let Some(streams) = probe_data["streams"].as_array() {
for stream in streams {
if stream["codec_type"] == "video" {
if let Some(fps_str) = stream["r_frame_rate"].as_str() {
// Parse "30000/1001" style fps
if let Some(fps) = parse_fractional_fps(fps_str) {
let interval = (fps / 8.0).round() as i64;
return interval.max(1);
}
}
if let Some(fps_val) = stream["avg_frame_rate"].as_str() {
if let Some(fps) = parse_fractional_fps(fps_val) {
let interval = (fps / 8.0).round() as i64;
return interval.max(1);
}
}
}
}
}
}
}
}
}
4 // Default: assume 30fps / 8 = ~4
}
fn parse_fractional_fps(s: &str) -> Option<f64> {
let parts: Vec<&str> = s.split('/').collect();
if parts.len() == 2 {
let num: f64 = parts[0].parse().ok()?;
let den: f64 = parts[1].parse().ok()?;
if den > 0.0 {
return Some(num / den);
}
}
s.parse::<f64>().ok()
}

View File

@@ -1,3 +1,4 @@
pub mod appearance;
pub mod asr;
pub mod asrx;
pub mod caption;
@@ -8,6 +9,7 @@ pub mod executor;
pub mod face;
pub mod face_recognition;
pub mod heuristic_scene;
pub mod mediapipe_v2;
pub mod ocr;
pub mod pose;
pub mod scene_classification;
@@ -15,11 +17,17 @@ pub mod story;
pub mod tkg;
pub mod yolo;
pub use appearance::{
process_appearance, AppearanceFrame, AppearancePerson, AppearanceResult, BBox,
};
pub use asr::{process_asr, AsrResult, AsrSegment};
pub use asrx::{process_asrx, AsrxResult, AsrxSegment};
pub use caption::{process_caption, CaptionResult, CaptionSummary, FrameCaption};
pub use cascade_vision::{CascadeDetectionResult, CascadeVisionProcessor};
pub use clip::{classify_image, classify_images, detect_objects, ClipDetectionResult, ClipImageResult, ClipPrediction};
pub use clip::{
classify_image, classify_images, detect_objects, ClipDetectionResult, ClipImageResult,
ClipPrediction,
};
pub use cut::{process_cut, CutResult, CutScene};
pub use executor::{validate_python_env, PythonExecutor, RetryConfig};
pub use face::{process_face, Face, FaceFrame, FaceResult};
@@ -32,6 +40,10 @@ pub use heuristic_scene::{
build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta,
SceneSegmentMeta,
};
pub use mediapipe_v2::{
process_mediapipe_v2, MediaPipeBBox, MediaPipeDictEntry, MediaPipeHands, MediaPipeMetadata,
MediaPipePerson, MediaPipeResult,
};
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
pub use scene_classification::{

View File

@@ -34,6 +34,7 @@ pub async fn process_ocr(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<OcrResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("ocr_processor.py");
@@ -50,12 +51,13 @@ pub async fn process_ocr(
}
executor
.run(
.run_with_frames(
"ocr_processor.py",
&[video_path, output_path],
uuid,
"OCR",
Some(OCR_TIMEOUT),
frames,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;

View File

@@ -46,6 +46,7 @@ pub async fn process_pose(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<PoseResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("pose_processor.py");
@@ -62,12 +63,13 @@ pub async fn process_pose(
}
executor
.run(
.run_with_frames(
"pose_processor.py",
&[video_path, output_path],
uuid,
"POSE",
Some(POSE_TIMEOUT),
frames,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;

View File

@@ -933,6 +933,145 @@ async fn build_co_occurrence_edges(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
let face_db = FaceEmbeddingDb::new();
let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?;
if !qdrant_embeddings.is_empty() {
tracing::info!(
"[TKG-Phase2.6.1] Building co_occurrence edges from Qdrant ({} embeddings)",
qdrant_embeddings.len()
);
return build_co_occurrence_edges_from_qdrant(pool, file_uuid, output_dir, qdrant_embeddings).await;
}
tracing::info!("[TKG-Phase2.6.1] No Qdrant embeddings, falling back to PostgreSQL");
build_co_occurrence_edges_from_pg(pool, file_uuid, output_dir).await
}
async fn build_co_occurrence_edges_from_qdrant(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
qdrant_embeddings: Vec<(String, Vec<f32>, crate::core::db::face_embedding_db::FaceEmbeddingPayload)>,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingPayload;
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
if !yolo_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&yolo_path)?;
let yolo: YoloJson = serde_json::from_str(&content)?;
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let mut frame_faces: HashMap<i64, Vec<(i64, f64, f64, f64, f64)>> = HashMap::new();
for (_, _, payload) in &qdrant_embeddings {
let frame = payload.frame;
let trace_id = payload.trace_id as i64;
frame_faces
.entry(frame)
.or_default()
.push((trace_id, payload.bbox_x, payload.bbox_y, payload.bbox_w, payload.bbox_h));
}
let mut edge_count = 0;
for (frame, faces) in frame_faces.iter() {
let frame_str = frame.to_string();
let yolo_frame = match yolo.frames.get(&frame_str) {
Some(f) => f,
None => continue,
};
let dets = if !yolo_frame.detections.is_empty() {
&yolo_frame.detections
} else {
&yolo_frame.objects
};
if dets.is_empty() {
continue;
}
for (trace_id, _, _, _, _) in faces {
let external_id = format!("trace_{}", trace_id);
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&external_id)
.fetch_optional(pool)
.await?;
let face_node_id = match face_node {
Some((id,)) => id,
None => continue,
};
for det in dets {
let obj_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&det.class_name)
.fetch_optional(pool)
.await?;
let obj_node_id = match obj_node {
Some((id,)) => id,
None => continue,
};
let edge_props = serde_json::json!({
"frame": *frame,
"object_confidence": det.confidence,
});
if let Err(e) = sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("CO_OCCURS_WITH")
.bind(face_node_id)
.bind(obj_node_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await
{
tracing::warn!(
"[TKG-Phase2.6.1] Edge insert failed (trace={}, obj={}): {}",
trace_id,
det.class_name,
e
);
continue;
}
edge_count += 1;
}
}
}
Ok(edge_count)
}
async fn build_co_occurrence_edges_from_pg(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid));
if !yolo_path.exists() {
@@ -1046,6 +1185,154 @@ async fn build_speaker_face_edges(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
let face_db = FaceEmbeddingDb::new();
let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?;
if !qdrant_embeddings.is_empty() {
tracing::info!(
"[TKG-Phase2.6.3] Building speaker_face edges from Qdrant ({} embeddings)",
qdrant_embeddings.len()
);
return build_speaker_face_edges_from_qdrant(pool, file_uuid, output_dir, qdrant_embeddings).await;
}
tracing::info!("[TKG-Phase2.6.3] No Qdrant embeddings, falling back to PostgreSQL");
build_speaker_face_edges_from_pg(pool, file_uuid, output_dir).await
}
async fn build_speaker_face_edges_from_qdrant(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
qdrant_embeddings: Vec<(String, Vec<f32>, crate::core::db::face_embedding_db::FaceEmbeddingPayload)>,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingPayload;
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
if !asrx_path.exists() {
return Ok(0);
}
let content = std::fs::read_to_string(&asrx_path)?;
let asrx: AsrxJson = serde_json::from_str(&content)?;
if asrx.segments.is_empty() {
return Ok(0);
}
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let mut trace_ranges: HashMap<i64, (i64, i64)> = HashMap::new();
for (_, _, payload) in &qdrant_embeddings {
let trace_id = payload.trace_id as i64;
let frame = payload.frame;
let entry = trace_ranges.entry(trace_id).or_insert((frame, frame));
entry.0 = entry.0.min(frame);
entry.1 = entry.1.max(frame);
}
let last = asrx.segments.last().unwrap();
let fps = if last.end > 0.0 {
last.end_frame as f64 / last.end
} else {
30.0
};
let mut edge_count = 0;
for (tid, (sf, ef)) in &trace_ranges {
let face_ext_id = format!("trace_{}", tid);
let face_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&face_ext_id)
.fetch_optional(pool)
.await?;
let face_node_id = match face_node {
Some((id,)) => id,
None => continue,
};
let face_start_sec = *sf as f64 / fps;
let face_end_sec = *ef as f64 / fps;
for seg in &asrx.segments {
let seg_start = seg.start;
let seg_end = seg.end;
let overlap_start = face_start_sec.max(seg_start);
let overlap_end = face_end_sec.min(seg_end);
if overlap_start >= overlap_end {
continue;
}
let overlap_dur = overlap_end - overlap_start;
let face_dur = face_end_sec - face_start_sec;
if face_dur <= 0.0 {
continue;
}
let overlap_ratio = overlap_dur / face_dur;
if overlap_ratio < 0.3 {
continue;
}
let speaker_node: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2",
nodes_table
))
.bind(file_uuid)
.bind(&seg.speaker_id)
.fetch_optional(pool)
.await?;
let speaker_node_id = match speaker_node {
Some((id,)) => id,
None => continue,
};
let edge_props = serde_json::json!({
"overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0,
"overlap_duration_s": (overlap_dur * 10.0).round() / 10.0,
"face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec),
"speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end),
});
sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("SPEAKS_AS")
.bind(face_node_id)
.bind(speaker_node_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await?;
edge_count += 1;
}
}
Ok(edge_count)
}
async fn build_speaker_face_edges_from_pg(
pool: &PgPool,
file_uuid: &str,
output_dir: &str,
) -> Result<usize> {
let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid));
if !asrx_path.exists() {
@@ -1073,7 +1360,6 @@ async fn build_speaker_face_edges(
.fetch_all(pool)
.await?;
// Calculate fps from last segment
let last = asrx.segments.last().unwrap();
let fps = if last.end > 0.0 {
last.end_frame as f64 / last.end
@@ -1173,48 +1459,234 @@ async fn build_face_face_edges(
file_uuid: &str,
pose_data: &[FacePose],
) -> Result<usize> {
let face_table = t("face_detections");
use crate::core::db::face_embedding_db::FaceEmbeddingDb;
let face_db = FaceEmbeddingDb::new();
let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?;
if !qdrant_embeddings.is_empty() {
tracing::info!(
"[TKG-Phase2.6.2] Building face_face edges from Qdrant ({} embeddings)",
qdrant_embeddings.len()
);
return build_face_face_edges_from_qdrant(pool, file_uuid, pose_data, qdrant_embeddings).await;
}
tracing::info!("[TKG-Phase2.6.2] No Qdrant embeddings, falling back to PostgreSQL");
build_face_face_edges_from_pg(pool, file_uuid, pose_data).await
}
async fn build_face_face_edges_from_qdrant(
pool: &PgPool,
file_uuid: &str,
pose_data: &[FacePose],
qdrant_embeddings: Vec<(String, Vec<f32>, crate::core::db::face_embedding_db::FaceEmbeddingPayload)>,
) -> Result<usize> {
use crate::core::db::face_embedding_db::FaceEmbeddingPayload;
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
// Use SQL JOIN for fast co-occurrence detection
let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
r#"
SELECT a.trace_id::bigint AS tid_a, b.trace_id::bigint AS tid_b, a.frame_number::bigint
FROM {} a
JOIN {} b
ON a.file_uuid = b.file_uuid
AND a.frame_number = b.frame_number
AND a.trace_id < b.trace_id
WHERE a.file_uuid = $1
AND a.trace_id IS NOT NULL
AND b.trace_id IS NOT NULL
ORDER BY a.frame_number
"#,
face_table, face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
// Also load per-frame bbox for mutual_gaze lookups
let bbox_data: Vec<(i64, i64, f64, f64, f64, f64)> = sqlx::query_as(
&format!(
"SELECT trace_id::bigint, frame_number::bigint, x::float8, y::float8, width::float8, height::float8 \
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL ORDER BY trace_id, frame_number",
face_table
)
)
.bind(file_uuid)
.fetch_all(pool)
.await?;
let mut frame_map: HashMap<(i64, i64), (f64, f64, f64, f64)> = HashMap::new(); // (trace_id, frame) → (x, y, w, h)
for (tid, frame, x, y, w, h) in &bbox_data {
frame_map.insert((*tid, *frame), (*x, *y, *w, *h));
let mut frame_faces: HashMap<i64, Vec<FaceEmbeddingPayload>> = HashMap::new();
for (_, _, payload) in &qdrant_embeddings {
frame_faces.entry(payload.frame).or_default().push(payload.clone());
}
let mut frame_map: HashMap<(i64, i64), (f64, f64, f64, f64)> = HashMap::new();
for (_, _, payload) in &qdrant_embeddings {
let trace_id = payload.trace_id as i64;
let frame = payload.frame;
frame_map.insert((trace_id, frame), (payload.bbox_x, payload.bbox_y, payload.bbox_w, payload.bbox_h));
}
let mut rows: Vec<(i64, i64, i64)> = Vec::new();
for (frame, faces) in frame_faces.iter() {
for i in 0..faces.len() {
for j in (i+1)..faces.len() {
let tid_a = faces[i].trace_id as i64;
let tid_b = faces[j].trace_id as i64;
let min_tid = tid_a.min(tid_b);
let max_tid = tid_a.max(tid_b);
rows.push((min_tid, max_tid, *frame));
}
}
}
let mut pair_frames: HashMap<(i64, i64), Vec<(i64, bool)>> = HashMap::new();
for (tid_a, tid_b, frame) in &rows {
let key = (*tid_a.min(tid_b), *tid_a.max(tid_b));
let bbox_a = frame_map.get(&(*tid_a, *frame));
let bbox_b = frame_map.get(&(*tid_b, *frame));
let gaze = match (bbox_a, bbox_b) {
(Some(&(xa, ya, wa, ha)), Some(&(xb, yb, wb, hb))) => {
get_pose_for_face(*frame, xa, ya, wa, ha, pose_data)
.and_then(|(yaw_a, _, _)| {
get_pose_for_face(*frame, xb, yb, wb, hb, pose_data).map(|(yaw_b, _, _)| {
detect_mutual_gaze(xa, wa, yaw_a, xb, wb, yaw_b, 0.05)
})
})
.unwrap_or(false)
}
_ => false,
};
pair_frames.entry(key).or_default().push((*frame, gaze));
}
let mut edge_count = 0;
let mut node_id_cache: HashMap<i64, i64> = HashMap::new();
for ((tid_a, tid_b), frame_data) in &pair_frames {
let ext_a = format!("trace_{}", tid_a);
let ext_b = format!("trace_{}", tid_b);
let n_a_id = match node_id_cache.get(tid_a) {
Some(id) => *id,
None => {
if let Some((id,)) = sqlx::query_as::<_, (i64,)>(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid).bind(&ext_a).fetch_optional(pool).await?
{
node_id_cache.insert(*tid_a, id);
id
} else { continue; }
}
};
let n_b_id = match node_id_cache.get(tid_b) {
Some(id) => *id,
None => {
if let Some((id,)) = sqlx::query_as::<_, (i64,)>(&format!(
"SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_trace' AND external_id=$2",
nodes_table
))
.bind(file_uuid).bind(&ext_b).fetch_optional(pool).await?
{
node_id_cache.insert(*tid_b, id);
id
} else { continue; }
}
};
let frames: Vec<i64> = frame_data.iter().map(|(f, _)| *f).collect();
let gaze_frames: Vec<i64> = frame_data
.iter()
.filter(|(_, g)| *g)
.map(|(f, _)| *f)
.collect();
let gaze_count = gaze_frames.len() as i64;
let has_gaze = gaze_count > 0;
let edge_props = if has_gaze {
let mut yaw_a_sum = 0.0f64;
let mut yaw_b_sum = 0.0f64;
let mut gaze_sample = 0i64;
for (frame, _) in frame_data.iter().filter(|(_, g)| *g) {
let bbox_a = frame_map.get(&(*tid_a, *frame));
let bbox_b = frame_map.get(&(*tid_b, *frame));
if let (Some(&(xa, ya, wa, ha)), Some(&(xb, yb, wb, hb))) = (bbox_a, bbox_b) {
let pose_a = get_pose_for_face(*frame, xa, ya, wa, ha, pose_data);
let pose_b = get_pose_for_face(*frame, xb, yb, wb, hb, pose_data);
if let (Some((ya, _, _)), Some((yb, _, _))) = (pose_a, pose_b) {
yaw_a_sum += ya;
yaw_b_sum += yb;
gaze_sample += 1;
}
}
}
let (avg_ya, avg_yb) = if gaze_sample > 0 {
(
yaw_a_sum / gaze_sample as f64,
yaw_b_sum / gaze_sample as f64,
)
} else {
(0.0, 0.0)
};
serde_json::json!({
"first_frame": frames[0],
"frame_count": frames.len() as i64,
"mutual_gaze": true,
"gaze_frame_count": gaze_count,
"yaw_a_avg": (avg_ya * 1000.0).round() / 1000.0,
"yaw_b_avg": (avg_yb * 1000.0).round() / 1000.0,
})
} else {
serde_json::json!({
"first_frame": frames[0],
"frame_count": frames.len() as i64,
"mutual_gaze": false,
})
};
sqlx::query(&format!(
r#"
INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties)
VALUES ($1, $2, $3, $4, $5::jsonb)
ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id)
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties)
"#,
edges_table
))
.bind("CO_OCCURS_WITH")
.bind(n_a_id)
.bind(n_b_id)
.bind(file_uuid)
.bind(serde_json::to_string(&edge_props)?)
.execute(pool)
.await?;
edge_count += 1;
}
Ok(edge_count)
}
async fn build_face_face_edges_from_pg(
pool: &PgPool,
file_uuid: &str,
pose_data: &[FacePose],
) -> Result<usize> {
let face_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let rows: Vec<(i64, i64, i64)> = sqlx::query_as(&format!(
r#"
SELECT a.trace_id::bigint AS tid_a, b.trace_id::bigint AS tid_b, a.frame_number::bigint
FROM {} a
JOIN {} b
ON a.file_uuid = b.file_uuid
AND a.frame_number = b.frame_number
AND a.trace_id < b.trace_id
WHERE a.file_uuid = $1
AND a.trace_id IS NOT NULL
AND b.trace_id IS NOT NULL
ORDER BY a.frame_number
"#,
face_table, face_table
))
.bind(file_uuid)
.fetch_all(pool)
.await?;
let bbox_data: Vec<(i64, i64, f64, f64, f64, f64)> = sqlx::query_as(
&format!(
"SELECT trace_id::bigint, frame_number::bigint, x::float8, y::float8, width::float8, height::float8 \
FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL ORDER BY trace_id, frame_number",
face_table
)
)
.bind(file_uuid)
.fetch_all(pool)
.await?;
let mut frame_map: HashMap<(i64, i64), (f64, f64, f64, f64)> = HashMap::new();
for (tid, frame, x, y, w, h) in &bbox_data {
frame_map.insert((*tid, *frame), (*x, *y, *w, *h));
}
// Group by pair
let mut pair_frames: HashMap<(i64, i64), Vec<(i64, bool)>> = HashMap::new();
for (tid_a, tid_b, frame) in &rows {
let key = (*tid_a.min(tid_b), *tid_a.max(tid_b));
@@ -1237,7 +1709,6 @@ async fn build_face_face_edges(
}
let mut edge_count = 0;
// Cache node IDs to avoid repeated queries
let mut node_id_cache: HashMap<i64, i64> = HashMap::new();
for ((tid_a, tid_b), frame_data) in &pair_frames {
let ext_a = format!("trace_{}", tid_a);
@@ -1283,7 +1754,6 @@ async fn build_face_face_edges(
let has_gaze = gaze_count > 0;
let edge_props = if has_gaze {
// Compute average yaw values for gaze frames
let mut yaw_a_sum = 0.0f64;
let mut yaw_b_sum = 0.0f64;
let mut gaze_sample = 0i64;

View File

@@ -120,6 +120,7 @@ pub async fn process_yolo(
video_path: &str,
output_path: &str,
uuid: Option<&str>,
frames: Option<&[i64]>,
) -> Result<YoloResult> {
let executor = PythonExecutor::new()?;
let script_path = executor.script_path("yolo_processor.py");
@@ -136,12 +137,13 @@ pub async fn process_yolo(
}
executor
.run(
.run_with_frames(
"yolo_processor.py",
&[video_path, output_path],
uuid,
"YOLO",
Some(YOLO_TIMEOUT),
frames,
)
.await
.with_context(|| format!("Failed to run {:?}", script_path))?;

View File

@@ -1 +1 @@
pub mod qwen_vl_manager;
pub mod qwen_vl_manager;

View File

@@ -47,7 +47,7 @@ impl QwenVLManager {
pub async fn is_running(&self) -> Result<bool> {
let health_url = format!("http://localhost:{}/health", self.port);
let client = reqwest::Client::new();
let response = client
.get(&health_url)
@@ -90,41 +90,57 @@ impl QwenVLManager {
}
pub async fn start_server(&self) -> Result<()> {
let script_path = self.start_script.canonicalize()
let script_path = self
.start_script
.canonicalize()
.context("Failed to resolve start script path")?;
debug!("Running start script: {}", script_path.display());
let output = Command::new("bash")
.arg(&script_path)
.output()
.context("Failed to execute start script")?;
if !output.status.success() {
error!("Start script failed: {}", String::from_utf8_lossy(&output.stderr));
error!(
"Start script failed: {}",
String::from_utf8_lossy(&output.stderr)
);
anyhow::bail!("Failed to start Qwen3-VL server");
}
debug!("Start script output: {}", String::from_utf8_lossy(&output.stdout));
debug!(
"Start script output: {}",
String::from_utf8_lossy(&output.stdout)
);
Ok(())
}
pub async fn stop_server(&self) -> Result<()> {
let script_path = self.stop_script.canonicalize()
let script_path = self
.stop_script
.canonicalize()
.context("Failed to resolve stop script path")?;
debug!("Running stop script: {}", script_path.display());
let output = Command::new("bash")
.arg(&script_path)
.output()
.context("Failed to execute stop script")?;
if !output.status.success() {
warn!("Stop script returned error: {}", String::from_utf8_lossy(&output.stderr));
warn!(
"Stop script returned error: {}",
String::from_utf8_lossy(&output.stderr)
);
}
debug!("Stop script output: {}", String::from_utf8_lossy(&output.stdout));
debug!(
"Stop script output: {}",
String::from_utf8_lossy(&output.stdout)
);
tokio::time::sleep(Duration::from_secs(2)).await;
@@ -141,7 +157,7 @@ impl QwenVLManager {
let client = reqwest::Client::new();
let start_time = Instant::now();
while start_time.elapsed() < self.max_startup_time {
let response = client
.get(&health_url)
@@ -154,7 +170,10 @@ impl QwenVLManager {
if resp.status().is_success() {
let body = resp.text().await?;
if body.contains("\"status\":\"ok\"") {
debug!("Qwen3-VL ready after {} seconds", start_time.elapsed().as_secs());
debug!(
"Qwen3-VL ready after {} seconds",
start_time.elapsed().as_secs()
);
return Ok(());
}
}
@@ -165,7 +184,10 @@ impl QwenVLManager {
tokio::time::sleep(Duration::from_secs(2)).await;
}
error!("Qwen3-VL failed to start within {} seconds", self.max_startup_time.as_secs());
error!(
"Qwen3-VL failed to start within {} seconds",
self.max_startup_time.as_secs()
);
anyhow::bail!("Qwen3-VL startup timeout");
}
@@ -178,7 +200,7 @@ impl QwenVLManager {
pub async fn get_status(&self) -> Result<QwenVLStatus> {
let is_running = self.is_running().await?;
let last_request = self.last_request_time.lock().await.clone();
Ok(QwenVLStatus {
running: is_running,
port: self.port,
@@ -191,9 +213,12 @@ impl QwenVLManager {
pub async fn auto_stop_if_idle(&self, idle_timeout: Duration) -> Result<()> {
let last_request = self.last_request_time.lock().await.clone();
if last_request.elapsed() > idle_timeout && self.is_running().await? {
info!("Qwen3-VL idle for {} seconds, stopping server", last_request.elapsed().as_secs());
info!(
"Qwen3-VL idle for {} seconds, stopping server",
last_request.elapsed().as_secs()
);
self.stop_server().await?;
}
@@ -215,4 +240,4 @@ impl Default for QwenVLManager {
fn default() -> Self {
Self::new()
}
}
}

View File

@@ -17,7 +17,7 @@ fn init_tracing() {
}
fn load_env() {
let _ = dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env");
let _ = dotenv::from_filename("/Users/accusys/momentry_core/.env");
}
use cli::*;
@@ -116,13 +116,8 @@ async fn main() -> Result<()> {
} => {
handle_gitea(action, username, password, token_name, scopes).await?;
}
Commands::N8n {
action,
api_key,
label,
expires_in_days,
} => {
handle_n8n(action, api_key, label, expires_in_days).await?;
Commands::Agent { tool, args } => {
cli::agent::handle_agent(&tool, &args).await?;
}
Commands::Detect {
image,
@@ -360,19 +355,3 @@ async fn handle_gitea(
// TODO: Implement Gitea logic
Ok(())
}
/// Handle n8n command
async fn handle_n8n(
action: N8nAction,
api_key: Option<String>,
label: Option<String>,
expires_in_days: Option<i64>,
) -> Result<()> {
println!("n8n action: {:?}", action);
println!("API key: {:?}", api_key);
println!("Label: {:?}", label);
println!("Expires in days: {:?}", expires_in_days);
// TODO: Implement n8n logic
Ok(())
}

View File

@@ -426,6 +426,7 @@ async fn process_yolo_module(
video_path,
yolo_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let yolo_json = serde_json::to_string_pretty(&yolo_result)?;
@@ -460,6 +461,7 @@ async fn process_ocr_module(
video_path,
ocr_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let ocr_json = serde_json::to_string_pretty(&ocr_result)?;
@@ -497,6 +499,7 @@ async fn process_face_module(
video_path,
face_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let face_json = serde_json::to_string_pretty(&face_result)?;
@@ -531,6 +534,7 @@ async fn process_pose_module(
video_path,
pose_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let pose_json = serde_json::to_string_pretty(&pose_result)?;
@@ -551,6 +555,47 @@ async fn process_pose_module(
Ok(())
}
async fn process_appearance_module(
appearance_path: &Path,
video_path: &str,
pose_path: &Path,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> anyhow::Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Appearance).start(1);
}
let appearance_result = momentry_core::core::processor::process_appearance(
video_path,
pose_path.to_str().unwrap(),
appearance_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let appearance_json = serde_json::to_string_pretty(&appearance_result)?;
std::fs::write(appearance_path, &appearance_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "appearance.json");
println!(
" ✓ Appearance saved: {} frames",
appearance_result.frame_count
);
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Appearance)
.complete(&format!("{} frames", appearance_result.frame_count));
state.stop();
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}
async fn process_story_module(
story_path: &Path,
video_path: &str,
@@ -643,7 +688,7 @@ enum Commands {
Process {
/// UUID or path
target: String,
/// Modules to process (comma separated: asr,cut,asrx,yolo,ocr,face,pose,story,caption)
/// Modules to process (comma separated: appearance,asr,cut,asrx,yolo,ocr,face,pose,story,caption)
/// If not specified, processes all modules
#[arg(short, long, value_delimiter = ',')]
modules: Option<Vec<String>>,
@@ -826,9 +871,11 @@ enum N8nAction {
#[tokio::main]
async fn main() -> Result<()> {
// Load development environment — try absolute path first
if dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env.development").is_err() {
// Fallback to relative path (for development)
let _ = dotenv::from_filename(".env.development");
let env_loaded = dotenv::from_filename("/Users/accusys/momentry_core_0.1/.env.development")
.is_ok()
|| dotenv::from_filename(".env.development").is_ok();
if !env_loaded {
eprintln!("[WARN] No .env.development found, using defaults or env vars");
}
tracing_subscriber::fmt::init();
@@ -839,6 +886,10 @@ async fn main() -> Result<()> {
"Redis prefix: {}",
*momentry_core::core::config::REDIS_KEY_PREFIX
);
tracing::info!(
"Database schema: {}",
*momentry_core::core::config::DATABASE_SCHEMA
);
let cli = Cli::parse();
@@ -1011,6 +1062,7 @@ async fn main() -> Result<()> {
.filter_map(|name| {
let name_lower = name.to_lowercase();
match name_lower.as_str() {
"appearance" => Some(ProcessorType::Appearance),
"asr" => Some(ProcessorType::Asr),
"cut" => Some(ProcessorType::Cut),
"asrx" => Some(ProcessorType::Asrx),
@@ -1037,6 +1089,7 @@ async fn main() -> Result<()> {
.filter_map(|name| {
let name_lower = name.to_lowercase();
match name_lower.as_str() {
"appearance" => Some(ProcessorType::Appearance),
"asr" => Some(ProcessorType::Asr),
"cut" => Some(ProcessorType::Cut),
"asrx" => Some(ProcessorType::Asrx),
@@ -1618,6 +1671,69 @@ async fn main() -> Result<()> {
}
}
// Process Appearance (color/histogram analysis, depends on Pose)
if should_process(ProcessorType::Appearance) {
let appearance_path = output_dir.get_output_path(&uuid, "appearance.json");
let pose_path = output_dir.get_output_path(&uuid, "pose.json");
let decision = decide_processing(&appearance_path, force, resume);
match decision {
ProcessingDecision::SkipComplete => {
println!("\nAppearance: ✓ Already complete, skipping");
}
ProcessingDecision::ForceReprocess => {
println!("\nAppearance: ⟳ Force reprocessing from scratch...");
std::fs::remove_file(&appearance_path).ok();
if is_cloud(ProcessorType::Appearance) {
println!(" [Cloud processing not implemented yet - run locally]");
} else {
process_appearance_module(
&appearance_path,
video_path,
&pose_path,
&uuid,
&progress_state,
&ui,
)
.await?;
}
}
ProcessingDecision::ResumePartial => {
println!("\nAppearance: ↻ Resuming from checkpoint...");
if is_cloud(ProcessorType::Appearance) {
println!(" [Cloud processing not implemented yet - run locally]");
} else {
process_appearance_module(
&appearance_path,
video_path,
&pose_path,
&uuid,
&progress_state,
&ui,
)
.await?;
}
}
ProcessingDecision::Process => {
if is_cloud(ProcessorType::Appearance) {
println!("\nAppearance: ☁️ Running via cloud...");
println!(" [Cloud processing not implemented yet - run locally]");
} else {
println!("\nAppearance: ⚙️ Processing...");
process_appearance_module(
&appearance_path,
video_path,
&pose_path,
&uuid,
&progress_state,
&ui,
)
.await?;
}
}
}
}
// Process Story (video narrative)
if should_process(ProcessorType::Story) {
let story_path = output_dir.get_output_path(&uuid, "story.json");
@@ -1770,6 +1886,10 @@ async fn main() -> Result<()> {
let path = output_dir.get_output_path(&uuid, "pose.json");
println!(" - Pose JSON: {}", path.display());
}
if should_process(ProcessorType::Appearance) {
let path = output_dir.get_output_path(&uuid, "appearance.json");
println!(" - Appearance JSON: {}", path.display());
}
if should_process(ProcessorType::Story) {
let path = output_dir.get_output_path(&uuid, "story.json");
println!(" - Story JSON: {}", path.display());

View File

@@ -390,35 +390,6 @@ pub async fn handle_gitea(
Ok(())
}
/// Handle n8n workflow command
pub async fn handle_n8n(
action: &crate::cli::args::N8nAction,
name: Option<String>,
url: Option<String>,
workflow_id: Option<String>,
) -> Result<()> {
match action {
crate::cli::args::N8nAction::Create => {
println!("Creating n8n workflow...");
// TODO: Implement n8n workflow creation
}
crate::cli::args::N8nAction::List => {
println!("Listing n8n workflows...");
// TODO: Implement n8n workflow listing
}
crate::cli::args::N8nAction::Delete => {
let workflow_id = workflow_id.expect("Workflow ID required for deletion");
println!("Deleting n8n workflow: {}", workflow_id);
// TODO: Implement n8n workflow deletion
}
crate::cli::args::N8nAction::Verify => {
println!("Verifying n8n workflow...");
// TODO: Implement n8n workflow verification
}
}
Ok(())
}
/// Handle store-asrx command
pub async fn handle_store_asrx(uuid: &str) -> Result<()> {

View File

@@ -0,0 +1,53 @@
use anyhow::Result;
use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi};
use momentry_core::OutputDir;
use std::path::Path;
use std::sync::{Arc, Mutex};
pub async fn process_appearance_module(
appearance_path: &Path,
video_path: &str,
pose_path: &str,
uuid: &str,
progress_state: &Arc<Mutex<ProgressState>>,
ui: &Arc<Mutex<Option<ProgressUi>>>,
) -> Result<()> {
{
let mut state = progress_state.lock().unwrap();
state.get_processor(ProcessorType::Appearance).start(1);
}
let appearance_result = momentry_core::core::processor::process_appearance(
video_path,
pose_path,
appearance_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;
let appearance_json = serde_json::to_string_pretty(&appearance_result)?;
std::fs::write(appearance_path, &appearance_json)?;
let output_dir = OutputDir::new();
let _ = output_dir.backup_file(uuid, "appearance.json");
println!(
" ✓ Appearance saved: {} frames",
appearance_result.frame_count
);
{
let mut state = progress_state.lock().unwrap();
state
.get_processor(ProcessorType::Appearance)
.complete(&format!("{} frames", appearance_result.frame_count));
state.stop();
}
if let Some(ref mut ui) = *ui.lock().unwrap() {
let _ = ui.render();
}
Ok(())
}

View File

@@ -23,6 +23,7 @@ pub async fn process_face_module(
video_path,
face_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;

View File

@@ -1,5 +1,6 @@
//! Video processing modules
pub mod appearance;
pub mod asr;
pub mod asrx;
pub mod caption;
@@ -10,6 +11,7 @@ pub mod pose;
pub mod story;
pub mod yolo;
pub use appearance::*;
pub use asr::*;
pub use asrx::*;
pub use caption::*;

View File

@@ -23,6 +23,7 @@ pub async fn process_ocr_module(
video_path,
ocr_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;

View File

@@ -23,6 +23,7 @@ pub async fn process_pose_module(
video_path,
pose_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;

View File

@@ -23,6 +23,7 @@ pub async fn process_yolo_module(
video_path,
yolo_path.to_str().unwrap(),
Some(uuid),
None,
)
.await?;

View File

@@ -12,6 +12,7 @@ use std::io;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ProcessorType {
Appearance,
Asr,
Cut,
Asrx,
@@ -26,6 +27,7 @@ pub enum ProcessorType {
impl ProcessorType {
pub fn as_str(&self) -> &'static str {
match self {
ProcessorType::Appearance => "Appearance",
ProcessorType::Asr => "ASR",
ProcessorType::Cut => "CUT",
ProcessorType::Asrx => "ASRX",
@@ -132,6 +134,7 @@ impl ProgressState {
pub fn new(video_name: &str) -> Self {
Self {
processors: vec![
ProcessorProgress::new(ProcessorType::Appearance),
ProcessorProgress::new(ProcessorType::Asr),
ProcessorProgress::new(ProcessorType::Cut),
ProcessorProgress::new(ProcessorType::Asrx),
@@ -187,6 +190,7 @@ impl ProgressState {
message: Option<&str>,
) {
let proc_type = match processor.to_uppercase().as_str() {
"APPEARANCE" => ProcessorType::Appearance,
"ASR" => ProcessorType::Asr,
"CUT" => ProcessorType::Cut,
"ASRX" => ProcessorType::Asrx,

View File

@@ -1,3 +1,7 @@
pub mod schema;
pub mod verifier;
pub use verifier::{verify_output, VerificationResult, VerifierError};
pub use schema::{FileVerificationReport, ProcessorVerification};
pub use verifier::{
cleanup_temp_files, verify_file, verify_output, VerificationResult, VerifierError,
};

355
src/verification/schema.rs Normal file
View File

@@ -0,0 +1,355 @@
use crate::core::db::ProcessorType;
use serde::{Deserialize, Serialize};
/// Required field definition for JSON schema validation
#[derive(Debug, Clone)]
pub struct RequiredField {
pub path: &'static str,
pub field_type: FieldType,
pub allow_empty: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum FieldType {
Number,
PositiveNumber,
Array,
NonEmptyArray,
Object,
String,
OptionalNumber,
}
/// Processor JSON schema: defines required fields and their types
#[derive(Debug, Clone)]
pub struct ProcessorJsonSchema {
pub processor: ProcessorType,
pub required_fields: &'static [RequiredField],
pub min_data_threshold: usize,
}
/// All processor schemas
pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[
ProcessorJsonSchema {
processor: ProcessorType::Cut,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "scenes",
field_type: FieldType::NonEmptyArray,
allow_empty: false,
},
],
min_data_threshold: 1,
},
ProcessorJsonSchema {
processor: ProcessorType::Yolo,
required_fields: &[
RequiredField {
path: "metadata.fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "metadata.total_frames",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Object,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Ocr,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Face,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Pose,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Appearance,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Asr,
required_fields: &[
RequiredField {
path: "language",
field_type: FieldType::OptionalNumber,
allow_empty: true,
},
RequiredField {
path: "segments",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Asrx,
required_fields: &[
RequiredField {
path: "language",
field_type: FieldType::OptionalNumber,
allow_empty: true,
},
RequiredField {
path: "segments",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::Scene,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "scenes",
field_type: FieldType::NonEmptyArray,
allow_empty: false,
},
],
min_data_threshold: 1,
},
ProcessorJsonSchema {
processor: ProcessorType::Story,
required_fields: &[
RequiredField {
path: "child_chunks",
field_type: FieldType::Array,
allow_empty: true,
},
RequiredField {
path: "parent_chunks",
field_type: FieldType::Array,
allow_empty: true,
},
RequiredField {
path: "stats",
field_type: FieldType::Object,
allow_empty: false,
},
],
min_data_threshold: 0,
},
ProcessorJsonSchema {
processor: ProcessorType::MediaPipe,
required_fields: &[
RequiredField {
path: "frame_count",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "fps",
field_type: FieldType::PositiveNumber,
allow_empty: false,
},
RequiredField {
path: "frames",
field_type: FieldType::Array,
allow_empty: true,
},
],
min_data_threshold: 0,
},
];
/// Get schema for a processor
pub fn get_schema(processor: &ProcessorType) -> Option<&'static ProcessorJsonSchema> {
PROCESSOR_SCHEMAS.iter().find(|s| s.processor == *processor)
}
/// Verification result for a single processor
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessorVerification {
pub processor: String,
pub file_exists: bool,
pub valid_json: bool,
pub completeness: bool,
pub dependency_ok: bool,
pub reasonableness: bool,
pub trust_level: String,
pub issues: Vec<String>,
pub data_summary: serde_json::Value,
}
impl ProcessorVerification {
pub fn new(processor: &str) -> Self {
Self {
processor: processor.to_string(),
file_exists: false,
valid_json: false,
completeness: false,
dependency_ok: true,
reasonableness: true,
trust_level: "untrusted".to_string(),
issues: Vec::new(),
data_summary: serde_json::json!({}),
}
}
pub fn update_trust_level(&mut self) {
if self.file_exists
&& self.valid_json
&& self.completeness
&& self.dependency_ok
&& self.reasonableness
{
self.trust_level = "trusted".to_string();
} else if self.file_exists && self.valid_json && self.completeness && !self.dependency_ok {
self.trust_level = "degraded".to_string();
} else if self.file_exists
&& self.valid_json
&& self.completeness
&& self.dependency_ok
&& !self.reasonableness
{
self.trust_level = "suspicious".to_string();
} else {
self.trust_level = "untrusted".to_string();
}
}
}
/// Overall file verification report
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileVerificationReport {
pub file_uuid: String,
pub trust_level: String,
pub processors: Vec<ProcessorVerification>,
pub summary: serde_json::Value,
}
impl FileVerificationReport {
pub fn update_overall_trust(&mut self) {
let levels: Vec<&str> = self
.processors
.iter()
.map(|p| p.trust_level.as_str())
.collect();
self.trust_level = if levels.is_empty() {
"untrusted".to_string()
} else if levels.iter().all(|&l| l == "trusted") {
"trusted".to_string()
} else if levels.iter().all(|&l| l == "trusted" || l == "degraded") {
"degraded".to_string()
} else if levels.iter().any(|&l| l == "suspicious") {
"suspicious".to_string()
} else {
"untrusted".to_string()
};
let trusted = levels.iter().filter(|&&l| l == "trusted").count();
let degraded = levels.iter().filter(|&&l| l == "degraded").count();
let suspicious = levels.iter().filter(|&&l| l == "suspicious").count();
let untrusted = levels.iter().filter(|&&l| l == "untrusted").count();
self.summary = serde_json::json!({
"total": levels.len(),
"trusted": trusted,
"degraded": degraded,
"suspicious": suspicious,
"untrusted": untrusted
});
}
}

View File

@@ -1,8 +1,13 @@
use crate::core::config::OUTPUT_DIR;
use crate::core::db::ProcessorType;
use anyhow::Result;
use std::collections::HashMap;
use std::path::PathBuf;
use tracing::info;
use super::schema::{
get_schema, FieldType, FileVerificationReport, ProcessorJsonSchema, ProcessorVerification,
RequiredField,
};
#[derive(Debug)]
pub struct VerificationResult {
@@ -37,6 +42,601 @@ pub struct VerifierError {
pub reason: String,
}
/// Resolve file_uuid (supports short prefix)
fn resolve_uuid(file_uuid: &str) -> String {
if file_uuid.len() == 32 {
file_uuid.to_string()
} else {
// Try to find full UUID by scanning output directory
let prefix = file_uuid;
if let Ok(entries) = std::fs::read_dir(OUTPUT_DIR.as_str()) {
for entry in entries.flatten() {
if let Some(name) = entry.file_name().to_str() {
if name.starts_with(prefix) && name.ends_with(".probe.json") {
return name.split('.').next().unwrap_or(prefix).to_string();
}
}
}
}
file_uuid.to_string()
}
}
/// Layer 1: Check JSON structure and data completeness
fn check_completeness(processor: &ProcessorType, value: &serde_json::Value) -> (bool, Vec<String>) {
let schema = match get_schema(processor) {
Some(s) => s,
None => return (true, Vec::new()), // No schema = pass
};
let mut issues = Vec::new();
for field in schema.required_fields {
if let Some(val) = get_value_at_path(value, field.path) {
match field.field_type {
FieldType::Number | FieldType::PositiveNumber => {
if val.as_f64().is_none() {
issues.push(format!("'{}' is not a number", field.path));
}
if field.field_type == FieldType::PositiveNumber && val.as_f64() == Some(0.0) {
issues.push(format!("'{}' is zero (expected positive)", field.path));
}
}
FieldType::Array | FieldType::NonEmptyArray => {
if let Some(arr) = val.as_array() {
if arr.is_empty() && !field.allow_empty {
issues.push(format!("'{}' is empty", field.path));
}
} else {
issues.push(format!("'{}' is not an array", field.path));
}
}
FieldType::Object => {
if val.as_object().is_none() {
issues.push(format!("'{}' is not an object", field.path));
}
}
FieldType::String => {
if val.as_str().is_none() {
issues.push(format!("'{}' is not a string", field.path));
}
}
FieldType::OptionalNumber => {
// Optional, skip if null/missing
}
}
} else if !field.allow_empty || field.field_type != FieldType::OptionalNumber {
issues.push(format!("'{}' is missing", field.path));
}
}
// Check data threshold
let data_count = count_data_items(processor, value);
if data_count < schema.min_data_threshold {
issues.push(format!(
"data count {} below minimum threshold {}",
data_count, schema.min_data_threshold
));
}
(issues.is_empty(), issues)
}
/// Extract data count from JSON based on processor type
fn count_data_items(processor: &ProcessorType, value: &serde_json::Value) -> usize {
match processor {
ProcessorType::Cut => value
.get("scenes")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Yolo => {
// YOLO uses dict-based frames
value
.get("frames")
.and_then(|v| v.as_object())
.map(|o| o.len())
.unwrap_or(0)
}
ProcessorType::Ocr
| ProcessorType::Face
| ProcessorType::Pose
| ProcessorType::Appearance => value
.get("frames")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Asr => value
.get("segments")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Asrx => value
.get("segments")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Scene => value
.get("scenes")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
ProcessorType::Story => {
let child = value
.get("child_chunks")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0);
let parent = value
.get("parent_chunks")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0);
child + parent
}
ProcessorType::MediaPipe => value
.get("frames")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0),
_ => 0,
}
}
/// Get value at a JSON path (e.g., "stats.total_child_chunks")
fn get_value_at_path<'a>(
value: &'a serde_json::Value,
path: &str,
) -> Option<&'a serde_json::Value> {
let parts: Vec<&str> = path.split('.').collect();
let mut current = value;
for part in parts {
current = current.get(part)?;
}
Some(current)
}
/// Layer 2: Check dependency completeness
fn check_dependencies(
processor: &ProcessorType,
all_results: &HashMap<String, &ProcessorVerification>,
) -> (bool, Vec<String>) {
let deps = processor.dependencies();
let mut issues = Vec::new();
if deps.is_empty() {
return (true, Vec::new());
}
for dep in &deps {
let dep_name = dep.as_str();
match all_results.get(dep_name) {
Some(dep_result) => {
if !dep_result.file_exists || !dep_result.valid_json {
issues.push(format!("dependency '{}' missing or invalid", dep_name));
} else if !dep_result.completeness {
issues.push(format!("dependency '{}' incomplete", dep_name));
}
// Note: trust_level not checked here as it's updated after this function runs
}
None => {
issues.push(format!("dependency '{}' not found", dep_name));
}
}
}
(issues.is_empty(), issues)
}
/// Layer 3: Cross-JSON reasonableness checks
fn check_reasonableness(
processor: &ProcessorType,
value: &serde_json::Value,
probe_value: Option<&serde_json::Value>,
all_values: &HashMap<String, &serde_json::Value>,
) -> (bool, Vec<String>) {
let mut issues = Vec::new();
// Get probe data if available
let probe_fps = probe_value
.and_then(|p| p.get("streams"))
.and_then(|s| s.as_array())
.and_then(|streams| {
streams
.iter()
.find(|s| s.get("codec_type").and_then(|c| c.as_str()) == Some("video"))
})
.and_then(|v| v.get("r_frame_rate"))
.and_then(|r| r.as_str())
.and_then(|fps_str| {
if let Some((num, den)) = fps_str.split_once('/') {
if let (Ok(n), Ok(d)) = (num.parse::<f64>(), den.parse::<f64>()) {
if d > 0.0 {
return Some(n / d);
}
}
}
None
});
let probe_frames = probe_value
.and_then(|p| p.get("streams"))
.and_then(|s| s.as_array())
.and_then(|streams| {
streams
.iter()
.find(|s| s.get("codec_type").and_then(|c| c.as_str()) == Some("video"))
})
.and_then(|v| v.get("nb_frames"))
.and_then(|n| n.as_str())
.and_then(|s| s.parse::<u64>().ok());
// Check fps consistency with probe
if let Some(json_fps) = value.get("fps").and_then(|v| v.as_f64()) {
if json_fps <= 0.0 {
issues.push("fps is zero or negative".to_string());
} else if let Some(p_fps) = probe_fps {
let diff = (json_fps - p_fps).abs();
if diff > 0.5 {
issues.push(format!(
"fps mismatch: JSON={}, probe={:.2}",
json_fps, p_fps
));
}
}
}
// Check frame_count consistency with probe
// For sampled processors (8Hz), frame_count should be ~total_frames/8
// Only flag if the count is wildly off (less than 10% of expected)
if let Some(json_frames) = value.get("frame_count").and_then(|v| v.as_u64()) {
if let Some(p_frames) = probe_frames {
// Check if this is a sampled processor (most frame processors use 8Hz)
let is_sampled = matches!(
processor,
ProcessorType::Cut
| ProcessorType::Yolo
| ProcessorType::Ocr
| ProcessorType::Face
| ProcessorType::Pose
| ProcessorType::Appearance
| ProcessorType::Scene
);
let expected = if is_sampled {
(p_frames as f64 / 8.0) as u64
} else {
p_frames
};
// Allow 50% tolerance for sampling variations
let min_expected = (expected as f64 * 0.1) as u64;
if json_frames > 0 && json_frames < min_expected && min_expected > 0 {
issues.push(format!(
"frame_count {} much less than expected ~{} (probe={})",
json_frames, expected, p_frames
));
}
}
}
// Story-specific: check chunk count vs cut scene count
if *processor == ProcessorType::Story {
if let Some(cut_value) = all_values.get("cut") {
let story_chunks = count_data_items(processor, value);
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
if story_chunks > 0 && cut_scenes > 0 {
// Story chunks should be >= cut scenes (one chunk per scene minimum)
if story_chunks < cut_scenes / 2 {
issues.push(format!(
"story chunk count ({}) much less than cut scene count ({})",
story_chunks, cut_scenes
));
}
}
}
}
// ASR-specific: check segments vs cut scenes
if *processor == ProcessorType::Asr {
if let Some(cut_value) = all_values.get("cut") {
let asr_segments = count_data_items(processor, value);
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
if asr_segments == 0 && cut_scenes > 5 {
issues.push(format!(
"ASR has 0 segments but CUT has {} scenes",
cut_scenes
));
}
}
}
// ASRX-specific: check segments vs cut scenes
if *processor == ProcessorType::Asrx {
if let Some(cut_value) = all_values.get("cut") {
let asrx_segments = count_data_items(processor, value);
let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value);
// Only flag if CUT has many scenes but ASRX has none (likely a processing issue)
if asrx_segments == 0 && cut_scenes > 5 {
issues.push(format!(
"ASRX has 0 segments but CUT has {} scenes",
cut_scenes
));
}
}
}
// Check scene time ranges
if *processor == ProcessorType::Cut || *processor == ProcessorType::Scene {
if let Some(scenes) = value.get("scenes").and_then(|v| v.as_array()) {
for (i, scene) in scenes.iter().enumerate() {
let start = scene.get("start_time").and_then(|v| v.as_f64());
let end = scene.get("end_time").and_then(|v| v.as_f64());
if let (Some(s), Some(e)) = (start, end) {
if e < s {
issues.push(format!("scene {}: end_time < start_time", i));
}
}
}
}
}
(issues.is_empty(), issues)
}
/// Build data summary for a processor JSON
fn build_data_summary(processor: &ProcessorType, value: &serde_json::Value) -> serde_json::Value {
let data_count = count_data_items(processor, value);
let mut summary = serde_json::json!({
"data_count": data_count
});
match processor {
ProcessorType::Cut => {
if let Some(scenes) = value.get("scenes").and_then(|v| v.as_array()) {
summary["scene_count"] = serde_json::json!(scenes.len());
if let Some(first) = scenes.first() {
summary["first_scene_start"] =
first.get("start_time").and_then(|v| v.as_f64()).into();
}
if let Some(last) = scenes.last() {
summary["last_scene_end"] =
last.get("end_time").and_then(|v| v.as_f64()).into();
}
}
}
ProcessorType::Face
| ProcessorType::Ocr
| ProcessorType::Pose
| ProcessorType::Appearance => {
if let Some(frames) = value.get("frames").and_then(|v| v.as_array()) {
let total_detections: usize = frames
.iter()
.map(|f| {
f.get("faces")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
+ f.get("objects")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
+ f.get("texts")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
+ f.get("persons")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
})
.sum();
summary["total_detections"] = serde_json::json!(total_detections);
summary["frames_with_data"] = serde_json::json!(frames
.iter()
.filter(|f| {
f.get("faces")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
|| f.get("objects")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
|| f.get("texts")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
|| f.get("persons")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
})
.count());
}
}
ProcessorType::Yolo => {
if let Some(frames) = value.get("frames").and_then(|v| v.as_object()) {
let total_detections: usize = frames
.values()
.map(|f| {
f.get("objects")
.and_then(|v| v.as_array())
.map(|a| a.len())
.unwrap_or(0)
})
.sum();
summary["total_detections"] = serde_json::json!(total_detections);
summary["frames_with_data"] = serde_json::json!(frames
.values()
.filter(|f| {
f.get("objects")
.and_then(|v| v.as_array())
.map(|a| !a.is_empty())
.unwrap_or(false)
})
.count());
}
}
ProcessorType::Asr => {
if let Some(segments) = value.get("segments").and_then(|v| v.as_array()) {
summary["segment_count"] = serde_json::json!(segments.len());
if let Some(lang) = value.get("language").and_then(|v| v.as_str()) {
summary["language"] = serde_json::json!(lang);
}
}
}
ProcessorType::Asrx => {
if let Some(segments) = value.get("segments").and_then(|v| v.as_array()) {
let speakers: std::collections::HashSet<String> = segments
.iter()
.filter_map(|s| {
s.get("speaker_id")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
})
.collect();
summary["segment_count"] = serde_json::json!(segments.len());
summary["speaker_count"] = serde_json::json!(speakers.len());
}
}
ProcessorType::Story => {
if let Some(stats) = value.get("stats") {
summary["stats"] = stats.clone();
}
}
_ => {}
}
summary
}
/// Load probe.json for a file
fn load_probe_json(file_uuid: &str) -> Option<serde_json::Value> {
let probe_path = PathBuf::from(OUTPUT_DIR.as_str()).join(format!("{}.probe.json", file_uuid));
if let Ok(content) = std::fs::read_to_string(&probe_path) {
serde_json::from_str(&content).ok()
} else {
None
}
}
/// Main verification function for a file
pub fn verify_file(file_uuid: &str) -> FileVerificationReport {
let full_uuid = resolve_uuid(file_uuid);
let processors = ProcessorType::all();
let mut report = FileVerificationReport {
file_uuid: full_uuid.clone(),
trust_level: "untrusted".to_string(),
processors: Vec::new(),
summary: serde_json::json!({}),
};
// Load probe.json once
let probe_value = load_probe_json(&full_uuid);
// Phase 1: Load all JSON values
let mut all_values: HashMap<String, serde_json::Value> = HashMap::new();
for processor in &processors {
let proc_name = processor.as_str();
let filename = match processor {
ProcessorType::Story => format!("{}.story_story.json", full_uuid),
_ => format!("{}.{}.json", full_uuid, proc_name),
};
let path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename);
if let Ok(content) = std::fs::read_to_string(&path) {
if let Ok(value) = serde_json::from_str(&content) {
all_values.insert(proc_name.to_string(), value);
}
}
}
// Phase 2: Verify each processor
let mut verifications: Vec<ProcessorVerification> = Vec::new();
let mut value_refs: HashMap<String, &serde_json::Value> = HashMap::new();
for processor in &processors {
let proc_name = processor.as_str();
let mut pv = ProcessorVerification::new(proc_name);
if let Some(value) = all_values.get(proc_name) {
pv.file_exists = true;
pv.valid_json = true;
value_refs.insert(proc_name.to_string(), value);
// Layer 1: Completeness
let (complete, issues) = check_completeness(processor, value);
pv.completeness = complete;
pv.issues.extend(issues);
// Data summary
pv.data_summary = build_data_summary(processor, value);
} else {
pv.issues.push("JSON file not found".to_string());
}
verifications.push(pv);
}
// Phase 3: Check dependencies and reasonableness
// Build references once outside the loop
let mut all_value_refs: HashMap<String, &serde_json::Value> = HashMap::new();
for (name, value) in &all_values {
all_value_refs.insert(name.clone(), value);
}
let probe_ref = probe_value.as_ref();
// Collect updates first, then apply
let updates: Vec<(String, bool, bool, Vec<String>)> = verifications
.iter()
.map(|pv| {
let processor = ProcessorType::all()
.iter()
.find(|p| p.as_str() == pv.processor)
.cloned();
if let Some(ref proc_type) = processor {
// Build verification refs for dependency checking
// Use completeness/valid_json/file_exists from Layer 1 results, not trust_level
let mut verif_refs: HashMap<String, &ProcessorVerification> = HashMap::new();
for v in &verifications {
verif_refs.insert(v.processor.clone(), v);
}
// Layer 2: Dependencies (check completeness, not trust_level)
let (deps_ok, dep_issues) = check_dependencies(proc_type, &verif_refs);
// Layer 3: Reasonableness
let (reasonable, reason_issues) = if let Some(val) = all_values.get(&pv.processor) {
check_reasonableness(proc_type, val, probe_ref, &all_value_refs)
} else {
(true, Vec::new())
};
let mut all_issues = dep_issues.clone();
all_issues.extend(reason_issues);
(pv.processor.clone(), deps_ok, reasonable, all_issues)
} else {
(pv.processor.clone(), true, true, Vec::new())
}
})
.collect();
// Apply updates
for (i, update) in updates.into_iter().enumerate() {
verifications[i].dependency_ok = update.1;
verifications[i].reasonableness = update.2;
verifications[i].issues.extend(update.3);
verifications[i].update_trust_level();
}
report.processors = verifications;
report.update_overall_trust();
report
}
/// Legacy verification function (backward compatible)
pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> VerificationResult {
let proc_name = processor.as_str();
let filename = match processor {
@@ -63,53 +663,16 @@ pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> Verification
}
};
match processor {
ProcessorType::Asrx => {
let segs = value.get("segments").and_then(|v| v.as_array());
match segs {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Cut => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Yolo => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Face => VerificationResult::ok(proc_name, file_uuid),
ProcessorType::Ocr => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Pose => {
let frames = value.get("frames").and_then(|v| v.as_array());
match frames {
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Scene => {
let scenes = value.get("scenes").and_then(|v| v.as_array());
match scenes {
Some(s) if s.is_empty() => {
VerificationResult::fail(proc_name, file_uuid, "0 scenes")
}
Some(_) => VerificationResult::ok(proc_name, file_uuid),
None => VerificationResult::ok(proc_name, file_uuid),
}
}
ProcessorType::Story => VerificationResult::ok(proc_name, file_uuid),
_ => VerificationResult::ok(proc_name, file_uuid),
// Use new completeness check
let (complete, issues) = check_completeness(processor, &value);
if !complete {
return VerificationResult::fail(proc_name, file_uuid, &issues.join("; "));
}
VerificationResult::ok(proc_name, file_uuid)
}
/// 清理通過驗收的 processor 暫存檔,只保留最終 .json
/// Clean up temp files for a processor
pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
let proc_name = processor.as_str();
let prefix = format!("{}.{}.", file_uuid, proc_name);
@@ -133,9 +696,11 @@ pub fn cleanup_temp_files(processor: &ProcessorType, file_uuid: &str) {
}
}
if removed > 0 {
info!(
tracing::info!(
"Cleaned up {} temp files for {}.{}",
removed, file_uuid, proc_name
removed,
file_uuid,
proc_name
);
}
}

View File

@@ -12,7 +12,7 @@ use crate::core::config::OUTPUT_DIR;
use crate::core::db::qdrant_db::QdrantDb;
use crate::core::db::{
schema, MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VectorPayload,
VideoStatus,
VideoStatus, WorkspaceDb,
};
use crate::core::embedding::Embedder;
use crate::core::processor::heuristic_scene::generate_scene_meta;
@@ -376,15 +376,109 @@ impl JobWorker {
error!("Failed to create completed processor result: {}", e);
}
// Load output file and store to pre_chunks
// Also dual-write to workspace if available
let workspace = WorkspaceDb::open(&job.uuid).await.ok();
if let Ok(json_str) = std::fs::read_to_string(&output_path) {
let store_result = match processor_type {
let store_result: Result<()> = match processor_type {
crate::core::db::ProcessorType::Asr => {
if let Ok(result) =
serde_json::from_str::<crate::core::processor::AsrResult>(&json_str)
{
if let Err(e) =
ProcessorPool::store_asr_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store ASR chunks: {}", e);
}
if let Some(ref ws) = workspace {
for segment in &result.segments {
let data = serde_json::json!({
"text": segment.text,
"timestamp": segment.start_time,
});
let _ = ws
.store_pre_chunk(
"asr",
"raw",
segment.start_frame,
segment.end_frame,
Some(segment.start_time),
Some(segment.end_time),
Some(&data.to_string()),
Some(&segment.text),
)
.await;
}
}
Ok(())
} else {
error!(
"Failed to parse ASR JSON for {}: {}",
job.uuid,
json_str.len()
);
Ok(())
}
}
crate::core::db::ProcessorType::Asrx => {
if let Ok(result) = serde_json::from_str::<
crate::core::processor::AsrxResult,
>(&json_str)
{
ProcessorPool::store_asrx_chunks(&self.db, &job.uuid, &result).await
if let Err(e) =
ProcessorPool::store_asrx_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store ASRX chunks: {}", e);
}
if let Some(ref ws) = workspace {
for segment in &result.segments {
let data = serde_json::json!({"text": segment.text, "speaker_id": segment.speaker_id, "end_time": segment.end_time});
let _ = ws
.store_pre_chunk(
"asrx",
"raw",
None,
None,
Some(segment.start_time),
Some(segment.end_time),
Some(&data.to_string()),
Some(&segment.text),
)
.await;
// Also store asr pre_chunks (needed by Rule 1 after checkin)
let _ = ws
.store_pre_chunk(
"asr",
"raw",
None,
None,
Some(segment.start_time),
Some(segment.end_time),
Some(&data.to_string()),
Some(&segment.text),
)
.await;
}
let spk_dets: Vec<crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem> = result.segments.iter().map(|s| {
crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem {
speaker_id: s.speaker_id.clone().unwrap_or_default(),
start_time: s.start_time,
end_time: s.end_time,
text: s.text.clone(),
chunk_id: None,
confidence: 0.0,
}
}).collect();
let _ = ws.store_speaker_detections_batch(&spk_dets).await;
}
Ok(())
} else {
error!(
"Failed to parse ASRX JSON for {}: {}",
job.uuid,
json_str.len()
);
Ok(())
}
}
@@ -392,8 +486,35 @@ impl JobWorker {
if let Ok(result) =
serde_json::from_str::<crate::core::processor::CutResult>(&json_str)
{
ProcessorPool::store_cut_chunks(&self.db, &job.uuid, &result).await
if let Err(e) =
ProcessorPool::store_cut_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store CUT chunks: {}", e);
}
if let Some(ref ws) = workspace {
for scene in &result.scenes {
let _ = ws
.store_pre_chunk(
"cut",
"cut",
Some(scene.start_frame as i64),
Some(scene.end_frame as i64),
Some(scene.start_time),
Some(scene.end_time),
None,
None,
)
.await;
}
}
Ok(())
} else {
error!(
"Failed to parse CUT JSON for {}: {} bytes",
job.uuid,
json_str.len()
);
Ok(())
}
}
@@ -402,8 +523,36 @@ impl JobWorker {
crate::core::processor::YoloResult,
>(&json_str)
{
ProcessorPool::store_yolo_chunks(&self.db, &job.uuid, &result).await
if let Err(e) =
ProcessorPool::store_yolo_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store YOLO chunks: {}", e);
}
if let Some(ref ws) = workspace {
for frame in &result.frames {
let data = serde_json::json!({"objects": frame.objects});
let _ = ws
.store_pre_chunk(
"yolo",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(())
} else {
error!(
"Failed to parse YOLO JSON for {}: {} bytes",
job.uuid,
json_str.len()
);
Ok(())
}
}
@@ -411,8 +560,36 @@ impl JobWorker {
if let Ok(result) =
serde_json::from_str::<crate::core::processor::OcrResult>(&json_str)
{
ProcessorPool::store_ocr_chunks(&self.db, &job.uuid, &result).await
if let Err(e) =
ProcessorPool::store_ocr_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store OCR chunks: {}", e);
}
if let Some(ref ws) = workspace {
for frame in &result.frames {
let data = serde_json::json!({"texts": frame.texts});
let _ = ws
.store_pre_chunk(
"ocr",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(())
} else {
error!(
"Failed to parse OCR JSON for {}: {} bytes",
job.uuid,
json_str.len()
);
Ok(())
}
}
@@ -421,8 +598,51 @@ impl JobWorker {
crate::core::processor::FaceResult,
>(&json_str)
{
ProcessorPool::store_face_chunks(&self.db, &job.uuid, &result).await
if let Err(e) =
ProcessorPool::store_face_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store FACE chunks: {}", e);
}
if let Some(ref ws) = workspace {
let dets: Vec<crate::core::db::workspace_sqlite::FaceDetectionBatchItem> = result.frames.iter().flat_map(|frame| {
frame.faces.iter().map(|face| crate::core::db::workspace_sqlite::FaceDetectionBatchItem {
face_id: face.face_id.clone(),
frame: frame.frame as i64,
ts: frame.timestamp,
x: face.x,
y: face.y,
w: face.width,
h: face.height,
confidence: face.confidence,
})
}).collect();
if !dets.is_empty() {
let _ = ws.store_face_detections_batch(&dets).await;
}
for frame in &result.frames {
let data = serde_json::json!({"faces": frame.faces});
let _ = ws
.store_pre_chunk(
"face",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(())
} else {
error!(
"Failed to parse FACE JSON for {}: {} bytes",
job.uuid,
json_str.len()
);
Ok(())
}
}
@@ -431,11 +651,40 @@ impl JobWorker {
crate::core::processor::PoseResult,
>(&json_str)
{
ProcessorPool::store_pose_chunks(&self.db, &job.uuid, &result).await
if let Err(e) =
ProcessorPool::store_pose_chunks(&self.db, &job.uuid, &result)
.await
{
error!("Failed to store POSE chunks: {}", e);
}
if let Some(ref ws) = workspace {
for frame in &result.frames {
let data = serde_json::json!({"persons": frame.persons});
let _ = ws
.store_pre_chunk(
"pose",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(())
} else {
error!(
"Failed to parse POSE JSON for {}: {} bytes",
job.uuid,
json_str.len()
);
Ok(())
}
}
crate::core::db::ProcessorType::Appearance => Ok(()),
_ => Ok(()),
};
if let Err(e) = store_result {
@@ -741,7 +990,7 @@ impl JobWorker {
macro_rules! check {
($sql:expr) => {
sqlx::query_scalar::<_, i64>($sql)
sqlx::query_scalar::<_, i32>($sql)
.fetch_one(pool)
.await
.unwrap_or(0)
@@ -797,7 +1046,7 @@ impl JobWorker {
// 例如Rule 1 只需 ASR+ASRX 完成即可觸發,不須等 face/pose/story 完成
// 定義必要 processor必須完成的才算 job 成功)
let essential_processors = ["cut", "asrx", "yolo"];
let essential_processors = ["cut", "asr", "asrx", "yolo"];
let essential_completed = essential_processors.iter().all(|ep| {
results.iter().any(|r| {
@@ -864,7 +1113,7 @@ impl JobWorker {
if has_asrx {
// Guard: only spawn Rule 1 if sentence chunks don't exist yet
let chunk_t = schema::table_name("chunk");
let already_spawned: bool = sqlx::query_scalar::<_, i64>(&format!(
let already_spawned: bool = sqlx::query_scalar::<_, i32>(&format!(
"SELECT 1 FROM {chunk_t} WHERE file_uuid = $1 AND chunk_type = 'sentence' LIMIT 1"
))
.bind(uuid)
@@ -1256,6 +1505,84 @@ impl JobWorker {
);
Ok(())
}
/// Vectorize relationship chunks (from Rule 2) and store in PG + Qdrant
async fn vectorize_relationship_chunks(db: &PostgresDb, uuid: &str) -> anyhow::Result<()> {
let embedder = Embedder::new("embeddinggemma-300m".to_string());
let qdrant = QdrantDb::new();
let pool = db.pool();
let chunk_table = schema::table_name("chunk");
let rows = sqlx::query_as::<_, (String, String, i64, i64, f64, f64)>(
&format!(
"SELECT chunk_id, text_content, start_frame, end_frame, start_time, end_time \
FROM {} WHERE file_uuid = $1 AND chunk_type = 'relationship' \
AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '') \
ORDER BY id",
chunk_table
),
)
.bind(uuid)
.fetch_all(pool)
.await?;
if rows.is_empty() {
info!("[Vectorize-R2] No relationship chunks to vectorize for {}", uuid);
return Ok(());
}
let total = rows.len();
info!(
"[Vectorize-R2] Starting vectorize of {} relationship chunks for {}",
total, uuid
);
let mut stored = 0usize;
for (chunk_id, text, start_frame, end_frame, start_time, end_time) in &rows {
if text.is_empty() {
continue;
}
match embedder.embed_document(&text).await {
Ok(vector) => {
if let Err(e) = db.store_vector(&chunk_id, &vector, uuid).await {
error!("[Vectorize-R2] PG store failed for {}: {}", chunk_id, e);
continue;
}
let payload = VectorPayload {
file_uuid: uuid.to_string(),
chunk_id: chunk_id.clone(),
chunk_type: "relationship".to_string(),
start_frame: *start_frame,
end_frame: *end_frame,
start_time: *start_time,
end_time: *end_time,
text: Some(text.clone()),
};
if let Err(e) = qdrant.upsert_vector(&chunk_id, &vector, payload).await {
error!("[Vectorize-R2] Qdrant upsert failed for {}: {}", chunk_id, e);
continue;
}
stored += 1;
if stored % 10 == 0 {
info!(
"[Vectorize-R2] {}/{} vectors stored for {}",
stored, total, uuid
);
}
}
Err(e) => {
error!("[Vectorize-R2] Embedding failed for {}: {}", chunk_id, e);
}
}
}
info!(
"[Vectorize-R2] Completed: {}/{} relationship vectors stored for {}",
stored, total, uuid
);
Ok(())
}
}
#[cfg(test)]

View File

@@ -14,7 +14,9 @@ struct ProcessorCleanupGuard {
running_count: Arc<RwLock<usize>>,
frame_count: Arc<RwLock<usize>>,
time_count: Arc<RwLock<usize>>,
best_effort_count: Arc<RwLock<usize>>,
pipeline: PipelineType,
is_best_effort: bool,
}
impl Drop for ProcessorCleanupGuard {
@@ -30,22 +32,30 @@ impl Drop for ProcessorCleanupGuard {
*guard -= 1;
}
}
match self.pipeline {
PipelineType::Frame => {
if let Ok(mut guard) = self.frame_count.try_write() {
if *guard > 0 {
*guard -= 1;
}
if self.is_best_effort {
if let Ok(mut guard) = self.best_effort_count.try_write() {
if *guard > 0 {
*guard -= 1;
}
}
PipelineType::Time => {
if let Ok(mut guard) = self.time_count.try_write() {
if *guard > 0 {
*guard -= 1;
} else {
match self.pipeline {
PipelineType::Frame => {
if let Ok(mut guard) = self.frame_count.try_write() {
if *guard > 0 {
*guard -= 1;
}
}
}
PipelineType::Time => {
if let Ok(mut guard) = self.time_count.try_write() {
if *guard > 0 {
*guard -= 1;
}
}
}
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
}
PipelineType::Cross => {} // cross pipeline not tracked in slot counts
}
}
}
@@ -61,6 +71,7 @@ struct ProcessorHandle {
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
use crate::core::db::{
MonitorJob, PipelineType, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
WorkspaceDb,
};
use crate::core::processor;
use crate::core::processor::asr::AsrResult;
@@ -95,6 +106,8 @@ pub struct ProcessorTask {
const FRAME_SLOT_MAX: usize = 2;
/// Time pipeline max concurrent processors (audio is heavy, run 1 at a time).
const TIME_SLOT_MAX: usize = 1;
/// Best-effort slot (used by low-priority processors like MediaPipe).
const BEST_EFFORT_SLOT_MAX: usize = 1;
pub struct ProcessorPool {
db: Arc<PostgresDb>,
@@ -104,6 +117,7 @@ pub struct ProcessorPool {
running_count: Arc<RwLock<usize>>,
running_frame_count: Arc<RwLock<usize>>,
running_time_count: Arc<RwLock<usize>>,
running_best_effort_count: Arc<RwLock<usize>>,
}
impl ProcessorPool {
@@ -116,6 +130,7 @@ impl ProcessorPool {
running_count: Arc::new(RwLock::new(0)),
running_frame_count: Arc::new(RwLock::new(0)),
running_time_count: Arc::new(RwLock::new(0)),
running_best_effort_count: Arc::new(RwLock::new(0)),
}
}
@@ -225,16 +240,22 @@ impl ProcessorPool {
*count += 1;
}
// 遞增產線專屬 slot
match pipeline {
PipelineType::Frame => *self.running_frame_count.write().await += 1,
PipelineType::Time => *self.running_time_count.write().await += 1,
PipelineType::Cross => {} // cross pipeline uses global slot
let is_best_effort = processor_type == ProcessorType::MediaPipe;
if is_best_effort {
*self.running_best_effort_count.write().await += 1;
} else {
match pipeline {
PipelineType::Frame => *self.running_frame_count.write().await += 1,
PipelineType::Time => *self.running_time_count.write().await += 1,
PipelineType::Cross => {} // cross pipeline uses global slot
}
}
let running = self.running.clone();
let running_count = self.running_count.clone();
let running_frame_count = self.running_frame_count.clone();
let running_time_count = self.running_time_count.clone();
let running_best_effort_count = self.running_best_effort_count.clone();
let child_pid: Arc<RwLock<Option<i32>>> = Arc::new(RwLock::new(None));
running.write().await.insert(
job_id,
@@ -266,7 +287,9 @@ impl ProcessorPool {
running_count: running_count.clone(),
frame_count: running_frame_count.clone(),
time_count: running_time_count.clone(),
best_effort_count: running_best_effort_count.clone(),
pipeline,
is_best_effort,
};
info!("Starting processor {} for job {}", processor_name, job.uuid);
@@ -519,6 +542,14 @@ impl ProcessorPool {
let uuid = Some(job.uuid.as_str());
let video = db.get_video_by_uuid(&job.uuid).await?;
let total_frames = video.as_ref().map(|v| v.total_frames as i32).unwrap_or(0);
let fps = video.as_ref().map(|v| v.fps).unwrap_or(29.97);
// Compute 8Hz sample frames for frame-based processors
let sample_frames =
crate::core::processor::PythonExecutor::compute_8hz_frames(total_frames as i64, fps);
// Open workspace for dual-write (best-effort)
let workspace = WorkspaceDb::open(&job.uuid).await.ok();
match processor_type {
ProcessorType::Cut => {
@@ -540,6 +571,22 @@ impl ProcessorPool {
if let Err(e) = Self::store_cut_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store CUT chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
for scene in &result.scenes {
let _ = ws
.store_pre_chunk(
"cut",
"cut",
Some(scene.start_frame as i64),
Some(scene.end_frame as i64),
Some(scene.start_time),
Some(scene.end_time),
None,
None,
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -550,9 +597,13 @@ impl ProcessorPool {
})
}
ProcessorType::Yolo => {
let result =
processor::process_yolo(video_path, output_path.to_str().unwrap(), uuid)
.await?;
let result = processor::process_yolo(
video_path,
output_path.to_str().unwrap(),
uuid,
Some(&sample_frames),
)
.await?;
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"YOLO completed, storing {} frames for {}",
@@ -562,6 +613,23 @@ impl ProcessorPool {
if let Err(e) = Self::store_yolo_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store YOLO chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
for frame in &result.frames {
let data = serde_json::json!({"objects": frame.objects});
let _ = ws
.store_pre_chunk(
"yolo",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -572,8 +640,13 @@ impl ProcessorPool {
})
}
ProcessorType::Ocr => {
let result =
processor::process_ocr(video_path, output_path.to_str().unwrap(), uuid).await?;
let result = processor::process_ocr(
video_path,
output_path.to_str().unwrap(),
uuid,
Some(&sample_frames),
)
.await?;
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"OCR completed, storing {} frames for {}",
@@ -583,6 +656,23 @@ impl ProcessorPool {
if let Err(e) = Self::store_ocr_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store OCR chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
for frame in &result.frames {
let data = serde_json::json!({"texts": frame.texts});
let _ = ws
.store_pre_chunk(
"ocr",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -593,9 +683,13 @@ impl ProcessorPool {
})
}
ProcessorType::Face => {
let result =
processor::process_face(video_path, output_path.to_str().unwrap(), uuid)
.await?;
let result = processor::process_face(
video_path,
output_path.to_str().unwrap(),
uuid,
Some(&sample_frames),
)
.await?;
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"FACE completed, storing {} frames for {}",
@@ -605,6 +699,45 @@ impl ProcessorPool {
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
let dets: Vec<crate::core::db::workspace_sqlite::FaceDetectionBatchItem> =
result
.frames
.iter()
.flat_map(|frame| {
frame.faces.iter().map(|face| {
crate::core::db::workspace_sqlite::FaceDetectionBatchItem {
face_id: face.face_id.clone(),
frame: frame.frame as i64,
ts: frame.timestamp,
x: face.x,
y: face.y,
w: face.width,
h: face.height,
confidence: face.confidence,
}
})
})
.collect();
if !dets.is_empty() {
let _ = ws.store_face_detections_batch(&dets).await;
}
for frame in &result.frames {
let data = serde_json::json!({"faces": frame.faces});
let _ = ws
.store_pre_chunk(
"face",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -615,9 +748,13 @@ impl ProcessorPool {
})
}
ProcessorType::Pose => {
let result =
processor::process_pose(video_path, output_path.to_str().unwrap(), uuid)
.await?;
let result = processor::process_pose(
video_path,
output_path.to_str().unwrap(),
uuid,
Some(&sample_frames),
)
.await?;
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"POSE completed, storing {} frames for {}",
@@ -627,6 +764,91 @@ impl ProcessorPool {
if let Err(e) = Self::store_pose_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store POSE chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
for frame in &result.frames {
let data = serde_json::json!({"persons": frame.persons});
let _ = ws
.store_pre_chunk(
"pose",
"raw",
Some(frame.frame as i64),
None,
Some(frame.timestamp),
None,
Some(&data.to_string()),
None,
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Appearance => {
let pose_path =
std::path::Path::new(&output_dir).join(format!("{}.pose.json", job.uuid));
let pose_path_str = pose_path.to_str().unwrap_or("");
let result = processor::process_appearance(
video_path,
pose_path_str,
output_path.to_str().unwrap(),
uuid,
Some(&sample_frames),
)
.await?;
let chunks_produced = result.frame_count as i32;
tracing::info!(
"APPEARANCE completed, {} frames for {}",
chunks_produced,
job.uuid
);
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
ProcessorType::Asr => {
let result =
processor::process_asr(video_path, output_path.to_str().unwrap(), uuid).await?;
let chunks_produced = result.segments.len() as i32;
tracing::info!(
"ASR completed, storing {} segments for {}",
chunks_produced,
job.uuid
);
if let Err(e) = Self::store_asr_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store ASR chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
for segment in &result.segments {
let data = serde_json::json!({
"text": segment.text,
"timestamp": segment.start_time,
});
let _ = ws
.store_pre_chunk(
"asr",
"raw",
segment.start_frame,
segment.end_frame,
Some(segment.start_time),
Some(segment.end_time),
Some(&data.to_string()),
Some(&segment.text),
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -653,6 +875,47 @@ impl ProcessorPool {
if let Err(e) = Self::store_voice_embeddings_to_qdrant(&job.uuid, &result).await {
tracing::error!("Failed to store voice embeddings to Qdrant: {}", e);
}
// 寫入 workspace
if let Some(ref ws) = workspace {
for segment in &result.segments {
let data = serde_json::json!({
"text": segment.text,
"speaker_id": segment.speaker_id,
"timestamp": segment.start_time,
});
let _ = ws
.store_pre_chunk(
"asrx",
"raw",
None,
None,
Some(segment.start_time),
Some(segment.end_time),
Some(&data.to_string()),
Some(&segment.text),
)
.await;
}
let spk_dets: Vec<
crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem,
> = result
.segments
.iter()
.map(
|s| crate::core::db::workspace_sqlite::SpeakerDetectionBatchItem {
speaker_id: s.speaker_id.clone().unwrap_or_default(),
start_time: s.start_time,
end_time: s.end_time,
text: s.text.clone(),
chunk_id: None,
confidence: 0.0,
},
)
.collect();
if !spk_dets.is_empty() {
let _ = ws.store_speaker_detections_batch(&spk_dets).await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -703,6 +966,22 @@ impl ProcessorPool {
if let Err(e) = Self::store_scene_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store Scene chunks for {}: {}", job.uuid, e);
}
if let Some(ref ws) = workspace {
for scene in &result.scenes {
let _ = ws
.store_pre_chunk(
"scene",
"scene",
None,
None,
Some(scene.start_time),
Some(scene.end_time),
None,
None,
)
.await;
}
}
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
@@ -763,6 +1042,29 @@ impl ProcessorPool {
pid: 0,
})
}
ProcessorType::MediaPipe => {
let result = processor::process_mediapipe_v2(
video_path,
output_path.to_str().unwrap(),
uuid,
Some(&sample_frames),
)
.await?;
let chunks_produced = result.frames.len() as i32;
tracing::info!(
"MEDIAPIPE completed, {} frames for {}",
chunks_produced,
job.uuid
);
Ok(ProcessorOutput {
data: serde_json::to_value(result)?,
chunks_produced,
frames_processed: total_frames,
total_frames,
retry_count: 0,
pid: 0,
})
}
}
}
@@ -944,6 +1246,7 @@ impl ProcessorPool {
detections_to_store.push((
frame.frame as i64,
frame.timestamp,
face.face_id.clone(),
face.x,
face.y,
face.width,
@@ -960,23 +1263,20 @@ impl ProcessorPool {
Ok(())
}
/// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
/// 將 voice embeddings 寫入 Qdrant {file_uuid}_voice collection (per-file)
pub async fn store_voice_embeddings_to_qdrant(
uuid: &str,
asrx_result: &AsrxResult,
) -> Result<()> {
let qdrant = QdrantDb::new();
let collection = format!(
"{}{}",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':'),
"_voice"
);
let collection = format!("{}_voice", uuid);
// 確保 collection 存在dim=192 for ASRX voice
if let Err(e) = qdrant.ensure_collection(&collection, 192).await {
tracing::error!("Failed to ensure Qdrant voice collection: {}", e);
tracing::error!(
"Failed to ensure Qdrant voice collection {}: {}",
collection,
e
);
return Ok(());
}
@@ -991,12 +1291,10 @@ impl ProcessorPool {
if emb.len() != 192 {
continue;
}
// Point ID: hash(file_uuid + speaker_id + index) for global uniqueness
// Point ID: hash(speaker_id + index) — file_uuid redundant in per-file collection
let point_id = {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(uuid.as_bytes());
hasher.update(b"_");
hasher.update(segment.speaker_id.clone().unwrap_or_default().as_bytes());
hasher.update(b"_");
hasher.update(i.to_string().as_bytes());
@@ -1012,6 +1310,7 @@ impl ProcessorPool {
"end_frame": segment.end_frame,
"start_time": segment.start_time,
"end_time": segment.end_time,
"event_type": "speaker",
});
if let Err(e) = qdrant
@@ -1026,7 +1325,12 @@ impl ProcessorPool {
}
if count > 0 {
tracing::info!("Stored {} voice embeddings to Qdrant for {}", count, uuid);
tracing::info!(
"Stored {} voice embeddings to Qdrant per-file collection {} for {}",
count,
collection,
uuid
);
}
Ok(())
}
@@ -1079,6 +1383,7 @@ impl ProcessorPool {
"text": segment.text,
"speaker_id": segment.speaker_id,
"timestamp": segment.start_time,
"end_time": segment.end_time,
});
// ASRX is time-based, so we use segment index or start time as coordinate.