feat: Rule2 TKG relationship chunks + Phase0-1 Qdrant integration
Phase 0: TKG builder populate face_detections from face.json - Fix face.json parser for pose_angle format - Call store_traced_faces.py to set trace_id - Skip if trace_id already populated Phase 1: Qdrant face embeddings integration - Add FaceEmbeddingDb module (src/core/db/face_embedding_db.rs) - Create dev_face_embeddings collection (dim=512) - Store 1122 face embeddings with pose metadata - API: init_collection, batch_upsert, search_similar Rule2: TKG edges → relationship chunks - Design: RULE2_TKG_RELATIONSHIP_V1.0.md - Implementation: rule2_ingest.rs - ChunkType::Relationship added - Edge types: SPEAKS_AS, MUTUAL_GAZE, CO_OCCURS_WITH, HAS_APPEARANCE, WEARS - Auto-trigger on TKG rebuild API: - POST /api/v1/file/:file_uuid/rule2 (vectorization) - POST /api/v1/file/:file_uuid/tkg/rebuild (auto Rule2) Test: 75 relationship chunks created + vectorized
This commit is contained in:
@@ -38,10 +38,17 @@ pub fn trace_agent_routes() -> Router<crate::api::types::AppState> {
|
||||
get(get_cooccurrence),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/tkg/rebuild", post(rebuild_tkg))
|
||||
.route("/api/v1/file/:file_uuid/rule2", post(ingest_rule2))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/representative-frame",
|
||||
get(get_representative_frame),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/tkg/nodes", post(query_tkg_nodes))
|
||||
.route("/api/v1/file/:file_uuid/tkg/edges", post(query_tkg_edges))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/tkg/node/:node_id",
|
||||
get(get_tkg_node_detail),
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -961,22 +968,52 @@ async fn rebuild_tkg(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Json<TkgRebuildResponse> {
|
||||
use crate::core::chunk::rule2_ingest::ingest_rule2;
|
||||
use tracing::info;
|
||||
|
||||
let result = crate::core::processor::tkg::build_tkg(&state.db, &file_uuid, &OUTPUT_DIR).await;
|
||||
|
||||
match result {
|
||||
Ok(r) => Json(TkgRebuildResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
result: Some(serde_json::json!({
|
||||
"face_trace_nodes": r.face_trace_nodes,
|
||||
"object_nodes": r.object_nodes,
|
||||
"speaker_nodes": r.speaker_nodes,
|
||||
"co_occurrence_edges": r.co_occurrence_edges,
|
||||
"speaker_face_edges": r.speaker_face_edges,
|
||||
"face_face_edges": r.face_face_edges,
|
||||
})),
|
||||
error: None,
|
||||
}),
|
||||
Ok(r) => {
|
||||
let total_edges = r.speaker_face_edges
|
||||
+ r.mutual_gaze_edges
|
||||
+ r.face_face_edges
|
||||
+ r.co_occurrence_edges
|
||||
+ r.has_appearance_edges
|
||||
+ r.wears_edges;
|
||||
|
||||
if total_edges > 0 {
|
||||
info!("[TKG] {} relationship edges found, triggering Rule 2 ingestion...", total_edges);
|
||||
match ingest_rule2(state.db.pool(), &file_uuid).await {
|
||||
Ok(count) => info!("[TKG] Rule 2 created {} relationship chunks", count),
|
||||
Err(e) => info!("[TKG] Rule 2 ingestion failed: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
Json(TkgRebuildResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
result: Some(serde_json::json!({
|
||||
"face_trace_nodes": r.face_trace_nodes,
|
||||
"gaze_trace_nodes": r.gaze_trace_nodes,
|
||||
"lip_trace_nodes": r.lip_trace_nodes,
|
||||
"text_trace_nodes": r.text_trace_nodes,
|
||||
"appearance_trace_nodes": r.appearance_trace_nodes,
|
||||
"skin_tone_trace_nodes": r.skin_tone_trace_nodes,
|
||||
"accessory_nodes": r.accessory_nodes,
|
||||
"object_nodes": r.object_nodes,
|
||||
"speaker_nodes": r.speaker_nodes,
|
||||
"co_occurrence_edges": r.co_occurrence_edges,
|
||||
"speaker_face_edges": r.speaker_face_edges,
|
||||
"face_face_edges": r.face_face_edges,
|
||||
"mutual_gaze_edges": r.mutual_gaze_edges,
|
||||
"lip_sync_edges": r.lip_sync_edges,
|
||||
"has_appearance_edges": r.has_appearance_edges,
|
||||
"wears_edges": r.wears_edges,
|
||||
})),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
Err(e) => Json(TkgRebuildResponse {
|
||||
success: false,
|
||||
file_uuid,
|
||||
@@ -1097,3 +1134,463 @@ async fn get_stranger_thumbnail(
|
||||
|
||||
get_trace_thumbnail_inner(&state, &file_uuid, trace_id).await
|
||||
}
|
||||
|
||||
// ── TKG Node/Edge Query APIs ─────────────────────────────────────
|
||||
|
||||
fn t(name: &str) -> String {
|
||||
let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string());
|
||||
if schema == "public" {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}.{}", schema, name)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TkgNodesRequest {
|
||||
node_type: Option<String>,
|
||||
page: Option<i64>,
|
||||
page_size: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TkgNodeInfo {
|
||||
id: i64,
|
||||
node_type: String,
|
||||
external_id: String,
|
||||
label: String,
|
||||
properties: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TkgNodesResponse {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
total: i64,
|
||||
page: i64,
|
||||
page_size: i64,
|
||||
nodes: Vec<TkgNodeInfo>,
|
||||
}
|
||||
|
||||
async fn query_tkg_nodes(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
Json(req): Json<TkgNodesRequest>,
|
||||
) -> Result<Json<TkgNodesResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let page = req.page.unwrap_or(1).max(1);
|
||||
let page_size = req.page_size.unwrap_or(100).max(1).min(500);
|
||||
let offset = (page - 1) * page_size;
|
||||
|
||||
let (where_clause, count_args, query_args) = if let Some(ref node_type) = req.node_type {
|
||||
(
|
||||
"WHERE file_uuid = $1 AND node_type = $2".to_string(),
|
||||
vec![serde_json::json!([&file_uuid, node_type])],
|
||||
vec![serde_json::json!([
|
||||
&file_uuid, node_type, page_size, offset
|
||||
])],
|
||||
)
|
||||
} else {
|
||||
(
|
||||
"WHERE file_uuid = $1".to_string(),
|
||||
vec![serde_json::json!([&file_uuid])],
|
||||
vec![serde_json::json!([&file_uuid, page_size, offset])],
|
||||
)
|
||||
};
|
||||
|
||||
let total: i64 = if let Some(ref node_type) = req.node_type {
|
||||
sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} {}",
|
||||
nodes_table, where_clause
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(node_type)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
} else {
|
||||
sqlx::query_scalar(&format!(
|
||||
"SELECT COUNT(*) FROM {} {}",
|
||||
nodes_table, where_clause
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_one(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, node_type, external_id, label, properties FROM {} {} ORDER BY id LIMIT ${} OFFSET ${}",
|
||||
nodes_table, where_clause,
|
||||
if req.node_type.is_some() { 3 } else { 2 },
|
||||
if req.node_type.is_some() { 4 } else { 3 }
|
||||
);
|
||||
|
||||
let rows: Vec<(i64, String, String, String, serde_json::Value)> =
|
||||
if let Some(ref node_type) = req.node_type {
|
||||
sqlx::query_as(&query)
|
||||
.bind(&file_uuid)
|
||||
.bind(node_type)
|
||||
.bind(page_size)
|
||||
.bind(offset)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
} else {
|
||||
sqlx::query_as(&query)
|
||||
.bind(&file_uuid)
|
||||
.bind(page_size)
|
||||
.bind(offset)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
let nodes = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(id, node_type, external_id, label, properties)| TkgNodeInfo {
|
||||
id,
|
||||
node_type,
|
||||
external_id,
|
||||
label,
|
||||
properties,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
Ok(Json(TkgNodesResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
total,
|
||||
page,
|
||||
page_size,
|
||||
nodes,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TkgEdgesRequest {
|
||||
edge_type: Option<String>,
|
||||
source_type: Option<String>,
|
||||
target_type: Option<String>,
|
||||
page: Option<i64>,
|
||||
page_size: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TkgEdgeInfo {
|
||||
id: i64,
|
||||
edge_type: String,
|
||||
source_node_id: i64,
|
||||
target_node_id: i64,
|
||||
properties: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TkgEdgesResponse {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
total: i64,
|
||||
page: i64,
|
||||
page_size: i64,
|
||||
edges: Vec<TkgEdgeInfo>,
|
||||
}
|
||||
|
||||
async fn query_tkg_edges(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
Json(req): Json<TkgEdgesRequest>,
|
||||
) -> Result<Json<TkgEdgesResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let edges_table = t("tkg_edges");
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let page = req.page.unwrap_or(1).max(1);
|
||||
let page_size = req.page_size.unwrap_or(100).max(1).min(500);
|
||||
let offset = (page - 1) * page_size;
|
||||
|
||||
let mut conditions = vec!["e.file_uuid = $1".to_string()];
|
||||
let mut param_idx = 2i32;
|
||||
let mut joins = String::new();
|
||||
|
||||
if let Some(ref edge_type) = req.edge_type {
|
||||
conditions.push(format!("e.edge_type = ${}", param_idx));
|
||||
param_idx += 1;
|
||||
}
|
||||
if req.source_type.is_some() || req.target_type.is_some() {
|
||||
joins = format!(
|
||||
" JOIN {} sn ON e.source_node_id = sn.id JOIN {} tn ON e.target_node_id = tn.id",
|
||||
nodes_table, nodes_table
|
||||
);
|
||||
}
|
||||
if let Some(ref source_type) = req.source_type {
|
||||
conditions.push(format!("sn.node_type = ${}", param_idx));
|
||||
param_idx += 1;
|
||||
}
|
||||
if let Some(ref target_type) = req.target_type {
|
||||
conditions.push(format!("tn.node_type = ${}", param_idx));
|
||||
param_idx += 1;
|
||||
}
|
||||
|
||||
let where_clause = conditions.join(" AND ");
|
||||
let count_query = format!(
|
||||
"SELECT COUNT(*) FROM {} e {} WHERE {}",
|
||||
edges_table, joins, where_clause
|
||||
);
|
||||
|
||||
let total: i64 = {
|
||||
let mut q = sqlx::query_scalar::<_, i64>(&count_query).bind(&file_uuid);
|
||||
if let Some(ref edge_type) = req.edge_type {
|
||||
q = q.bind(edge_type);
|
||||
}
|
||||
if let Some(ref source_type) = req.source_type {
|
||||
q = q.bind(source_type);
|
||||
}
|
||||
if let Some(ref target_type) = req.target_type {
|
||||
q = q.bind(target_type);
|
||||
}
|
||||
q.fetch_one(state.db.pool()).await.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
let query = format!(
|
||||
"SELECT e.id, e.edge_type, e.source_node_id, e.target_node_id, e.properties FROM {} e {} WHERE {} ORDER BY e.id LIMIT ${} OFFSET ${}",
|
||||
edges_table, joins, where_clause, param_idx, param_idx + 1
|
||||
);
|
||||
|
||||
let rows: Vec<(i64, String, i64, i64, serde_json::Value)> = {
|
||||
let mut q = sqlx::query_as(&query).bind(&file_uuid);
|
||||
if let Some(ref edge_type) = req.edge_type {
|
||||
q = q.bind(edge_type);
|
||||
}
|
||||
if let Some(ref source_type) = req.source_type {
|
||||
q = q.bind(source_type);
|
||||
}
|
||||
if let Some(ref target_type) = req.target_type {
|
||||
q = q.bind(target_type);
|
||||
}
|
||||
q.bind(page_size)
|
||||
.bind(offset)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
let edges = rows
|
||||
.into_iter()
|
||||
.map(
|
||||
|(id, edge_type, source_node_id, target_node_id, properties)| TkgEdgeInfo {
|
||||
id,
|
||||
edge_type,
|
||||
source_node_id,
|
||||
target_node_id,
|
||||
properties,
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
|
||||
Ok(Json(TkgEdgesResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
total,
|
||||
page,
|
||||
page_size,
|
||||
edges,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TkgNodeWithEdges {
|
||||
node: TkgNodeInfo,
|
||||
incoming_edges: Vec<TkgEdgeInfo>,
|
||||
outgoing_edges: Vec<TkgEdgeInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TkgNodeDetailResponse {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
node: Option<TkgNodeWithEdges>,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
async fn get_tkg_node_detail(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, node_id)): Path<(String, i64)>,
|
||||
) -> Json<TkgNodeDetailResponse> {
|
||||
let nodes_table = t("tkg_nodes");
|
||||
let edges_table = t("tkg_edges");
|
||||
|
||||
let node: Option<(i64, String, String, String, serde_json::Value)> = sqlx::query_as(
|
||||
&format!("SELECT id, node_type, external_id, label, properties FROM {} WHERE file_uuid = $1 AND id = $2", nodes_table)
|
||||
)
|
||||
.bind(&file_uuid).bind(node_id)
|
||||
.fetch_optional(state.db.pool()).await.ok().flatten();
|
||||
|
||||
match node {
|
||||
Some((id, node_type, external_id, label, properties)) => {
|
||||
let incoming: Vec<TkgEdgeInfo> = sqlx::query_as(
|
||||
&format!("SELECT id, edge_type, source_node_id, target_node_id, properties FROM {} WHERE file_uuid = $1 AND target_node_id = $2", edges_table)
|
||||
)
|
||||
.bind(&file_uuid).bind(node_id)
|
||||
.fetch_all(state.db.pool()).await.unwrap_or_default()
|
||||
.into_iter().map(|(id, edge_type, source_node_id, target_node_id, properties)| {
|
||||
TkgEdgeInfo { id, edge_type, source_node_id, target_node_id, properties }
|
||||
}).collect();
|
||||
|
||||
let outgoing: Vec<TkgEdgeInfo> = sqlx::query_as(
|
||||
&format!("SELECT id, edge_type, source_node_id, target_node_id, properties FROM {} WHERE file_uuid = $1 AND source_node_id = $2", edges_table)
|
||||
)
|
||||
.bind(&file_uuid).bind(node_id)
|
||||
.fetch_all(state.db.pool()).await.unwrap_or_default()
|
||||
.into_iter().map(|(id, edge_type, source_node_id, target_node_id, properties)| {
|
||||
TkgEdgeInfo { id, edge_type, source_node_id, target_node_id, properties }
|
||||
}).collect();
|
||||
|
||||
Json(TkgNodeDetailResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
node: Some(TkgNodeWithEdges {
|
||||
node: TkgNodeInfo {
|
||||
id,
|
||||
node_type,
|
||||
external_id,
|
||||
label,
|
||||
properties,
|
||||
},
|
||||
incoming_edges: incoming,
|
||||
outgoing_edges: outgoing,
|
||||
}),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
None => Json(TkgNodeDetailResponse {
|
||||
success: false,
|
||||
file_uuid,
|
||||
node: None,
|
||||
error: Some("Node not found".to_string()),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// ── Rule 2 Ingest ───────────────────────────────────────────────────
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct IngestRule2Response {
|
||||
success: bool,
|
||||
file_uuid: String,
|
||||
rule2_chunks: i64,
|
||||
vectorized_chunks: Option<i64>,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
async fn ingest_rule2(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Result<Json<IngestRule2Response>, (StatusCode, Json<serde_json::Value>)> {
|
||||
use crate::core::chunk::rule2_ingest::ingest_rule2;
|
||||
use crate::core::embedding::Embedder;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::db::qdrant_db::{QdrantDb, VectorPayload};
|
||||
use tracing::info;
|
||||
|
||||
let result = ingest_rule2(state.db.pool(), &file_uuid).await;
|
||||
|
||||
match result {
|
||||
Ok(rule2_chunks) => {
|
||||
info!(
|
||||
"[Rule2API] {} relationship chunks created for {}",
|
||||
rule2_chunks, file_uuid
|
||||
);
|
||||
|
||||
// Auto-vectorize relationship chunks
|
||||
let embedder = Embedder::new("embeddinggemma-300m".to_string());
|
||||
let qdrant = QdrantDb::new();
|
||||
let pool = state.db.pool();
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
|
||||
let rows: Vec<(String, String, i64, i64, f64, f64)> = sqlx::query_as(&format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, start_time, end_time \
|
||||
FROM {} WHERE file_uuid = $1 AND chunk_type = 'relationship' \
|
||||
AND embedding IS NULL AND (text_content IS NOT NULL AND text_content != '')",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut vectorized = 0i64;
|
||||
for (chunk_id, text, start_frame, end_frame, start_time, end_time) in &rows {
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Ok(vector) = embedder.embed_document(&text).await {
|
||||
if state.db.store_vector(&chunk_id, &vector, &file_uuid).await.is_ok() {
|
||||
let payload = VectorPayload {
|
||||
file_uuid: file_uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type: "relationship".to_string(),
|
||||
start_frame: *start_frame,
|
||||
end_frame: *end_frame,
|
||||
start_time: *start_time,
|
||||
end_time: *end_time,
|
||||
text: Some(text.clone()),
|
||||
};
|
||||
if qdrant.upsert_vector(&chunk_id, &vector, payload).await.is_ok() {
|
||||
vectorized += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Json(IngestRule2Response {
|
||||
success: true,
|
||||
file_uuid,
|
||||
rule2_chunks: rule2_chunks as i64,
|
||||
vectorized_chunks: Some(vectorized),
|
||||
error: None,
|
||||
}))
|
||||
}
|
||||
Err(e) => Ok(Json(IngestRule2Response {
|
||||
success: false,
|
||||
file_uuid,
|
||||
rule2_chunks: 0,
|
||||
vectorized_chunks: None,
|
||||
error: Some(e.to_string()),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user