feat: representative frame - auto-detect thumbnail + JSON endpoint

This commit is contained in:
Accusys
2026-05-22 09:22:15 +08:00
parent 2b025a014e
commit 2b950c985c
4 changed files with 266 additions and 4 deletions

View File

@@ -690,7 +690,7 @@ async fn stream_video(
#[derive(Debug, serde::Deserialize)]
struct ThumbQuery {
frame: i64,
frame: Option<i64>,
x: Option<i32>,
y: Option<i32>,
w: Option<i32>,
@@ -703,6 +703,20 @@ async fn face_thumbnail(
Query(q): Query<ThumbQuery>,
) -> Result<impl IntoResponse, StatusCode> {
let videos_table = schema::table_name("videos");
let frame = match q.frame {
Some(f) => f,
None => {
let result = crate::core::processor::tkg::query_auto_representative_frame(
state.db.pool(),
&file_uuid,
)
.await
.map_err(|_| StatusCode::NOT_FOUND)?;
result.frame_number
}
};
let row: Option<(String,)> = sqlx::query_as(&format!(
"SELECT file_path FROM {} WHERE file_uuid = $1",
videos_table
@@ -713,7 +727,7 @@ async fn face_thumbnail(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_path,) = row.ok_or(StatusCode::NOT_FOUND)?;
let select = format!("select=eq(n\\,{})", q.frame);
let select = format!("select=eq(n\\,{})", frame);
let vf = if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
format!("{},crop={}:{}:{}:{}", select, w, h, x, y)
} else {

View File

@@ -33,6 +33,10 @@ pub fn trace_agent_routes() -> Router<crate::api::types::AppState> {
"/api/v1/file/:file_uuid/tkg/rebuild",
post(rebuild_tkg),
)
.route(
"/api/v1/file/:file_uuid/representative-frame",
get(get_representative_frame),
)
}
#[derive(Debug, Deserialize)]
@@ -783,3 +787,59 @@ async fn rebuild_tkg(
}),
}
}
// ── Representative Frame (JSON) ───────────────────────────────────
use crate::core::processor::tkg;
#[derive(Serialize)]
struct RepFrameResponse {
success: bool,
file_uuid: String,
frame_number: i64,
timestamp_secs: f64,
face_quality: f64,
main_identities: Vec<tkg::MainIdentityInfo>,
traces: Vec<tkg::FrameTraceInfo>,
}
async fn get_representative_frame(
State(state): State<crate::api::types::AppState>,
Path(file_uuid): Path<String>,
) -> Result<Json<RepFrameResponse>, (StatusCode, Json<serde_json::Value>)> {
let result = tkg::query_auto_representative_frame(
state.db.pool(),
&file_uuid,
)
.await
.map_err(|e| {
(StatusCode::NOT_FOUND, Json(serde_json::json!({"error": e.to_string()})))
})?;
let fps = query_fps(state.db.pool(), &file_uuid).await;
Ok(Json(RepFrameResponse {
success: true,
file_uuid,
frame_number: result.frame_number,
timestamp_secs: result.frame_number as f64 / fps,
face_quality: result.face_quality,
main_identities: result.main_identities,
traces: result.traces,
}))
}
async fn query_fps(pool: &sqlx::PgPool, file_uuid: &str) -> f64 {
use crate::core::db::schema;
let video_table = schema::table_name("videos");
sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
video_table
))
.bind(file_uuid)
.fetch_optional(pool)
.await
.ok()
.flatten()
.unwrap_or(25.0)
}

View File

@@ -36,6 +36,9 @@ pub use scene_classification::{
SceneSegment,
};
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use tkg::{build_tkg, TkgResult};
pub use tkg::{
build_tkg, query_auto_representative_frame, FrameTraceInfo, MainIdentityInfo,
RepresentativeFrameResult, TkgResult,
};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -1,5 +1,5 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use std::collections::HashMap;
use std::path::Path;
@@ -835,6 +835,191 @@ async fn build_face_face_edges(pool: &PgPool, file_uuid: &str, pose_data: &[Face
Ok(edge_count)
}
// ── TKG Bridge: Representative Frame ──────────────────────────────
#[derive(Debug, Serialize)]
pub struct FrameTraceInfo {
pub trace_id: i32,
pub identity_uuid: Option<String>,
pub name: Option<String>,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
pub confidence: f64,
}
#[derive(Debug, Serialize)]
pub struct MainIdentityInfo {
pub identity_uuid: String,
pub name: String,
pub face_count: i64,
}
#[derive(Debug, Serialize)]
pub struct RepresentativeFrameResult {
pub frame_number: i64,
pub face_quality: f64,
pub main_identities: Vec<MainIdentityInfo>,
pub traces: Vec<FrameTraceInfo>,
}
pub async fn query_auto_representative_frame(
pool: &PgPool,
file_uuid: &str,
) -> Result<RepresentativeFrameResult> {
let id_table = t("identities");
let fd_table = t("face_detections");
let nodes_table = t("tkg_nodes");
let edges_table = t("tkg_edges");
let mains = sqlx::query_as::<_, (i32, String, String, i64)>(&format!(
"SELECT i.id, i.uuid::text, i.name, COUNT(fd.id)::bigint \
FROM {} fd \
JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL \
AND i.source = 'tmdb' \
GROUP BY i.id, i.uuid, i.name \
ORDER BY COUNT(fd.id) DESC LIMIT 2",
fd_table, id_table
))
.bind(file_uuid)
.fetch_all(pool)
.await
.context("Failed to detect main identities")?;
let main_ids: Vec<(i32, String, String, i64)> = mains;
let main_idents: Vec<MainIdentityInfo> = main_ids.iter().map(|(_, u, n, c)|
MainIdentityInfo { identity_uuid: u.clone(), name: n.clone(), face_count: *c }
).collect();
let frame_number: Option<i64> = if main_ids.len() >= 2 {
let id_a = main_ids[0].0;
let id_b = main_ids[1].0;
let trace_a: Option<(i32,)> = sqlx::query_as(&format!(
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND identity_id = $2 \
AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1",
fd_table
))
.bind(file_uuid).bind(id_a)
.fetch_optional(pool).await?;
let trace_b: Option<(i32,)> = sqlx::query_as(&format!(
"SELECT trace_id FROM {} WHERE file_uuid = $1 AND identity_id = $2 \
AND trace_id IS NOT NULL GROUP BY trace_id ORDER BY COUNT(*) DESC LIMIT 1",
fd_table
))
.bind(file_uuid).bind(id_b)
.fetch_optional(pool).await?;
match (trace_a, trace_b) {
(Some((ta,)), Some((tb,))) => {
let tkg_frame: Option<(i64,)> = sqlx::query_as(&format!(
"SELECT (e.properties->>'first_frame')::bigint \
FROM {} e \
JOIN {} a ON a.id = e.source_node_id \
JOIN {} b ON b.id = e.target_node_id \
WHERE e.file_uuid = $1 \
AND a.external_id = concat('trace_', $2) \
AND b.external_id = concat('trace_', $3) \
AND e.properties->>'mutual_gaze' = 'true' \
LIMIT 1",
edges_table, nodes_table, nodes_table
))
.bind(file_uuid).bind(ta).bind(tb)
.fetch_optional(pool).await?;
if let Some((f,)) = tkg_frame {
Some(f)
} else {
sqlx::query_scalar::<_, i64>(&format!(
"SELECT MIN(fd_a.frame_number)::bigint \
FROM {} fd_a \
JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \
WHERE fd_a.file_uuid = $1 AND fd_a.identity_id = $2 \
AND fd_b.identity_id = $3",
fd_table, fd_table
))
.bind(file_uuid).bind(id_a).bind(id_b)
.fetch_optional(pool).await?
}
}
_ => None,
}
} else {
None
};
let frame_number: Option<i64> = match frame_number {
Some(f) => Some(f),
None => {
if let Some((first_id,)) = main_ids.first().map(|(id, _, _, _)| (*id,)) {
sqlx::query_scalar::<_, i64>(&format!(
"SELECT frame_number::bigint FROM {} \
WHERE file_uuid = $1 AND identity_id = $2 \
ORDER BY (width::float8 * height::float8) * confidence::float8 DESC \
LIMIT 1",
fd_table
))
.bind(file_uuid).bind(first_id)
.fetch_optional(pool).await?
} else {
None
}
}
};
let frame_number: Option<i64> = match frame_number {
Some(f) => Some(f),
None => {
sqlx::query_scalar::<_, i64>(&format!(
"SELECT frame_number::bigint FROM {} \
WHERE file_uuid = $1 AND identity_id IS NOT NULL \
ORDER BY (width::float8 * height::float8) * confidence::float8 DESC \
LIMIT 1",
fd_table
))
.bind(file_uuid)
.fetch_optional(pool).await?
}
};
let frame_number = frame_number.ok_or_else(|| anyhow::anyhow!("No faces found in this file"))?;
let face_quality: f64 = sqlx::query_scalar::<_, f64>(&format!(
"SELECT COALESCE(MAX((width::float8 * height::float8) * confidence::float8), 0) \
FROM {} WHERE file_uuid = $1 AND frame_number = $2",
fd_table
))
.bind(file_uuid).bind(frame_number)
.fetch_one(pool).await?;
let traces: Vec<FrameTraceInfo> = sqlx::query_as::<_, (i32, Option<String>, Option<String>, i32, i32, i32, i32, f64)>(&format!(
"SELECT fd.trace_id, i.uuid::text, i.name, fd.x, fd.y, fd.width, fd.height, fd.confidence::float8 \
FROM {} fd \
LEFT JOIN {} i ON i.id = fd.identity_id \
WHERE fd.file_uuid = $1 AND fd.frame_number = $2 AND fd.trace_id IS NOT NULL \
ORDER BY fd.trace_id",
fd_table, id_table
))
.bind(file_uuid).bind(frame_number)
.fetch_all(pool)
.await?
.into_iter()
.map(|(trace_id, identity_uuid, name, x, y, width, height, confidence)| {
FrameTraceInfo { trace_id, identity_uuid, name, x, y, width, height, confidence }
})
.collect();
Ok(RepresentativeFrameResult {
frame_number,
face_quality,
main_identities: main_idents,
traces,
})
}
// ── Tests ─────────────────────────────────────────────────────────
#[cfg(test)]