Files
momentry_core/src/api/media_api.rs
Accusys 2cfcfdd1af feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration
- build_co_occurrence_edges_from_qdrant()
- Qdrant embeddings → frame grouping → YOLO objects
- Result: 6679 edges (vs 6701 PostgreSQL)

Phase 2.6.2: face_face_edges migration
- build_face_face_edges_from_qdrant()
- Qdrant embeddings → frame grouping → face pairs
- mutual_gaze detection preserved
- Result: 6 edges (exact match)

Phase 2.6.3: speaker_face_edges migration
- build_speaker_face_edges_from_qdrant()
- Qdrant embeddings → trace_id frame ranges
- SPEAKS_AS edge creation

Architecture:
- All edges use Qdrant payload (no face_detections queries)
- PostgreSQL fallback for empty Qdrant
- Estimated 3.6x performance improvement

Testing:
- Playground (3003): ✓ All Phase 2.6 logs verified
- Edge counts: ✓ Close match with PostgreSQL
- Fallback: ✓ Working

Docs:
- docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md
- docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
2026-06-21 04:47:49 +08:00

1229 lines
42 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use axum::{
body::Body,
extract::{Path, Query, State},
http::{header, StatusCode},
response::{IntoResponse, Response},
routing::get,
Router,
};
use once_cell::sync::Lazy;
use std::collections::HashMap;
use uuid::Uuid;
use crate::core::db::{schema, PostgresDb};
/// Shared video query params: mode=normal|debug, audio=on|off
fn parse_video_params(params: &std::collections::HashMap<String, String>) -> (String, String) {
let mode = params
.get("mode")
.map(|s| s.as_str())
.unwrap_or("normal")
.to_string();
let audio = params
.get("audio")
.map(|s| s.as_str())
.unwrap_or("on")
.to_string();
(mode, audio)
}
static FFMPEG: Lazy<String> = Lazy::new(|| {
std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
if std::path::Path::new(full).exists() {
full.to_string()
} else {
"ffmpeg".to_string()
}
})
});
fn ffmpeg_cmd() -> std::process::Command {
let mut cmd = std::process::Command::new(&*FFMPEG);
let full_lib = "/opt/homebrew/opt/ffmpeg-full/lib";
if std::path::Path::new(full_lib).exists() {
cmd.env("DYLD_LIBRARY_PATH", full_lib);
}
cmd
}
pub fn bbox_routes() -> Router<crate::api::types::AppState> {
Router::new()
.route(
"/api/v1/file/:file_uuid/video/bbox",
get(bbox_overlay_video),
)
.route(
"/api/v1/file/:file_uuid/trace/:trace_id/video",
get(trace_video),
)
.route(
"/api/v1/file/:file_uuid/stranger/:stranger_id/video",
get(stranger_video),
)
.route("/api/v1/file/:file_uuid/video", get(stream_video))
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
.route(
"/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail",
get(chunk_thumbnail),
)
.route("/api/v1/file/:file_uuid/clip", get(video_clip))
}
/// 5×7 bitmap font — each character 5 wide × 7 tall
/// Encoding: col 0=0x10, col 1=0x08, col 2=0x04, col 3=0x02, col 4=0x01
fn bitmap_char(c: char) -> [u8; 7] {
match c.to_ascii_lowercase() {
'0' => [0x0E, 0x11, 0x13, 0x15, 0x19, 0x11, 0x0E],
'1' => [0x04, 0x0C, 0x04, 0x04, 0x04, 0x04, 0x0E],
'2' => [0x0E, 0x11, 0x01, 0x02, 0x04, 0x08, 0x1F],
'3' => [0x0E, 0x11, 0x01, 0x06, 0x01, 0x11, 0x0E],
'4' => [0x02, 0x06, 0x0A, 0x12, 0x1F, 0x02, 0x02],
'5' => [0x1F, 0x10, 0x1E, 0x01, 0x01, 0x11, 0x0E],
'6' => [0x0E, 0x10, 0x1E, 0x11, 0x11, 0x11, 0x0E],
'7' => [0x1F, 0x01, 0x02, 0x04, 0x08, 0x10, 0x10],
'8' => [0x0E, 0x11, 0x11, 0x0E, 0x11, 0x11, 0x0E],
'9' => [0x0E, 0x11, 0x11, 0x0F, 0x01, 0x11, 0x0E],
'a' => [0x0E, 0x11, 0x11, 0x1F, 0x11, 0x11, 0x11],
'b' => [0x1E, 0x11, 0x11, 0x1E, 0x11, 0x11, 0x1E],
'c' => [0x0E, 0x11, 0x10, 0x10, 0x10, 0x11, 0x0E],
'd' => [0x1C, 0x12, 0x11, 0x11, 0x11, 0x12, 0x1C],
'e' => [0x1F, 0x10, 0x10, 0x1E, 0x10, 0x10, 0x1F],
'f' => [0x1F, 0x10, 0x10, 0x1E, 0x10, 0x10, 0x10],
't' => [0x04, 0x04, 0x1F, 0x04, 0x04, 0x04, 0x06],
'_' => [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F],
' ' => [0x00; 7],
_ => [0x00; 7],
}
}
/// Width of one character in pixels (5 cols × 3px/dot = 15px)
const CHAR_W: i32 = 5 * 3;
/// Spacing between characters (px)
const CHAR_GAP: i32 = 4;
/// Total advance per character
const CHAR_ADVANCE: i32 = CHAR_W + CHAR_GAP;
fn render_text(
parts: &mut Vec<String>,
text: &str,
origin_x: i32,
origin_y: i32,
enable: Option<i32>,
) -> i32 {
let mut px = origin_x;
for ch in text.chars() {
let bm = bitmap_char(ch);
for (row, bits) in bm.iter().enumerate() {
for col in 0..5 {
if bits & (1 << (4 - col)) != 0 {
let x = px + col as i32 * 3;
let y = origin_y + row as i32 * 3;
if let Some(offset) = enable {
parts.push(format!(
"drawbox=x={}:y={}:w=3:h=3:color=white@1.0:t=fill:enable='eq(n,{})'",
x, y, offset
));
} else {
parts.push(format!(
"drawbox=x={}:y={}:w=3:h=3:color=white@1.0:t=fill",
x, y
));
}
}
}
}
px += CHAR_ADVANCE;
}
px
}
#[derive(Debug, serde::Deserialize)]
struct BboxParams {
// Legacy (deprecated): single param, frames
start: Option<i32>,
end: Option<i32>,
// Explicit params: input either or both
start_frame: Option<i32>,
end_frame: Option<i32>,
start_time: Option<f64>,
end_time: Option<f64>,
face_uuid: Option<String>,
duration: Option<f64>,
mode: Option<String>,
audio: Option<String>,
}
/// Resolve (start_frame, end_frame) from dual input.
/// Priority: start_frame/end_frame > start/end > start_time/end_time.
/// If only time is given, convert via fps.
fn resolve_frame_range(
start_frame: Option<i32>,
end_frame: Option<i32>,
start: Option<i32>,
end: Option<i32>,
start_time: Option<f64>,
end_time: Option<f64>,
fps: f64,
) -> (i32, i32) {
if let (Some(sf), Some(ef)) = (start_frame.or(start), end_frame.or(end)) {
return (sf, ef);
}
if let (Some(st), Some(et)) = (start_time, end_time) {
return ((st * fps) as i32, (et * fps) as i32);
}
(0, i32::MAX)
}
async fn bbox_overlay_video(
State(state): State<crate::api::types::AppState>,
Path(file_uuid): Path<String>,
Query(p): Query<BboxParams>,
) -> Result<impl IntoResponse, StatusCode> {
let videos_table = schema::table_name("videos");
let row: Option<(String,)> = sqlx::query_as(&format!(
"SELECT file_path FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (video_path,) = row.ok_or(StatusCode::NOT_FOUND)?;
let face_fuid = p.face_uuid.as_deref().unwrap_or(&file_uuid);
let duration = p.duration.unwrap_or(10.0);
// Get FPS
let fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0);
let (start_f, end_f) = resolve_frame_range(
p.start_frame,
p.end_frame,
p.start,
p.end,
p.start_time,
p.end_time,
fps,
);
let start_sec = start_f as f64 / fps;
// Get face bboxes
// frame_number is BIGINT (i64) in database
let face_table = schema::table_name("face_detections");
let rows: Vec<(i64, i32, i32, i32, i32, Option<i32>, Option<String>)> = sqlx::query_as(
&format!("SELECT frame_number, x, y, width, height, trace_id, face_id FROM {} WHERE file_uuid = $1 AND frame_number BETWEEN $2 AND $3 ORDER BY frame_number", face_table)
)
.bind(face_fuid).bind(start_f).bind(end_f)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| { tracing::error!("bbox query error: {}", e); vec![] });
// Build filters — each bbox enabled only on its frame
let mut parts: Vec<String> = Vec::new();
for (frame, x, y, w, h, trace_id, _) in &rows {
let text = format!("t{}", trace_id.unwrap_or(0));
let offset = (*frame as i32) - start_f;
parts.push(format!(
"drawbox=x={}:y={}:w={}:h={}:color=red@0.8:thickness=4:enable='eq(n,{})'",
x, y, w, h, offset
));
let tx = *x + 6;
let ty = *y + 6;
render_text(&mut parts, &text, tx, ty, Some(offset));
}
let bbox_mode = p.mode.as_deref().unwrap_or("normal");
let bbox_audio = p.audio.as_deref().unwrap_or("on");
let vf = if parts.is_empty() || bbox_mode == "normal" {
"null".to_string()
} else {
parts.join(",")
};
let tmp = std::env::temp_dir().join(format!("bbox_{}.mp4", Uuid::new_v4()));
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let ss = start_sec.to_string();
let dur = duration.to_string();
let mut bbox_args = vec!["-ss", &ss, "-i", &video_path, "-t", &dur];
if vf != "null" {
bbox_args.extend_from_slice(&[
"-vf",
&vf,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
]);
} else {
bbox_args.extend_from_slice(&["-c", "copy"]);
}
if bbox_audio == "off" {
bbox_args.push("-an");
}
bbox_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let status = ffmpeg_cmd()
.args(&bbox_args)
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() {
let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp);
Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap())
}
fn parse_range(range: &str, file_size: u64) -> (u64, u64) {
let r = range.trim_start_matches("bytes=");
let parts: Vec<&str> = r.split('-').collect();
let start = parts[0].parse::<u64>().unwrap_or(0);
let end = if parts.len() > 1 && !parts[1].is_empty() {
parts[1].parse::<u64>().unwrap_or(file_size - 1)
} else {
file_size - 1
};
(start.min(file_size - 1), end.min(file_size - 1))
}
async fn trace_video(
State(state): State<crate::api::types::AppState>,
Path((file_uuid, trace_id)): Path<(String, i32)>,
Query(params): Query<std::collections::HashMap<String, String>>,
) -> Result<impl IntoResponse, StatusCode> {
trace_video_inner(&state, &file_uuid, trace_id, &params).await
}
async fn trace_video_inner(
state: &crate::api::types::AppState,
file_uuid: &str,
trace_id: i32,
params: &std::collections::HashMap<String, String>,
) -> Result<impl IntoResponse, StatusCode> {
use axum::http::header;
let (mode, audio) = parse_video_params(&params);
let videos_table = schema::table_name("videos");
let row: Option<(String, f64, i32, i32)> = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 24.0), COALESCE(width, 0), COALESCE(height, 0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (video_path, fps, _width, _height) = row.ok_or(StatusCode::NOT_FOUND)?;
// Query face detections to find frame range for target trace
// frame_number is BIGINT (i64) in database
let face_table = schema::table_name("face_detections");
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND trace_id = $2 ORDER BY frame_number",
face_table
))
.bind(&file_uuid).bind(trace_id)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| { tracing::error!("trace query error: {}", e); vec![] });
if rows.is_empty() {
return Err(StatusCode::NOT_FOUND);
}
let first_frame = rows[0].0;
let last_frame = rows[rows.len() - 1].0;
let start_sec = first_frame as f64 / fps;
let padding = params
.get("padding")
.and_then(|s| s.parse().ok())
.unwrap_or(2.0);
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
let seek = (start_sec - padding).max(0.0);
// === NORMAL MODE: raw video without overlays ===
if mode == "normal" {
let tmp = std::env::temp_dir().join(format!("trace_{}.mp4", Uuid::new_v4()));
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let sk = seek.to_string();
let du = duration.to_string();
let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
if audio == "off" {
cmd_args.push("-an");
}
cmd_args.extend_from_slice(&["-y", &tmp_str]);
let result = ffmpeg_cmd()
.args(&cmd_args)
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !result.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp);
return Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap());
}
// === DEBUG MODE: text overlay, list all traces in frame range ===
let start_fn = (start_sec * fps) as i32;
let end_fn = ((start_sec + duration) * fps) as i64;
// Query all traces with identity names and bbox positions in the visible frame range
// frame_number is BIGINT (i64) in database
let identities_table = schema::table_name("identities");
let all_rows: Vec<(i32, i64, i32, i32, i32, i32, Option<String>)> = sqlx::query_as(&format!(
"SELECT fd.trace_id, fd.frame_number, fd.x, fd.y, fd.width, fd.height, i.name \
FROM {} fd \
LEFT JOIN {} i ON fd.identity_id = i.id \
WHERE fd.file_uuid = $1 AND fd.frame_number BETWEEN $2 AND $3 AND fd.trace_id IS NOT NULL \
ORDER BY fd.trace_id, fd.frame_number",
face_table, identities_table
))
.bind(&file_uuid)
.bind(start_fn)
.bind(end_fn)
.fetch_all(state.db.pool())
.await
.unwrap_or_default();
// Group frames by trace_id, compute start_frame per trace; collect bbox per frame
// frame_number is i64 (BIGINT), so HashMaps need i64 for frame values
let mut trace_frames: HashMap<i32, Vec<i64>> = HashMap::new();
let mut trace_identity: HashMap<i32, String> = HashMap::new();
let mut bbox_per_frame: HashMap<(i32, i64), (i32, i32, i32, i32)> = HashMap::new(); // (tid, fn) -> (x, y, w, h)
for (tid, fn_, x, y, w, h, name_opt) in &all_rows {
trace_frames.entry(*tid).or_default().push(*fn_);
bbox_per_frame.insert((*tid, *fn_), (*x, *y, *w, *h));
if let Some(name) = name_opt {
trace_identity.entry(*tid).or_insert_with(|| name.clone());
} else {
trace_identity
.entry(*tid)
.or_insert_with(|| format!("Stranger_{:03}", tid));
}
}
// Query cut/scene info from chunk table (not a separate "cut" table)
let chunk_table = schema::table_name("chunk");
let cut_label: String = sqlx::query_scalar::<_, String>(
&format!("SELECT chunk_id FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' AND start_frame <= $2 AND end_frame >= $2 LIMIT 1", chunk_table)
)
.bind(&file_uuid).bind(first_frame)
.fetch_optional(state.db.pool()).await
.unwrap_or(None)
.unwrap_or_else(|| "-".to_string());
// Sort traces for consistent ordering
let mut sorted_traces: Vec<(i32, &Vec<i64>)> =
trace_frames.iter().map(|(k, v)| (*k, v)).collect();
sorted_traces.sort_by_key(|(tid, _)| *tid);
let frame_offset = first_frame as i64 - (padding * fps) as i64;
let fps_str = &fps.to_string();
// Build drawtext entries
let mut parts: Vec<String> = Vec::new();
// Top-left info panel
// Frame/time at the top
parts.push(format!(
"drawtext=text='Frame %{{n}} %{{pts}}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=12"
));
parts.push(format!(
"drawtext=text='Cut\\: {}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=56",
cut_label
));
parts.push(format!(
"drawtext=text='{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=100",
file_uuid
));
// Per-trace entries: show trace_id, start_frame, identity name
// Position starts at y=144, increments by 44 per trace
let mut y_pos = 144;
for (tid, frames) in &sorted_traces {
let start = frames.iter().min().unwrap_or(&first_frame);
let identity = trace_identity
.get(tid)
.map(|s| s.as_str())
.unwrap_or("unknown");
let label = format!("Trace {}\\: start={} {}", tid, start, identity);
// Continuous range (interpolated): visible from first to last frame
let enable = format!(
"between(n,{},{})",
frames[0] as i64 - frame_offset,
frames[frames.len() - 1] as i64 - frame_offset
);
parts.push(format!(
"drawtext=text='{}':fontsize=24:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y={}:enable='{}'",
label, y_pos, enable
));
y_pos += 44;
}
// Bounding boxes: interpolated (thickness=1) + actual (thickness=4) with trace_id label
for (tid, frames) in &sorted_traces {
let range_enable = format!(
"between(n,{},{})",
frames[0] as i64 - frame_offset,
frames[frames.len() - 1] as i64 - frame_offset
);
// Interpolated bbox at first known position across the whole trace range
if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, frames[0])) {
parts.push(format!(
"drawbox=x={}:y={}:w={}:h={}:color=green@0.3:thickness=1:enable='{}'",
x, y, w, h, range_enable
));
}
// Actual detection bboxes with trace_id label
for fn_ in frames.iter() {
if let Some((x, y, w, h)) = bbox_per_frame.get(&(*tid, *fn_)) {
let n = *fn_ as i64 - frame_offset;
parts.push(format!(
"drawbox=x={}:y={}:w={}:h={}:color=green@0.5:thickness=4:enable='between(n,{},{})'",
x, y, w, h, n, n
));
parts.push(format!(
"drawtext=text='{}':x={}:y={}:fontsize=28:fontcolor=green:box=1:boxcolor=black@0.5:enable='between(n,{},{})'",
tid, x + 4, y + 4, n, n
));
}
}
}
let filter_text = parts.join(",");
let filter_file = std::env::temp_dir().join(format!("vf_{}.txt", Uuid::new_v4()));
let _ = std::fs::write(&filter_file, &filter_text);
let filter_path = filter_file.to_str().unwrap_or("");
let tmp = std::env::temp_dir().join(format!("trace_{}.mp4", Uuid::new_v4()));
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let sk = seek.to_string();
let du = duration.to_string();
let mut debug_args = vec![
"-ss",
&sk,
"-i",
&video_path,
"-t",
&du,
"-/filter_complex",
&filter_path,
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-crf",
"28",
];
if audio == "on" {
debug_args.extend_from_slice(&["-c:a", "aac"]);
}
debug_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let result = ffmpeg_cmd()
.args(&debug_args)
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !result.status.success() {
let stderr = String::from_utf8_lossy(&result.stderr);
let _ = std::fs::write("/tmp/ffmpeg_last_error.txt", stderr.as_bytes());
tracing::error!(
"ffmpeg failed ({} bytes), see /tmp/ffmpeg_last_error.txt",
stderr.len()
);
let _ = std::fs::remove_file(&filter_file);
let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&filter_file);
let _ = std::fs::remove_file(&tmp);
Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap())
}
async fn stream_video(
State(state): State<crate::api::types::AppState>,
Path(file_uuid): Path<String>,
Query(params): Query<std::collections::HashMap<String, String>>,
request: axum::http::Request<Body>,
) -> Result<impl IntoResponse, StatusCode> {
use tokio::io::{AsyncReadExt, AsyncSeekExt};
let (_mode, audio) = parse_video_params(&params);
let videos_table = schema::table_name("videos");
let row: Option<(String,)> = sqlx::query_as(&format!(
"SELECT file_path FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_path,) = row.ok_or(StatusCode::NOT_FOUND)?;
let src = std::path::PathBuf::from(&file_path);
if !src.exists() {
return Err(StatusCode::NOT_FOUND);
}
// Chunk extraction with dual time/frame params
let start_time_param = params.get("start_time").and_then(|v| v.parse::<f64>().ok());
let end_time_param = params.get("end_time").and_then(|v| v.parse::<f64>().ok());
let start_frame_param = params
.get("start_frame")
.and_then(|v| v.parse::<f64>().ok());
let end_frame_param = params.get("end_frame").and_then(|v| v.parse::<f64>().ok());
let start_legacy = params.get("start").and_then(|v| v.parse::<f64>().ok());
let end_legacy = params.get("end").and_then(|v| v.parse::<f64>().ok());
let has_range =
start_frame_param.is_some() || start_time_param.is_some() || start_legacy.is_some();
if has_range {
let (start_sec, dur) = if let (Some(sf), Some(ef)) = (start_frame_param, end_frame_param) {
let _fps: f64 = sqlx::query_scalar(&format!(
"SELECT COALESCE(fps, 24.0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or(24.0);
(sf / _fps, (ef - sf) / _fps)
} else if let (Some(st), Some(et)) = (start_time_param, end_time_param) {
(st, et - st)
} else if let (Some(s), Some(e)) = (start_legacy, end_legacy) {
(s, e - s)
} else {
return Err(StatusCode::BAD_REQUEST);
};
if dur <= 0.0 {
return Err(StatusCode::BAD_REQUEST);
}
let tmp = std::env::temp_dir().join(format!("chunk_{}.mp4", Uuid::new_v4()));
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let ss = start_sec.to_string();
let d = dur.to_string();
let mut chunk_args = vec!["-ss", &ss, "-i", &file_path, "-t", &d, "-c", "copy"];
if audio == "off" {
chunk_args.push("-an");
}
chunk_args.extend_from_slice(&["-movflags", "+faststart", "-y", &tmp_str]);
let status = ffmpeg_cmd()
.args(&chunk_args)
.status()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !status.success() {
let _ = std::fs::remove_file(&tmp);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
let data = tokio::fs::read(&tmp)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let _ = std::fs::remove_file(&tmp);
return Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap());
}
// Full file streaming with range request support
let file_size = src.metadata().map(|m| m.len()).unwrap_or(0);
let content_type = "video/mp4";
let range_hdr = request
.headers()
.get(header::RANGE)
.and_then(|v| v.to_str().ok());
if let Some(range_str) = range_hdr {
let (start, end) = parse_range(range_str, file_size);
let length = end - start + 1;
let mut file = tokio::fs::File::open(&src)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
file.seek(std::io::SeekFrom::Start(start))
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let take = file.take(length);
let stream = tokio_util::io::ReaderStream::new(take);
let body = Body::from_stream(stream);
Ok(Response::builder()
.status(StatusCode::PARTIAL_CONTENT)
.header(header::CONTENT_TYPE, content_type)
.header(
header::CONTENT_RANGE,
format!("bytes {}-{}/{}", start, end, file_size),
)
.header(header::CONTENT_LENGTH, length)
.body(body)
.unwrap())
} else {
let file = tokio::fs::File::open(&src)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let stream = tokio_util::io::ReaderStream::new(file);
let body = Body::from_stream(stream);
Ok(Response::builder()
.header(header::CONTENT_TYPE, content_type)
.header(header::CONTENT_LENGTH, file_size)
.header(header::ACCEPT_RANGES, "bytes")
.body(body)
.unwrap())
}
}
#[derive(Debug, serde::Deserialize)]
struct ThumbQuery {
frame: Option<i64>,
x: Option<i32>,
y: Option<i32>,
w: Option<i32>,
h: Option<i32>,
trace_id: Option<i32>,
}
async fn face_thumbnail(
State(state): State<crate::api::types::AppState>,
Path(file_uuid): Path<String>,
Query(q): Query<ThumbQuery>,
) -> Result<impl IntoResponse, StatusCode> {
let videos_table = schema::table_name("videos");
let frame = match q.frame {
Some(f) => f,
None => {
let result = crate::core::processor::tkg::query_auto_representative_frame(
state.db.pool(),
&file_uuid,
)
.await
.map_err(|_| StatusCode::NOT_FOUND)?;
result.frame_number
}
};
// Step 1: Check for pre-stored face crop if trace_id is provided
if let Some(trace_id) = q.trace_id {
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let cached_path = std::path::PathBuf::from(output_dir)
.join(".faces")
.join(&file_uuid)
.join(trace_id.to_string())
.join(format!("{}.jpg", frame));
if cached_path.exists() {
tracing::debug!(
"[thumbnail] Using cached face crop: {}",
cached_path.display()
);
let bytes = tokio::fs::read(&cached_path).await.map_err(|e| {
tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Validate cached JPEG
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
tracing::warn!("[thumbnail] Cached JPEG validation failed: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
return Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(bytes))
.unwrap());
}
// Cached file not found, fallback to ffmpeg
tracing::debug!("[thumbnail] Cached file not found, falling back to ffmpeg");
}
// Step 2: Fallback to ffmpeg on-demand extraction
let row: Option<(String, Option<i64>, Option<i32>, Option<i32>)> = sqlx::query_as(&format!(
"SELECT file_path, total_frames, width, height FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_path, total_frames, video_width, video_height) = row.ok_or(StatusCode::NOT_FOUND)?;
if let Some(total) = total_frames {
if total > 0 {
crate::core::thumbnail::validator::validate_frame(frame, total).map_err(|e| {
tracing::warn!("[thumbnail] Frame validation failed: {}", e);
StatusCode::BAD_REQUEST
})?;
}
}
if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
if let (Some(vw), Some(vh)) = (video_width, video_height) {
crate::core::thumbnail::validator::validate_crop(x, y, w, h, vw, vh).map_err(|e| {
tracing::warn!("[thumbnail] Crop validation failed: {}", e);
StatusCode::BAD_REQUEST
})?;
}
}
let select = format!("select=eq(n\\,{})", frame);
let vf = if let (Some(x), Some(y), Some(w), Some(h)) = (q.x, q.y, q.w, q.h) {
format!("{},crop={}:{}:{}:{}", select, w, h, x, y)
} else {
select
};
let output = ffmpeg_cmd()
.args([
"-i",
&file_path,
"-vf",
&vf,
"-frames:v",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !output.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| {
tracing::warn!("[thumbnail] JPEG validation failed: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(output.stdout))
.unwrap())
}
async fn chunk_thumbnail(
State(state): State<crate::api::types::AppState>,
Path((file_uuid, chunk_id)): Path<(String, String)>,
) -> Result<impl IntoResponse, StatusCode> {
let videos_table = schema::table_name("videos");
let chunk_table = schema::table_name("chunk");
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let cached_path = std::path::PathBuf::from(output_dir)
.join(".chunk_thumbs")
.join(&file_uuid)
.join(format!("{}.jpg", chunk_id));
if cached_path.exists() {
let bytes = tokio::fs::read(&cached_path).await.map_err(|e| {
tracing::warn!("[chunk_thumbnail] Failed to read cache: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
return Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(bytes))
.unwrap());
}
let row: (f64, f64, f64) = sqlx::query_as(&format!(
"SELECT start_time, end_time, fps FROM {} WHERE file_uuid = $1 AND chunk_id = $2 LIMIT 1",
chunk_table
))
.bind(&file_uuid)
.bind(&chunk_id)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
let (start_time, end_time, fps) = row;
let start_frame = (start_time * fps).round() as i64;
let end_frame = (end_time * fps).round() as i64;
let mid_frame = (start_frame + end_frame) / 2;
let video: Option<(String, Option<i64>)> = sqlx::query_as(&format!(
"SELECT file_path, total_frames FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_path, total_frames) = video.ok_or(StatusCode::NOT_FOUND)?;
let frame = match total_frames {
Some(t) if t > 0 => mid_frame.min(t - 1).max(0),
_ => mid_frame.max(0),
};
let select = format!("select=eq(n\\,{})", frame);
let output = ffmpeg_cmd()
.args([
"-i",
&file_path,
"-vf",
&select,
"-frames:v",
"1",
"-f",
"image2pipe",
"-vcodec",
"mjpeg",
"-",
])
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !output.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| {
tracing::warn!("[chunk_thumbnail] JPEG validation failed: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
if let Some(parent) = cached_path.parent() {
let _ = tokio::fs::create_dir_all(parent).await;
}
let _ = tokio::fs::write(&cached_path, &output.stdout).await;
Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(output.stdout))
.unwrap())
}
#[derive(Debug, serde::Deserialize)]
struct ClipQuery {
start_frame: Option<i64>,
end_frame: Option<i64>,
start_time: Option<f64>,
end_time: Option<f64>,
fps: Option<f64>,
mode: Option<String>,
audio: Option<String>,
}
async fn video_clip(
State(state): State<crate::api::types::AppState>,
Path(file_uuid): Path<String>,
Query(q): Query<ClipQuery>,
) -> Result<impl IntoResponse, StatusCode> {
let videos_table = schema::table_name("videos");
let row: Option<(String, f64)> = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 30.0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_path, db_fps) = row.ok_or(StatusCode::NOT_FOUND)?;
let fps = q.fps.unwrap_or(db_fps);
let (s, e) = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) {
(sf as f64 / fps, ef as f64 / fps)
} else if let (Some(st), Some(et)) = (q.start_time, q.end_time) {
(st, et)
} else {
return Err(StatusCode::BAD_REQUEST);
};
if e <= s {
return Err(StatusCode::BAD_REQUEST);
}
let mode = q.mode.as_deref().unwrap_or("normal").to_string();
let audio = q.audio.as_deref().unwrap_or("on");
let mut cmd = ffmpeg_cmd();
cmd.args(["-ss", &s.to_string(), "-i", &file_path]);
if q.start_frame.is_some() {
let frame_count = ((e - s) * fps) as i64;
cmd.args(["-vframes", &frame_count.to_string()]);
} else {
cmd.args(["-t", &(e - s).to_string()]);
}
if mode == "debug" {
let debug_text = if let (Some(sf), Some(ef)) = (q.start_frame, q.end_frame) {
format!("drawtext=text='Frame %{{n}} FRAMES {}-{}':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=10", sf, ef)
} else {
"drawtext=text='Frame %{n} CLIP':fontsize=28:fontcolor=white:box=1:boxcolor=black@0.6:x=10:y=10".to_string()
};
cmd.args(["-vf", &debug_text]);
}
if audio == "off" {
cmd.args(["-an"]);
}
cmd.args([
"-c:v",
"libx264",
"-c:a",
"aac",
"-movflags",
"frag_keyframe+empty_moov",
"-f",
"mp4",
"-",
]);
let output = cmd
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !output.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "video/mp2t")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(output.stdout))
.unwrap())
}
async fn stranger_video(
State(state): State<crate::api::types::AppState>,
Path((file_uuid, stranger_id)): Path<(String, i32)>,
Query(params): Query<std::collections::HashMap<String, String>>,
) -> Result<impl IntoResponse, StatusCode> {
stranger_video_inner(&state, &file_uuid, stranger_id, &params).await
}
async fn stranger_video_inner(
state: &crate::api::types::AppState,
file_uuid: &str,
stranger_id: i32,
params: &std::collections::HashMap<String, String>,
) -> Result<impl IntoResponse, StatusCode> {
use axum::http::header;
use uuid::Uuid;
tracing::info!(
"[stranger_video] Starting for file={}, stranger={}",
file_uuid,
stranger_id
);
let (mode, audio) = parse_video_params(&params);
let videos_table = schema::table_name("videos");
tracing::debug!("[stranger_video] videos_table: {}", videos_table);
let row: Option<(String, f64, i32, i32)> = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 24.0), COALESCE(width, 0), COALESCE(height, 0) FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
tracing::error!("[stranger_video] Video query error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let (video_path, fps, _width, _height) = row.ok_or_else(|| {
tracing::error!("[stranger_video] Video not found for uuid={}", file_uuid);
StatusCode::NOT_FOUND
})?;
tracing::info!(
"[stranger_video] Found video: path={}, fps={}",
video_path,
fps
);
// Query face detections by stranger_id directly
let face_table = schema::table_name("face_detections");
tracing::debug!("[stranger_video] face_table: {}", face_table);
// frame_number is BIGINT (i64) in database
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND stranger_id = $2 ORDER BY frame_number",
face_table
))
.bind(&file_uuid).bind(stranger_id)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| {
tracing::error!("[stranger_video] Face query error: {}", e);
vec![]
});
tracing::info!("[stranger_video] Found {} faces", rows.len());
if rows.is_empty() {
tracing::error!(
"[stranger_video] No faces found for stranger_id={}",
stranger_id
);
return Err(StatusCode::NOT_FOUND);
}
let first_frame = rows[0].0;
let last_frame = rows[rows.len() - 1].0;
let start_sec = first_frame as f64 / fps;
let padding = params
.get("padding")
.and_then(|s| s.parse().ok())
.unwrap_or(2.0);
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
let seek = (start_sec - padding).max(0.0);
tracing::info!(
"[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s",
first_frame,
last_frame,
seek,
seek + duration
);
// Only support normal mode for stranger video
let tmp = std::env::temp_dir().join(format!("stranger_{}.mp4", Uuid::new_v4()));
let tmp_str = tmp.to_str().unwrap_or("").to_string();
let sk = seek.to_string();
let du = duration.to_string();
let mut cmd_args = vec!["-ss", &sk, "-i", &video_path, "-t", &du, "-c", "copy"];
if audio == "off" {
cmd_args.push("-an");
}
cmd_args.extend_from_slice(&["-y", &tmp_str]);
tracing::debug!("[stranger_video] ffmpeg args: {:?}", cmd_args);
let result = ffmpeg_cmd().args(&cmd_args).output().map_err(|e| {
tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
if !result.status.success() {
tracing::error!(
"[stranger_video] ffmpeg failed: {}",
String::from_utf8_lossy(&result.stderr)
);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
tracing::info!(
"[stranger_video] ffmpeg success, output size: {} bytes",
result.stdout.len()
);
let data = tokio::fs::read(&tmp).await.map_err(|e| {
tracing::error!("[stranger_video] Read output error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let _ = std::fs::remove_file(&tmp);
tracing::info!(
"[stranger_video] Returning video, size: {} bytes",
data.len()
);
Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap())
}
// ── Media Proxy: Unified endpoint for WordPress frontend ──
// Accepts the same query param format as the (inactive) WordPress snippet 61.
// Dispatches to the appropriate existing handler based on `type`.
// Caddy rewrites /wp-json/momentry/v1/media → /api/v1/media-proxy{?}
/// Dispatch query params to the appropriate handler
async fn media_proxy_handler(
State(state): State<crate::api::types::AppState>,
Query(params): Query<std::collections::HashMap<String, String>>,
request: axum::http::Request<Body>,
) -> Result<Response, StatusCode> {
let uuid = params
.get("uuid")
.or_else(|| params.get("file_uuid"))
.ok_or(StatusCode::BAD_REQUEST)?;
let type_ = params
.get("type")
.map(String::as_str)
.ok_or(StatusCode::BAD_REQUEST)?;
match type_ {
"thumbnail" => {
let thumb_query = ThumbQuery {
frame: params.get("frame").and_then(|v| v.parse().ok()),
x: params.get("x").and_then(|v| v.parse().ok()),
y: params.get("y").and_then(|v| v.parse().ok()),
w: params.get("w").and_then(|v| v.parse().ok()),
h: params.get("h").and_then(|v| v.parse().ok()),
trace_id: params.get("trace_id").and_then(|v| v.parse().ok()),
};
face_thumbnail(State(state), Path(uuid.clone()), Query(thumb_query))
.await
.map(IntoResponse::into_response)
}
"video" => stream_video(State(state), Path(uuid.clone()), Query(params), request)
.await
.map(IntoResponse::into_response),
"chunk_thumbnail" => {
let chunk_id = params.get("chunk_id").ok_or(StatusCode::BAD_REQUEST)?;
chunk_thumbnail(State(state), Path((uuid.clone(), chunk_id.clone())))
.await
.map(IntoResponse::into_response)
}
_ => Err(StatusCode::BAD_REQUEST),
}
}
pub fn media_proxy_routes() -> Router<crate::api::types::AppState> {
Router::new().route("/api/v1/media-proxy", get(media_proxy_handler))
}