feat: add Vision LLM integration (CLIP + Qwen3-VL cascade)
- Add Qwen3-VL dynamic management (start/stop/status CLI) - Add CLIP + Qwen3-VL cascade detection strategy - Add Vision CLI commands (vision start/stop/status, detect) - Add cascade_vision processor module - Add clip processor module - Add qwen_vl_manager module Changes: - scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts - src/core/vision/: Qwen3-VL manager module - src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic - src/core/processor/clip.rs: CLIP classification and detection - src/api/clip_api.rs: CLIP API endpoints - src/cli/vision.rs: Vision CLI implementation - src/cli/args.rs: Add Vision and Detect commands - src/main.rs: Integrate Vision CLI - src/core/mod.rs: Add vision module - src/core/processor/mod.rs: Add cascade_vision module
This commit is contained in:
@@ -63,6 +63,7 @@ pub fn bbox_routes() -> Router<crate::api::types::AppState> {
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/video", get(stream_video))
|
||||
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
|
||||
.route("/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail", get(chunk_thumbnail))
|
||||
.route("/api/v1/file/:file_uuid/clip", get(video_clip))
|
||||
}
|
||||
|
||||
@@ -745,13 +746,14 @@ async fn face_thumbnail(
|
||||
.join(format!("{}.jpg", frame));
|
||||
|
||||
if cached_path.exists() {
|
||||
tracing::debug!("[thumbnail] Using cached face crop: {}", cached_path.display());
|
||||
let bytes = tokio::fs::read(&cached_path)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
tracing::debug!(
|
||||
"[thumbnail] Using cached face crop: {}",
|
||||
cached_path.display()
|
||||
);
|
||||
let bytes = tokio::fs::read(&cached_path).await.map_err(|e| {
|
||||
tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
// Validate cached JPEG
|
||||
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
|
||||
@@ -766,7 +768,7 @@ async fn face_thumbnail(
|
||||
.body(Body::from(bytes))
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
|
||||
// Cached file not found, fallback to ffmpeg
|
||||
tracing::debug!("[thumbnail] Cached file not found, falling back to ffmpeg");
|
||||
}
|
||||
@@ -841,6 +843,99 @@ async fn face_thumbnail(
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
async fn chunk_thumbnail(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Path((file_uuid, chunk_id)): Path<(String, String)>,
|
||||
) -> Result<impl IntoResponse, StatusCode> {
|
||||
let videos_table = schema::table_name("videos");
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
|
||||
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
|
||||
let cached_path = std::path::PathBuf::from(output_dir)
|
||||
.join(".chunk_thumbs")
|
||||
.join(&file_uuid)
|
||||
.join(format!("{}.jpg", chunk_id));
|
||||
|
||||
if cached_path.exists() {
|
||||
let bytes = tokio::fs::read(&cached_path).await.map_err(|e| {
|
||||
tracing::warn!("[chunk_thumbnail] Failed to read cache: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
return Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "image/jpeg")
|
||||
.header(header::CACHE_CONTROL, "public, max-age=86400")
|
||||
.body(Body::from(bytes))
|
||||
.unwrap());
|
||||
}
|
||||
|
||||
let row: (f64, f64, f64) = sqlx::query_as(&format!(
|
||||
"SELECT start_time, end_time, fps FROM {} WHERE file_uuid = $1 AND chunk_id = $2 LIMIT 1",
|
||||
chunk_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(&chunk_id)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
let (start_time, end_time, fps) = row;
|
||||
|
||||
let start_frame = (start_time * fps).round() as i64;
|
||||
let end_frame = (end_time * fps).round() as i64;
|
||||
let mid_frame = (start_frame + end_frame) / 2;
|
||||
|
||||
let video: Option<(String, Option<i64>)> = sqlx::query_as(&format!(
|
||||
"SELECT file_path, total_frames FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
let (file_path, total_frames) = video.ok_or(StatusCode::NOT_FOUND)?;
|
||||
|
||||
let frame = match total_frames {
|
||||
Some(t) if t > 0 => mid_frame.min(t - 1).max(0),
|
||||
_ => mid_frame.max(0),
|
||||
};
|
||||
|
||||
let select = format!("select=eq(n\\,{})", frame);
|
||||
let output = ffmpeg_cmd()
|
||||
.args([
|
||||
"-i", &file_path,
|
||||
"-vf", &select,
|
||||
"-frames:v", "1",
|
||||
"-f", "image2pipe",
|
||||
"-vcodec", "mjpeg",
|
||||
"-",
|
||||
])
|
||||
.output()
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| {
|
||||
tracing::warn!("[chunk_thumbnail] JPEG validation failed: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
if let Some(parent) = cached_path.parent() {
|
||||
let _ = tokio::fs::create_dir_all(parent).await;
|
||||
}
|
||||
let _ = tokio::fs::write(&cached_path, &output.stdout).await;
|
||||
|
||||
Ok(Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "image/jpeg")
|
||||
.header(header::CACHE_CONTROL, "public, max-age=86400")
|
||||
.body(Body::from(output.stdout))
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
struct ClipQuery {
|
||||
start_frame: Option<i64>,
|
||||
@@ -945,13 +1040,17 @@ async fn stranger_video_inner(
|
||||
use axum::http::header;
|
||||
use uuid::Uuid;
|
||||
|
||||
tracing::info!("[stranger_video] Starting for file={}, stranger={}", file_uuid, stranger_id);
|
||||
tracing::info!(
|
||||
"[stranger_video] Starting for file={}, stranger={}",
|
||||
file_uuid,
|
||||
stranger_id
|
||||
);
|
||||
|
||||
let (mode, audio) = parse_video_params(¶ms);
|
||||
|
||||
let videos_table = schema::table_name("videos");
|
||||
tracing::debug!("[stranger_video] videos_table: {}", videos_table);
|
||||
|
||||
|
||||
let row: Option<(String, f64, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT file_path, COALESCE(fps, 24.0), COALESCE(width, 0), COALESCE(height, 0) FROM {} WHERE file_uuid = $1",
|
||||
videos_table
|
||||
@@ -963,18 +1062,22 @@ async fn stranger_video_inner(
|
||||
tracing::error!("[stranger_video] Video query error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
|
||||
let (video_path, fps, _width, _height) = row.ok_or_else(|| {
|
||||
tracing::error!("[stranger_video] Video not found for uuid={}", file_uuid);
|
||||
StatusCode::NOT_FOUND
|
||||
})?;
|
||||
|
||||
tracing::info!("[stranger_video] Found video: path={}, fps={}", video_path, fps);
|
||||
|
||||
tracing::info!(
|
||||
"[stranger_video] Found video: path={}, fps={}",
|
||||
video_path,
|
||||
fps
|
||||
);
|
||||
|
||||
// Query face detections by stranger_id directly
|
||||
let face_table = schema::table_name("face_detections");
|
||||
tracing::debug!("[stranger_video] face_table: {}", face_table);
|
||||
|
||||
|
||||
// frame_number is BIGINT (i64) in database
|
||||
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
|
||||
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND stranger_id = $2 ORDER BY frame_number",
|
||||
@@ -982,15 +1085,18 @@ async fn stranger_video_inner(
|
||||
))
|
||||
.bind(&file_uuid).bind(stranger_id)
|
||||
.fetch_all(state.db.pool()).await
|
||||
.unwrap_or_else(|e| {
|
||||
tracing::error!("[stranger_video] Face query error: {}", e);
|
||||
vec![]
|
||||
.unwrap_or_else(|e| {
|
||||
tracing::error!("[stranger_video] Face query error: {}", e);
|
||||
vec![]
|
||||
});
|
||||
|
||||
tracing::info!("[stranger_video] Found {} faces", rows.len());
|
||||
|
||||
if rows.is_empty() {
|
||||
tracing::error!("[stranger_video] No faces found for stranger_id={}", stranger_id);
|
||||
tracing::error!(
|
||||
"[stranger_video] No faces found for stranger_id={}",
|
||||
stranger_id
|
||||
);
|
||||
return Err(StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
@@ -1004,8 +1110,13 @@ async fn stranger_video_inner(
|
||||
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
|
||||
let seek = (start_sec - padding).max(0.0);
|
||||
|
||||
tracing::info!("[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s",
|
||||
first_frame, last_frame, seek, seek + duration);
|
||||
tracing::info!(
|
||||
"[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s",
|
||||
first_frame,
|
||||
last_frame,
|
||||
seek,
|
||||
seek + duration
|
||||
);
|
||||
|
||||
// Only support normal mode for stranger video
|
||||
let tmp = std::env::temp_dir().join(format!("stranger_{}.mp4", Uuid::new_v4()));
|
||||
@@ -1017,37 +1128,98 @@ async fn stranger_video_inner(
|
||||
cmd_args.push("-an");
|
||||
}
|
||||
cmd_args.extend_from_slice(&["-y", &tmp_str]);
|
||||
|
||||
|
||||
tracing::debug!("[stranger_video] ffmpeg args: {:?}", cmd_args);
|
||||
|
||||
let result = ffmpeg_cmd()
|
||||
.args(&cmd_args)
|
||||
.output()
|
||||
.map_err(|e| {
|
||||
tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
|
||||
let result = ffmpeg_cmd().args(&cmd_args).output().map_err(|e| {
|
||||
tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
if !result.status.success() {
|
||||
tracing::error!("[stranger_video] ffmpeg failed: {}", String::from_utf8_lossy(&result.stderr));
|
||||
tracing::error!(
|
||||
"[stranger_video] ffmpeg failed: {}",
|
||||
String::from_utf8_lossy(&result.stderr)
|
||||
);
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
tracing::info!("[stranger_video] ffmpeg success, output size: {} bytes", result.stdout.len());
|
||||
|
||||
let data = tokio::fs::read(&tmp)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("[stranger_video] Read output error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
tracing::info!(
|
||||
"[stranger_video] ffmpeg success, output size: {} bytes",
|
||||
result.stdout.len()
|
||||
);
|
||||
|
||||
let data = tokio::fs::read(&tmp).await.map_err(|e| {
|
||||
tracing::error!("[stranger_video] Read output error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
|
||||
tracing::info!("[stranger_video] Returning video, size: {} bytes", data.len());
|
||||
|
||||
|
||||
tracing::info!(
|
||||
"[stranger_video] Returning video, size: {} bytes",
|
||||
data.len()
|
||||
);
|
||||
|
||||
Ok(Response::builder()
|
||||
.header(header::CONTENT_TYPE, "video/mp4")
|
||||
.header(header::CONTENT_LENGTH, data.len())
|
||||
.body(Body::from(data))
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
// ── Media Proxy: Unified endpoint for WordPress frontend ──
|
||||
// Accepts the same query param format as the (inactive) WordPress snippet 61.
|
||||
// Dispatches to the appropriate existing handler based on `type`.
|
||||
// Caddy rewrites /wp-json/momentry/v1/media → /api/v1/media-proxy{?}
|
||||
|
||||
/// Dispatch query params to the appropriate handler
|
||||
async fn media_proxy_handler(
|
||||
State(state): State<crate::api::types::AppState>,
|
||||
Query(params): Query<std::collections::HashMap<String, String>>,
|
||||
request: axum::http::Request<Body>,
|
||||
) -> Result<Response, StatusCode> {
|
||||
let uuid = params
|
||||
.get("uuid")
|
||||
.or_else(|| params.get("file_uuid"))
|
||||
.ok_or(StatusCode::BAD_REQUEST)?;
|
||||
|
||||
let type_ = params
|
||||
.get("type")
|
||||
.map(String::as_str)
|
||||
.ok_or(StatusCode::BAD_REQUEST)?;
|
||||
|
||||
match type_ {
|
||||
"thumbnail" => {
|
||||
let thumb_query = ThumbQuery {
|
||||
frame: params.get("frame").and_then(|v| v.parse().ok()),
|
||||
x: params.get("x").and_then(|v| v.parse().ok()),
|
||||
y: params.get("y").and_then(|v| v.parse().ok()),
|
||||
w: params.get("w").and_then(|v| v.parse().ok()),
|
||||
h: params.get("h").and_then(|v| v.parse().ok()),
|
||||
trace_id: params.get("trace_id").and_then(|v| v.parse().ok()),
|
||||
};
|
||||
face_thumbnail(State(state), Path(uuid.clone()), Query(thumb_query))
|
||||
.await
|
||||
.map(IntoResponse::into_response)
|
||||
}
|
||||
"video" => stream_video(State(state), Path(uuid.clone()), Query(params), request)
|
||||
.await
|
||||
.map(IntoResponse::into_response),
|
||||
"chunk_thumbnail" => {
|
||||
let chunk_id = params
|
||||
.get("chunk_id")
|
||||
.ok_or(StatusCode::BAD_REQUEST)?;
|
||||
chunk_thumbnail(
|
||||
State(state),
|
||||
Path((uuid.clone(), chunk_id.clone())),
|
||||
)
|
||||
.await
|
||||
.map(IntoResponse::into_response)
|
||||
}
|
||||
_ => Err(StatusCode::BAD_REQUEST),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn media_proxy_routes() -> Router<crate::api::types::AppState> {
|
||||
Router::new().route("/api/v1/media-proxy", get(media_proxy_handler))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user