Files
momentry_core/src/api/tmdb_api.rs
Accusys 074cdcdbed refactor: remove face embedding architecture - single Qdrant _faces collection
- Delete FaceEmbeddingDb module (face_embedding_db.rs)
- Stub match_faces_iterative, generate_seed_embeddings, tmdb_match_handler
- Remove sync_trace_embeddings, populate_face_embeddings_to_qdrant
- Remove embedding from face.json output (face_processor.py)
- Remove embedding from PG UPDATE (store_traced_faces.py)
- Remove workspace traces staging (checkin.rs, qdrant_workspace.rs)
- Fix tests: add pose_angle to Face, hand_nodes to TkgResult

Disabled functions (need reimplement with _faces):
- match_faces_iterative (identity agent)
- generate_seed_embeddings (TMDb seeds)
- tmdb_match_handler (TMDb matching)
- cluster_face_embeddings, search_similar_faces
- merge_traces_within_cuts
2026-06-24 22:27:09 +08:00

625 lines
22 KiB
Rust

use axum::{
extract::{Path, State},
http::StatusCode,
response::Json,
routing::{get, post},
Router,
};
use serde::{Deserialize, Serialize};
use crate::api::types::AppState;
use crate::core::config;
use crate::core::db::{PostgresDb, QdrantDb};
use crate::core::tmdb;
#[derive(Debug, Serialize)]
struct TmdbPrefetchResponse {
success: bool,
file_uuid: String,
message: String,
cache_path: Option<String>,
}
#[derive(Debug, Serialize)]
struct TmdbProbeResponse {
success: bool,
file_uuid: String,
tmdb_id: Option<u64>,
movie_title: Option<String>,
cast_count: Option<usize>,
identities_created: Option<usize>,
message: String,
}
#[derive(Debug, Serialize)]
struct TmdbResourceResponse {
success: bool,
status: tmdb::status::TmdbResourceStatus,
identities_seeded: i64,
identities_with_embedding: i64,
cache_files: usize,
operations: Vec<TmdbOperation>,
}
#[derive(Debug, Serialize)]
struct TmdbOperation {
method: String,
path: String,
description: String,
}
#[derive(Debug, Serialize)]
struct TmdbCheckResponse {
success: bool,
status: tmdb::status::TmdbResourceStatus,
}
#[derive(Debug, Deserialize)]
struct PrefetchRequest {
file_uuid: String,
}
#[derive(Debug, Deserialize)]
struct FileUuidParam {
file_uuid: String,
}
#[derive(Debug, Deserialize)]
struct TmdbFetchRequest {
file_uuid: String,
}
#[derive(Debug, Serialize)]
struct TmdbFetchMemberResult {
name: String,
character: Option<String>,
aliases: Vec<String>,
metadata: serde_json::Value,
status: String,
has_json: bool,
has_jpg: bool,
error: Option<String>,
}
#[derive(Debug, Serialize)]
struct TmdbFetchResponse {
success: bool,
movie_title: Option<String>,
tmdb_id: Option<u64>,
results: Vec<TmdbFetchMemberResult>,
summary: serde_json::Value,
}
pub fn tmdb_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/agents/tmdb/prefetch", post(tmdb_prefetch))
.route(
"/api/v1/file/:file_uuid/tmdb-probe",
post(tmdb_probe_handler),
)
.route("/api/v1/tmdb/fetch", post(tmdb_fetch))
.route(
"/api/v1/agents/tmdb/match/:file_uuid",
post(tmdb_match_handler),
)
.route("/api/v1/resource/tmdb", get(tmdb_resource_status))
.route("/api/v1/resource/tmdb/check", post(tmdb_resource_check))
}
async fn tmdb_prefetch(
State(state): State<AppState>,
Json(req): Json<PrefetchRequest>,
) -> Json<TmdbPrefetchResponse> {
let file_uuid = req.file_uuid;
// Verify file exists in DB
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.unwrap_or(false);
if !file_exists {
return Json(TmdbPrefetchResponse {
success: false,
file_uuid: file_uuid.clone(),
message: format!("File not found: {}", file_uuid),
cache_path: None,
});
}
// Offline-first: check if identity files already exist on disk (pre-prepared)
let identities_dir = std::path::Path::new(&*config::OUTPUT_DIR).join("identities");
let index_path = identities_dir.join("_index.json");
let cache_path = format!("{}/{}.tmdb.json", *config::OUTPUT_DIR, file_uuid);
let cache_file = std::path::Path::new(&cache_path);
if index_path.exists() && cache_file.exists() {
return Json(TmdbPrefetchResponse {
success: true,
file_uuid,
message: format!(
"Offline: using local identity files from {}.",
identities_dir.display()
),
cache_path: Some(cache_path),
});
}
if config::tmdb::API_KEY.is_none() {
return Json(TmdbPrefetchResponse {
success: false,
file_uuid: file_uuid.clone(),
message: "TMDB_API_KEY not configured and no local cache found.".to_string(),
cache_path: None,
});
}
let scripts_dir = config::SCRIPTS_DIR.clone();
let python_path = config::PYTHON_PATH.clone();
let agent_script = std::path::Path::new(&scripts_dir).join("tmdb_agent.py");
if !agent_script.exists() {
return Json(TmdbPrefetchResponse {
success: false,
file_uuid,
message: format!("tmdb_agent.py not found at {}", agent_script.display()),
cache_path: None,
});
}
let db_url = config::DATABASE_URL.clone();
let output = tokio::process::Command::new(&*python_path)
.arg(&agent_script)
.arg("--file-uuid")
.arg(&file_uuid)
.env("DATABASE_URL", &db_url)
.env("DATABASE_SCHEMA", &*config::DATABASE_SCHEMA)
.output()
.await;
match output {
Ok(o) => {
if o.status.success() {
let out = String::from_utf8_lossy(&o.stdout);
Json(TmdbPrefetchResponse {
success: true,
file_uuid,
message: out.lines().last().unwrap_or("OK").to_string(),
cache_path: Some(cache_path),
})
} else {
let stderr = String::from_utf8_lossy(&o.stderr);
Json(TmdbPrefetchResponse {
success: false,
file_uuid,
message: stderr.to_string(),
cache_path: None,
})
}
}
Err(e) => Json(TmdbPrefetchResponse {
success: false,
file_uuid,
message: format!("Failed to run tmdb_agent.py: {}", e),
cache_path: None,
}),
}
}
async fn tmdb_probe_handler(
Path(params): Path<FileUuidParam>,
State(state): State<AppState>,
) -> Result<Json<TmdbProbeResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = params.file_uuid;
// Verify file exists
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.unwrap_or(false);
if !file_exists {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "Video not found", "file_uuid": file_uuid
})),
));
}
match tmdb::probe::probe_from_cache(&state.db, &file_uuid).await {
Ok(result) => {
// Sync identity JSON files for newly created/updated identities
let pool = state.db.pool().clone();
let file_uuid_clone = file_uuid.clone();
tokio::spawn(async move {
// Query identities linked to this file
let fi_table = crate::core::db::schema::table_name("file_identities");
let query = format!(
"SELECT i.uuid::text FROM {} fi JOIN {} i ON fi.identity_id = i.id WHERE fi.file_uuid = $1",
fi_table, crate::core::db::schema::table_name("identities")
);
if let Ok(rows) = sqlx::query_scalar::<_, String>(&query)
.bind(&file_uuid_clone)
.fetch_all(&pool)
.await
{
for uuid in rows {
let _ = crate::core::identity::storage::save_identity_file_by_pool(
&pool, &uuid,
)
.await;
}
}
});
Ok(Json(TmdbProbeResponse {
success: true,
file_uuid,
tmdb_id: Some(result.tmdb_id),
movie_title: Some(result.title),
cast_count: Some(result.cast_count),
identities_created: Some(result.identities_created),
message: format!(
"Created/updated {} identities for movie ID {}",
result.identities_created, result.tmdb_id
),
}))
}
Err(e) => {
let msg = e.to_string();
if msg.contains("not found") {
Ok(Json(TmdbProbeResponse {
success: false,
file_uuid,
tmdb_id: None,
movie_title: None,
cast_count: None,
identities_created: None,
message: "No TMDb cache found. Run tmdb-prefetch first.".to_string(),
}))
} else {
Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": msg, "file_uuid": file_uuid
})),
))
}
}
}
}
async fn tmdb_resource_status(State(state): State<AppState>) -> Json<TmdbResourceResponse> {
let status = tmdb::status::quick_status();
let identities_seeded = tmdb::status::count_tmdb_identities(state.db.pool())
.await
.unwrap_or(0);
let identities_with_embedding =
tmdb::status::count_tmdb_identities_with_embedding(state.db.pool())
.await
.unwrap_or(0);
let cache_files = tmdb::status::count_cache_files();
Json(TmdbResourceResponse {
success: true,
status,
identities_seeded,
identities_with_embedding,
cache_files,
operations: vec![
TmdbOperation {
method: "GET".to_string(),
path: "/api/v1/resource/tmdb".to_string(),
description: "TMDb resource status".to_string(),
},
TmdbOperation {
method: "POST".to_string(),
path: "/api/v1/resource/tmdb/check".to_string(),
description: "Ping TMDb API health".to_string(),
},
TmdbOperation {
method: "POST".to_string(),
path: "/api/v1/agents/tmdb/prefetch".to_string(),
description: "Fetch TMDb data and cache locally".to_string(),
},
TmdbOperation {
method: "POST".to_string(),
path: "/api/v1/file/:file_uuid/tmdb-probe".to_string(),
description: "Read cache and create identities".to_string(),
},
],
})
}
async fn tmdb_resource_check() -> Json<TmdbCheckResponse> {
let status = tmdb::status::check_tmdb_api().await;
Json(TmdbCheckResponse {
success: status.api_reachable.unwrap_or(false) && status.api_key_configured,
status,
})
}
async fn tmdb_fetch(
State(state): State<AppState>,
Json(req): Json<TmdbFetchRequest>,
) -> Result<Json<TmdbFetchResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = req.file_uuid;
let filename: Option<String> = sqlx::query_scalar(&format!(
"SELECT file_name FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?
.flatten();
let filename = filename.ok_or_else(|| {
(
StatusCode::NOT_FOUND,
Json(serde_json::json!({"error": "File not found"})),
)
})?;
// Run probe to create identities
match tmdb::probe::probe_movie(&state.db, &filename, &file_uuid).await {
Ok(Some(probe_result)) => {
let mut member_results = Vec::new();
// Read the cache to get cast list with names and profile URLs
if let Ok(cache) = tmdb::cache::read_tmdb_cache(&file_uuid) {
for member in &cache.cast {
let name = member.name.clone();
let character = if member.character.is_empty() {
None
} else {
Some(member.character.clone())
};
let aliases = member.also_known_as.clone();
let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let metadata = serde_json::json!({
"tmdb_id": member.id,
"name": member.name,
"character": member.character,
"aliases": member.also_known_as,
"profile_path": member.profile_path,
"order": member.order,
"biography": member.biography,
"birthday": member.birthday,
"place_of_birth": member.place_of_birth,
"imdb_id": member.imdb_id,
"known_for_department": member.known_for_department,
"popularity": member.popularity,
"deathday": member.deathday,
"gender": member.gender,
"homepage": member.homepage,
});
let identity_row = sqlx::query_as::<_, (i32, uuid::Uuid)>(&format!(
"SELECT id, uuid FROM {} WHERE name = $1 AND source = 'tmdb' LIMIT 1",
crate::core::db::schema::table_name("identities")
))
.bind(&name)
.fetch_optional(state.db.pool())
.await;
match identity_row {
Ok(Some((identity_id, uuid))) => {
let clean = uuid.to_string().replace('-', "");
let dir = crate::core::identity::storage::identity_dir(&clean);
std::fs::create_dir_all(&dir).ok();
let json_result = crate::core::identity::storage::save_identity_file(
&state.db, &clean,
)
.await;
let has_json = json_result.is_ok();
let has_jpg = if let Some(url) = &profile_url {
let jpg_path = dir.join("profile.jpg");
if jpg_path.exists() {
true
} else if let Ok(resp) = reqwest::get(url).await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&jpg_path, &bytes).is_ok()
} else {
false
}
} else {
false
}
} else {
false
};
// Push face_embedding to Qdrant if available
let face_collection = format!(
"{}_faces",
crate::core::config::REDIS_KEY_PREFIX
.as_str()
.trim_end_matches(':')
);
let emb_row: Option<(Vec<f32>,)> = sqlx::query_as(
&format!(
"SELECT face_embedding::real[] FROM {} WHERE uuid = $1 AND face_embedding IS NOT NULL",
crate::core::db::schema::table_name("identities")
)
)
.bind(&uuid)
.fetch_optional(state.db.pool())
.await
.unwrap_or(None);
if let Some((embedding,)) = emb_row {
let qdrant = QdrantDb::new();
qdrant.ensure_collection(&face_collection, 512).await.ok();
let _ = qdrant
.upsert_vector_to_collection(
&face_collection,
identity_id as u64,
&embedding,
Some(serde_json::json!({
"identity_id": identity_id,
"name": name,
"source": "tmdb",
})),
)
.await;
}
let status = if has_json && has_jpg {
"success"
} else {
"partial"
};
let error = if !has_json {
Some(format!("{:?}", json_result.err()))
} else if !has_jpg {
Some("profile download failed".to_string())
} else {
None
};
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: status.to_string(),
has_json,
has_jpg,
error,
});
}
Ok(None) => {
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: "skipped".to_string(),
has_json: false,
has_jpg: false,
error: None,
});
}
Err(e) => {
member_results.push(TmdbFetchMemberResult {
name: name.clone(),
character: character.clone(),
aliases: aliases.clone(),
metadata: metadata.clone(),
status: "error".to_string(),
has_json: false,
has_jpg: false,
error: Some(format!("DB error: {}", e)),
});
}
}
}
}
let total = member_results.len();
let success_count = member_results
.iter()
.filter(|r| r.status == "success")
.count();
let json_count = member_results.iter().filter(|r| r.has_json).count();
let jpg_count = member_results.iter().filter(|r| r.has_jpg).count();
Ok(Json(TmdbFetchResponse {
success: true,
movie_title: Some(probe_result.title),
tmdb_id: Some(probe_result.tmdb_id),
results: member_results,
summary: serde_json::json!({
"total": total,
"success": success_count,
"with_json": json_count,
"with_jpg": jpg_count,
}),
}))
}
Ok(None) => Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "No movie found for this filename"
})),
)),
Err(e) => Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": e.to_string()
})),
)),
}
}
#[derive(Debug, Serialize)]
struct TmdbMatchResponse {
success: bool,
file_uuid: String,
bindings_created: usize,
tmdb_identities_available: usize,
message: String,
}
async fn tmdb_match_handler(
Path(params): Path<FileUuidParam>,
State(state): State<AppState>,
) -> Result<Json<TmdbMatchResponse>, (StatusCode, Json<serde_json::Value>)> {
let file_uuid = params.file_uuid;
// Verify file exists
let file_exists: bool = sqlx::query_scalar(&format!(
"SELECT COUNT(*) > 0 FROM {} WHERE file_uuid = $1",
crate::core::db::schema::table_name("videos")
))
.bind(&file_uuid)
.fetch_one(state.db.pool())
.await
.unwrap_or(false);
if !file_exists {
return Err((
StatusCode::NOT_FOUND,
Json(serde_json::json!({
"error": "Video not found", "file_uuid": file_uuid
})),
));
}
tracing::warn!(
"[TKG-MATCH] TMDb matching disabled - sync_trace_embeddings removed. \
TODO: Reimplement with _faces collection for {}",
file_uuid
);
Ok(Json(TmdbMatchResponse {
success: true,
file_uuid,
bindings_created: 0,
tmdb_identities_available: 0,
message: "TMDb matching disabled - needs reimplementation with _faces collection".to_string(),
}))
}