update: pipeline, search, clip, embedding fixes

This commit is contained in:
Accusys
2026-05-17 19:46:35 +08:00
parent eec2eea880
commit 3164a65554
36 changed files with 4313 additions and 4061 deletions

53
src/core/auth/jwt.rs Normal file
View File

@@ -0,0 +1,53 @@
use anyhow::{Context, Result};
use jsonwebtoken::{decode, encode, DecodingKey, EncodingKey, Header, Validation};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::core::config::JWT_SECRET;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Claims {
pub sub: String,
pub exp: usize,
pub iat: usize,
pub jti: String,
pub role: String,
pub name: String,
}
pub fn create_jwt(user_id: i32, username: &str, role: &str) -> Result<String> {
let now = chrono::Utc::now();
let exp = (now + chrono::Duration::hours(1)).timestamp() as usize;
let iat = now.timestamp() as usize;
let claims = Claims {
sub: user_id.to_string(),
exp,
iat,
jti: Uuid::new_v4().to_string(),
role: role.to_string(),
name: username.to_string(),
};
encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(JWT_SECRET.as_bytes()),
)
.context("Failed to encode JWT")
}
pub fn verify_jwt(token: &str) -> Result<Claims> {
let token_data = decode::<Claims>(
token,
&DecodingKey::from_secret(JWT_SECRET.as_bytes()),
&Validation::default(),
)
.context("Failed to decode JWT")?;
Ok(token_data.claims)
}
pub fn is_jwt(token: &str) -> bool {
token.starts_with("eyJ") && token.split('.').count() == 3
}

2
src/core/auth/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod jwt;
pub mod password;

41
src/core/auth/password.rs Normal file
View File

@@ -0,0 +1,41 @@
use anyhow::Result;
use argon2::{
password_hash::{rand_core::OsRng, PasswordHash, PasswordHasher, PasswordVerifier, SaltString},
Argon2,
};
pub fn hash_password(password: &str) -> Result<String> {
let salt = SaltString::generate(&mut OsRng);
let hash = Argon2::default()
.hash_password(password.as_bytes(), &salt)
.map_err(|e| anyhow::anyhow!("Failed to hash password: {}", e))?;
Ok(hash.to_string())
}
pub fn verify_password(password: &str, hash: &str) -> bool {
let parsed = match PasswordHash::new(hash) {
Ok(p) => p,
Err(_) => return false,
};
Argon2::default()
.verify_password(password.as_bytes(), &parsed)
.is_ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hash_and_verify() {
let password = "test_password_123";
let hash = hash_password(password).unwrap();
assert!(verify_password(password, &hash));
assert!(!verify_password("wrong_password", &hash));
}
#[test]
fn test_verify_fails_on_bad_hash() {
assert!(!verify_password("test", "not_a_valid_hash"));
}
}

View File

@@ -191,6 +191,14 @@ pub mod llm {
});
}
pub static SFTPGO_BASE_URL: Lazy<String> = Lazy::new(|| {
env::var("SFTPGO_BASE_URL").unwrap_or_else(|_| "http://127.0.0.1:8080".to_string())
});
pub static JWT_SECRET: Lazy<String> = Lazy::new(|| {
env::var("JWT_SECRET").unwrap_or_else(|_| "momentry_default_jwt_secret_change_me".to_string())
});
pub mod tmdb {
use super::*;

File diff suppressed because it is too large Load Diff

View File

@@ -344,7 +344,7 @@ impl RedisClient {
) -> Result<()> {
let mut conn = self.get_conn_internal().await?;
let prefix = REDIS_KEY_PREFIX.as_str();
let key = format!("{}worker:job:{}", prefix, uuid);
let key = format!("{}job:{}", prefix, uuid);
let _: Option<String> = conn
.hset_multiple(
@@ -379,7 +379,7 @@ impl RedisClient {
) -> Result<()> {
let mut conn = self.get_conn_internal().await?;
let prefix = REDIS_KEY_PREFIX.as_str();
let key = format!("{}worker:job:{}:processor:{}", prefix, uuid, processor);
let key = format!("{}job:{}:processor:{}", prefix, uuid, processor);
let now = chrono::Utc::now().to_rfc3339();
@@ -409,7 +409,7 @@ impl RedisClient {
pub async fn get_worker_job_status(&self, uuid: &str) -> Result<Option<WorkerJobStatus>> {
let mut conn = self.get_conn_internal().await?;
let prefix = REDIS_KEY_PREFIX.as_str();
let key = format!("{}worker:job:{}", prefix, uuid);
let key = format!("{}job:{}", prefix, uuid);
let exists: bool = conn.exists(&key).await?;
if !exists {
@@ -438,12 +438,12 @@ impl RedisClient {
let mut conn = self.get_conn_internal().await?;
let prefix = REDIS_KEY_PREFIX.as_str();
let key = format!("{}worker:job:{}", prefix, uuid);
let key = format!("{}job:{}", prefix, uuid);
let _: i32 = conn.del(&key).await?;
let processor_types = ["asr", "cut", "yolo", "ocr", "face", "pose", "asrx"];
for ptype in processor_types {
let proc_key = format!("{}worker:job:{}:processor:{}", prefix, uuid, ptype);
let proc_key = format!("{}job:{}:processor:{}", prefix, uuid, ptype);
let _: i32 = conn.del(&proc_key).await?;
}
@@ -453,11 +453,11 @@ impl RedisClient {
pub async fn get_all_worker_jobs(&self) -> Result<Vec<WorkerJobInfo>> {
let mut conn = self.get_conn_internal().await?;
let prefix = REDIS_KEY_PREFIX.as_str();
let keys: Vec<String> = conn.keys(format!("{}worker:job:*", prefix)).await?;
let keys: Vec<String> = conn.keys(format!("{}job:*", prefix)).await?;
let mut jobs = Vec::new();
for key in keys {
let uuid = key.replace(&format!("{}worker:job:", prefix), "");
let uuid = key.replace(&format!("{}job:", prefix), "");
if let Some(status) = self.get_worker_job_status(&uuid).await? {
jobs.push(WorkerJobInfo {
uuid,
@@ -517,6 +517,10 @@ pub struct ProgressData {
pub message: Option<String>,
pub current: Option<i32>,
pub total: Option<i32>,
#[serde(default)]
pub output_count: Option<i32>,
#[serde(default)]
pub output_type: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -43,7 +43,7 @@ impl Embedder {
}
fn default_url() -> String {
std::env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://localhost:11434".to_string())
std::env::var("MOMENTRY_EMBED_URL").unwrap_or_else(|_| "http://localhost:11436".to_string())
}
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {

1
src/core/identity/mod.rs Normal file
View File

@@ -0,0 +1 @@
pub mod storage;

View File

@@ -0,0 +1,513 @@
use std::collections::HashMap;
use std::path::PathBuf;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use tracing::warn;
use crate::core::config::OUTPUT_DIR;
use crate::core::db::PostgresDb;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IdentityFile {
pub version: u32,
pub identity_uuid: String,
pub name: String,
pub identity_type: Option<String>,
pub source: Option<String>,
pub status: Option<String>,
pub tmdb_id: Option<i32>,
pub tmdb_profile: Option<String>,
pub metadata: serde_json::Value,
pub file_bindings: Vec<FileBinding>,
pub created_at: String,
pub updated_at: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileBinding {
pub file_uuid: String,
pub trace_ids: Vec<i32>,
pub face_count: i64,
}
pub fn identities_root() -> PathBuf {
PathBuf::from(&*OUTPUT_DIR).join("identities")
}
pub fn identity_dir(uuid: &str) -> PathBuf {
identities_root().join(uuid)
}
pub fn identity_file_path(uuid: &str) -> PathBuf {
identity_dir(uuid).join("identity.json")
}
pub fn index_path() -> PathBuf {
identities_root().join("_index.json")
}
pub fn read_identity_file(uuid: &str) -> Result<IdentityFile> {
let path = identity_file_path(uuid);
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Identity file not found: {} ({})", uuid, path.display()))?;
serde_json::from_str(&content)
.with_context(|| format!("Invalid identity.json: {}", uuid))
}
pub fn write_identity_file(file: &IdentityFile) -> Result<()> {
let dir = identity_dir(&file.identity_uuid);
std::fs::create_dir_all(&dir)
.with_context(|| format!("Failed to create identity dir: {}", dir.display()))?;
let path = dir.join("identity.json");
let json = serde_json::to_string_pretty(file)
.with_context(|| format!("Failed to serialize identity: {}", file.identity_uuid))?;
std::fs::write(&path, &json)
.with_context(|| format!("Failed to write identity.json: {}", path.display()))?;
Ok(())
}
pub fn delete_identity_file(uuid: &str) -> Result<()> {
let path = identity_file_path(uuid);
if path.exists() {
std::fs::remove_file(&path)
.with_context(|| format!("Failed to delete identity.json: {}", path.display()))?;
}
let dir = identity_dir(uuid);
if dir.exists() {
std::fs::remove_dir(&dir).ok();
}
remove_from_index(uuid).ok();
Ok(())
}
pub fn list_identity_uuids() -> Result<Vec<String>> {
let root = identities_root();
if !root.is_dir() {
return Ok(Vec::new());
}
let mut uuids = Vec::new();
for entry in std::fs::read_dir(&root)
.with_context(|| format!("Failed to read identities dir: {}", root.display()))?
{
let entry = entry?;
let name = entry.file_name().to_string_lossy().to_string();
if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
&& name.len() == 32
&& name.chars().all(|c| c.is_ascii_hexdigit())
{
uuids.push(name);
}
}
uuids.sort();
Ok(uuids)
}
pub fn count_identity_files() -> usize {
list_identity_uuids().map(|v| v.len()).unwrap_or(0)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct IndexFile {
version: u32,
updated_at: String,
entries: HashMap<String, String>,
}
fn read_index_inner() -> Result<IndexFile> {
let path = index_path();
if !path.exists() {
return Ok(IndexFile {
version: 1,
updated_at: chrono::Utc::now().to_rfc3339(),
entries: HashMap::new(),
});
}
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read index: {}", path.display()))?;
serde_json::from_str(&content)
.with_context(|| format!("Invalid _index.json: {}", path.display()))
}
pub fn read_index() -> Result<HashMap<String, String>> {
read_index_inner().map(|idx| idx.entries)
}
pub fn update_index(uuid: &str, name: &str) -> Result<()> {
let mut idx = read_index_inner()?;
idx.entries.insert(uuid.to_string(), name.to_string());
idx.updated_at = chrono::Utc::now().to_rfc3339();
let root = identities_root();
std::fs::create_dir_all(&root)?;
let json = serde_json::to_string_pretty(&idx)?;
std::fs::write(index_path(), &json)?;
Ok(())
}
pub fn remove_from_index(uuid: &str) -> Result<()> {
let mut idx = read_index_inner()?;
idx.entries.remove(uuid);
idx.updated_at = chrono::Utc::now().to_rfc3339();
let json = serde_json::to_string_pretty(&idx)?;
std::fs::write(index_path(), &json)?;
Ok(())
}
pub fn rebuild_index() -> Result<usize> {
let uuids = list_identity_uuids()?;
let mut entries = HashMap::new();
for uuid in &uuids {
match read_identity_file(uuid) {
Ok(file) => {
entries.insert(uuid.clone(), file.name);
}
Err(e) => {
warn!("[identity-storage] Skipping {} in index rebuild: {}", uuid, e);
}
}
}
let idx = IndexFile {
version: 1,
updated_at: chrono::Utc::now().to_rfc3339(),
entries,
};
let root = identities_root();
std::fs::create_dir_all(&root)?;
let json = serde_json::to_string_pretty(&idx)?;
std::fs::write(index_path(), &json)?;
Ok(uuids.len())
}
pub async fn save_identity_file_by_pool(pool: &sqlx::PgPool, uuid: &str) -> Result<()> {
let identity_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections");
let clean = uuid.replace('-', "");
let record = sqlx::query_as::<_, crate::core::db::IdentityDetailRecord>(
&format!(
"SELECT id, uuid::text, name, identity_type, source, status, metadata, reference_data, \
NULL::real[] as voice_embedding, NULL::real[] as identity_embedding, \
face_embedding::real[] as face_embedding, \
tmdb_id, tmdb_profile, created_at::timestamptz as created_at, NULL::timestamptz as updated_at \
FROM {} WHERE REPLACE(uuid::text, '-', '') = $1",
identity_table
)
)
.bind(&clean)
.fetch_optional(pool)
.await?
.with_context(|| format!("Identity not found in DB: {}", uuid))?;
let identity_uuid = record.uuid.clone();
let binding_rows = sqlx::query_as::<_, (String, Vec<i32>, i64)>(
&format!(
"SELECT fd.file_uuid, COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{{}}'::int[]), COUNT(*)::bigint \
FROM {} fd WHERE fd.identity_id = $1 GROUP BY fd.file_uuid ORDER BY fd.file_uuid",
fd_table
)
)
.bind(record.id)
.fetch_all(pool)
.await?;
let file_bindings: Vec<FileBinding> = binding_rows
.into_iter()
.map(|(fu, tids, cnt)| FileBinding {
file_uuid: fu,
trace_ids: tids,
face_count: cnt,
})
.collect();
let fmt_time = |dt: Option<chrono::DateTime<chrono::Utc>>| -> String {
dt.map(|d| d.to_rfc3339())
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
};
let file = IdentityFile {
version: 1,
identity_uuid,
name: record.name,
identity_type: record.identity_type,
source: record.source,
status: record.status,
tmdb_id: record.tmdb_id,
tmdb_profile: record.tmdb_profile,
metadata: record.metadata,
file_bindings,
created_at: fmt_time(record.created_at),
updated_at: fmt_time(record.updated_at),
};
write_identity_file(&file)?;
update_index(&file.identity_uuid, &file.name)?;
Ok(())
}
#[cfg(test)]
pub fn list_identity_uuids_at(base: &std::path::Path) -> Result<Vec<String>> {
let root = base.join("identities");
if !root.is_dir() {
return Ok(Vec::new());
}
let mut uuids = Vec::new();
for entry in std::fs::read_dir(&root)? {
let entry = entry?;
let name = entry.file_name().to_string_lossy().to_string();
if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
&& name.len() == 32
&& name.chars().all(|c| c.is_ascii_hexdigit())
{
uuids.push(name);
}
}
uuids.sort();
Ok(uuids)
}
#[cfg(test)]
pub fn identity_dir_at(base: &std::path::Path, uuid: &str) -> std::path::PathBuf {
base.join("identities").join(uuid)
}
#[cfg(test)]
pub fn identity_file_path_at(base: &std::path::Path, uuid: &str) -> std::path::PathBuf {
identity_dir_at(base, uuid).join("identity.json")
}
#[cfg(test)]
pub fn index_path_at(base: &std::path::Path) -> std::path::PathBuf {
base.join("identities").join("_index.json")
}
#[cfg(test)]
pub fn read_identity_file_at(base: &std::path::Path, uuid: &str) -> Result<IdentityFile> {
let path = identity_file_path_at(base, uuid);
let content = std::fs::read_to_string(&path)?;
serde_json::from_str(&content).map_err(Into::into)
}
#[cfg(test)]
pub fn write_identity_file_at(base: &std::path::Path, file: &IdentityFile) -> Result<()> {
let dir = identity_dir_at(base, &file.identity_uuid);
std::fs::create_dir_all(&dir)?;
let json = serde_json::to_string_pretty(file)?;
std::fs::write(dir.join("identity.json"), &json)?;
Ok(())
}
#[cfg(test)]
pub fn update_index_at(base: &std::path::Path, uuid: &str, name: &str) -> Result<()> {
use std::collections::HashMap;
let index_path = index_path_at(base);
let mut entries: HashMap<String, String> = if index_path.exists() {
let content = std::fs::read_to_string(&index_path)?;
let v: serde_json::Value = serde_json::from_str(&content).unwrap_or_default();
v["entries"].as_object()
.map(|obj| obj.iter().map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())).collect())
.unwrap_or_default()
} else {
HashMap::new()
};
entries.insert(uuid.to_string(), name.to_string());
std::fs::create_dir_all(base.join("identities"))?;
let json = serde_json::to_string_pretty(&serde_json::json!({
"version": 1, "updated_at": chrono::Utc::now().to_rfc3339(), "entries": entries
}))?;
std::fs::write(&index_path, &json)?;
Ok(())
}
pub async fn save_identity_file(db: &PostgresDb, uuid: &str) -> Result<()> {
let record = db.get_identity_by_uuid(uuid).await?
.with_context(|| format!("Identity not found in DB: {}", uuid))?;
let identity_uuid = record.uuid.clone();
let binding_rows = sqlx::query_as::<_, (String, Vec<i32>, i64)>(
"SELECT fd.file_uuid, COALESCE(array_agg(DISTINCT fd.trace_id) FILTER (WHERE fd.trace_id IS NOT NULL), '{}'::int[]), COUNT(*)::bigint \
FROM face_detections fd \
WHERE fd.identity_id = $1 \
GROUP BY fd.file_uuid \
ORDER BY fd.file_uuid"
)
.bind(record.id)
.fetch_all(db.pool())
.await
.with_context(|| format!("Failed to query bindings for identity: {}", identity_uuid))?;
let file_bindings: Vec<FileBinding> = binding_rows
.into_iter()
.map(|(fu, tids, cnt)| FileBinding {
file_uuid: fu,
trace_ids: tids,
face_count: cnt,
})
.collect();
let fmt_time = |dt: Option<chrono::DateTime<chrono::Utc>>| -> String {
dt.map(|d| d.to_rfc3339())
.unwrap_or_else(|| chrono::Utc::now().to_rfc3339())
};
let file = IdentityFile {
version: 1,
identity_uuid,
name: record.name,
identity_type: record.identity_type,
source: record.source,
status: record.status,
tmdb_id: record.tmdb_id,
tmdb_profile: record.tmdb_profile,
metadata: record.metadata,
file_bindings,
created_at: fmt_time(record.created_at),
updated_at: fmt_time(record.updated_at),
};
write_identity_file(&file)?;
update_index(&file.identity_uuid, &file.name)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
fn sample_identity() -> IdentityFile {
IdentityFile {
version: 1,
identity_uuid: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
name: "Test Person".to_string(),
identity_type: Some("people".to_string()),
source: Some("tmdb".to_string()),
status: Some("confirmed".to_string()),
tmdb_id: Some(112),
tmdb_profile: Some("https://image.tmdb.org/t/p/w185/test.jpg".to_string()),
metadata: serde_json::json!({"tmdb_character": "Test Role"}),
file_bindings: vec![FileBinding {
file_uuid: "ffffffffffffffffffffffffffffffff".to_string(),
trace_ids: vec![1, 2, 3],
face_count: 5,
}],
created_at: "2026-05-16T00:00:00+00:00".to_string(),
updated_at: "2026-05-16T01:00:00+00:00".to_string(),
}
}
#[test]
fn test_serde_roundtrip() {
let file = sample_identity();
let json = serde_json::to_string_pretty(&file).unwrap();
let parsed: IdentityFile = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.name, "Test Person");
assert_eq!(parsed.identity_uuid, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
assert_eq!(parsed.tmdb_id, Some(112));
assert_eq!(parsed.file_bindings.len(), 1);
assert_eq!(parsed.file_bindings[0].face_count, 5);
}
#[test]
fn test_identity_dir_path() {
let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let p = identity_dir(uuid);
assert!(p.to_string_lossy().ends_with(&format!("identities/{}", uuid)));
}
#[test]
fn test_identity_file_path() {
let uuid = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let p = identity_file_path(uuid);
assert!(p.to_string_lossy().ends_with("identity.json"));
}
#[test]
fn test_index_path() {
let p = index_path();
assert!(p.to_string_lossy().ends_with("_index.json"));
}
#[test]
fn test_identity_dir_at() {
let base = Path::new("/tmp/test_base");
let uuid = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
let p = identity_dir_at(base, uuid);
assert_eq!(p, Path::new("/tmp/test_base/identities/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"));
}
#[test]
fn test_identity_file_path_at() {
let base = Path::new("/tmp/test_base");
let uuid = "cccccccccccccccccccccccccccccccc";
let p = identity_file_path_at(base, uuid);
assert_eq!(
p,
Path::new("/tmp/test_base/identities/cccccccccccccccccccccccccccccccc/identity.json")
);
}
#[test]
fn test_write_then_read_identity_file_at() {
let tmp = std::env::temp_dir().join("momentry_test_write_read");
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
let file = sample_identity();
write_identity_file_at(base, &file).unwrap();
let read = read_identity_file_at(base, &file.identity_uuid).unwrap();
assert_eq!(read.name, file.name);
assert_eq!(read.source, file.source);
assert_eq!(read.tmdb_id, file.tmdb_id);
assert_eq!(read.file_bindings[0].face_count, file.file_bindings[0].face_count);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_update_and_read_index_at() {
let tmp = std::env::temp_dir().join("momentry_test_index");
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
update_index_at(base, "aaa", "Alice").unwrap();
update_index_at(base, "bbb", "Bob").unwrap();
let idx_path = index_path_at(base);
let content = std::fs::read_to_string(&idx_path).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&content).unwrap();
let entries = parsed["entries"].as_object().unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries["aaa"], "Alice");
assert_eq!(entries["bbb"], "Bob");
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_list_identity_uuids_at() {
let tmp = std::env::temp_dir().join("momentry_test_list");
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
std::fs::create_dir_all(base.join("identities").join("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")).unwrap();
std::fs::create_dir_all(base.join("identities").join("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")).unwrap();
std::fs::create_dir_all(base.join("identities").join("cccccccccccccccccccccccccccccccc")).unwrap();
std::fs::create_dir_all(base.join("identities").join("not_a_uuid")).unwrap();
std::fs::create_dir_all(base.join("identities").join("short")).unwrap();
let uuids = list_identity_uuids_at(base).unwrap();
assert_eq!(uuids.len(), 3);
assert!(uuids.contains(&"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string()));
assert!(uuids.contains(&"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".to_string()));
assert!(uuids.contains(&"cccccccccccccccccccccccccccccccc".to_string()));
let _ = std::fs::remove_dir_all(&tmp);
}
}

View File

@@ -1,10 +1,12 @@
pub mod api_key;
pub mod auth;
pub mod cache;
pub mod chunk;
pub mod config;
pub mod db;
pub mod embedding;
pub mod frame_cache;
pub mod identity;
pub mod ingestion;
pub mod llm;
pub mod overlay;

View File

@@ -84,9 +84,9 @@ fn load_checksums(scripts_dir: &PathBuf) -> HashMap<String, String> {
pub fn validate_python_env() -> Result<()> {
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let venv_python = PathBuf::from(&python_path);
let python_bin = PathBuf::from(&python_path);
if !venv_python.exists() {
if !python_bin.exists() {
anyhow::bail!(
"Python not found at {} (set MOMENTRY_PYTHON_PATH env var)",
python_path
@@ -95,7 +95,7 @@ pub fn validate_python_env() -> Result<()> {
let rt = tokio::runtime::Runtime::new()?;
let output = rt
.block_on(async { Command::new(&venv_python).arg("--version").output().await })
.block_on(async { Command::new(&python_bin).arg("--version").output().await })
.context("Failed to run Python")?;
if !output.status.success() {
@@ -124,7 +124,7 @@ pub fn validate_python_env() -> Result<()> {
}
pub struct PythonExecutor {
venv_python: PathBuf,
python_path: PathBuf,
scripts_dir: PathBuf,
checksums: HashMap<String, String>,
}
@@ -139,10 +139,10 @@ impl PythonExecutor {
manifest.join("scripts").to_string_lossy().to_string()
});
let venv_python = PathBuf::from(&python_path);
let python_bin = PathBuf::from(&python_path);
let scripts_path = PathBuf::from(&scripts_dir);
if !venv_python.exists() {
if !python_bin.exists() {
anyhow::bail!(
"Python not found at {} (set MOMENTRY_PYTHON_PATH env var)",
python_path
@@ -160,7 +160,7 @@ impl PythonExecutor {
let checksums = load_checksums(&scripts_path);
Ok(Self {
venv_python,
python_path: python_bin,
scripts_dir: scripts_path,
checksums,
})
@@ -201,7 +201,7 @@ impl PythonExecutor {
let rt = tokio::runtime::Runtime::new()?;
let output = rt
.block_on(async {
Command::new(&self.venv_python)
Command::new(&self.python_path)
.arg("--version")
.output()
.await
@@ -251,7 +251,7 @@ impl PythonExecutor {
}
}
let mut cmd = Command::new(&self.venv_python);
let mut cmd = Command::new(&self.python_path);
cmd.arg(&script_path);
for arg in args {
@@ -467,7 +467,7 @@ impl PythonExecutor {
}
pub fn python_path(&self) -> &PathBuf {
&self.venv_python
&self.python_path
}
}
@@ -482,11 +482,11 @@ mod tests {
use super::*;
#[test]
fn test_python_executor_new_with_venv() {
fn test_python_executor_new() {
let executor = PythonExecutor::new();
assert!(
executor.is_ok(),
"PythonExecutor should create successfully with venv"
"PythonExecutor should create successfully"
);
}
@@ -499,10 +499,6 @@ mod tests {
"Python path should exist: {:?}",
python_path
);
assert!(
python_path.to_string_lossy().contains("venv"),
"Should be in venv"
);
}
#[test]

View File

@@ -284,10 +284,21 @@ pub async fn process_visual_chunk_advanced(
});
}
let yolo_path = uuid.map(|u| {
std::path::PathBuf::from(crate::core::config::OUTPUT_DIR.as_str())
.join(format!("{}.yolo.json", u))
.to_string_lossy()
.to_string()
});
let args: &[&str] = if let Some(ref yp) = yolo_path {
&[video_path, output_path, "--yolo-result", yp]
} else {
&[video_path, output_path]
};
let result = match executor
.run(
"visual_chunk_processor.py",
&[video_path, output_path],
args,
uuid,
"VisualChunk",
Some(VISUAL_CHUNK_TIMEOUT),

View File

@@ -25,13 +25,11 @@ impl ThumbnailExtractor {
.join("scripts")
.join("thumbnail_extractor.py");
// 使用 venv 中的 Python確保版本正確且隔離依賴
let venv_python = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("venv")
.join("bin")
.join("python");
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let python_bin = Path::new(&python_path);
let output = Command::new(venv_python)
let output = Command::new(python_bin)
.arg(script_path)
.arg(video_path)
.arg(uuid)

262
src/core/tmdb/cache.rs Normal file
View File

@@ -0,0 +1,262 @@
use std::path::PathBuf;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use crate::core::config::OUTPUT_DIR;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbCacheIdentity {
pub identity_uuid: String,
pub name: String,
pub tmdb_id: u64,
pub character: String,
pub order: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbCache {
pub file_uuid: String,
pub fetched_at: String,
pub source: String,
pub movie: TmdbMovie,
pub cast_count: usize,
pub identities_created: usize,
#[serde(default)]
pub identities: Vec<TmdbCacheIdentity>,
#[serde(default)]
pub cast: Vec<TmdbCastMember>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbMovie {
pub tmdb_id: u64,
pub title: String,
pub release_date: Option<String>,
pub overview: Option<String>,
pub poster_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbCastMember {
pub name: String,
pub character: String,
pub profile_path: Option<String>,
pub order: u32,
pub id: u64,
// Person detail fields from /person/{id}
pub biography: Option<String>,
pub birthday: Option<String>,
pub place_of_birth: Option<String>,
#[serde(default)]
pub also_known_as: Vec<String>,
pub imdb_id: Option<String>,
pub known_for_department: Option<String>,
pub popularity: Option<f64>,
pub deathday: Option<String>,
pub gender: Option<i32>,
pub homepage: Option<String>,
}
pub fn tmdb_cache_path(file_uuid: &str) -> PathBuf {
PathBuf::from(&*OUTPUT_DIR).join(format!("{}.tmdb.json", file_uuid))
}
pub fn read_tmdb_cache(file_uuid: &str) -> Result<TmdbCache> {
let path = tmdb_cache_path(file_uuid);
if !path.exists() {
anyhow::bail!("TMDb cache not found: {} (expected: {})", file_uuid, path.display());
}
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read TMDb cache: {}", path.display()))?;
serde_json::from_str(&content)
.map_err(|e| anyhow::anyhow!("Invalid TMDb cache JSON {}: {}", path.display(), e))
}
pub fn write_tmdb_cache(cache: &TmdbCache) -> Result<()> {
let path = tmdb_cache_path(&cache.file_uuid);
let json = serde_json::to_string_pretty(cache)
.with_context(|| format!("Failed to serialize TMDb cache: {}", cache.file_uuid))?;
std::fs::write(&path, &json)
.with_context(|| format!("Failed to write TMDb cache: {}", path.display()))?;
Ok(())
}
pub fn delete_tmdb_cache(file_uuid: &str) -> Result<()> {
let path = tmdb_cache_path(file_uuid);
if path.exists() {
std::fs::remove_file(&path)
.with_context(|| format!("Failed to delete TMDb cache: {}", path.display()))?;
}
Ok(())
}
pub fn count_cache_files() -> usize {
let dir = PathBuf::from(&*OUTPUT_DIR);
match std::fs::read_dir(&dir) {
Ok(entries) => entries
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name().to_string_lossy().ends_with(".tmdb.json")
})
.count(),
Err(_) => 0,
}
}
#[cfg(test)]
pub fn count_cache_files_at(base: &std::path::Path) -> usize {
match std::fs::read_dir(base) {
Ok(entries) => entries
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_string_lossy().ends_with(".tmdb.json"))
.count(),
Err(_) => 0,
}
}
#[cfg(test)]
pub fn write_tmdb_cache_at(base: &std::path::Path, cache: &TmdbCache) -> Result<()> {
std::fs::create_dir_all(base)?;
let path = base.join(format!("{}.tmdb.json", cache.file_uuid));
let json = serde_json::to_string_pretty(cache)?;
std::fs::write(&path, &json)?;
Ok(())
}
#[cfg(test)]
pub fn read_tmdb_cache_at(base: &std::path::Path, file_uuid: &str) -> Result<TmdbCache> {
let path = base.join(format!("{}.tmdb.json", file_uuid));
if !path.exists() {
anyhow::bail!("Cache not found");
}
let content = std::fs::read_to_string(&path)?;
serde_json::from_str(&content).map_err(Into::into)
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_cache(file_uuid: &str) -> TmdbCache {
TmdbCache {
file_uuid: file_uuid.to_string(),
fetched_at: "2026-05-16T12:00:00+00:00".to_string(),
source: "agent".to_string(),
movie: TmdbMovie {
tmdb_id: 4808,
title: "Charade".to_string(),
release_date: Some("1963-12-05".to_string()),
overview: Some("A romantic thriller...".to_string()),
poster_path: Some("/abc.jpg".to_string()),
},
cast: vec![
TmdbCastMember {
name: "Cary Grant".to_string(),
character: "Peter Joshua".to_string(),
profile_path: Some("/cary.jpg".to_string()),
order: 0,
id: 112,
biography: Some("Archibald Alec Leach...".to_string()),
birthday: Some("1904-01-18".to_string()),
place_of_birth: Some("Bristol, England, UK".to_string()),
also_known_as: vec!["Archie Leach".to_string()],
imdb_id: Some("nm0000026".to_string()),
known_for_department: Some("Acting".to_string()),
popularity: Some(28.3),
deathday: Some("1986-11-29".to_string()),
gender: Some(2),
homepage: None,
},
TmdbCastMember {
name: "Audrey Hepburn".to_string(),
character: "Regina Lampert".to_string(),
profile_path: Some("/audrey.jpg".to_string()),
order: 1,
id: 113,
biography: Some("Audrey Kathleen Hepburn...".to_string()),
birthday: Some("1929-05-04".to_string()),
place_of_birth: Some("Ixelles, Belgium".to_string()),
also_known_as: vec!["Edda van Heemstra".to_string()],
imdb_id: Some("nm0000030".to_string()),
known_for_department: Some("Acting".to_string()),
popularity: Some(35.7),
deathday: Some("1993-01-20".to_string()),
gender: Some(1),
homepage: None,
},
],
cast_count: 20,
identities_created: 0,
identities: vec![],
}
}
#[test]
fn test_cache_path_format() {
let p = tmdb_cache_path("abcdef");
assert!(p.to_string_lossy().ends_with("abcdef.tmdb.json"));
}
#[test]
fn test_serde_roundtrip() {
let cache = sample_cache("aaaaaaaa");
let json = serde_json::to_string_pretty(&cache).unwrap();
let parsed: TmdbCache = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.file_uuid, "aaaaaaaa");
assert_eq!(parsed.movie.title, "Charade");
assert_eq!(parsed.cast.len(), 2);
assert_eq!(parsed.cast[0].name, "Cary Grant");
assert_eq!(parsed.movie.tmdb_id, 4808);
}
#[test]
fn test_write_then_read_cache_at() {
let tmp = std::env::temp_dir().join("momentry_test_cache");
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
let cache = sample_cache("bbbbbbbb");
write_tmdb_cache_at(base, &cache).unwrap();
let read = read_tmdb_cache_at(base, "bbbbbbbb").unwrap();
assert_eq!(read.movie.title, "Charade");
assert_eq!(read.cast[1].id, 113);
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_read_missing_cache_at_errors() {
let tmp = std::env::temp_dir().join("momentry_test_missing");
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
let result = read_tmdb_cache_at(base, "nonexistent");
assert!(result.is_err());
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_count_cache_files_at() {
let tmp = std::env::temp_dir().join("momentry_test_count");
let _ = std::fs::remove_dir_all(&tmp);
let base = &tmp;
assert_eq!(count_cache_files_at(base), 0);
let c1 = sample_cache("aaa");
write_tmdb_cache_at(base, &c1).unwrap();
assert_eq!(count_cache_files_at(base), 1);
let c2 = sample_cache("bbb");
write_tmdb_cache_at(base, &c2).unwrap();
assert_eq!(count_cache_files_at(base), 2);
std::fs::write(base.join("other.json"), "{}").unwrap();
assert_eq!(count_cache_files_at(base), 2);
let _ = std::fs::remove_dir_all(&tmp);
}
}

View File

@@ -1,3 +1,5 @@
pub mod cache;
pub mod face_agent;
pub mod ingest;
pub mod probe;
pub mod status;

View File

@@ -1,6 +1,5 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use tracing::{info, warn};
use crate::core::config;
@@ -8,11 +7,11 @@ use crate::core::db::PostgresDb;
#[derive(Debug, Deserialize)]
struct TmdbSearchResult {
results: Vec<TmdbMovie>,
results: Vec<TmdbApiMovie>,
}
#[derive(Debug, Deserialize)]
struct TmdbMovie {
struct TmdbApiMovie {
id: u64,
title: String,
release_date: Option<String>,
@@ -22,11 +21,11 @@ struct TmdbMovie {
#[derive(Debug, Deserialize)]
struct TmdbCredits {
cast: Vec<TmdbCastMember>,
cast: Vec<TmdbApiCastMember>,
}
#[derive(Debug, Deserialize)]
struct TmdbCastMember {
struct TmdbApiCastMember {
id: u64,
name: String,
character: String,
@@ -54,6 +53,271 @@ fn extract_movie_name(filename: &str) -> Option<String> {
Some(cleaned)
}
pub async fn probe_from_cache(
db: &PostgresDb,
file_uuid: &str,
) -> Result<TmdbProbeResult> {
let cache = crate::core::tmdb::cache::read_tmdb_cache(file_uuid)?;
if cache.identities.is_empty() && !cache.cast.is_empty() {
return create_identities_from_data(db, file_uuid, &cache.movie, &cache.cast).await;
}
upsert_identities_from_disk(db, &cache, file_uuid).await
}
async fn upsert_identities_from_disk(
db: &PostgresDb,
cache: &crate::core::tmdb::cache::TmdbCache,
file_uuid: &str,
) -> Result<TmdbProbeResult> {
info!(
"[TMDB] Upserting identities from disk for: {} (TMDB id={})",
cache.movie.title, cache.movie.tmdb_id
);
let mut identities_created = 0usize;
for entry in &cache.identities {
let path = crate::core::identity::storage::identity_file_path(&entry.identity_uuid);
if !path.exists() {
warn!("[TMDB] Identity file not found on disk: {}", path.display());
continue;
}
match std::fs::read_to_string(&path) {
Ok(content) => {
match serde_json::from_str::<crate::core::identity::storage::IdentityFile>(&content) {
Ok(identity_file) => {
let identities_table = crate::core::db::schema::table_name("identities");
let result = sqlx::query(&format!(
"INSERT INTO {} (uuid, name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \
VALUES ($1::uuid, $2, 'people', 'tmdb', 'confirmed', $3, $4, $5::jsonb) \
ON CONFLICT (name) DO UPDATE SET \
uuid = COALESCE({}.uuid, $1::uuid), \
tmdb_id = COALESCE(EXCLUDED.tmdb_id, {}.tmdb_id), \
tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
metadata = {}.metadata || $5::jsonb",
identities_table, identities_table, identities_table, identities_table, identities_table
))
.bind(&identity_file.identity_uuid)
.bind(&identity_file.name)
.bind(identity_file.tmdb_id)
.bind(&identity_file.tmdb_profile)
.bind(&identity_file.metadata)
.execute(db.pool())
.await;
match result {
Ok(_) => {
info!("[TMDB] Upserted identity: {} (uuid={})", identity_file.name, identity_file.identity_uuid);
identities_created += 1;
}
Err(e) => {
warn!("[TMDB] Failed to upsert identity '{}': {}", identity_file.name, e);
}
}
}
Err(e) => {
warn!("[TMDB] Failed to parse identity file {}: {}", path.display(), e);
}
}
}
Err(e) => {
warn!("[TMDB] Failed to read identity file {}: {}", path.display(), e);
}
}
}
drop_identities_cache(db, file_uuid, &cache.movie, identities_created).await;
Ok(TmdbProbeResult {
tmdb_id: cache.movie.tmdb_id,
title: cache.movie.title.clone(),
cast_count: cache.cast_count,
identities_created,
})
}
async fn drop_identities_cache(
db: &PostgresDb,
file_uuid: &str,
movie: &crate::core::tmdb::cache::TmdbMovie,
identities_created: usize,
) {
let videos_table = crate::core::db::schema::table_name("videos");
let tmdb_label = "tmdb";
let _ = sqlx::query(&format!(
"UPDATE {} SET birth_registration = \
jsonb_set(COALESCE(birth_registration, '{{}}'::jsonb), '{{{}}}'::text[], $1::jsonb) \
WHERE file_uuid = $2",
videos_table, tmdb_label
))
.bind(serde_json::json!({
"movie_id": movie.tmdb_id,
"movie_title": movie.title,
"release_date": movie.release_date,
"poster": movie.poster_path,
"cast_count": movie.tmdb_id,
"identities_created": identities_created,
}))
.bind(file_uuid)
.execute(db.pool())
.await
.ok();
}
pub async fn create_identities_from_data(
db: &PostgresDb,
file_uuid: &str,
movie: &crate::core::tmdb::cache::TmdbMovie,
cast: &[crate::core::tmdb::cache::TmdbCastMember],
) -> Result<TmdbProbeResult> {
info!(
"[TMDB] Creating identities for: {} (TMDB id={})",
movie.title, movie.tmdb_id
);
let identities_table = crate::core::db::schema::table_name("identities");
let mut identities_created = 0usize;
for member in cast.iter() {
if member.name.trim().is_empty() {
continue;
}
let profile_url = member.profile_path.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let metadata = serde_json::json!({
"tmdb_character": member.character,
"tmdb_cast_order": member.order,
"tmdb_movie_id": movie.tmdb_id,
"tmdb_movie_title": movie.title,
"tmdb_biography": member.biography,
"tmdb_birthday": member.birthday,
"tmdb_place_of_birth": member.place_of_birth,
"tmdb_aliases": member.also_known_as,
"tmdb_imdb_id": member.imdb_id,
"tmdb_department": member.known_for_department,
"tmdb_popularity": member.popularity,
"tmdb_deathday": member.deathday,
"tmdb_gender": member.gender,
"tmdb_homepage": member.homepage,
});
let result = sqlx::query_as::<_, (uuid::Uuid,)>(&format!(
"INSERT INTO {} (name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \
VALUES ($1, 'people', 'tmdb', 'confirmed', $2, $3, $4::jsonb) \
ON CONFLICT (name) DO UPDATE SET \
tmdb_id = COALESCE(EXCLUDED.tmdb_id, {}.tmdb_id), \
tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
metadata = {}.metadata || $4::jsonb \
RETURNING uuid",
identities_table, identities_table, identities_table, identities_table
))
.bind(&member.name)
.bind(member.id as i64)
.bind(&profile_url)
.bind(&metadata)
.fetch_optional(db.pool())
.await;
match result {
Ok(Some((identity_uuid,))) => {
let uuid_str = identity_uuid.to_string().replace('-', "");
info!(
"[TMDB] Created/updated identity: {} as {} (uuid={})",
member.name, member.character, uuid_str
);
identities_created += 1;
if let Err(e) = crate::core::identity::storage::save_identity_file(db, &uuid_str).await {
warn!("[TMDB] Failed to save identity file for {}: {}", member.name, e);
}
// Download and save TMDb profile image locally
if let Some(url) = &profile_url {
let dir = crate::core::identity::storage::identity_dir(&uuid_str);
std::fs::create_dir_all(&dir).ok();
let img_path = dir.join("profile.jpg");
if !img_path.exists() {
if let Ok(resp) = reqwest::get(url).await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&img_path, &bytes).ok();
}
}
}
}
}
Ok(None) => {
warn!("[TMDB] INSERT returned no uuid for: {}", member.name);
}
Err(e) => {
warn!("[TMDB] Failed to create identity '{}': {}", member.name, e);
}
}
}
// Step 4: Trigger background embedding extraction
if identities_created > 0 {
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let schema = crate::core::config::DATABASE_SCHEMA.clone();
tokio::spawn(async move {
let output = tokio::process::Command::new(&python_path)
.arg(&format!("{}/tmdb_embed_extractor.py", scripts_dir))
.arg("--schema")
.arg(&schema)
.output()
.await;
match output {
Ok(o) => {
if !o.status.success() {
let stderr = String::from_utf8_lossy(&o.stderr);
warn!("[TMDB] Embed extraction script failed: {}", stderr);
} else {
info!("[TMDB] Background face embedding extraction complete");
}
}
Err(e) => warn!("[TMDB] Failed to run embed extraction script: {}", e),
}
});
}
// Step 5: Store tmdb_id on the video record for later use
let videos_table = crate::core::db::schema::table_name("videos");
let tmdb_label = "tmdb";
let _ = sqlx::query(&format!(
"UPDATE {} SET birth_registration = \
jsonb_set(COALESCE(birth_registration, '{{}}'::jsonb), '{{{}}}'::text[], $1::jsonb) \
WHERE file_uuid = $2",
videos_table, tmdb_label
))
.bind(serde_json::json!({
"movie_id": movie.tmdb_id,
"movie_title": movie.title,
"release_date": movie.release_date,
"poster": movie.poster_path,
"cast_count": cast.len(),
"identities_created": identities_created,
}))
.bind(file_uuid)
.execute(db.pool())
.await
.ok();
info!(
"[TMDB] Probe complete: {} cast members, {} identities created/updated",
cast.len(),
identities_created
);
Ok(TmdbProbeResult {
tmdb_id: movie.tmdb_id,
title: movie.title.clone(),
cast_count: cast.len(),
identities_created,
})
}
pub async fn probe_movie(
db: &PostgresDb,
filename: &str,
@@ -120,119 +384,57 @@ pub async fn probe_movie(
.await
.context("Failed to parse TMDb credits response")?;
// Step 3: Create identities for top cast
let identities_table = crate::core::db::schema::table_name("identities");
let mut identities_created = 0usize;
for member in credits.cast.iter().take(20) {
if member.name.trim().is_empty() {
continue;
}
let profile_url = member
.profile_path
.as_ref()
.map(|p| format!("https://image.tmdb.org/t/p/w185{}", p));
let result = sqlx::query(&format!(
"INSERT INTO {} (name, identity_type, source, status, tmdb_id, tmdb_profile, metadata) \
VALUES ($1, 'people', 'tmdb', 'confirmed', $2, $3, \
jsonb_build_object('tmdb_character', $4, 'tmdb_cast_order', $5, 'tmdb_movie_id', $6, 'tmdb_movie_title', $7)) \
ON CONFLICT (name) DO UPDATE SET \
tmdb_id = COALESCE(EXCLUDED.tmdb_id, {}.tmdb_id), \
tmdb_profile = COALESCE(EXCLUDED.tmdb_profile, {}.tmdb_profile), \
metadata = {}.metadata || jsonb_build_object('tmdb_movie_id', $6, 'tmdb_movie_title', $7) \
RETURNING id",
identities_table, identities_table, identities_table, identities_table
))
.bind(&member.name)
.bind(member.id as i64)
.bind(&profile_url)
.bind(&member.character)
.bind(member.order as i32)
.bind(movie.id as i64)
.bind(&movie.title)
.execute(db.pool())
.await;
match result {
Ok(_) => {
info!(
"[TMDB] Created/updated identity: {} as {}",
member.name, member.character
);
identities_created += 1;
}
Err(e) => {
warn!("[TMDB] Failed to create identity '{}': {}", member.name, e);
}
}
}
// Step 4: Trigger background embedding extraction
if identities_created > 0 {
let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR")
.unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string());
let python_path = std::env::var("MOMENTRY_PYTHON_PATH")
.unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string());
let schema = crate::core::config::DATABASE_SCHEMA.clone();
tokio::spawn(async move {
let output = tokio::process::Command::new(&python_path)
.arg(&format!("{}/tmdb_embed_extractor.py", scripts_dir))
.arg("--schema")
.arg(&schema)
.output()
.await;
match output {
Ok(o) => {
if !o.status.success() {
let stderr = String::from_utf8_lossy(&o.stderr);
warn!("[TMDB] Embed extraction script failed: {}", stderr);
} else {
info!("[TMDB] Background face embedding extraction complete");
}
}
Err(e) => warn!("[TMDB] Failed to run embed extraction script: {}", e),
}
});
}
// Step 5: Store tmdb_id on the video record for later use
let videos_table = crate::core::db::schema::table_name("videos");
let tmdb_label = "tmdb";
let _ = sqlx::query(&format!(
"UPDATE {} SET birth_registration = \
jsonb_set(COALESCE(birth_registration, '{{}}'::jsonb), '{{{}}}', $1::jsonb) \
WHERE file_uuid = $2",
videos_table, tmdb_label
))
.bind(serde_json::json!({
"movie_id": movie.id,
"movie_title": movie.title,
"release_date": movie.release_date,
"poster": movie.poster_path,
"cast_count": credits.cast.len(),
"identities_created": identities_created,
}))
.bind(file_uuid)
.execute(db.pool())
.await
.ok();
info!(
"[TMDB] Probe complete: {} cast members, {} identities created/updated",
credits.cast.len(),
identities_created
);
Ok(Some(TmdbProbeResult {
// Step 3: Convert API types to cache types and use shared logic
use crate::core::tmdb::cache;
let cache_movie = cache::TmdbMovie {
tmdb_id: movie.id,
title: movie.title,
title: movie.title.clone(),
release_date: movie.release_date.clone(),
overview: movie.overview.clone(),
poster_path: movie.poster_path.clone(),
};
let cache_cast: Vec<cache::TmdbCastMember> = credits.cast.iter().map(|m| {
cache::TmdbCastMember {
id: m.id,
name: m.name.clone(),
character: m.character.clone(),
profile_path: m.profile_path.clone(),
order: m.order,
biography: None,
birthday: None,
place_of_birth: None,
also_known_as: vec![],
imdb_id: None,
known_for_department: None,
popularity: None,
deathday: None,
gender: None,
homepage: None,
}
}).collect();
// Write TMDb cache so probe_from_cache can be used next time
let cache_obj = cache::TmdbCache {
file_uuid: file_uuid.to_string(),
fetched_at: chrono::Utc::now().to_rfc3339(),
source: "probe_movie".to_string(),
movie: cache_movie.clone(),
cast: cache_cast.clone(),
cast_count: credits.cast.len(),
identities_created,
}))
identities_created: 0,
identities: vec![],
};
cache::write_tmdb_cache(&cache_obj).ok();
let result = create_identities_from_data(db, file_uuid, &cache_movie, &cache_cast).await?;
// Update cache with actual identities_created count
if let Ok(mut cache_obj) = cache::read_tmdb_cache(file_uuid) {
cache_obj.identities_created = result.identities_created;
cache::write_tmdb_cache(&cache_obj).ok();
}
Ok(Some(result))
}
fn urlencoding(s: &str) -> String {

148
src/core/tmdb/status.rs Normal file
View File

@@ -0,0 +1,148 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::core::config;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TmdbResourceStatus {
pub api_key_configured: bool,
pub enabled: bool,
pub api_reachable: Option<bool>,
pub api_latency_ms: Option<u64>,
pub api_error: Option<String>,
pub last_check_at: Option<String>,
}
pub fn quick_status() -> TmdbResourceStatus {
TmdbResourceStatus {
api_key_configured: config::tmdb::API_KEY.is_some(),
enabled: *config::tmdb::PROBE_ENABLED,
api_reachable: None,
api_latency_ms: None,
api_error: None,
last_check_at: None,
}
}
pub async fn check_tmdb_api() -> TmdbResourceStatus {
let api_key = match config::tmdb::API_KEY.as_ref() {
Some(k) => k.clone(),
None => {
return TmdbResourceStatus {
api_key_configured: false,
enabled: *config::tmdb::PROBE_ENABLED,
api_reachable: Some(false),
api_latency_ms: None,
api_error: Some("API key not configured".to_string()),
last_check_at: Some(chrono::Utc::now().to_rfc3339()),
};
}
};
let start = std::time::Instant::now();
let url = format!(
"https://api.themoviedb.org/3/configuration?api_key={}",
api_key
);
match reqwest::get(&url).await {
Ok(resp) => {
let latency = start.elapsed().as_millis() as u64;
let reachable = resp.status().is_success();
info!(
"[TMDB-check] API {}reachable ({}ms)",
if reachable { "" } else { "not " },
latency
);
TmdbResourceStatus {
api_key_configured: true,
enabled: *config::tmdb::PROBE_ENABLED,
api_reachable: Some(reachable),
api_latency_ms: Some(latency),
api_error: if reachable { None } else { Some(format!("HTTP {}", resp.status())) },
last_check_at: Some(chrono::Utc::now().to_rfc3339()),
}
}
Err(e) => {
let latency = start.elapsed().as_millis() as u64;
TmdbResourceStatus {
api_key_configured: true,
enabled: *config::tmdb::PROBE_ENABLED,
api_reachable: Some(false),
api_latency_ms: Some(latency),
api_error: Some(e.to_string()),
last_check_at: Some(chrono::Utc::now().to_rfc3339()),
}
}
}
}
pub fn count_cache_files() -> usize {
crate::core::tmdb::cache::count_cache_files()
}
pub async fn count_tmdb_identities(pool: &sqlx::PgPool) -> Result<i64> {
let identities_table = crate::core::db::schema::table_name("identities");
let count: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb'", identities_table)
)
.fetch_one(pool)
.await?;
Ok(count)
}
pub async fn count_tmdb_identities_with_embedding(pool: &sqlx::PgPool) -> Result<i64> {
let identities_table = crate::core::db::schema::table_name("identities");
let count: i64 = sqlx::query_scalar(
&format!("SELECT COUNT(*) FROM {} WHERE source = 'tmdb' AND face_embedding IS NOT NULL", identities_table)
)
.fetch_one(pool)
.await?;
Ok(count)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_quick_status_fields() {
let s = quick_status();
// Fields should all be present with appropriate defaults
assert_eq!(s.api_reachable, None);
assert_eq!(s.api_latency_ms, None);
assert_eq!(s.api_error, None);
assert!(s.last_check_at.is_none());
// api_key_configured and enabled depend on env vars at compile time
// Just verify they're booleans
assert!(s.api_key_configured == true || s.api_key_configured == false);
assert!(s.enabled == true || s.enabled == false);
}
#[test]
fn test_status_serialization() {
let s = TmdbResourceStatus {
api_key_configured: true,
enabled: false,
api_reachable: Some(true),
api_latency_ms: Some(120),
api_error: None,
last_check_at: Some("2026-05-16T12:00:00+00:00".to_string()),
};
let json = serde_json::to_string(&s).unwrap();
assert!(json.contains("\"api_key_configured\":true"));
assert!(json.contains("\"api_reachable\":true"));
assert!(json.contains("\"api_latency_ms\":120"));
}
#[test]
fn test_status_deserialization() {
let json = r#"{"api_key_configured":false,"enabled":true,"api_reachable":null,"api_latency_ms":null,"api_error":"No key","last_check_at":null}"#;
let s: TmdbResourceStatus = serde_json::from_str(json).unwrap();
assert!(!s.api_key_configured);
assert!(s.enabled);
assert!(s.api_reachable.is_none());
assert_eq!(s.api_error, Some("No key".to_string()));
}
}