feat: Phase 3 API (Identity, Files, Candidates) and pre_chunks migration

This commit is contained in:
Warren
2026-04-25 22:19:12 +08:00
parent 1f84e5469f
commit e84982e7d9
5 changed files with 454 additions and 0 deletions

View File

@@ -15,6 +15,32 @@ use crate::core::text::{
tokenizer::{contains_chinese, tokenize_chinese_text},
};
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct IdentityRecord {
pub id: i32,
pub uuid: Uuid,
pub name: String,
pub metadata: serde_json::Value,
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct FileRecord {
pub uuid: String,
pub file_path: String,
pub file_name: String,
pub probe_json: Option<serde_json::Value>,
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, sqlx::FromRow)]
pub struct CandidateRecord {
pub id: i64,
pub file_uuid: Uuid,
pub data: serde_json::Value, // Face data
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct StorageStatus {
pub fs_video: bool,
@@ -1704,6 +1730,153 @@ impl PostgresDb {
Ok(())
}
/// Store a raw pre-chunk from a processor (e.g., YOLO frame, Face detection).
/// This replaces the old direct-to-chunks approach for trace data.
pub async fn store_raw_pre_chunk(
&self,
file_uuid: &str,
processor_type: &str,
coordinate_index: i64,
timestamp: Option<f64>,
data: &serde_json::Value,
identity_id: Option<Uuid>,
confidence: Option<f64>,
) -> Result<()> {
let table = schema::table_name("pre_chunks");
let query = format!(
r#"
INSERT INTO {} (
file_uuid, processor_type, coordinate_type, coordinate_index,
timestamp, data, identity_id, confidence
) VALUES ($1, $2, 'frame', $3, $4, $5, $6, $7)
"#,
table
);
sqlx::query(&query)
.bind(file_uuid)
.bind(processor_type)
.bind(coordinate_index)
.bind(timestamp)
.bind(data)
.bind(identity_id)
.bind(confidence)
.execute(self.pool())
.await
.map_err(|e| anyhow::anyhow!("Failed to store raw pre_chunk: {}", e))?;
Ok(())
}
/// Batch store pre-chunks for better performance (e.g. bulk insert of frames).
pub async fn store_raw_pre_chunks_batch(
&self,
file_uuid: &str,
processor_type: &str,
chunks: &Vec<(i64, Option<f64>, serde_json::Value, Option<Uuid>, Option<f64>)>,
) -> Result<()> {
// For large batches, we can use a loop or copy. Here using loop for safety with pgvector types if any.
// Note: A transaction is recommended for batch inserts.
let mut tx = self.pool().begin().await?;
let table = schema::table_name("pre_chunks");
let query = format!(
r#"
INSERT INTO {} (
file_uuid, processor_type, coordinate_type, coordinate_index,
timestamp, data, identity_id, confidence
) VALUES ($1, $2, 'frame', $3, $4, $5, $6, $7)
"#,
table
);
for (coord_idx, ts, data, id, conf) in chunks {
sqlx::query(&query)
.bind(file_uuid)
.bind(processor_type)
.bind(*coord_idx)
.bind(*ts)
.bind(data)
.bind(*id)
.bind(*conf)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(())
}
pub async fn list_people(&self, limit: i32, offset: i64) -> Result<Vec<IdentityRecord>> {
let query = r#"
SELECT id, uuid, name, metadata, created_at
FROM identities
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#;
let rows = sqlx::query_as(query)
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
pub async fn search_people(&self, query: &str, limit: i32, offset: i64) -> Result<Vec<IdentityRecord>> {
let pattern = format!("%{}%", query);
let sql = r#"
SELECT id, uuid, name, metadata, created_at
FROM identities
WHERE name ILIKE $1
ORDER BY name ASC
LIMIT $2 OFFSET $3
"#;
let rows = sqlx::query_as(sql)
.bind(pattern)
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
pub async fn get_people_candidates(&self, limit: i32, offset: i64) -> Result<Vec<CandidateRecord>> {
let query = r#"
SELECT id, file_uuid, data, created_at
FROM pre_chunks
WHERE processor_type = 'face' AND identity_id IS NULL
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#;
let rows = sqlx::query_as(query)
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
pub async fn list_files(&self, limit: i32, offset: i64) -> Result<Vec<FileRecord>> {
let query = r#"
SELECT uuid, file_path, file_name, probe_json, created_at
FROM videos
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#;
let rows = sqlx::query_as(query)
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await?;
Ok(rows)
}
pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> {
let table = schema::table_name("chunks");
let content_with_rule = serde_json::json!({