feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system
This commit is contained in:
604
src/core/db/identity_merge_history.rs
Normal file
604
src/core/db/identity_merge_history.rs
Normal file
@@ -0,0 +1,604 @@
|
||||
use anyhow::{Context, Result};
|
||||
use bson::{doc, oid::ObjectId, DateTime as BsonDateTime, Document};
|
||||
use chrono::{DateTime, Utc};
|
||||
use mongodb::{Client, Collection, Database, IndexModel};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value as JsonValue;
|
||||
use uuid::Uuid;
|
||||
|
||||
const COLLECTION_NAME: &str = "identity_merge_history";
|
||||
|
||||
fn bson_doc_to_json(doc: &Document) -> JsonValue {
|
||||
match bson::to_bson(doc) {
|
||||
Ok(bson) => bson.into_relaxed_extjson(),
|
||||
Err(_) => JsonValue::Null,
|
||||
}
|
||||
}
|
||||
|
||||
fn json_value_to_bson_doc(value: &JsonValue) -> Document {
|
||||
bson::to_document(value).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn doc_field_to_json(doc: &Document, key: &str) -> JsonValue {
|
||||
doc.get(key)
|
||||
.map(|b| b.clone().into_relaxed_extjson())
|
||||
.unwrap_or(JsonValue::Null)
|
||||
}
|
||||
|
||||
fn json_to_bson(value: &JsonValue) -> bson::Bson {
|
||||
bson::to_bson(value).unwrap_or(bson::Bson::Null)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdentityMergeHistory {
|
||||
pub id: Option<ObjectId>,
|
||||
pub merge_id: String,
|
||||
pub source_identity: IdentitySnapshot,
|
||||
pub target_identity: TargetIdentitySnapshot,
|
||||
pub aliases_added_to_target: Vec<AliasEntry>,
|
||||
pub metadata_fields_added: Vec<String>,
|
||||
pub faces_transferred: FacesTransferred,
|
||||
pub merge_params: MergeParams,
|
||||
pub merged_at: DateTime<Utc>,
|
||||
pub undo_deadline: DateTime<Utc>,
|
||||
pub undone: bool,
|
||||
pub undone_at: Option<DateTime<Utc>>,
|
||||
pub undone_by: Option<String>,
|
||||
pub undone_snapshot: Option<UndoneSnapshot>,
|
||||
pub undo_expired: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdentitySnapshot {
|
||||
pub id: i64,
|
||||
pub uuid: String,
|
||||
pub name: String,
|
||||
pub identity_type: Option<String>,
|
||||
pub source: Option<String>,
|
||||
pub status: String,
|
||||
pub tmdb_id: Option<i64>,
|
||||
pub tmdb_profile: Option<String>,
|
||||
pub metadata: JsonValue,
|
||||
pub created_at: Option<DateTime<Utc>>,
|
||||
pub face_count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TargetIdentitySnapshot {
|
||||
pub id: i64,
|
||||
pub uuid: String,
|
||||
pub name: String,
|
||||
pub metadata_before: JsonValue,
|
||||
pub metadata_after: Option<JsonValue>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AliasEntry {
|
||||
pub name: String,
|
||||
pub locale: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FacesTransferred {
|
||||
pub file_uuid: String,
|
||||
pub face_ids: Vec<String>,
|
||||
pub trace_ids: Vec<i32>,
|
||||
pub count: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UndoneSnapshot {
|
||||
pub source_identity_id: i64,
|
||||
pub source_uuid: String,
|
||||
pub source_name: String,
|
||||
pub target_metadata_at_undo: JsonValue,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MergeParams {
|
||||
pub keep_history: bool,
|
||||
pub cleared_stranger_id: bool,
|
||||
pub performed_by_user: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MergeHistoryQuery {
|
||||
pub source_uuid: Option<String>,
|
||||
pub target_uuid: Option<String>,
|
||||
pub merge_id: Option<String>,
|
||||
pub undone: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MergeHistoryEntry {
|
||||
pub merge_id: String,
|
||||
pub source_name: String,
|
||||
pub target_name: String,
|
||||
pub faces_transferred: i64,
|
||||
pub merged_at: DateTime<Utc>,
|
||||
pub undo_deadline: DateTime<Utc>,
|
||||
pub undone: bool,
|
||||
pub undo_expired: bool,
|
||||
}
|
||||
|
||||
impl IdentityMergeHistory {
|
||||
pub fn from_document(doc: &Document) -> Result<Self> {
|
||||
let source = doc
|
||||
.get_document("source_identity")
|
||||
.context("Missing source_identity")?;
|
||||
let target = doc
|
||||
.get_document("target_identity")
|
||||
.context("Missing target_identity")?;
|
||||
let faces = doc
|
||||
.get_document("faces_transferred")
|
||||
.context("Missing faces_transferred")?;
|
||||
let aliases = doc
|
||||
.get_array("aliases_added_to_target")
|
||||
.unwrap_or(&vec![])
|
||||
.clone();
|
||||
let fields = doc
|
||||
.get_array("metadata_fields_added")
|
||||
.unwrap_or(&vec![])
|
||||
.clone();
|
||||
let merge_params_doc = doc
|
||||
.get_document("merge_params")
|
||||
.unwrap_or(&Document::new())
|
||||
.clone();
|
||||
|
||||
let mut parsed_aliases = Vec::new();
|
||||
for a in aliases {
|
||||
if let Some(d) = a.as_document() {
|
||||
parsed_aliases.push(AliasEntry {
|
||||
name: d.get_str("name").unwrap_or("").to_string(),
|
||||
locale: d.get_str("locale").unwrap_or("en").to_string(),
|
||||
source: d.get_str("source").ok().map(|s| s.to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut parsed_fields = Vec::new();
|
||||
for f in fields {
|
||||
if let Some(s) = f.as_str() {
|
||||
parsed_fields.push(s.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let undone_snapshot = doc.get_document("undone_snapshot").ok().and_then(|d| {
|
||||
let sid = d.get_i64("source_identity_id").unwrap_or(0);
|
||||
let suuid = d.get_str("source_uuid").unwrap_or("").to_string();
|
||||
let sname = d.get_str("source_name").unwrap_or("").to_string();
|
||||
let meta = doc_field_to_json(d, "target_metadata_at_undo");
|
||||
Some(UndoneSnapshot {
|
||||
source_identity_id: sid,
|
||||
source_uuid: suuid,
|
||||
source_name: sname,
|
||||
target_metadata_at_undo: meta,
|
||||
})
|
||||
});
|
||||
|
||||
Ok(IdentityMergeHistory {
|
||||
id: doc.get_object_id("_id").ok(),
|
||||
merge_id: doc.get_str("merge_id").unwrap_or("").to_string(),
|
||||
source_identity: IdentitySnapshot {
|
||||
id: source.get_i64("id").unwrap_or(0),
|
||||
uuid: source.get_str("uuid").unwrap_or("").to_string(),
|
||||
name: source.get_str("name").unwrap_or("").to_string(),
|
||||
identity_type: source.get_str("identity_type").ok().map(|s| s.to_string()),
|
||||
source: source.get_str("source").ok().map(|s| s.to_string()),
|
||||
status: source.get_str("status").unwrap_or("").to_string(),
|
||||
tmdb_id: source.get_i64("tmdb_id").ok(),
|
||||
tmdb_profile: source.get_str("tmdb_profile").ok().map(|s| s.to_string()),
|
||||
metadata: doc_field_to_json(source, "metadata"),
|
||||
created_at: source
|
||||
.get_datetime("created_at")
|
||||
.map(|d| d.to_chrono())
|
||||
.ok(),
|
||||
face_count: source.get_i64("face_count").unwrap_or(0),
|
||||
},
|
||||
target_identity: TargetIdentitySnapshot {
|
||||
id: target.get_i64("id").unwrap_or(0),
|
||||
uuid: target.get_str("uuid").unwrap_or("").to_string(),
|
||||
name: target.get_str("name").unwrap_or("").to_string(),
|
||||
metadata_before: doc_field_to_json(target, "metadata_before"),
|
||||
metadata_after: target
|
||||
.get("metadata_after")
|
||||
.map(|b| b.clone().into_relaxed_extjson()),
|
||||
},
|
||||
aliases_added_to_target: parsed_aliases,
|
||||
metadata_fields_added: parsed_fields,
|
||||
faces_transferred: FacesTransferred {
|
||||
file_uuid: faces.get_str("file_uuid").unwrap_or("").to_string(),
|
||||
face_ids: faces
|
||||
.get_array("face_ids")
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|b| b.as_str().map(|s| s.to_string()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
trace_ids: faces
|
||||
.get_array("trace_ids")
|
||||
.map(|arr| arr.iter().filter_map(|b| b.as_i32()).collect())
|
||||
.unwrap_or_default(),
|
||||
count: faces.get_i64("count").unwrap_or(0),
|
||||
},
|
||||
merge_params: MergeParams {
|
||||
keep_history: merge_params_doc.get_bool("keep_history").unwrap_or(true),
|
||||
cleared_stranger_id: merge_params_doc
|
||||
.get_bool("cleared_stranger_id")
|
||||
.unwrap_or(true),
|
||||
performed_by_user: merge_params_doc
|
||||
.get_str("performed_by_user")
|
||||
.ok()
|
||||
.map(|s| s.to_string()),
|
||||
},
|
||||
merged_at: doc
|
||||
.get_datetime("merged_at")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default(),
|
||||
undo_deadline: doc
|
||||
.get_datetime("undo_deadline")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default(),
|
||||
undone: doc.get_bool("undone").unwrap_or(false),
|
||||
undone_at: doc.get_datetime("undone_at").map(|d| d.to_chrono()).ok(),
|
||||
undone_by: doc.get_str("undone_by").ok().map(|s| s.to_string()),
|
||||
undone_snapshot,
|
||||
undo_expired: doc.get_bool("undo_expired").unwrap_or(false),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_document(&self) -> Document {
|
||||
let mut doc = doc! {
|
||||
"merge_id": &self.merge_id,
|
||||
"source_identity": {
|
||||
"id": self.source_identity.id as i64,
|
||||
"uuid": &self.source_identity.uuid,
|
||||
"name": &self.source_identity.name,
|
||||
"identity_type": self.source_identity.identity_type.as_deref(),
|
||||
"source": self.source_identity.source.as_deref(),
|
||||
"status": &self.source_identity.status,
|
||||
"tmdb_id": self.source_identity.tmdb_id,
|
||||
"tmdb_profile": self.source_identity.tmdb_profile.as_deref(),
|
||||
"metadata": json_to_bson(&self.source_identity.metadata),
|
||||
"created_at": self.source_identity.created_at
|
||||
.map(|dt| BsonDateTime::from_chrono(dt)),
|
||||
"face_count": self.source_identity.face_count,
|
||||
},
|
||||
"target_identity": {
|
||||
"id": self.target_identity.id as i64,
|
||||
"uuid": &self.target_identity.uuid,
|
||||
"name": &self.target_identity.name,
|
||||
"metadata_before": json_to_bson(&self.target_identity.metadata_before),
|
||||
"metadata_after": self.target_identity.metadata_after.as_ref().map(json_to_bson),
|
||||
},
|
||||
"aliases_added_to_target": self.aliases_added_to_target.iter().map(|a| {
|
||||
doc! {
|
||||
"name": &a.name,
|
||||
"locale": &a.locale,
|
||||
"source": a.source.as_deref(),
|
||||
}
|
||||
}).collect::<Vec<Document>>(),
|
||||
"metadata_fields_added": &self.metadata_fields_added,
|
||||
"faces_transferred": {
|
||||
"file_uuid": &self.faces_transferred.file_uuid,
|
||||
"face_ids": &self.faces_transferred.face_ids,
|
||||
"trace_ids": &self.faces_transferred.trace_ids,
|
||||
"count": self.faces_transferred.count,
|
||||
},
|
||||
"merge_params": {
|
||||
"keep_history": self.merge_params.keep_history,
|
||||
"cleared_stranger_id": self.merge_params.cleared_stranger_id,
|
||||
"performed_by_user": self.merge_params.performed_by_user.as_deref(),
|
||||
},
|
||||
"merged_at": BsonDateTime::from_chrono(self.merged_at),
|
||||
"undo_deadline": BsonDateTime::from_chrono(self.undo_deadline),
|
||||
"undone": self.undone,
|
||||
"undone_at": self.undone_at.map(|dt| BsonDateTime::from_chrono(dt)),
|
||||
"undone_by": self.undone_by.as_deref(),
|
||||
"undone_snapshot": self.undone_snapshot.as_ref().map(|s| {
|
||||
doc! {
|
||||
"source_identity_id": s.source_identity_id,
|
||||
"source_uuid": &s.source_uuid,
|
||||
"source_name": &s.source_name,
|
||||
"target_metadata_at_undo": json_to_bson(&s.target_metadata_at_undo),
|
||||
}
|
||||
}),
|
||||
"undo_expired": self.undo_expired,
|
||||
};
|
||||
|
||||
if let Some(ref oid) = self.id {
|
||||
doc.insert("_id", oid.clone());
|
||||
}
|
||||
|
||||
doc
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IdentityMergeHistoryStore {
|
||||
client: Client,
|
||||
db: Database,
|
||||
collection: Collection<Document>,
|
||||
}
|
||||
|
||||
impl IdentityMergeHistoryStore {
|
||||
pub async fn init() -> Result<Self> {
|
||||
let uri = crate::core::config::MONGODB_URL.as_str();
|
||||
let client = Client::with_uri_str(uri)
|
||||
.await
|
||||
.context("Failed to connect to MongoDB")?;
|
||||
let db_name = crate::core::config::MONGODB_DATABASE.as_str();
|
||||
let db = client.database(db_name);
|
||||
let collection: Collection<Document> = db.collection(COLLECTION_NAME);
|
||||
|
||||
let store = Self {
|
||||
client,
|
||||
db,
|
||||
collection,
|
||||
};
|
||||
|
||||
store.ensure_indexes().await?;
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
async fn ensure_indexes(&self) -> Result<()> {
|
||||
let merge_id_index = IndexModel::builder()
|
||||
.keys(doc! { "merge_id": 1 })
|
||||
.options(
|
||||
mongodb::options::IndexOptions::builder()
|
||||
.unique(true)
|
||||
.build(),
|
||||
)
|
||||
.build();
|
||||
|
||||
let merged_at_index = IndexModel::builder().keys(doc! { "merged_at": -1 }).build();
|
||||
|
||||
let source_uuid_index = IndexModel::builder()
|
||||
.keys(doc! { "source_identity.uuid": 1 })
|
||||
.build();
|
||||
|
||||
let target_uuid_index = IndexModel::builder()
|
||||
.keys(doc! { "target_identity.uuid": 1 })
|
||||
.build();
|
||||
|
||||
self.collection
|
||||
.create_indexes(
|
||||
[
|
||||
merge_id_index,
|
||||
merged_at_index,
|
||||
source_uuid_index,
|
||||
target_uuid_index,
|
||||
],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.context("Failed to create identity_merge_history indexes")?;
|
||||
|
||||
tracing::info!("MongoDB identity_merge_history indexes ensured");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn generate_merge_id() -> String {
|
||||
Uuid::new_v4().to_string()
|
||||
}
|
||||
|
||||
pub async fn store_merge_history(&self, history: &IdentityMergeHistory) -> Result<()> {
|
||||
let doc = history.to_document();
|
||||
self.collection
|
||||
.insert_one(doc, None)
|
||||
.await
|
||||
.context("Failed to store merge history in MongoDB")?;
|
||||
|
||||
tracing::info!(
|
||||
"Stored merge history: merge_id={}, source={}, target={}, faces={}",
|
||||
history.merge_id,
|
||||
history.source_identity.name,
|
||||
history.target_identity.name,
|
||||
history.faces_transferred.count
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_merge_history(&self, merge_id: &str) -> Result<Option<IdentityMergeHistory>> {
|
||||
let filter = doc! { "merge_id": merge_id };
|
||||
let result = self
|
||||
.collection
|
||||
.find_one(filter, None)
|
||||
.await
|
||||
.context("Failed to get merge history from MongoDB")?;
|
||||
|
||||
match result {
|
||||
Some(doc) => {
|
||||
let history = IdentityMergeHistory::from_document(&doc)
|
||||
.context("Failed to parse merge history from MongoDB")?;
|
||||
Ok(Some(history))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn query_merge_history(
|
||||
&self,
|
||||
query: MergeHistoryQuery,
|
||||
page: u32,
|
||||
page_size: u32,
|
||||
) -> Result<(Vec<MergeHistoryEntry>, u64)> {
|
||||
let mut filter = doc! {};
|
||||
|
||||
if let Some(source_uuid) = query.source_uuid {
|
||||
filter.insert("source_identity.uuid", source_uuid);
|
||||
}
|
||||
if let Some(target_uuid) = query.target_uuid {
|
||||
filter.insert("target_identity.uuid", target_uuid);
|
||||
}
|
||||
if let Some(merge_id) = query.merge_id {
|
||||
filter.insert("merge_id", merge_id);
|
||||
}
|
||||
if let Some(undone) = query.undone {
|
||||
filter.insert("undone", undone);
|
||||
}
|
||||
|
||||
let skip = (page - 1) * page_size;
|
||||
let limit = page_size;
|
||||
|
||||
let mut cursor = self
|
||||
.collection
|
||||
.find(filter.clone(), None)
|
||||
.await
|
||||
.context("Failed to query merge history")?;
|
||||
|
||||
let total = self
|
||||
.collection
|
||||
.count_documents(filter, None)
|
||||
.await
|
||||
.context("Failed to count merge history")?;
|
||||
|
||||
let mut results: Vec<MergeHistoryEntry> = Vec::new();
|
||||
let mut count = 0;
|
||||
|
||||
while cursor.advance().await.context("Failed to advance cursor")? {
|
||||
if count >= skip && results.len() < limit as usize {
|
||||
let doc: Document = cursor
|
||||
.deserialize_current()
|
||||
.context("Failed to deserialize")?;
|
||||
|
||||
let merge_id = doc.get_str("merge_id").unwrap_or("").to_string();
|
||||
let source_name = doc
|
||||
.get_document("source_identity")
|
||||
.map(|d| d.get_str("name").unwrap_or("").to_string())
|
||||
.unwrap_or_default();
|
||||
let target_name = doc
|
||||
.get_document("target_identity")
|
||||
.map(|d| d.get_str("name").unwrap_or("").to_string())
|
||||
.unwrap_or_default();
|
||||
let faces_count = doc
|
||||
.get_document("faces_transferred")
|
||||
.map(|d| d.get_i64("count").unwrap_or(0))
|
||||
.unwrap_or(0);
|
||||
let merged_at = doc
|
||||
.get_datetime("merged_at")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default();
|
||||
let undo_deadline = doc
|
||||
.get_datetime("undo_deadline")
|
||||
.map(|d| d.to_chrono())
|
||||
.unwrap_or_default();
|
||||
let undone = doc.get_bool("undone").unwrap_or(false);
|
||||
let undo_expired = doc.get_bool("undo_expired").unwrap_or(false);
|
||||
|
||||
results.push(MergeHistoryEntry {
|
||||
merge_id,
|
||||
source_name,
|
||||
target_name,
|
||||
faces_transferred: faces_count,
|
||||
merged_at,
|
||||
undo_deadline,
|
||||
undone,
|
||||
undo_expired,
|
||||
});
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok((results, total))
|
||||
}
|
||||
|
||||
pub async fn mark_as_undone(
|
||||
&self,
|
||||
merge_id: &str,
|
||||
undone_by: Option<&str>,
|
||||
undone_snapshot: UndoneSnapshot,
|
||||
) -> Result<()> {
|
||||
let filter = doc! { "merge_id": merge_id };
|
||||
let snapshot_doc = doc! {
|
||||
"source_identity_id": undone_snapshot.source_identity_id,
|
||||
"source_uuid": &undone_snapshot.source_uuid,
|
||||
"source_name": &undone_snapshot.source_name,
|
||||
"target_metadata_at_undo": json_to_bson(&undone_snapshot.target_metadata_at_undo),
|
||||
};
|
||||
let update = doc! {
|
||||
"$set": {
|
||||
"undone": true,
|
||||
"undone_at": BsonDateTime::from_chrono(Utc::now()),
|
||||
"undone_by": undone_by,
|
||||
"undone_snapshot": snapshot_doc,
|
||||
}
|
||||
};
|
||||
|
||||
self.collection
|
||||
.update_one(filter, update, None)
|
||||
.await
|
||||
.context("Failed to mark merge as undone")?;
|
||||
|
||||
tracing::info!("Marked merge {} as undone", merge_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn mark_as_redone(&self, merge_id: &str, redone_by: Option<&str>) -> Result<()> {
|
||||
let now = Utc::now();
|
||||
let new_deadline = now + chrono::Duration::hours(24);
|
||||
let filter = doc! { "merge_id": merge_id };
|
||||
let update = doc! {
|
||||
"$set": {
|
||||
"undone": false,
|
||||
"undone_at": bson::Bson::Null,
|
||||
"undone_by": redone_by,
|
||||
"undone_snapshot": bson::Bson::Null,
|
||||
"undo_deadline": BsonDateTime::from_chrono(new_deadline),
|
||||
"undo_expired": false
|
||||
}
|
||||
};
|
||||
|
||||
self.collection
|
||||
.update_one(filter, update, None)
|
||||
.await
|
||||
.context("Failed to mark merge as redone")?;
|
||||
|
||||
tracing::info!(
|
||||
"Marked merge {} as redone (new deadline: {})",
|
||||
merge_id,
|
||||
new_deadline
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn check_undo_deadline(&self, merge_id: &str) -> Result<bool> {
|
||||
let history = self
|
||||
.get_merge_history(merge_id)
|
||||
.await?
|
||||
.context("Merge history not found")?;
|
||||
|
||||
let now = Utc::now();
|
||||
if now > history.undo_deadline {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
pub async fn mark_expired_merges(&self) -> Result<u64> {
|
||||
let now = BsonDateTime::from_chrono(Utc::now());
|
||||
let filter = doc! {
|
||||
"undo_deadline": { "$lt": now },
|
||||
"undone": false,
|
||||
"undo_expired": false
|
||||
};
|
||||
let update = doc! { "$set": { "undo_expired": true } };
|
||||
|
||||
let result = self
|
||||
.collection
|
||||
.update_many(filter, update, None)
|
||||
.await
|
||||
.context("Failed to mark expired merges")?;
|
||||
|
||||
let count = result.modified_count;
|
||||
if count > 0 {
|
||||
tracing::info!("Marked {} expired merges", count);
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
@@ -32,17 +32,21 @@ pub trait VectorStore: Send + Sync {
|
||||
async fn search(&self, query_vector: &[f32], limit: usize) -> Result<Vec<SearchResult>>;
|
||||
}
|
||||
|
||||
pub mod identity_merge_history;
|
||||
pub mod mongodb_db;
|
||||
pub mod postgres_db;
|
||||
pub mod qdrant_db;
|
||||
pub mod redis_client;
|
||||
pub mod redis_db;
|
||||
pub mod sync_db;
|
||||
|
||||
pub use identity_merge_history::{
|
||||
AliasEntry, FacesTransferred, IdentityMergeHistory, IdentityMergeHistoryStore,
|
||||
IdentitySnapshot, MergeHistoryEntry, MergeHistoryQuery, MergeParams, TargetIdentitySnapshot,
|
||||
UndoneSnapshot,
|
||||
};
|
||||
pub use mongodb_db::MongoDb;
|
||||
pub use postgres_db::{
|
||||
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileIdentityRecord, FileRecord,
|
||||
HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
|
||||
Bm25Result, CandidateRecord, CreateApiKeyConfig, FileFaceRecord, FileIdentityRecord,
|
||||
FileRecord, HybridSearchResult, IdentityChunkRecord, IdentityDetailRecord, IdentityFaceRecord,
|
||||
IdentityFileRecord, MonitorJob, MonitorJobStats, MonitorJobStatus, PipelineType, PostgresDb,
|
||||
ProcessorJobStatus, ProcessorResult, ProcessorType, ResourceRecord, VideoRecord, VideoStatus,
|
||||
};
|
||||
@@ -52,4 +56,3 @@ pub use redis_client::{
|
||||
ProgressMessage, RedisClient,
|
||||
};
|
||||
pub use redis_db::RedisDb;
|
||||
pub use sync_db::SyncDb;
|
||||
|
||||
@@ -131,7 +131,6 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -190,7 +189,6 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -246,7 +244,6 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -70,7 +70,7 @@ impl QdrantDb {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let create_url = format!("{}/collections", self.base_url);
|
||||
let create_url = format!("{}/collections/{}", self.base_url, self.collection_name);
|
||||
let body = serde_json::json!({
|
||||
"vectors": {
|
||||
"size": vector_dim,
|
||||
@@ -79,7 +79,7 @@ impl QdrantDb {
|
||||
});
|
||||
|
||||
self.client
|
||||
.post(&create_url)
|
||||
.put(&create_url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
@@ -867,50 +867,6 @@ impl VectorStore for QdrantDb {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sync face embeddings from PostgreSQL to Qdrant for ANN search
|
||||
pub async fn sync_face_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
|
||||
let pool = sqlx::PgPool::connect(&DATABASE_URL).await?;
|
||||
let table = crate::core::db::schema::table_name("face_detections");
|
||||
|
||||
let qdrant: QdrantDb = QdrantDb::new();
|
||||
|
||||
let query = format!(
|
||||
"SELECT id, trace_id, frame_number, embedding FROM {} \
|
||||
WHERE file_uuid = $1 AND embedding IS NOT NULL \
|
||||
AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)",
|
||||
table
|
||||
);
|
||||
let rows = sqlx::query(&query).bind(file_uuid).fetch_all(&pool).await?;
|
||||
|
||||
let mut count = 0u64;
|
||||
for row in &rows {
|
||||
let id: i32 = row.get(0);
|
||||
let trace_id: Option<i32> = row.get(1);
|
||||
let frame_number: i64 = row.get(2);
|
||||
let embedding: Option<Vec<f32>> = row.get(3);
|
||||
|
||||
if let (Some(emb), Some(tid)) = (embedding, trace_id) {
|
||||
if let Err(e) = qdrant
|
||||
.upsert_face_embedding(id as u64, &emb, file_uuid, tid, frame_number)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("Qdrant upsert failed for face {}: {}", id, e);
|
||||
continue;
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
"Synced {} face embeddings to Qdrant for {}",
|
||||
count,
|
||||
file_uuid
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
use crate::core::config::DATABASE_URL;
|
||||
use sqlx::Row;
|
||||
@@ -984,12 +940,22 @@ pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
// Push to Qdrant in batches
|
||||
// Point ID: hash(file_uuid + trace_id) for global uniqueness
|
||||
for chunk in trace_avgs.chunks(500) {
|
||||
let batch: Vec<(u64, &[f32], Option<serde_json::Value>)> = chunk
|
||||
.iter()
|
||||
.map(|t| {
|
||||
let point_id = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(file_uuid.as_bytes());
|
||||
hasher.update(b"_");
|
||||
hasher.update(t.tid.to_string().as_bytes());
|
||||
let hash = hasher.finalize();
|
||||
u64::from_be_bytes(hash[0..8].try_into().unwrap())
|
||||
};
|
||||
(
|
||||
t.tid as u64,
|
||||
point_id,
|
||||
t.avg_emb.as_slice(),
|
||||
Some(serde_json::json!({
|
||||
"trace_id": t.tid,
|
||||
|
||||
@@ -319,7 +319,9 @@ impl RedisClient {
|
||||
"timestamp": chrono::Utc::now().to_rfc3339(),
|
||||
});
|
||||
|
||||
let _: usize = conn.publish(&channel, serde_json::to_string(&alert_json)?).await?;
|
||||
let _: usize = conn
|
||||
.publish(&channel, serde_json::to_string(&alert_json)?)
|
||||
.await?;
|
||||
|
||||
tracing::warn!(
|
||||
"Processor alert: {} | {} | {} | {}",
|
||||
|
||||
@@ -78,7 +78,10 @@ impl SyncDb {
|
||||
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.post(&format!("{}/api/embeddings", crate::core::config::OLLAMA_URL.as_str()))
|
||||
.post(&format!(
|
||||
"{}/api/embeddings",
|
||||
crate::core::config::OLLAMA_URL.as_str()
|
||||
))
|
||||
.json(&serde_json::json!({
|
||||
"model": "all-minilm",
|
||||
"prompt": text,
|
||||
Reference in New Issue
Block a user