feat: Initial v0.9 release with API Key authentication
## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
This commit is contained in:
@@ -32,9 +32,20 @@ pub trait VectorStore: Send + Sync {
|
||||
pub mod mongodb_db;
|
||||
pub mod postgres_db;
|
||||
pub mod qdrant_db;
|
||||
pub mod redis_client;
|
||||
pub mod redis_db;
|
||||
pub mod sync_db;
|
||||
|
||||
pub use mongodb_db::MongoDb;
|
||||
pub use postgres_db::{PostgresDb, VideoRecord};
|
||||
pub use qdrant_db::QdrantDb;
|
||||
pub use postgres_db::{
|
||||
Bm25Result, CreateApiKeyConfig, HybridSearchResult, MonitorJob, MonitorJobStats,
|
||||
MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorResult, ProcessorType, VideoRecord,
|
||||
VideoStatus,
|
||||
};
|
||||
pub use qdrant_db::{QdrantDb, VectorPayload};
|
||||
pub use redis_client::{
|
||||
JobErrorMessage, MonitorJobRedis, ProcessorStatus as RedisProcessorStatus, ProgressData,
|
||||
ProgressMessage, RedisClient,
|
||||
};
|
||||
pub use redis_db::RedisDb;
|
||||
pub use sync_db::SyncDb;
|
||||
|
||||
@@ -1,53 +1,269 @@
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::Database;
|
||||
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
|
||||
pub struct MongoDb {
|
||||
cache: Arc<RwLock<MongoCache>>,
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MongoCache {
|
||||
documents: std::collections::HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct VideoDocument {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChunkDocument {
|
||||
pub uuid: String,
|
||||
pub file_path: String,
|
||||
pub file_name: String,
|
||||
pub probe: serde_json::Value,
|
||||
pub asr: Option<serde_json::Value>,
|
||||
pub asrx: Option<serde_json::Value>,
|
||||
pub ocr: Option<serde_json::Value>,
|
||||
pub yolo: Option<serde_json::Value>,
|
||||
pub face: Option<serde_json::Value>,
|
||||
pub pose: Option<serde_json::Value>,
|
||||
pub created_at: String,
|
||||
pub updated_at: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_index: u32,
|
||||
pub chunk_type: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub fps: f64,
|
||||
pub start_frame: i64,
|
||||
pub end_frame: i64,
|
||||
pub content: serde_json::Value,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub vector_id: Option<String>,
|
||||
pub parent_chunk_id: Option<String>,
|
||||
pub child_chunk_ids: Vec<String>,
|
||||
}
|
||||
|
||||
impl From<Chunk> for ChunkDocument {
|
||||
fn from(chunk: Chunk) -> Self {
|
||||
Self {
|
||||
uuid: chunk.uuid,
|
||||
chunk_id: chunk.chunk_id,
|
||||
chunk_index: chunk.chunk_index,
|
||||
chunk_type: chunk.chunk_type.as_str().to_string(),
|
||||
start_time: chunk.start_time,
|
||||
end_time: chunk.end_time,
|
||||
fps: chunk.fps,
|
||||
start_frame: chunk.start_frame,
|
||||
end_frame: chunk.end_frame,
|
||||
content: chunk.content,
|
||||
metadata: chunk.metadata,
|
||||
vector_id: chunk.vector_id,
|
||||
parent_chunk_id: chunk.parent_chunk_id,
|
||||
child_chunk_ids: chunk.child_chunk_ids,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MongoDb {
|
||||
pub async fn store_video(&self, _doc: &VideoDocument) -> Result<()> {
|
||||
// TODO: Implement MongoDB client
|
||||
pub fn new() -> Self {
|
||||
let base_url =
|
||||
std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string());
|
||||
Self { base_url }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MongoDb {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl MongoDb {
|
||||
pub async fn store_chunk(&self, chunk: &Chunk) -> Result<()> {
|
||||
let doc: ChunkDocument = chunk.clone().into();
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let url = format!("{}/momentry/chunks", self.base_url);
|
||||
|
||||
client
|
||||
.post(&url)
|
||||
.json(&doc)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to store chunk in MongoDB")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_video(&self, _uuid: &str) -> Result<Option<VideoDocument>> {
|
||||
// TODO: Implement MongoDB client
|
||||
Ok(None)
|
||||
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!(
|
||||
"{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}",
|
||||
self.base_url, uuid
|
||||
);
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to get chunks from MongoDB")?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MongoResponse {
|
||||
documents: Vec<ChunkDocument>,
|
||||
}
|
||||
|
||||
let result: MongoResponse = response.json().await?;
|
||||
|
||||
let chunks: Vec<Chunk> = result
|
||||
.documents
|
||||
.into_iter()
|
||||
.map(|doc| {
|
||||
let chunk_type = match doc.chunk_type.as_str() {
|
||||
"sentence" => ChunkType::Sentence,
|
||||
"cut" => ChunkType::Cut,
|
||||
"time_based" => ChunkType::TimeBased,
|
||||
"trace" => ChunkType::Trace,
|
||||
"story" => ChunkType::Story,
|
||||
_ => ChunkType::Sentence,
|
||||
};
|
||||
|
||||
Chunk {
|
||||
file_id: 0,
|
||||
uuid: doc.uuid,
|
||||
chunk_id: doc.chunk_id,
|
||||
chunk_index: doc.chunk_index,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: doc.start_time,
|
||||
end_time: doc.end_time,
|
||||
fps: doc.fps,
|
||||
start_frame: doc.start_frame,
|
||||
end_frame: doc.end_frame,
|
||||
text_content: None,
|
||||
content: doc.content,
|
||||
metadata: doc.metadata,
|
||||
vector_id: doc.vector_id,
|
||||
frame_count: 0,
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!(
|
||||
"{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
|
||||
self.base_url, query
|
||||
);
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search text in MongoDB")?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MongoResponse {
|
||||
documents: Vec<ChunkDocument>,
|
||||
}
|
||||
|
||||
let result: MongoResponse = response.json().await?;
|
||||
|
||||
let chunks: Vec<Chunk> = result
|
||||
.documents
|
||||
.into_iter()
|
||||
.map(|doc| {
|
||||
let chunk_type = match doc.chunk_type.as_str() {
|
||||
"sentence" => ChunkType::Sentence,
|
||||
"cut" => ChunkType::Cut,
|
||||
"time" => ChunkType::TimeBased,
|
||||
"trace" => ChunkType::Trace,
|
||||
"story" => ChunkType::Story,
|
||||
_ => ChunkType::Sentence,
|
||||
};
|
||||
|
||||
Chunk {
|
||||
file_id: 0,
|
||||
uuid: doc.uuid,
|
||||
chunk_id: doc.chunk_id,
|
||||
chunk_index: doc.chunk_index,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: doc.start_time,
|
||||
end_time: doc.end_time,
|
||||
fps: doc.fps,
|
||||
start_frame: doc.start_frame,
|
||||
end_frame: doc.end_frame,
|
||||
text_content: None,
|
||||
content: doc.content,
|
||||
metadata: doc.metadata,
|
||||
vector_id: doc.vector_id,
|
||||
frame_count: 0,
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{}/momentry/chunks", self.base_url);
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to get all chunks from MongoDB")?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MongoResponse {
|
||||
documents: Vec<ChunkDocument>,
|
||||
}
|
||||
|
||||
let result: MongoResponse = response.json().await?;
|
||||
|
||||
let chunks: Vec<Chunk> = result
|
||||
.documents
|
||||
.into_iter()
|
||||
.map(|doc| {
|
||||
let chunk_type = match doc.chunk_type.as_str() {
|
||||
"sentence" => ChunkType::Sentence,
|
||||
"cut" => ChunkType::Cut,
|
||||
"time" => ChunkType::TimeBased,
|
||||
"trace" => ChunkType::Trace,
|
||||
"story" => ChunkType::Story,
|
||||
_ => ChunkType::Sentence,
|
||||
};
|
||||
|
||||
Chunk {
|
||||
file_id: 0,
|
||||
uuid: doc.uuid,
|
||||
chunk_id: doc.chunk_id,
|
||||
chunk_index: doc.chunk_index,
|
||||
chunk_type,
|
||||
rule: ChunkRule::Rule1,
|
||||
start_time: doc.start_time,
|
||||
end_time: doc.end_time,
|
||||
fps: doc.fps,
|
||||
start_frame: doc.start_frame,
|
||||
end_frame: doc.end_frame,
|
||||
text_content: None,
|
||||
content: doc.content,
|
||||
metadata: doc.metadata,
|
||||
vector_id: doc.vector_id,
|
||||
frame_count: 0,
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
pub async fn get_chunk_count(&self) -> Result<i64> {
|
||||
let chunks = self.get_all_chunks().await?;
|
||||
Ok(chunks.len() as i64)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Database for MongoDb {
|
||||
impl super::Database for MongoDb {
|
||||
async fn init() -> Result<Self> {
|
||||
// TODO: Initialize MongoDB client
|
||||
Ok(Self {
|
||||
cache: Arc::new(RwLock::new(MongoCache::default())),
|
||||
})
|
||||
Ok(Self::new())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2669,8 +2669,9 @@ impl PostgresDb {
|
||||
pub async fn get_processor_results_by_job(&self, job_id: i32) -> Result<Vec<ProcessorResult>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, job_id, processor, status, started_at, completed_at, duration_secs,
|
||||
error_message, output_data, retry_count, created_at, updated_at
|
||||
SELECT id, job_id, processor, status, output_path, started_at, completed_at,
|
||||
error_message, progress_total, progress_current, last_checkpoint,
|
||||
created_at, updated_at, duration_secs
|
||||
FROM processor_results
|
||||
WHERE job_id = $1
|
||||
ORDER BY created_at ASC
|
||||
@@ -2685,6 +2686,10 @@ impl PostgresDb {
|
||||
.map(|r| {
|
||||
let status_str: String = r.get(3);
|
||||
let processor_type_str: String = r.get(2);
|
||||
let started_at: Option<chrono::NaiveDateTime> = r.get(5);
|
||||
let completed_at: Option<chrono::NaiveDateTime> = r.get(6);
|
||||
let created_at: chrono::NaiveDateTime = r.get(11);
|
||||
let updated_at: Option<chrono::NaiveDateTime> = r.get(12);
|
||||
ProcessorResult {
|
||||
id: r.get(0),
|
||||
job_id: r.get(1),
|
||||
@@ -2692,14 +2697,14 @@ impl PostgresDb {
|
||||
.unwrap_or(ProcessorType::Asr),
|
||||
status: ProcessorJobStatus::from_db_str(&status_str)
|
||||
.unwrap_or(ProcessorJobStatus::Pending),
|
||||
started_at: r.get(4),
|
||||
completed_at: r.get(5),
|
||||
duration_secs: r.get(6),
|
||||
started_at: started_at.map(|t| t.to_string()),
|
||||
completed_at: completed_at.map(|t| t.to_string()),
|
||||
duration_secs: r.get(13),
|
||||
error_message: r.get(7),
|
||||
output_data: r.get(8),
|
||||
retry_count: r.get(9),
|
||||
created_at: r.get(10),
|
||||
updated_at: r.get(11),
|
||||
output_data: None,
|
||||
retry_count: 0,
|
||||
created_at: created_at.to_string(),
|
||||
updated_at: updated_at.map(|t| t.to_string()).unwrap_or_default(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -1,46 +1,330 @@
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::{Database, SearchResult, VectorStore};
|
||||
|
||||
pub struct QdrantDb {
|
||||
client: Client,
|
||||
base_url: String,
|
||||
api_key: String,
|
||||
collection_name: String,
|
||||
cache: Arc<RwLock<QdrantCache>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct QdrantCache {
|
||||
vectors: std::collections::HashMap<String, Vec<f32>>,
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VectorPayload {
|
||||
pub uuid: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_type: String,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub text: Option<String>,
|
||||
}
|
||||
|
||||
impl QdrantDb {
|
||||
pub async fn init_collection(&self) -> Result<()> {
|
||||
// TODO: Implement actual Qdrant client
|
||||
// This is a placeholder
|
||||
pub fn new() -> Self {
|
||||
let base_url =
|
||||
std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6333".to_string());
|
||||
let api_key = std::env::var("QDRANT_API_KEY")
|
||||
.unwrap_or_else(|_| "Test3200Test3200Test3200".to_string());
|
||||
let collection_name =
|
||||
std::env::var("QDRANT_COLLECTION").unwrap_or_else(|_| "chunks_v3".to_string());
|
||||
|
||||
Self {
|
||||
client: Client::new(),
|
||||
base_url,
|
||||
api_key,
|
||||
collection_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for QdrantDb {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl QdrantDb {
|
||||
pub async fn init_collection(&self, vector_dim: usize) -> Result<()> {
|
||||
let url = format!("{}/collections/{}", self.base_url, self.collection_name);
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.get(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if response.status().is_success() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let create_url = format!("{}/collections", self.base_url);
|
||||
let body = serde_json::json!({
|
||||
"vectors": {
|
||||
"size": vector_dim,
|
||||
"distance": "Cosine"
|
||||
}
|
||||
});
|
||||
|
||||
self.client
|
||||
.post(&create_url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to create Qdrant collection")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
|
||||
let mut cache = self.cache.write().await;
|
||||
cache.vectors.insert(chunk_id.to_string(), vector.to_vec());
|
||||
pub async fn upsert_vector(
|
||||
&self,
|
||||
_chunk_id: &str,
|
||||
vector: &[f32],
|
||||
payload: VectorPayload,
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let mut payload_map = HashMap::new();
|
||||
payload_map.insert("uuid".to_string(), serde_json::json!(payload.uuid));
|
||||
payload_map.insert("chunk_id".to_string(), serde_json::json!(payload.chunk_id));
|
||||
payload_map.insert(
|
||||
"chunk_type".to_string(),
|
||||
serde_json::json!(payload.chunk_type),
|
||||
);
|
||||
payload_map.insert(
|
||||
"start_time".to_string(),
|
||||
serde_json::json!(payload.start_time),
|
||||
);
|
||||
payload_map.insert("end_time".to_string(), serde_json::json!(payload.end_time));
|
||||
if let Some(text) = payload.text {
|
||||
payload_map.insert("text".to_string(), serde_json::json!(text));
|
||||
}
|
||||
|
||||
let point_id = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
let body = serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": vector,
|
||||
"payload": payload_map
|
||||
}]
|
||||
});
|
||||
|
||||
self.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to upsert vector in Qdrant")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn search(&self, query_vector: &[f32], limit: usize) -> Result<Vec<SearchResult>> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/search",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"vector": query_vector,
|
||||
"limit": limit,
|
||||
"with_payload": true
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search Qdrant")?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantSearchResult {
|
||||
result: Vec<QdrantPoint>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantPoint {
|
||||
#[allow(dead_code)]
|
||||
id: serde_json::Value,
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
let result: QdrantSearchResult = response.json().await?;
|
||||
|
||||
let search_results: Vec<SearchResult> = result
|
||||
.result
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let chunk_id = r
|
||||
.payload
|
||||
.get("chunk_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
SearchResult {
|
||||
chunk_id,
|
||||
score: r.score as f32,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(search_results)
|
||||
}
|
||||
|
||||
pub async fn search_in_uuid(
|
||||
&self,
|
||||
query_vector: &[f64],
|
||||
uuid: &str,
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchResult>> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/search",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"vector": query_vector,
|
||||
"limit": limit,
|
||||
"with_payload": true,
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to search Qdrant")?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantSearchResult {
|
||||
result: Vec<QdrantPoint>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantPoint {
|
||||
#[allow(dead_code)]
|
||||
id: serde_json::Value,
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
let result: QdrantSearchResult = response.json().await?;
|
||||
|
||||
let search_results: Vec<SearchResult> = result
|
||||
.result
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let chunk_id = r
|
||||
.payload
|
||||
.get("chunk_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
SearchResult {
|
||||
chunk_id,
|
||||
score: r.score as f32,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(search_results)
|
||||
}
|
||||
|
||||
pub async fn delete_by_uuid(&self, uuid: &str) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points/delete",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let body = serde_json::json!({
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "uuid",
|
||||
"match": {
|
||||
"value": uuid
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
self.client
|
||||
.post(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to delete points from Qdrant")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_point_count(&self) -> Result<usize> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/info",
|
||||
self.base_url, self.collection_name
|
||||
);
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.get(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to get collection info")?;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CollectionInfo {
|
||||
result: CollectionStatus,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CollectionStatus {
|
||||
points_count: usize,
|
||||
}
|
||||
|
||||
let result: CollectionInfo = response.json().await?;
|
||||
Ok(result.result.points_count)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Database for QdrantDb {
|
||||
async fn init() -> Result<Self> {
|
||||
let collection_name =
|
||||
std::env::var("QDRANT_COLLECTION").unwrap_or_else(|_| "momentry_chunks".to_string());
|
||||
|
||||
let db = Self {
|
||||
collection_name,
|
||||
cache: Arc::new(RwLock::new(QdrantCache::default())),
|
||||
};
|
||||
|
||||
db.init_collection().await?;
|
||||
let db = Self::new();
|
||||
db.init_collection(768).await?;
|
||||
Ok(db)
|
||||
}
|
||||
}
|
||||
@@ -48,41 +332,18 @@ impl Database for QdrantDb {
|
||||
#[async_trait]
|
||||
impl VectorStore for QdrantDb {
|
||||
async fn store_vector(&self, chunk_id: &str, vector: &[f32]) -> Result<()> {
|
||||
self.upsert_vector(chunk_id, vector).await
|
||||
let payload = VectorPayload {
|
||||
uuid: String::new(),
|
||||
chunk_id: chunk_id.to_string(),
|
||||
chunk_type: String::new(),
|
||||
start_time: 0.0,
|
||||
end_time: 0.0,
|
||||
text: None,
|
||||
};
|
||||
self.upsert_vector(chunk_id, vector, payload).await
|
||||
}
|
||||
|
||||
async fn search(&self, query_vector: &[f32], limit: usize) -> Result<Vec<SearchResult>> {
|
||||
// Simple cosine similarity search (placeholder)
|
||||
let cache = self.cache.read().await;
|
||||
let mut results: Vec<SearchResult> = Vec::new();
|
||||
|
||||
for (chunk_id, vector) in &cache.vectors {
|
||||
let similarity = cosine_similarity(query_vector, vector);
|
||||
results.push(SearchResult {
|
||||
chunk_id: chunk_id.clone(),
|
||||
score: similarity,
|
||||
});
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||
results.truncate(limit);
|
||||
|
||||
Ok(results)
|
||||
self.search(query_vector, limit).await
|
||||
}
|
||||
}
|
||||
|
||||
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
|
||||
if norm_a == 0.0 || norm_b == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
dot_product / (norm_a * norm_b)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use tokio::sync::RwLock;
|
||||
use super::Database;
|
||||
|
||||
pub struct RedisDb {
|
||||
#[allow(dead_code)]
|
||||
state: Arc<RwLock<RedisState>>,
|
||||
}
|
||||
|
||||
|
||||
155
src/core/db/sync_db.rs
Normal file
155
src/core/db/sync_db.rs
Normal file
@@ -0,0 +1,155 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
use crate::core::db::mongodb_db::MongoDb;
|
||||
use crate::core::db::postgres_db::PostgresDb;
|
||||
use crate::core::db::qdrant_db::{QdrantDb, VectorPayload};
|
||||
use crate::core::processor::asr::{AsrResult, AsrSegment};
|
||||
|
||||
pub struct SyncDb {
|
||||
postgres: PostgresDb,
|
||||
mongodb: MongoDb,
|
||||
qdrant: QdrantDb,
|
||||
}
|
||||
|
||||
impl SyncDb {
|
||||
pub async fn new(postgres: PostgresDb, mongodb: MongoDb, qdrant: QdrantDb) -> Result<Self> {
|
||||
Ok(Self {
|
||||
postgres,
|
||||
mongodb,
|
||||
qdrant,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn store_chunk_with_vector(&self, mut chunk: Chunk, text: &str) -> Result<Chunk> {
|
||||
let uuid = chunk.uuid.clone();
|
||||
let chunk_id = chunk.chunk_id.clone();
|
||||
let chunk_type = chunk.chunk_type.as_str().to_string();
|
||||
let start_time = chunk.start_time;
|
||||
let end_time = chunk.end_time;
|
||||
|
||||
let vector = self.embed_text(text).await?;
|
||||
|
||||
let vector_id = format!("vec_{}", chunk_id);
|
||||
chunk = chunk.with_vector_id(vector_id.clone());
|
||||
|
||||
let postgres_result = self.postgres.store_chunk(&chunk).await;
|
||||
if let Err(e) = &postgres_result {
|
||||
tracing::warn!("Failed to store chunk in PostgreSQL: {}", e);
|
||||
}
|
||||
|
||||
let mongo_result = self.mongodb.store_chunk(&chunk).await;
|
||||
if let Err(e) = &mongo_result {
|
||||
tracing::warn!("Failed to store chunk in MongoDB: {}", e);
|
||||
}
|
||||
|
||||
let payload = VectorPayload {
|
||||
uuid: uuid.clone(),
|
||||
chunk_id: chunk_id.clone(),
|
||||
chunk_type,
|
||||
start_time,
|
||||
end_time,
|
||||
text: Some(text.to_string()),
|
||||
};
|
||||
|
||||
let qdrant_result = self
|
||||
.qdrant
|
||||
.upsert_vector(&vector_id, &vector, payload)
|
||||
.await;
|
||||
if let Err(e) = &qdrant_result {
|
||||
tracing::warn!("Failed to store vector in Qdrant: {}", e);
|
||||
}
|
||||
|
||||
let pg_vector_result = self.postgres.store_vector(&vector_id, &vector, &uuid).await;
|
||||
if let Err(e) = &pg_vector_result {
|
||||
tracing::warn!("Failed to store vector in PostgreSQL: {}", e);
|
||||
}
|
||||
|
||||
postgres_result?;
|
||||
mongo_result?;
|
||||
qdrant_result?;
|
||||
|
||||
Ok(chunk)
|
||||
}
|
||||
|
||||
pub async fn embed_text(&self, text: &str) -> Result<Vec<f32>> {
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.post("http://localhost:11434/api/embeddings")
|
||||
.json(&json!({
|
||||
"model": "nomic-embed-text",
|
||||
"prompt": text
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to call Ollama embedding API")?;
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct EmbeddingResponse {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
let embedding = response
|
||||
.json::<EmbeddingResponse>()
|
||||
.await
|
||||
.context("Failed to parse embedding response")?;
|
||||
|
||||
Ok(embedding.embedding)
|
||||
}
|
||||
|
||||
pub async fn process_asr_to_chunks(
|
||||
&self,
|
||||
uuid: &str,
|
||||
asr_result: &AsrResult,
|
||||
) -> Result<Vec<Chunk>> {
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
for (i, segment) in asr_result.segments.iter().enumerate() {
|
||||
let segment: &AsrSegment = segment;
|
||||
let content = json!({
|
||||
"text": segment.text,
|
||||
"text_normalized": segment.text.to_lowercase(),
|
||||
});
|
||||
|
||||
let metadata = json!({
|
||||
"language": asr_result.language,
|
||||
"language_probability": asr_result.language_probability,
|
||||
});
|
||||
|
||||
let chunk = Chunk::new(
|
||||
0, // file_id - will be set later
|
||||
uuid.to_string(),
|
||||
i as u32,
|
||||
ChunkType::Sentence,
|
||||
ChunkRule::Rule1,
|
||||
segment.start,
|
||||
segment.end,
|
||||
24.0, // fps
|
||||
content,
|
||||
)
|
||||
.with_metadata(metadata);
|
||||
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
let mut stored_chunks = Vec::new();
|
||||
for chunk in chunks {
|
||||
let text = chunk
|
||||
.content
|
||||
.get("text")
|
||||
.and_then(|t| t.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
match self.store_chunk_with_vector(chunk, &text).await {
|
||||
Ok(stored) => stored_chunks.push(stored),
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to store chunk: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(stored_chunks)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user