chore: backup before migration to new repo
This commit is contained in:
@@ -6,6 +6,7 @@ use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
|
||||
|
||||
pub struct MongoDb {
|
||||
base_url: String,
|
||||
database: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -53,7 +54,8 @@ impl MongoDb {
|
||||
pub fn new() -> Self {
|
||||
let base_url =
|
||||
std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string());
|
||||
Self { base_url }
|
||||
let database = crate::core::config::MONGODB_DATABASE.clone();
|
||||
Self { base_url, database }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +70,7 @@ impl MongoDb {
|
||||
let doc: ChunkDocument = chunk.clone().into();
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let url = format!("{}/momentry/chunks", self.base_url);
|
||||
let url = format!("{}/{}/chunks", self.base_url, self.database);
|
||||
|
||||
client
|
||||
.post(&url)
|
||||
@@ -83,8 +85,8 @@ impl MongoDb {
|
||||
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!(
|
||||
"{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}",
|
||||
self.base_url, uuid
|
||||
"{}/{}/chunks?filter={{\"uuid\":\"{}\"}}",
|
||||
self.base_url, self.database, uuid
|
||||
);
|
||||
|
||||
let response = client
|
||||
@@ -131,6 +133,7 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -141,8 +144,8 @@ impl MongoDb {
|
||||
pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!(
|
||||
"{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
|
||||
self.base_url, query
|
||||
"{}/{}/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
|
||||
self.base_url, self.database, query
|
||||
);
|
||||
|
||||
let response = client
|
||||
@@ -189,6 +192,7 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
@@ -198,7 +202,7 @@ impl MongoDb {
|
||||
|
||||
pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> {
|
||||
let client = reqwest::Client::new();
|
||||
let url = format!("{}/momentry/chunks", self.base_url);
|
||||
let url = format!("{}/{}/chunks", self.base_url, self.database);
|
||||
|
||||
let response = client
|
||||
.get(&url)
|
||||
@@ -244,6 +248,7 @@ impl MongoDb {
|
||||
pre_chunk_ids: vec![],
|
||||
parent_chunk_id: doc.parent_chunk_id,
|
||||
child_chunk_ids: doc.child_chunk_ids,
|
||||
visual_stats: None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -128,7 +128,7 @@ impl QdrantDb {
|
||||
use std::hash::{Hash, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
point_id_str.hash(&mut hasher);
|
||||
let point_id = hasher.finish() as u64;
|
||||
let point_id = hasher.finish();
|
||||
|
||||
let body = serde_json::json!({
|
||||
"points": [{
|
||||
@@ -171,7 +171,7 @@ impl QdrantDb {
|
||||
));
|
||||
}
|
||||
|
||||
tracing::debug!("Qdrant response: {}", response_text);
|
||||
tracing::debug!("Qdrant upsert response status: {}", status);
|
||||
tracing::info!("Successfully upserted vector for chunk: {}", chunk_id);
|
||||
Ok(())
|
||||
}
|
||||
@@ -257,6 +257,101 @@ impl QdrantDb {
|
||||
Ok(search_results)
|
||||
}
|
||||
|
||||
pub async fn search_collections(
|
||||
&self,
|
||||
query_vector: &[f32],
|
||||
collections: &[&str],
|
||||
limit: usize,
|
||||
) -> Result<Vec<SearchResult>> {
|
||||
let mut handles = Vec::new();
|
||||
for &collection in collections {
|
||||
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
|
||||
let client = self.client.clone();
|
||||
let api_key = self.api_key.clone();
|
||||
let query_vec = query_vector.to_vec();
|
||||
let body = serde_json::json!({
|
||||
"vector": query_vec,
|
||||
"limit": limit * 2, // Fetch more from each to account for overlaps
|
||||
"with_payload": true
|
||||
});
|
||||
handles.push(async move {
|
||||
let response = client
|
||||
.post(&url)
|
||||
.header("api-key", &api_key)
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match response {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let resp_text = resp
|
||||
.text()
|
||||
.await
|
||||
.unwrap_or_else(|_| "Failed to read response".to_string());
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantSearchResult {
|
||||
result: Vec<QdrantPoint>,
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
struct QdrantPoint {
|
||||
#[allow(dead_code)]
|
||||
id: serde_json::Value,
|
||||
score: f64,
|
||||
payload: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
if let Ok(result) = serde_json::from_str::<QdrantSearchResult>(&resp_text) {
|
||||
let results: Vec<SearchResult> = result
|
||||
.result
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let uuid = r
|
||||
.payload
|
||||
.get("uuid")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let chunk_id = r
|
||||
.payload
|
||||
.get("chunk_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
SearchResult {
|
||||
uuid,
|
||||
chunk_id,
|
||||
score: r.score as f32,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok::<Vec<SearchResult>, anyhow::Error>(results)
|
||||
} else {
|
||||
Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new())
|
||||
}
|
||||
}
|
||||
_ => Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new()),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let results = futures_util::future::join_all(handles).await;
|
||||
let mut merged: Vec<SearchResult> = results
|
||||
.into_iter()
|
||||
.filter_map(Result::ok)
|
||||
.flatten()
|
||||
.collect();
|
||||
|
||||
// Sort by score descending
|
||||
merged.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
||||
// Deduplicate by chunk_id + uuid
|
||||
merged.dedup_by_key(|r| (r.chunk_id.clone(), r.uuid.clone()));
|
||||
// Truncate to limit
|
||||
merged.truncate(limit);
|
||||
|
||||
Ok(merged)
|
||||
}
|
||||
|
||||
pub async fn search_in_uuid(
|
||||
&self,
|
||||
query_vector: &[f32],
|
||||
|
||||
Reference in New Issue
Block a user