feat: fix Chinese text search and duplicate chunk_id bug
- Add helper functions to extract text from nested content structure - Update SearchResult to include uuid field - Add PostgreSQL function get_chunk_by_chunk_id_and_uuid to handle duplicate chunk_ids - Update Qdrant search functions to extract uuid from payload - Change embedding model to nomic-embed-text-v2-moe:latest - Update Qdrant collection name to momentry_rule1 - Fix MongoDB authentication and disable cache for development - Improve error handling in processor.rs - Update documentation with new embedding model
This commit is contained in:
@@ -17,7 +17,7 @@ const QDRANT_API_KEY: &str = "Test3200Test3200Test3200";
|
||||
#[allow(dead_code)]
|
||||
const OLLAMA_URL: &str = "http://localhost:11434";
|
||||
#[allow(dead_code)]
|
||||
const MODEL: &str = "nomic-embed-text-v2-moe";
|
||||
const MODEL: &str = "nomic-embed-text-v2-moe:latest";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
@@ -112,8 +112,8 @@ impl ChunkSelector {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// Search Qdrant - try both collections (chunks_v3 for multilingual, AccusysDB for others)
|
||||
let collections = ["chunks_v3", "AccusysDB"];
|
||||
// Search Qdrant - use momentry_rule1 collection (Rule1 specification)
|
||||
let collections = ["momentry_rule1"];
|
||||
|
||||
for collection in collections {
|
||||
let vector_str = serde_json::to_string(&embedding)
|
||||
|
||||
Reference in New Issue
Block a user