feat: fix Chinese text search and duplicate chunk_id bug

- Add helper functions to extract text from nested content structure - Update SearchResult to include uuid field - Add PostgreSQL function get_chunk_by_chunk_id_and_uuid to handle duplicate chunk_ids - Update Qdrant search functions to extract uuid from payload - Change embedding model to nomic-embed-text-v2-moe:latest - Update Qdrant collection name to momentry_rule1 - Fix MongoDB authentication and disable cache for development - Improve error handling in processor.rs - Update documentation with new embedding model
2026-03-29 04:44:28 +08:00
parent 82955504f3
commit 2393d81a3f
13 changed files with 355 additions and 106 deletions
--- a/src/player/chunk_selector.rs
+++ b/src/player/chunk_selector.rs
@@ -17,7 +17,7 @@ const QDRANT_API_KEY: &str = "Test3200Test3200Test3200";
 #[allow(dead_code)]
 const OLLAMA_URL: &str = "http://localhost:11434";
 #[allow(dead_code)]
-const MODEL: &str = "nomic-embed-text-v2-moe";
+const MODEL: &str = "nomic-embed-text-v2-moe:latest";

 #[derive(Debug, Clone)]
 #[allow(dead_code)]
@@ -112,8 +112,8 @@ impl ChunkSelector {
            return Ok(Vec::new());
        }

-        // Search Qdrant - try both collections (chunks_v3 for multilingual, AccusysDB for others)
-        let collections = ["chunks_v3", "AccusysDB"];
+        // Search Qdrant - use momentry_rule1 collection (Rule1 specification)
+        let collections = ["momentry_rule1"];

        for collection in collections {
            let vector_str = serde_json::to_string(&embedding)