feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
This commit is contained in:
227
test_search_comparison.rs
Normal file
227
test_search_comparison.rs
Normal file
@@ -0,0 +1,227 @@
|
||||
use anyhow::{Context, Result};
|
||||
use momentry_core::core::db::{Database, PostgresDb, QdrantDb};
|
||||
use momentry_core::Embedder;
|
||||
use std::env;
|
||||
use std::time::Instant;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SearchComparison {
|
||||
query: String,
|
||||
bm25_results: usize,
|
||||
qdrant_results: usize,
|
||||
bm25_top_score: f32,
|
||||
qdrant_top_score: f32,
|
||||
bm25_time_ms: u128,
|
||||
qdrant_time_ms: u128,
|
||||
overlap_count: usize,
|
||||
}
|
||||
|
||||
fn print_results(
|
||||
query: &str,
|
||||
bm25_results: &[momentry_core::core::db::postgres_db::Bm25Result],
|
||||
qdrant_results: &[momentry_core::core::db::SearchResult],
|
||||
limit: usize,
|
||||
) {
|
||||
println!("\n=== 查詢: '{}' ===", query);
|
||||
println!(
|
||||
"BM25 結果 (共 {} 筆,顯示前 {} 筆):",
|
||||
bm25_results.len(),
|
||||
limit.min(bm25_results.len())
|
||||
);
|
||||
for (i, r) in bm25_results.iter().take(limit).enumerate() {
|
||||
println!(
|
||||
" {}. {} (uuid: {}, chunk_id: {})",
|
||||
i + 1,
|
||||
r.text.chars().take(60).collect::<String>(),
|
||||
r.uuid,
|
||||
r.chunk_id
|
||||
);
|
||||
println!(
|
||||
" 分數: {:.4}, 時間: {:.1}-{:.1}s, 類型: {}",
|
||||
r.bm25_score, r.start_time, r.end_time, r.chunk_type
|
||||
);
|
||||
}
|
||||
|
||||
println!(
|
||||
"\nQdrant 向量搜尋結果 (共 {} 筆,顯示前 {} 筆):",
|
||||
qdrant_results.len(),
|
||||
limit.min(qdrant_results.len())
|
||||
);
|
||||
for (i, r) in qdrant_results.iter().take(limit).enumerate() {
|
||||
println!(" {}. uuid: {}, chunk_id: {}", i + 1, r.uuid, r.chunk_id);
|
||||
println!(" 分數: {:.4}", r.score);
|
||||
}
|
||||
|
||||
// 計算重疊
|
||||
let bm25_ids: Vec<String> = bm25_results
|
||||
.iter()
|
||||
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
||||
.collect();
|
||||
let qdrant_ids: Vec<String> = qdrant_results
|
||||
.iter()
|
||||
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
||||
.collect();
|
||||
|
||||
let overlap: Vec<&String> = bm25_ids
|
||||
.iter()
|
||||
.filter(|id| qdrant_ids.contains(id))
|
||||
.collect();
|
||||
|
||||
println!(
|
||||
"\n結果重疊: {}/{} (BM25 與 Qdrant 共同返回)",
|
||||
overlap.len(),
|
||||
bm25_results.len().max(qdrant_results.len())
|
||||
);
|
||||
if !overlap.is_empty() {
|
||||
println!("重疊的 chunk IDs: {:?}", overlap);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// 設定環境變數
|
||||
env::set_var("RUST_LOG", "info");
|
||||
env::set_var("QDRANT_URL", "http://localhost:6333");
|
||||
env::set_var("QDRANT_API_KEY", "Test3200Test3200Test3200");
|
||||
env::set_var("QDRANT_COLLECTION", "momentry_rule1");
|
||||
|
||||
println!("=== BM25 與 Qdrant 搜尋比較測試 ===\n");
|
||||
|
||||
// 初始化元件
|
||||
println!("初始化元件...");
|
||||
let embedder = Embedder::new("nomic-embed-text-v2-moe:latest".to_string());
|
||||
let pg = PostgresDb::init()
|
||||
.await
|
||||
.context("Failed to initialize PostgreSQL database")?;
|
||||
let qdrant = QdrantDb::new();
|
||||
|
||||
// 測試查詢清單
|
||||
let test_queries = vec![
|
||||
// 英文查詢
|
||||
("telephone", Some("384b0ff44aaaa1f1")), // Charade 電影
|
||||
("money", Some("384b0ff44aaaa1f1")),
|
||||
("gold", Some("384b0ff44aaaa1f1")),
|
||||
// 中文查詢
|
||||
("工作", Some("9760d0820f0cf9a7")), // ExaSAN 影片
|
||||
("加快速度", Some("9760d0820f0cf9a7")),
|
||||
("聲音", Some("9760d0820f0cf9a7")),
|
||||
// 全域查詢(無 uuid 限制)
|
||||
("computer", None),
|
||||
("technology", None),
|
||||
];
|
||||
|
||||
let limit = 10;
|
||||
let mut comparisons = Vec::new();
|
||||
|
||||
for (query_str, uuid_opt) in test_queries {
|
||||
let query = query_str.to_string();
|
||||
let uuid = uuid_opt.map(|s| s.to_string());
|
||||
|
||||
println!(
|
||||
"\n🔍 測試查詢: '{}' {}",
|
||||
query,
|
||||
uuid_opt
|
||||
.map(|u| format!("(uuid: {})", u))
|
||||
.unwrap_or_default()
|
||||
);
|
||||
|
||||
// BM25 搜尋
|
||||
let bm25_start = Instant::now();
|
||||
let bm25_results = pg.search_bm25(&query, uuid_opt, limit).await?;
|
||||
let bm25_time = bm25_start.elapsed();
|
||||
|
||||
// Qdrant 向量搜尋
|
||||
let qdrant_start = Instant::now();
|
||||
let query_vector = embedder.embed_query(&query).await?;
|
||||
let qdrant_results = if let Some(ref uuid) = uuid {
|
||||
qdrant.search_in_uuid(&query_vector, uuid, limit).await?
|
||||
} else {
|
||||
qdrant.search(&query_vector, limit).await?
|
||||
};
|
||||
let qdrant_time = qdrant_start.elapsed();
|
||||
|
||||
// 計算重疊
|
||||
let bm25_ids: Vec<String> = bm25_results
|
||||
.iter()
|
||||
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
||||
.collect();
|
||||
let qdrant_ids: Vec<String> = qdrant_results
|
||||
.iter()
|
||||
.map(|r| format!("{}-{}", r.uuid, r.chunk_id))
|
||||
.collect();
|
||||
|
||||
let overlap_count = bm25_ids.iter().filter(|id| qdrant_ids.contains(id)).count();
|
||||
|
||||
// 儲存比較結果
|
||||
let comparison = SearchComparison {
|
||||
query: query.clone(),
|
||||
bm25_results: bm25_results.len(),
|
||||
qdrant_results: qdrant_results.len(),
|
||||
bm25_top_score: bm25_results.first().map(|r| r.bm25_score).unwrap_or(0.0),
|
||||
qdrant_top_score: qdrant_results.first().map(|r| r.score).unwrap_or(0.0),
|
||||
bm25_time_ms: bm25_time.as_millis(),
|
||||
qdrant_time_ms: qdrant_time.as_millis(),
|
||||
overlap_count,
|
||||
};
|
||||
comparisons.push(comparison);
|
||||
|
||||
// 顯示詳細結果
|
||||
print_results(&query, &bm25_results, &qdrant_results, 5);
|
||||
|
||||
// 顯示效能比較
|
||||
println!("\n⏱️ 效能比較:");
|
||||
println!(" BM25 搜尋時間: {}ms", bm25_time.as_millis());
|
||||
println!(
|
||||
" Qdrant 搜尋時間: {}ms (含向量嵌入時間)",
|
||||
qdrant_time.as_millis()
|
||||
);
|
||||
}
|
||||
|
||||
// 顯示總結比較表
|
||||
println!("\n📊 搜尋比較總結");
|
||||
println!(
|
||||
"{:<15} {:<6} {:<6} {:<8} {:<8} {:<6} {:<6} {:<6}",
|
||||
"查詢", "BM25數", "QD數", "BM25分", "QD分", "BM25ms", "QDms", "重疊"
|
||||
);
|
||||
println!("{}", "-".repeat(80));
|
||||
|
||||
for comp in &comparisons {
|
||||
println!(
|
||||
"{:<15} {:<6} {:<6} {:<8.4} {:<8.4} {:<6} {:<6} {:<6}/{}",
|
||||
&comp.query[..15.min(comp.query.len())],
|
||||
comp.bm25_results,
|
||||
comp.qdrant_results,
|
||||
comp.bm25_top_score,
|
||||
comp.qdrant_top_score,
|
||||
comp.bm25_time_ms,
|
||||
comp.qdrant_time_ms,
|
||||
comp.overlap_count,
|
||||
comp.bm25_results.max(comp.qdrant_results)
|
||||
);
|
||||
}
|
||||
|
||||
// 分析統計
|
||||
let total_queries = comparisons.len();
|
||||
let bm25_faster = comparisons
|
||||
.iter()
|
||||
.filter(|c| c.bm25_time_ms < c.qdrant_time_ms)
|
||||
.count();
|
||||
let avg_overlap = comparisons
|
||||
.iter()
|
||||
.map(|c| c.overlap_count as f32 / c.bm25_results.max(c.qdrant_results).max(1) as f32)
|
||||
.sum::<f32>()
|
||||
/ total_queries as f32
|
||||
* 100.0;
|
||||
|
||||
println!("\n📈 統計分析:");
|
||||
println!(" • 總測試查詢數: {}", total_queries);
|
||||
println!(
|
||||
" • BM25 較快的查詢: {}/{} ({:.1}%)",
|
||||
bm25_faster,
|
||||
total_queries,
|
||||
bm25_faster as f32 / total_queries as f32 * 100.0
|
||||
);
|
||||
println!(" • 平均結果重疊率: {:.1}%", avg_overlap);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user