28 lines
958 B
Rust
28 lines
958 B
Rust
use momentry_core::core::text::tokenizer::{contains_chinese, tokenize_chinese_text};
|
|
|
|
fn main() {
|
|
let texts = ["電腦", "工作", "視頻", "分析", "檔案", "這是一個測試"];
|
|
for text in texts {
|
|
let tokens = tokenize_chinese_text(text);
|
|
println!("Text: '{}' -> Tokens: '{}'", text, tokens);
|
|
let split: Vec<&str> = tokens.split_whitespace().collect();
|
|
println!(" Split: {:?}", split);
|
|
}
|
|
|
|
println!("\n=== Testing complex queries ===");
|
|
let complex = [
|
|
"(電腦 | 計算機 | 微机)",
|
|
"(工作 | 任務 | 作業)",
|
|
"電腦 & 工作",
|
|
"(電腦:* | 計算機:* | 微机:*)",
|
|
];
|
|
|
|
for query in complex {
|
|
let tokens = tokenize_chinese_text(query);
|
|
println!("Query: '{}' -> Tokens: '{}'", query, tokens);
|
|
let split: Vec<&str> = tokens.split_whitespace().collect();
|
|
println!(" Split: {:?}", split);
|
|
println!("---");
|
|
}
|
|
}
|