2022-04-04 13:59:29 +02:00
|
|
|
use std::collections::BTreeSet;
|
|
|
|
|
|
|
|
use heed::EnvOpenOptions;
|
|
|
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
2022-08-18 17:36:08 +02:00
|
|
|
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
2022-04-04 13:59:29 +02:00
|
|
|
use serde_json::json;
|
|
|
|
use tempfile::tempdir;
|
2022-04-01 10:50:01 +02:00
|
|
|
use Criterion::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_typo_tolerance_one_typo() {
|
|
|
|
let criteria = [Typo];
|
|
|
|
let index = super::setup_search_index_with_criteria(&criteria);
|
|
|
|
|
|
|
|
// basic typo search with default typo settings
|
|
|
|
{
|
|
|
|
let txn = index.read_txn().unwrap();
|
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zeal");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 1);
|
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zean");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut txn = index.write_txn().unwrap();
|
|
|
|
|
|
|
|
let config = IndexerConfig::default();
|
|
|
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
|
|
|
builder.set_min_word_len_one_typo(4);
|
2022-10-05 17:41:07 +02:00
|
|
|
builder.execute(|_| (), || false).unwrap();
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
// typo is now supported for 4 letters words
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zean");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_typo_tolerance_two_typo() {
|
|
|
|
let criteria = [Typo];
|
|
|
|
let index = super::setup_search_index_with_criteria(&criteria);
|
|
|
|
|
|
|
|
// basic typo search with default typo settings
|
|
|
|
{
|
|
|
|
let txn = index.read_txn().unwrap();
|
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zealand");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 1);
|
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zealemd");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut txn = index.write_txn().unwrap();
|
|
|
|
|
|
|
|
let config = IndexerConfig::default();
|
|
|
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
|
|
|
builder.set_min_word_len_two_typos(7);
|
2022-10-05 17:41:07 +02:00
|
|
|
builder.execute(|_| (), || false).unwrap();
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
// typo is now supported for 4 letters words
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zealemd");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-01 10:50:01 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 1);
|
|
|
|
}
|
2022-04-04 13:59:29 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_typo_disabled_on_word() {
|
|
|
|
let tmp = tempdir().unwrap();
|
|
|
|
let mut options = EnvOpenOptions::new();
|
|
|
|
options.map_size(4096 * 100);
|
|
|
|
let index = Index::new(options, tmp.path()).unwrap();
|
|
|
|
|
2022-06-14 16:04:27 +02:00
|
|
|
let mut builder = milli::documents::DocumentsBatchBuilder::new(Vec::new());
|
|
|
|
let doc1 = json!({
|
|
|
|
"id": 1usize,
|
|
|
|
"data": "zealand",
|
|
|
|
});
|
|
|
|
|
|
|
|
let doc2 = json!({
|
|
|
|
"id": 2usize,
|
|
|
|
"data": "zearand",
|
|
|
|
});
|
|
|
|
|
|
|
|
builder.append_json_object(doc1.as_object().unwrap()).unwrap();
|
|
|
|
builder.append_json_object(doc2.as_object().unwrap()).unwrap();
|
|
|
|
let vector = builder.into_inner().unwrap();
|
|
|
|
|
|
|
|
let documents =
|
|
|
|
milli::documents::DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap();
|
2022-04-04 13:59:29 +02:00
|
|
|
|
|
|
|
let mut txn = index.write_txn().unwrap();
|
|
|
|
let config = IndexerConfig::default();
|
|
|
|
let indexing_config = IndexDocumentsConfig::default();
|
2022-10-05 17:41:07 +02:00
|
|
|
let builder =
|
|
|
|
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
2022-04-04 13:59:29 +02:00
|
|
|
|
2022-06-15 14:35:19 +02:00
|
|
|
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
|
|
|
user_error.unwrap();
|
2022-04-04 13:59:29 +02:00
|
|
|
builder.execute().unwrap();
|
|
|
|
txn.commit().unwrap();
|
|
|
|
|
|
|
|
// basic typo search with default typo settings
|
|
|
|
{
|
|
|
|
let txn = index.read_txn().unwrap();
|
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zealand");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-04 13:59:29 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut txn = index.write_txn().unwrap();
|
|
|
|
|
|
|
|
let config = IndexerConfig::default();
|
|
|
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
|
|
|
let mut exact_words = BTreeSet::new();
|
2022-04-04 20:34:23 +02:00
|
|
|
// `zealand` doesn't allow typos anymore
|
2022-04-04 13:59:29 +02:00
|
|
|
exact_words.insert("zealand".to_string());
|
|
|
|
builder.set_exact_words(exact_words);
|
2022-10-05 17:41:07 +02:00
|
|
|
builder.execute(|_| (), || false).unwrap();
|
2022-04-04 13:59:29 +02:00
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("zealand");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-04 13:59:29 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 1);
|
|
|
|
}
|
2022-04-04 14:47:07 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_disable_typo_on_attribute() {
|
|
|
|
let criteria = [Typo];
|
|
|
|
let index = super::setup_search_index_with_criteria(&criteria);
|
|
|
|
|
|
|
|
// basic typo search with default typo settings
|
|
|
|
{
|
|
|
|
let txn = index.read_txn().unwrap();
|
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
2022-04-04 21:17:06 +02:00
|
|
|
// typo in `antebel(l)um`
|
2022-04-04 14:47:07 +02:00
|
|
|
search.query("antebelum");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-04 14:47:07 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut txn = index.write_txn().unwrap();
|
|
|
|
|
|
|
|
let config = IndexerConfig::default();
|
|
|
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
2022-04-04 21:17:06 +02:00
|
|
|
// disable typos on `description`
|
2022-04-04 14:47:07 +02:00
|
|
|
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
|
2022-10-05 17:41:07 +02:00
|
|
|
builder.execute(|_| (), || false).unwrap();
|
2022-04-04 14:47:07 +02:00
|
|
|
|
|
|
|
let mut search = Search::new(&txn, &index);
|
|
|
|
search.query("antebelum");
|
|
|
|
search.limit(10);
|
2023-04-24 12:11:25 +02:00
|
|
|
|
2022-08-22 17:37:36 +02:00
|
|
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
2022-04-04 14:47:07 +02:00
|
|
|
|
|
|
|
let result = search.execute().unwrap();
|
|
|
|
assert_eq!(result.documents_ids.len(), 0);
|
|
|
|
}
|