mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Add a test to ensure that the indexer support stop words
This commit is contained in:
parent
ff7dde7522
commit
e9dce3ce81
@ -170,20 +170,18 @@ fn index_token(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if stop_words.contains(&token.word) {
|
if !stop_words.contains(&token.word) {
|
||||||
return false;
|
match token_to_docindex(id, attr, token) {
|
||||||
}
|
Some(docindex) => {
|
||||||
|
let word = Vec::from(token.word);
|
||||||
match token_to_docindex(id, attr, token) {
|
words_doc_indexes
|
||||||
Some(docindex) => {
|
.entry(word.clone())
|
||||||
let word = Vec::from(token.word);
|
.or_insert_with(Vec::new)
|
||||||
words_doc_indexes
|
.push(docindex);
|
||||||
.entry(word.clone())
|
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||||
.or_insert_with(Vec::new)
|
}
|
||||||
.push(docindex);
|
None => return false,
|
||||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
|
||||||
}
|
}
|
||||||
None => return false,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
true
|
true
|
||||||
@ -256,4 +254,33 @@ mod tests {
|
|||||||
.get(&"l’éteindre".to_owned().into_bytes())
|
.get(&"l’éteindre".to_owned().into_bytes())
|
||||||
.is_some());
|
.is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic_stop_words() {
|
||||||
|
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
|
||||||
|
let stop_words = fst::Set::from_iter(stop_words).unwrap();
|
||||||
|
|
||||||
|
let mut indexer = RawIndexer::new(stop_words);
|
||||||
|
|
||||||
|
let docid = DocumentId(0);
|
||||||
|
let attr = SchemaAttr(0);
|
||||||
|
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||||
|
indexer.index_text(docid, attr, text);
|
||||||
|
|
||||||
|
let Indexed {
|
||||||
|
words_doc_indexes, ..
|
||||||
|
} = indexer.build();
|
||||||
|
|
||||||
|
assert!(words_doc_indexes.get(&b"l"[..]).is_none());
|
||||||
|
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||||
|
assert!(words_doc_indexes.get(&b"j"[..]).is_none());
|
||||||
|
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
|
||||||
|
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
|
||||||
|
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||||
|
|
||||||
|
// with the ugly apostrophe...
|
||||||
|
assert!(words_doc_indexes
|
||||||
|
.get(&"l’éteindre".to_owned().into_bytes())
|
||||||
|
.is_some());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user