mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 13:40:31 +01:00
fix: Always lowercase indexed tokens
This commit is contained in:
parent
7dbf5d6319
commit
f0268d49fe
@ -33,6 +33,10 @@ impl Indexer {
|
||||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
|
||||
for token in Tokenizer::new(text) {
|
||||
if token.word_index >= self.word_limit { break }
|
||||
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token { word: &lower, ..token };
|
||||
|
||||
let docindex = match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => docindex,
|
||||
None => break,
|
||||
@ -49,6 +53,10 @@ impl Indexer {
|
||||
let iter = iter.into_iter();
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
if token.word_index >= self.word_limit { break }
|
||||
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token { word: &lower, ..token };
|
||||
|
||||
let docindex = match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => docindex,
|
||||
None => break,
|
||||
|
Loading…
x
Reference in New Issue
Block a user