From f0268d49fe3d84832b6408ccaeb6b3e142b683c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 22 Apr 2019 18:43:00 +0200 Subject: [PATCH] fix: Always lowercase indexed tokens --- meilidb-data/src/indexer.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/meilidb-data/src/indexer.rs b/meilidb-data/src/indexer.rs index a1be35a93..350cc9e00 100644 --- a/meilidb-data/src/indexer.rs +++ b/meilidb-data/src/indexer.rs @@ -33,6 +33,10 @@ impl Indexer { pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) { for token in Tokenizer::new(text) { if token.word_index >= self.word_limit { break } + + let lower = token.word.to_lowercase(); + let token = Token { word: &lower, ..token }; + let docindex = match token_to_docindex(id, attr, token) { Some(docindex) => docindex, None => break, @@ -49,6 +53,10 @@ impl Indexer { let iter = iter.into_iter(); for token in SeqTokenizer::new(iter) { if token.word_index >= self.word_limit { break } + + let lower = token.word.to_lowercase(); + let token = Token { word: &lower, ..token }; + let docindex = match token_to_docindex(id, attr, token) { Some(docindex) => docindex, None => break,