mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
fix: Always lowercase indexed tokens
This commit is contained in:
parent
7dbf5d6319
commit
f0268d49fe
1 changed files with 8 additions and 0 deletions
|
@ -33,6 +33,10 @@ impl Indexer {
|
|||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
|
||||
for token in Tokenizer::new(text) {
|
||||
if token.word_index >= self.word_limit { break }
|
||||
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token { word: &lower, ..token };
|
||||
|
||||
let docindex = match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => docindex,
|
||||
None => break,
|
||||
|
@ -49,6 +53,10 @@ impl Indexer {
|
|||
let iter = iter.into_iter();
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
if token.word_index >= self.word_limit { break }
|
||||
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token { word: &lower, ..token };
|
||||
|
||||
let docindex = match token_to_docindex(id, attr, token) {
|
||||
Some(docindex) => docindex,
|
||||
None => break,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue