5062: Fix bugs for v1.12 r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #4984
Fixes https://github.com/meilisearch/meilisearch/issues/4974
Fixes [SDK test](https://github.com/meilisearch/meilisearch/actions/runs/11886701996/job/33118278794)
## What does this PR do?
- add 3 tests
- fix bugs

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-11-26 08:10:50 +00:00 committed by GitHub
commit e241f91285
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 180 additions and 7 deletions

View file

@ -58,9 +58,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.map(|s| s.iter().map(String::as_str).collect());
let old_dictionary: Option<Vec<_>> =
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let del_builder =
let mut del_builder =
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
let del_tokenizer = del_builder.into_tokenizer();
let del_tokenizer = del_builder.build();
let new_stop_words = settings_diff.new.stop_words.as_ref();
let new_separators: Option<Vec<_>> = settings_diff
@ -70,9 +70,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
.map(|s| s.iter().map(String::as_str).collect());
let new_dictionary: Option<Vec<_>> =
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let add_builder =
let mut add_builder =
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
let add_tokenizer = add_builder.into_tokenizer();
let add_tokenizer = add_builder.build();
// iterate over documents.
let mut cursor = obkv_documents.into_cursor()?;