mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Merge #5062
5062: Fix bugs for v1.12 r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #4984 Fixes https://github.com/meilisearch/meilisearch/issues/4974 Fixes [SDK test](https://github.com/meilisearch/meilisearch/actions/runs/11886701996/job/33118278794) ## What does this PR do? - add 3 tests - fix bugs Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
e241f91285
6 changed files with 180 additions and 7 deletions
|
@ -274,7 +274,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||
last_match_last_token_position_plus_one
|
||||
} else {
|
||||
// we have matched the end of possible tokens, there's nothing to advance
|
||||
tokens.len() - 1
|
||||
tokens.len()
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
|
|||
if words.is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut candidates = None;
|
||||
for word in words.iter().flatten().copied() {
|
||||
if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
|
||||
candidates |= word_docids;
|
||||
if let Some(candidates) = candidates.as_mut() {
|
||||
*candidates &= word_docids;
|
||||
} else {
|
||||
candidates = Some(word_docids);
|
||||
}
|
||||
} else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
}
|
||||
|
||||
let Some(mut candidates) = candidates else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
let winsize = words.len().min(3);
|
||||
|
||||
for win in words.windows(winsize) {
|
||||
|
|
|
@ -58,9 +58,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let old_dictionary: Option<Vec<_>> =
|
||||
settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let del_builder =
|
||||
let mut del_builder =
|
||||
tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
|
||||
let del_tokenizer = del_builder.into_tokenizer();
|
||||
let del_tokenizer = del_builder.build();
|
||||
|
||||
let new_stop_words = settings_diff.new.stop_words.as_ref();
|
||||
let new_separators: Option<Vec<_>> = settings_diff
|
||||
|
@ -70,9 +70,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||
.map(|s| s.iter().map(String::as_str).collect());
|
||||
let new_dictionary: Option<Vec<_>> =
|
||||
settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let add_builder =
|
||||
let mut add_builder =
|
||||
tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
|
||||
let add_tokenizer = add_builder.into_tokenizer();
|
||||
let add_tokenizer = add_builder.build();
|
||||
|
||||
// iterate over documents.
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue