mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 11:47:28 +01:00
Fix bug in prefix DB indexing
Where the batch's information was not properly updated in cases where only the proximity changed between two consecutive word pair proximities. Closes https://github.com/meilisearch/meilisearch/issues/3043
This commit is contained in:
parent
a651397afc
commit
f7c8730d09
@ -238,4 +238,51 @@ mod tests {
|
||||
db_snap!(index, word_prefix_pair_proximity_docids, "update");
|
||||
db_snap!(index, prefix_word_pair_proximity_docids, "update");
|
||||
}
|
||||
#[test]
|
||||
fn test_batch_bug_3034() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3043
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.words_prefix_threshold = Some(50);
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec!["text".to_owned()]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let batch_reader_from_documents = |documents| {
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
for object in documents {
|
||||
builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
|
||||
};
|
||||
|
||||
let mut documents = documents_with_enough_different_words_for_prefixes(&["y"]);
|
||||
// now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
|
||||
documents.push(
|
||||
serde_json::json!({
|
||||
"text": "x y"
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
);
|
||||
documents.push(
|
||||
serde_json::json!({
|
||||
"text": "x a y"
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
);
|
||||
|
||||
let documents = batch_reader_from_documents(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
db_snap!(index, word_pair_proximity_docids);
|
||||
db_snap!(index, word_prefix_pair_proximity_docids);
|
||||
db_snap!(index, prefix_word_pair_proximity_docids);
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ word2 : doggo
|
||||
2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are
|
||||
in the list of sorted prefixes. And we insert the key `prefix`
|
||||
and the value (`docids`) to a sorted map which we call the “batch”. For example,
|
||||
at the end of the first inner loop, we may have:
|
||||
at the end of the first outer loop, we may have:
|
||||
```text
|
||||
Outer loop 1:
|
||||
------------------------------
|
||||
@ -85,7 +85,7 @@ end of the batch.
|
||||
|
||||
4. On the third iteration of the outer loop, we have:
|
||||
```text
|
||||
Outer loop 4:
|
||||
Outer loop 3:
|
||||
------------------------------
|
||||
proximity: 1
|
||||
word1 : good
|
||||
@ -340,17 +340,16 @@ fn execute_on_word_pairs_and_prefixes<I>(
|
||||
if prox_different_than_prev || word1_different_than_prev || word2_start_different_than_prev
|
||||
{
|
||||
batch.flush(&mut merge_buffer, &mut insert)?;
|
||||
batch.proximity = proximity;
|
||||
// don't forget to reset the value of batch.word1 and prev_word2_start
|
||||
if word1_different_than_prev {
|
||||
prefix_search_start.0 = 0;
|
||||
batch.word1.clear();
|
||||
batch.word1.extend_from_slice(word1);
|
||||
batch.proximity = proximity;
|
||||
}
|
||||
if word2_start_different_than_prev {
|
||||
// word2_start_different_than_prev == true
|
||||
prev_word2_start = word2[0];
|
||||
}
|
||||
prefix_search_start.0 = 0;
|
||||
// Optimisation: find the search start in the prefix trie to iterate over the prefixes of word2
|
||||
empty_prefixes = !prefixes.set_search_start(word2, &mut prefix_search_start);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user