mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #697
697: Fix bug in prefix DB indexing r=loiclec a=loiclec Where the batch's information was not properly updated in cases where only the proximity changed between two consecutive word pair proximities. Closes partially https://github.com/meilisearch/meilisearch/issues/3043 Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
57c9f03e51
@ -238,4 +238,51 @@ mod tests {
|
|||||||
db_snap!(index, word_prefix_pair_proximity_docids, "update");
|
db_snap!(index, word_prefix_pair_proximity_docids, "update");
|
||||||
db_snap!(index, prefix_word_pair_proximity_docids, "update");
|
db_snap!(index, prefix_word_pair_proximity_docids, "update");
|
||||||
}
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_batch_bug_3043() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/3043
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.words_prefix_threshold = Some(50);
|
||||||
|
index.index_documents_config.autogenerate_docids = true;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let batch_reader_from_documents = |documents| {
|
||||||
|
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||||
|
for object in documents {
|
||||||
|
builder.append_json_object(&object).unwrap();
|
||||||
|
}
|
||||||
|
DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut documents = documents_with_enough_different_words_for_prefixes(&["y"]);
|
||||||
|
// now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
|
||||||
|
documents.push(
|
||||||
|
serde_json::json!({
|
||||||
|
"text": "x y"
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
documents.push(
|
||||||
|
serde_json::json!({
|
||||||
|
"text": "x a y"
|
||||||
|
})
|
||||||
|
.as_object()
|
||||||
|
.unwrap()
|
||||||
|
.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let documents = batch_reader_from_documents(documents);
|
||||||
|
index.add_documents(documents).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, word_pair_proximity_docids);
|
||||||
|
db_snap!(index, word_prefix_pair_proximity_docids);
|
||||||
|
db_snap!(index, prefix_word_pair_proximity_docids);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/prefix_word_pairs/mod.rs
|
||||||
|
---
|
||||||
|
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/prefix_word_pairs/mod.rs
|
||||||
|
---
|
||||||
|
1 a y [51, ]
|
||||||
|
1 x a [51, ]
|
||||||
|
1 x y [50, ]
|
||||||
|
2 x y [51, ]
|
||||||
|
|
@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/prefix_word_pairs/mod.rs
|
||||||
|
---
|
||||||
|
1 a y [51, ]
|
||||||
|
1 x y [50, ]
|
||||||
|
2 x y [51, ]
|
||||||
|
|
@ -44,7 +44,7 @@ word2 : doggo
|
|||||||
2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are
|
2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are
|
||||||
in the list of sorted prefixes. And we insert the key `prefix`
|
in the list of sorted prefixes. And we insert the key `prefix`
|
||||||
and the value (`docids`) to a sorted map which we call the “batch”. For example,
|
and the value (`docids`) to a sorted map which we call the “batch”. For example,
|
||||||
at the end of the first inner loop, we may have:
|
at the end of the first outer loop, we may have:
|
||||||
```text
|
```text
|
||||||
Outer loop 1:
|
Outer loop 1:
|
||||||
------------------------------
|
------------------------------
|
||||||
@ -85,7 +85,7 @@ end of the batch.
|
|||||||
|
|
||||||
4. On the third iteration of the outer loop, we have:
|
4. On the third iteration of the outer loop, we have:
|
||||||
```text
|
```text
|
||||||
Outer loop 4:
|
Outer loop 3:
|
||||||
------------------------------
|
------------------------------
|
||||||
proximity: 1
|
proximity: 1
|
||||||
word1 : good
|
word1 : good
|
||||||
@ -340,17 +340,16 @@ fn execute_on_word_pairs_and_prefixes<I>(
|
|||||||
if prox_different_than_prev || word1_different_than_prev || word2_start_different_than_prev
|
if prox_different_than_prev || word1_different_than_prev || word2_start_different_than_prev
|
||||||
{
|
{
|
||||||
batch.flush(&mut merge_buffer, &mut insert)?;
|
batch.flush(&mut merge_buffer, &mut insert)?;
|
||||||
|
batch.proximity = proximity;
|
||||||
// don't forget to reset the value of batch.word1 and prev_word2_start
|
// don't forget to reset the value of batch.word1 and prev_word2_start
|
||||||
if word1_different_than_prev {
|
if word1_different_than_prev {
|
||||||
prefix_search_start.0 = 0;
|
|
||||||
batch.word1.clear();
|
batch.word1.clear();
|
||||||
batch.word1.extend_from_slice(word1);
|
batch.word1.extend_from_slice(word1);
|
||||||
batch.proximity = proximity;
|
|
||||||
}
|
}
|
||||||
if word2_start_different_than_prev {
|
if word2_start_different_than_prev {
|
||||||
// word2_start_different_than_prev == true
|
|
||||||
prev_word2_start = word2[0];
|
prev_word2_start = word2[0];
|
||||||
}
|
}
|
||||||
|
prefix_search_start.0 = 0;
|
||||||
// Optimisation: find the search start in the prefix trie to iterate over the prefixes of word2
|
// Optimisation: find the search start in the prefix trie to iterate over the prefixes of word2
|
||||||
empty_prefixes = !prefixes.set_search_start(word2, &mut prefix_search_start);
|
empty_prefixes = !prefixes.set_search_start(word2, &mut prefix_search_start);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user