5142: Try merge optimisation r=dureuill a=ManyTheFish

![Capture_decran_2024-12-09_a_11 59 42](https://github.com/user-attachments/assets/0dfc7e30-a603-4546-98d2-791990bdfcce)

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-12-09 14:48:26 +00:00 committed by GitHub
commit 1995040846
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 8 deletions

View File

@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
exact_word_docids: BalancedCaches<'extractor>, exact_word_docids: BalancedCaches<'extractor>,
word_position_docids: BalancedCaches<'extractor>, word_position_docids: BalancedCaches<'extractor>,
fid_word_count_docids: BalancedCaches<'extractor>, fid_word_count_docids: BalancedCaches<'extractor>,
fid_word_count: HashMap<FieldId, (usize, usize)>, fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
current_docid: Option<DocumentId>, current_docid: Option<DocumentId>,
} }
@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.fid_word_count self.fid_word_count
.entry(field_id) .entry(field_id)
.and_modify(|(_current_count, new_count)| *new_count += 1) .and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
.or_insert((0, 1)); .or_insert((None, Some(1)));
self.current_docid = Some(docid); self.current_docid = Some(docid);
Ok(()) Ok(())
@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
self.fid_word_count self.fid_word_count
.entry(field_id) .entry(field_id)
.and_modify(|(current_count, _new_count)| *current_count += 1) .and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
.or_insert((1, 0)); .or_insert((Some(1), None));
self.current_docid = Some(docid); self.current_docid = Some(docid);
@ -141,14 +141,18 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> { fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
for (fid, (current_count, new_count)) in self.fid_word_count.drain() { for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
if current_count != new_count { if current_count != new_count {
if current_count <= MAX_COUNTED_WORDS { if let Some(current_count) =
current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
{
buffer.clear(); buffer.clear();
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
buffer.push(current_count as u8); buffer.push(current_count as u8);
self.fid_word_count_docids self.fid_word_count_docids
.insert_del_u32(buffer, self.current_docid.unwrap())?; .insert_del_u32(buffer, self.current_docid.unwrap())?;
} }
if new_count <= MAX_COUNTED_WORDS { if let Some(new_count) =
new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
{
buffer.clear(); buffer.clear();
buffer.extend_from_slice(&fid.to_be_bytes()); buffer.extend_from_slice(&fid.to_be_bytes());
buffer.push(new_count as u8); buffer.push(new_count as u8);

View File

@ -235,8 +235,12 @@ fn merge_cbo_bitmaps(
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange (Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)), (Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
(Some(current), Some(del), add) => { (Some(current), Some(del), add) => {
debug_assert!(
del.is_subset(&current),
"del is not a subset of current, which must be impossible."
);
let output = match add { let output = match add {
Some(add) => (&current - del) | add, Some(add) => (&current - (&del - &add)) | (add - del),
None => &current - del, None => &current - del,
}; };
if output.is_empty() { if output.is_empty() {