mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 13:34:30 +01:00
Merge #5142
5142: Try merge optimisation r=dureuill a=ManyTheFish ![Capture_decran_2024-12-09_a_11 59 42](https://github.com/user-attachments/assets/0dfc7e30-a603-4546-98d2-791990bdfcce) Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
1995040846
@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
|
||||
exact_word_docids: BalancedCaches<'extractor>,
|
||||
word_position_docids: BalancedCaches<'extractor>,
|
||||
fid_word_count_docids: BalancedCaches<'extractor>,
|
||||
fid_word_count: HashMap<FieldId, (usize, usize)>,
|
||||
fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
|
||||
current_docid: Option<DocumentId>,
|
||||
}
|
||||
|
||||
@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(_current_count, new_count)| *new_count += 1)
|
||||
.or_insert((0, 1));
|
||||
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||
.or_insert((None, Some(1)));
|
||||
self.current_docid = Some(docid);
|
||||
|
||||
Ok(())
|
||||
@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(current_count, _new_count)| *current_count += 1)
|
||||
.or_insert((1, 0));
|
||||
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||
.or_insert((Some(1), None));
|
||||
|
||||
self.current_docid = Some(docid);
|
||||
|
||||
@ -141,14 +141,18 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
|
||||
for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
|
||||
if current_count != new_count {
|
||||
if current_count <= MAX_COUNTED_WORDS {
|
||||
if let Some(current_count) =
|
||||
current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
buffer.push(current_count as u8);
|
||||
self.fid_word_count_docids
|
||||
.insert_del_u32(buffer, self.current_docid.unwrap())?;
|
||||
}
|
||||
if new_count <= MAX_COUNTED_WORDS {
|
||||
if let Some(new_count) =
|
||||
new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
buffer.push(new_count as u8);
|
||||
|
@ -235,8 +235,12 @@ fn merge_cbo_bitmaps(
|
||||
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
|
||||
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
|
||||
(Some(current), Some(del), add) => {
|
||||
debug_assert!(
|
||||
del.is_subset(¤t),
|
||||
"del is not a subset of current, which must be impossible."
|
||||
);
|
||||
let output = match add {
|
||||
Some(add) => (¤t - del) | add,
|
||||
Some(add) => (¤t - (&del - &add)) | (add - del),
|
||||
None => ¤t - del,
|
||||
};
|
||||
if output.is_empty() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user