mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 21:44:34 +01:00
Merge #5142
5142: Try merge optimisation r=dureuill a=ManyTheFish ![Capture_decran_2024-12-09_a_11 59 42](https://github.com/user-attachments/assets/0dfc7e30-a603-4546-98d2-791990bdfcce) Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
1995040846
@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
|
|||||||
exact_word_docids: BalancedCaches<'extractor>,
|
exact_word_docids: BalancedCaches<'extractor>,
|
||||||
word_position_docids: BalancedCaches<'extractor>,
|
word_position_docids: BalancedCaches<'extractor>,
|
||||||
fid_word_count_docids: BalancedCaches<'extractor>,
|
fid_word_count_docids: BalancedCaches<'extractor>,
|
||||||
fid_word_count: HashMap<FieldId, (usize, usize)>,
|
fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
|
||||||
current_docid: Option<DocumentId>,
|
current_docid: Option<DocumentId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
|
|
||||||
self.fid_word_count
|
self.fid_word_count
|
||||||
.entry(field_id)
|
.entry(field_id)
|
||||||
.and_modify(|(_current_count, new_count)| *new_count += 1)
|
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||||
.or_insert((0, 1));
|
.or_insert((None, Some(1)));
|
||||||
self.current_docid = Some(docid);
|
self.current_docid = Some(docid);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
|
|
||||||
self.fid_word_count
|
self.fid_word_count
|
||||||
.entry(field_id)
|
.entry(field_id)
|
||||||
.and_modify(|(current_count, _new_count)| *current_count += 1)
|
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||||
.or_insert((1, 0));
|
.or_insert((Some(1), None));
|
||||||
|
|
||||||
self.current_docid = Some(docid);
|
self.current_docid = Some(docid);
|
||||||
|
|
||||||
@ -141,14 +141,18 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
|||||||
fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
|
fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
|
||||||
for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
|
for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
|
||||||
if current_count != new_count {
|
if current_count != new_count {
|
||||||
if current_count <= MAX_COUNTED_WORDS {
|
if let Some(current_count) =
|
||||||
|
current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
|
||||||
|
{
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
buffer.push(current_count as u8);
|
buffer.push(current_count as u8);
|
||||||
self.fid_word_count_docids
|
self.fid_word_count_docids
|
||||||
.insert_del_u32(buffer, self.current_docid.unwrap())?;
|
.insert_del_u32(buffer, self.current_docid.unwrap())?;
|
||||||
}
|
}
|
||||||
if new_count <= MAX_COUNTED_WORDS {
|
if let Some(new_count) =
|
||||||
|
new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
|
||||||
|
{
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
buffer.push(new_count as u8);
|
buffer.push(new_count as u8);
|
||||||
|
@ -235,8 +235,12 @@ fn merge_cbo_bitmaps(
|
|||||||
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
|
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
|
||||||
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
|
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
|
||||||
(Some(current), Some(del), add) => {
|
(Some(current), Some(del), add) => {
|
||||||
|
debug_assert!(
|
||||||
|
del.is_subset(¤t),
|
||||||
|
"del is not a subset of current, which must be impossible."
|
||||||
|
);
|
||||||
let output = match add {
|
let output = match add {
|
||||||
Some(add) => (¤t - del) | add,
|
Some(add) => (¤t - (&del - &add)) | (add - del),
|
||||||
None => ¤t - del,
|
None => ¤t - del,
|
||||||
};
|
};
|
||||||
if output.is_empty() {
|
if output.is_empty() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user