From 4e5e55c21a1574825882d41e81f3c413ba7351d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 13 Nov 2020 14:33:53 +0100 Subject: [PATCH] Simplify the merge functions --- src/update/index_documents/merge_function.rs | 56 ++++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/update/index_documents/merge_function.rs b/src/update/index_documents/merge_function.rs index 7f91b6716..fb785fd11 100644 --- a/src/update/index_documents/merge_function.rs +++ b/src/update/index_documents/merge_function.rs @@ -29,23 +29,13 @@ pub fn main_merge(key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { ensure!(values.windows(2).all(|vs| vs[0] == vs[1]), "fields ids map doesn't match"); Ok(values[0].to_vec()) }, - DOCUMENTS_IDS_KEY => word_docids_merge(&[], values), + DOCUMENTS_IDS_KEY => roaring_bitmap_merge(values), otherwise => bail!("wut {:?}", otherwise), } } pub fn word_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { - let (head, tail) = values.split_first().unwrap(); - let mut head = RoaringBitmap::deserialize_from(&head[..])?; - - for value in tail { - let bitmap = RoaringBitmap::deserialize_from(&value[..])?; - head.union_with(&bitmap); - } - - let mut vec = Vec::with_capacity(head.serialized_size()); - head.serialize_into(&mut vec)?; - Ok(vec) + roaring_bitmap_merge(values) } pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result> { @@ -53,17 +43,11 @@ pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow:: } pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { - let (head, tail) = values.split_first().unwrap(); - let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?; + cbo_roaring_bitmap_merge(values) +} - for value in tail { - let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?; - head.union_with(&bitmap); - } - - let mut vec = Vec::new(); - CboRoaringBitmapCodec::serialize_into(&head, &mut vec)?; - Ok(vec) +pub fn facet_field_value_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { + cbo_roaring_bitmap_merge(values) } pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result> { @@ -85,3 +69,31 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu writer.finish().unwrap(); } + +fn roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result> { + let (head, tail) = values.split_first().unwrap(); + let mut head = RoaringBitmap::deserialize_from(&head[..])?; + + for value in tail { + let bitmap = RoaringBitmap::deserialize_from(&value[..])?; + head.union_with(&bitmap); + } + + let mut vec = Vec::with_capacity(head.serialized_size()); + head.serialize_into(&mut vec)?; + Ok(vec) +} + +fn cbo_roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result> { + let (head, tail) = values.split_first().unwrap(); + let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?; + + for value in tail { + let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?; + head.union_with(&bitmap); + } + + let mut vec = Vec::new(); + CboRoaringBitmapCodec::serialize_into(&head, &mut vec)?; + Ok(vec) +}