From 19d7cdc20d144fdd62284c975e9be5ae370aae9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 20 Jun 2024 12:57:08 +0200 Subject: [PATCH 1/3] Improve facet distribution speed in lexico mode --- .../search/facet/facet_distribution_iter.rs | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index a8aa1a006..26b4ae80e 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -1,14 +1,17 @@ use std::cmp::Reverse; use std::collections::BinaryHeap; +use std::io::Cursor; use std::ops::ControlFlow; use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; -use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; +use crate::heed_codec::facet::{ + FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec, +}; use crate::heed_codec::BytesRefCodec; -use crate::DocumentId; +use crate::{CboRoaringBitmapCodec, DocumentId}; /// Call the given closure on the facet distribution of the candidate documents. /// @@ -31,12 +34,9 @@ pub fn lexicographically_iterate_over_facet_distribution<'t, CB>( where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { + let db = db.remap_data_type::(); let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; - let highest_level = get_highest_level( - rtxn, - db.remap_key_type::>(), - field_id, - )?; + let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; @@ -146,7 +146,7 @@ where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupLazyValueCodec>, field_id: u16, callback: CB, } @@ -171,7 +171,10 @@ where if key.field_id != self.field_id { return Ok(ControlFlow::Break(())); } - let docids_in_common = value.bitmap & candidates; + let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized( + value.bitmap_bytes, + candidates, + )?; if !docids_in_common.is_empty() { let any_docid_in_common = docids_in_common.min().unwrap(); match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)? @@ -205,7 +208,10 @@ where if key.field_id != self.field_id { return Ok(ControlFlow::Break(())); } - let docids_in_common = value.bitmap & candidates; + let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized( + value.bitmap_bytes, + candidates, + )?; if !docids_in_common.is_empty() { let cf = self.iterate( &docids_in_common, From 6fa4da8ae7cf7df6f78d9f11411a6a81b6625bc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 20 Jun 2024 12:58:51 +0200 Subject: [PATCH 2/3] Improve facet distribution speed in count mode --- .../search/facet/facet_distribution_iter.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 26b4ae80e..1e6ea8d88 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -1,6 +1,5 @@ use std::cmp::Reverse; use std::collections::BinaryHeap; -use std::io::Cursor; use std::ops::ControlFlow; use heed::Result; @@ -75,11 +74,8 @@ where // Represents the list of keys that we must explore. let mut heap = BinaryHeap::new(); - let highest_level = get_highest_level( - rtxn, - db.remap_key_type::>(), - field_id, - )?; + let db = db.remap_data_type::(); + let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { // We first fill the heap with values from the highest level @@ -92,7 +88,10 @@ where if key.field_id != field_id { break; } - let intersection = value.bitmap & candidates; + let intersection = CboRoaringBitmapCodec::intersection_with_serialized( + value.bitmap_bytes, + candidates, + )?; let count = intersection.len(); if count != 0 { heap.push(LevelEntry { @@ -121,7 +120,10 @@ where if key.field_id != field_id { break; } - let intersection = value.bitmap & candidates; + let intersection = CboRoaringBitmapCodec::intersection_with_serialized( + value.bitmap_bytes, + candidates, + )?; let count = intersection.len(); if count != 0 { heap.push(LevelEntry { From 9736e16a88868f352eda3605a723f04ba60d1b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 20 Jun 2024 13:02:44 +0200 Subject: [PATCH 3/3] Make clippy happy --- .../update/index_documents/extract/extract_vector_points.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 736c21c9f..36fa346a5 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -290,7 +290,7 @@ pub fn extract_vector_points( regenerate_if_prompt_changed( obkv, (old_prompt, prompt), - (&old_fields_ids_map, &new_fields_ids_map), + (old_fields_ids_map, new_fields_ids_map), )? } else { // we can simply ignore user provided vectors as they are not regenerated and are @@ -306,7 +306,7 @@ pub fn extract_vector_points( prompt, (add_to_user_provided, remove_from_user_provided), (old, new), - (&old_fields_ids_map, &new_fields_ids_map), + (old_fields_ids_map, new_fields_ids_map), document_id, )?, };