From fca4577e233d943990757c7b4e1408f8bec7840f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 7 Sep 2022 17:56:38 +0200 Subject: [PATCH] Return original string in facet distributions, work on facet tests --- milli/src/search/facet/facet_distribution.rs | 249 +++++++++++++++++- .../search/facet/facet_distribution_iter.rs | 104 +++----- milli/src/search/facet/facet_range_search.rs | 72 ++--- .../src/search/facet/facet_sort_ascending.rs | 41 +-- .../src/search/facet/facet_sort_descending.rs | 42 +-- milli/src/search/facet/filter.rs | 6 +- milli/src/search/facet/mod.rs | 37 +++ .../random_looking_index_snap.hash.snap | 4 - .../random_looking_index_snap.hash.snap | 4 - .../random_looking_index_snap.hash.snap | 4 - 10 files changed, 350 insertions(+), 213 deletions(-) delete mode 100644 milli/src/search/facet/snapshots/facet_distribution_iter.rs/random_looking_index_snap/random_looking_index_snap.hash.snap delete mode 100644 milli/src/search/facet/snapshots/facet_sort_ascending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap delete mode 100644 milli/src/search/facet/snapshots/facet_sort_descending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 7c554d368..7eb438a03 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -140,13 +140,13 @@ impl<'a> FacetDistribution<'a> { self.index.facet_id_f64_docids.remap_key_type::>(), field_id, candidates, - |facet_key, nbr_docids| { + |facet_key, nbr_docids, _| { let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap(); distribution.insert(facet_key.to_string(), nbr_docids); if distribution.len() == self.max_values_per_facet { - ControlFlow::Break(()) + Ok(ControlFlow::Break(())) } else { - ControlFlow::Continue(()) + Ok(ControlFlow::Continue(())) } }, ) @@ -163,13 +163,22 @@ impl<'a> FacetDistribution<'a> { self.index.facet_id_string_docids.remap_key_type::>(), field_id, candidates, - |facet_key, nbr_docids| { + |facet_key, nbr_docids, any_docid| { let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); - distribution.insert(facet_key.to_string(), nbr_docids); + + let key: (FieldId, _, &str) = (field_id, any_docid, facet_key); + let original_string = self + .index + .field_id_docid_facet_strings + .get(self.rtxn, &key)? + .unwrap() + .to_owned(); + + distribution.insert(original_string, nbr_docids); if distribution.len() == self.max_values_per_facet { - ControlFlow::Break(()) + Ok(ControlFlow::Break(())) } else { - ControlFlow::Continue(()) + Ok(ControlFlow::Continue(())) } }, ) @@ -186,7 +195,8 @@ impl<'a> FacetDistribution<'a> { let db = self.index.facet_id_f64_docids; let mut prefix = vec![]; prefix.extend_from_slice(&field_id.to_be_bytes()); - prefix.push(0); + prefix.push(0); // read values from level 0 only + let iter = db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? @@ -207,10 +217,15 @@ impl<'a> FacetDistribution<'a> { .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? .remap_types::, FacetGroupValueCodec>(); - // TODO: get the original value of the facet somewhere (in the documents DB?) for result in iter { let (key, value) = result?; - distribution.insert(key.left_bound.to_owned(), value.bitmap.len()); + + let docid = value.bitmap.iter().next().unwrap(); + let key: (FieldId, _, &'a str) = (field_id, docid, key.left_bound); + let original_string = + self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned(); + + distribution.insert(original_string, value.bitmap.len()); if distribution.len() == self.max_values_per_facet { break; } @@ -304,3 +319,217 @@ impl fmt::Debug for FacetDistribution<'_> { .finish() } } + +#[cfg(test)] +mod tests { + use big_s::S; + use maplit::hashset; + + use crate::{ + documents::documents_batch_reader_from_objects, index::tests::TempIndex, milli_snap, + FacetDistribution, + }; + + #[test] + fn few_candidates_few_facet_values() { + // All the tests here avoid using the code in `facet_distribution_iter` because there aren't + // enough candidates. + + let mut index = TempIndex::new(); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let documents = documents!([ + { "colour": "Blue" }, + { "colour": " blue" }, + { "colour": "RED" } + ]); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates([0, 1, 2].iter().copied().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates([1, 2].iter().copied().collect()) + .execute() + .unwrap(); + + // I think it would be fine if " blue" was "Blue" instead. + // We just need to get any non-normalised string I think, even if it's not in + // the candidates + milli_snap!(format!("{map:?}"), @r###"{"colour": {" blue": 1, "RED": 1}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates([2].iter().copied().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates([0, 1, 2].iter().copied().collect()) + .max_values_per_facet(1) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###); + } + + #[test] + fn many_candidates_few_facet_values() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"]; + + let mut documents = vec![]; + for i in 0..10_000 { + let document = serde_json::json!({ + "colour": facet_values[i % 5], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .max_values_per_facet(1) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..10_000).into_iter().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..5_000).into_iter().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..5_000).into_iter().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..5_000).into_iter().collect()) + .max_values_per_facet(1) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###); + } + + #[test] + fn many_candidates_many_facet_values() { + let mut index = TempIndex::new_with_map_size(4096 * 10_000); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .unwrap(); + + let facet_values = (0..1000).into_iter().map(|x| format!("{x:x}")).collect::>(); + + let mut documents = vec![]; + for i in 0..10_000 { + let document = serde_json::json!({ + "colour": facet_values[i % 1000], + }) + .as_object() + .unwrap() + .clone(); + documents.push(document); + } + + let documents = documents_batch_reader_from_objects(documents); + + index.add_documents(documents).unwrap(); + + let txn = index.read_txn().unwrap(); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .max_values_per_facet(2) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..10_000).into_iter().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5"); + + let map = FacetDistribution::new(&txn, &index) + .facets(std::iter::once("colour")) + .candidates((0..5_000).into_iter().collect()) + .execute() + .unwrap(); + + milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992"); + } +} diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 3379d1abe..ad330b8db 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -4,8 +4,9 @@ use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; -use crate::heed_codec::facet::{ - ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, +use crate::{ + heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}, + DocumentId, }; pub fn iterate_over_facet_distribution<'t, CB>( @@ -16,7 +17,7 @@ pub fn iterate_over_facet_distribution<'t, CB>( callback: CB, ) -> Result<()> where - CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, + CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { let mut fd = FacetDistribution { rtxn, db, field_id, callback }; let highest_level = @@ -32,7 +33,7 @@ where struct FacetDistribution<'t, CB> where - CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, + CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { rtxn: &'t heed::RoTxn<'t>, db: heed::Database, FacetGroupValueCodec>, @@ -42,7 +43,7 @@ where impl<'t, CB> FacetDistribution<'t, CB> where - CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, + CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { fn iterate_level_0( &mut self, @@ -62,7 +63,8 @@ where } let docids_in_common = value.bitmap.intersection_len(candidates); if docids_in_common > 0 { - match (self.callback)(key.left_bound, docids_in_common) { + let any_docid = value.bitmap.iter().next().unwrap(); + match (self.callback)(key.left_bound, docids_in_common, any_docid)? { ControlFlow::Continue(_) => {} ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), } @@ -112,50 +114,14 @@ where #[cfg(test)] mod tests { + use super::iterate_over_facet_distribution; + use crate::milli_snap; + use crate::search::facet::tests::get_random_looking_index; + use crate::{heed_codec::facet::OrderedF64Codec, search::facet::tests::get_simple_index}; + use heed::BytesDecode; + use roaring::RoaringBitmap; use std::ops::ControlFlow; - use super::iterate_over_facet_distribution; - use crate::heed_codec::facet::OrderedF64Codec; - use crate::milli_snap; - use crate::update::facet::tests::FacetIndex; - use heed::BytesDecode; - use rand::{Rng, SeedableRng}; - use roaring::RoaringBitmap; - - fn get_simple_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - fn get_random_looking_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - let keys = - std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); - - for (_i, key) in keys.into_iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - - #[test] - fn random_looking_index_snap() { - let index = get_random_looking_index(); - milli_snap!(format!("{index}")); - } #[test] fn filter_distribution_all() { let indexes = [get_simple_index(), get_random_looking_index()]; @@ -163,11 +129,17 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (0..=255).into_iter().collect::(); let mut results = String::new(); - iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| { - let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); - results.push_str(&format!("{facet}: {count}\n")); - ControlFlow::Continue(()) - }) + iterate_over_facet_distribution( + &txn, + index.content, + 0, + &candidates, + |facet, count, _| { + let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); + results.push_str(&format!("{facet}: {count}\n")); + Ok(ControlFlow::Continue(())) + }, + ) .unwrap(); milli_snap!(results, i); @@ -182,17 +154,23 @@ mod tests { let candidates = (0..=255).into_iter().collect::(); let mut results = String::new(); let mut nbr_facets = 0; - iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| { - let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); - if nbr_facets == 100 { - return ControlFlow::Break(()); - } else { - nbr_facets += 1; - results.push_str(&format!("{facet}: {count}\n")); + iterate_over_facet_distribution( + &txn, + index.content, + 0, + &candidates, + |facet, count, _| { + let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); + if nbr_facets == 100 { + return Ok(ControlFlow::Break(())); + } else { + nbr_facets += 1; + results.push_str(&format!("{facet}: {count}\n")); - ControlFlow::Continue(()) - } - }) + Ok(ControlFlow::Continue(())) + } + }, + ) .unwrap(); milli_snap!(results, i); diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index cb5fd14d2..c99ac8e92 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -15,7 +15,8 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( field_id: u16, left: &'t Bound<>::EItem>, right: &'t Bound<>::EItem>, -) -> Result + docids: &mut RoaringBitmap, +) -> Result<()> where BoundCodec: for<'a> BytesEncode<'a>, for<'a> >::EItem: Sized, @@ -45,16 +46,15 @@ where Bound::Unbounded => Bound::Unbounded, }; let db = db.remap_key_type::>(); - let mut docids = RoaringBitmap::new(); - let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids }; + let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; let highest_level = get_highest_level(rtxn, db, field_id)?; if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; - Ok(docids) + Ok(()) } else { - return Ok(RoaringBitmap::new()); + return Ok(()); } } @@ -255,45 +255,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { #[cfg(test)] mod tests { - use std::ops::Bound; - - use rand::{Rng, SeedableRng}; - use roaring::RoaringBitmap; - use super::find_docids_of_facet_within_bounds; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::milli_snap; + use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::snapshot_tests::display_bitmap; - use crate::update::facet::tests::FacetIndex; - - fn get_simple_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - fn get_random_looking_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - let keys = - std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); - - for (_i, key) in keys.into_iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - txn.commit().unwrap(); - index - } + use roaring::RoaringBitmap; + use std::ops::Bound; #[test] fn random_looking_index_snap() { @@ -310,12 +278,14 @@ mod tests { let i = i as f64; let start = Bound::Included(0.); let end = Bound::Included(i); - let docids = find_docids_of_facet_within_bounds::( + let mut docids = RoaringBitmap::new(); + find_docids_of_facet_within_bounds::( &txn, index.content.remap_key_type::>(), 0, &start, &end, + &mut docids, ) .unwrap(); results.push_str(&format!("{}\n", display_bitmap(&docids))); @@ -326,12 +296,14 @@ mod tests { let i = i as f64; let start = Bound::Excluded(0.); let end = Bound::Excluded(i); - let docids = find_docids_of_facet_within_bounds::( + let mut docids = RoaringBitmap::new(); + find_docids_of_facet_within_bounds::( &txn, index.content.remap_key_type::>(), 0, &start, &end, + &mut docids, ) .unwrap(); results.push_str(&format!("{}\n", display_bitmap(&docids))); @@ -352,12 +324,14 @@ mod tests { let i = i as f64; let start = Bound::Included(i); let end = Bound::Included(255.); - let docids = find_docids_of_facet_within_bounds::( + let mut docids = RoaringBitmap::new(); + find_docids_of_facet_within_bounds::( &txn, index.content.remap_key_type::>(), 0, &start, &end, + &mut docids, ) .unwrap(); results.push_str(&format!("{}\n", display_bitmap(&docids))); @@ -371,12 +345,14 @@ mod tests { let i = i as f64; let start = Bound::Excluded(i); let end = Bound::Excluded(255.); - let docids = find_docids_of_facet_within_bounds::( + let mut docids = RoaringBitmap::new(); + find_docids_of_facet_within_bounds::( &txn, index.content.remap_key_type::>(), 0, &start, &end, + &mut docids, ) .unwrap(); results.push_str(&format!("{}\n", display_bitmap(&docids))); @@ -399,12 +375,14 @@ mod tests { let i = i as f64; let start = Bound::Included(i); let end = Bound::Included(255. - i); - let docids = find_docids_of_facet_within_bounds::( + let mut docids = RoaringBitmap::new(); + find_docids_of_facet_within_bounds::( &txn, index.content.remap_key_type::>(), 0, &start, &end, + &mut docids, ) .unwrap(); results.push_str(&format!("{}\n", display_bitmap(&docids))); @@ -418,12 +396,14 @@ mod tests { let i = i as f64; let start = Bound::Excluded(i); let end = Bound::Excluded(255. - i); - let docids = find_docids_of_facet_within_bounds::( + let mut docids = RoaringBitmap::new(); + find_docids_of_facet_within_bounds::( &txn, index.content.remap_key_type::>(), 0, &start, &end, + &mut docids, ) .unwrap(); results.push_str(&format!("{}\n", display_bitmap(&docids))); diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index f320f9e77..33ca7d1ce 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -83,49 +83,12 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { #[cfg(test)] mod tests { - use rand::{Rng, SeedableRng}; - use roaring::RoaringBitmap; - - use crate::heed_codec::facet::OrderedF64Codec; use crate::milli_snap; use crate::search::facet::facet_sort_ascending::ascending_facet_sort; + use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::snapshot_tests::display_bitmap; - use crate::update::facet::tests::FacetIndex; + use roaring::RoaringBitmap; - fn get_simple_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - fn get_random_looking_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - let keys = - std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); - - for (_i, key) in keys.into_iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - - #[test] - fn random_looking_index_snap() { - let index = get_random_looking_index(); - milli_snap!(format!("{index}")); - } #[test] fn filter_sort() { let indexes = [get_simple_index(), get_random_looking_index()]; diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index be5fe7841..69f286886 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -116,49 +116,13 @@ impl<'t> Iterator for DescendingFacetSort<'t> { #[cfg(test)] mod tests { - use rand::{Rng, SeedableRng}; - use roaring::RoaringBitmap; - - use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, OrderedF64Codec}; + use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; use crate::milli_snap; use crate::search::facet::facet_sort_descending::descending_facet_sort; + use crate::search::facet::tests::{get_random_looking_index, get_simple_index}; use crate::snapshot_tests::display_bitmap; - use crate::update::facet::tests::FacetIndex; + use roaring::RoaringBitmap; - fn get_simple_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - for i in 0..256u16 { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(i as u32); - index.insert(&mut txn, 0, &(i as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - fn get_random_looking_index() -> FacetIndex { - let index = FacetIndex::::new(4, 8, 5); - let mut txn = index.env.write_txn().unwrap(); - - let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); - let keys = - std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); - - for (_i, key) in keys.into_iter().enumerate() { - let mut bitmap = RoaringBitmap::new(); - bitmap.insert(key); - bitmap.insert(key + 100); - index.insert(&mut txn, 0, &(key as f64), &bitmap); - } - txn.commit().unwrap(); - index - } - - #[test] - fn random_looking_index_snap() { - let index = get_random_looking_index(); - milli_snap!(format!("{index}")); - } #[test] fn filter_sort_descending() { let indexes = [get_simple_index(), get_random_looking_index()]; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 15edafb03..4263eea7b 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -278,11 +278,9 @@ impl<'a> Filter<'a> { (Excluded(l), Included(r)) if l >= r => return Ok(()), (_, _) => (), } - let x = facet_range_search::find_docids_of_facet_within_bounds::( - rtxn, db, field_id, &left, &right, + facet_range_search::find_docids_of_facet_within_bounds::( + rtxn, db, field_id, &left, &right, output, )?; - // TODO: the facet range search should take a mutable roaring bitmap as argument - *output = x; Ok(()) } diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index fc71acf37..415c2b51a 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -74,3 +74,40 @@ pub(crate) fn get_highest_level<'t>( }) .unwrap_or(0)) } + +#[cfg(test)] +pub(crate) mod tests { + use rand::{Rng, SeedableRng}; + use roaring::RoaringBitmap; + + use crate::{heed_codec::facet::OrderedF64Codec, update::facet::tests::FacetIndex}; + + pub fn get_simple_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8, 5); + let mut txn = index.env.write_txn().unwrap(); + for i in 0..256u16 { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(i as u32); + index.insert(&mut txn, 0, &(i as f64), &bitmap); + } + txn.commit().unwrap(); + index + } + pub fn get_random_looking_index() -> FacetIndex { + let index = FacetIndex::::new(4, 8, 5); + let mut txn = index.env.write_txn().unwrap(); + + let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); + let keys = + std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::>(); + + for (_i, key) in keys.into_iter().enumerate() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(key); + bitmap.insert(key + 100); + index.insert(&mut txn, 0, &(key as f64), &bitmap); + } + txn.commit().unwrap(); + index + } +} diff --git a/milli/src/search/facet/snapshots/facet_distribution_iter.rs/random_looking_index_snap/random_looking_index_snap.hash.snap b/milli/src/search/facet/snapshots/facet_distribution_iter.rs/random_looking_index_snap/random_looking_index_snap.hash.snap deleted file mode 100644 index 661e1a35b..000000000 --- a/milli/src/search/facet/snapshots/facet_distribution_iter.rs/random_looking_index_snap/random_looking_index_snap.hash.snap +++ /dev/null @@ -1,4 +0,0 @@ ---- -source: milli/src/search/facet/facet_distribution_iter.rs ---- -3256c76a7c1b768a013e78d5fa6e9ff9 diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap b/milli/src/search/facet/snapshots/facet_sort_ascending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap deleted file mode 100644 index 64ff762db..000000000 --- a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap +++ /dev/null @@ -1,4 +0,0 @@ ---- -source: milli/src/search/facet/facet_sort_ascending.rs ---- -3256c76a7c1b768a013e78d5fa6e9ff9 diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap b/milli/src/search/facet/snapshots/facet_sort_descending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap deleted file mode 100644 index 0649e3c5d..000000000 --- a/milli/src/search/facet/snapshots/facet_sort_descending.rs/random_looking_index_snap/random_looking_index_snap.hash.snap +++ /dev/null @@ -1,4 +0,0 @@ ---- -source: milli/src/search/facet/facet_sort_descending.rs ---- -3256c76a7c1b768a013e78d5fa6e9ff9