diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index 0e4891649..ec419446c 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; use heed::types::{OwnedType, Str}; use heed::{Database, RoIter, RoTxn, RwTxn}; -use roaring::RoaringBitmap; use crate::{DocumentId, BEU32}; @@ -44,23 +43,6 @@ impl ExternalDocumentsIds { Ok(map) } - /// Looks for the internal ids in the passed bitmap, and returns an iterator over the mapping between - /// these internal ids and their external id. - /// - /// The returned iterator has `Result<(String, DocumentId), RoaringBitmap>` as `Item`, - /// where the returned values can be: - /// - `Ok((external_id, internal_id))`: if a mapping was found - /// - `Err(remaining_ids)`: if the external ids for some of the requested internal ids weren't found. - /// In that case the returned bitmap contains the internal ids whose external ids were not found after traversing - /// the entire fst. - pub fn find_external_id_of<'t>( - &self, - rtxn: &'t RoTxn, - internal_ids: RoaringBitmap, - ) -> heed::Result> { - self.0.iter(rtxn).map(|iter| ExternalToInternalOwnedIterator { iter, internal_ids }) - } - /// Applies the list of operations passed as argument, modifying the current external to internal id mapping. /// /// If the list contains multiple operations on the same external id, then the result is unspecified. @@ -91,56 +73,3 @@ impl ExternalDocumentsIds { self.0.iter(rtxn) } } - -/// An iterator over mappings between requested internal ids and external ids. -/// -/// See [`ExternalDocumentsIds::find_external_id_of`] for details. -pub struct ExternalToInternalOwnedIterator<'t> { - iter: RoIter<'t, Str, OwnedType>, - internal_ids: RoaringBitmap, -} - -impl<'t> Iterator for ExternalToInternalOwnedIterator<'t> { - /// A result indicating if a mapping was found, or if the stream was exhausted without finding all internal ids. - type Item = Result<(&'t str, DocumentId), RoaringBitmap>; - - fn next(&mut self) -> Option { - // if all requested ids were found, we won't find any other, so short-circuit - if self.internal_ids.is_empty() { - return None; - } - loop { - let (external, internal) = match self.iter.next() { - Some(Ok((external, internal))) => (external, internal), - // TODO manage this better, remove panic - Some(Err(e)) => panic!("{}", e), - _ => { - // we exhausted the stream but we still have some internal ids to find - let remaining_ids = std::mem::take(&mut self.internal_ids); - return Some(Err(remaining_ids)); - // note: next calls to `next` will return `None` since we replaced the internal_ids - // with the default empty bitmap - } - }; - let internal = internal.get(); - let was_contained = self.internal_ids.remove(internal); - if was_contained { - return Some(Ok((external, internal))); - } - } - } -} - -impl<'t> ExternalToInternalOwnedIterator<'t> { - /// Returns the bitmap of internal ids whose external id are yet to be found - pub fn remaining_internal_ids(&self) -> &RoaringBitmap { - &self.internal_ids - } - - /// Consumes this iterator and returns an iterator over only the external ids, ignoring the internal ids. - /// - /// Use this when you don't need the mapping between the external and the internal ids. - pub fn only_external_ids(self) -> impl Iterator> + 't { - self.map(|res| res.map(|(external, _internal)| external.to_owned())) - } -} diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs index 5d9ca7ef2..d355ead68 100644 --- a/milli/src/update/index_documents/helpers/merge_functions.rs +++ b/milli/src/update/index_documents/helpers/merge_functions.rs @@ -12,18 +12,6 @@ use crate::Result; pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result>; -#[allow(unused)] -pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result> { - if values.len() == 1 { - Ok(values[0].clone()) - } else { - let capacity = values.iter().map(|v| v.len()).sum::(); - let mut output = Vec::with_capacity(capacity); - values.iter().for_each(|integers| output.extend_from_slice(integers)); - Ok(Cow::Owned(output)) - } -} - pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec) -> io::Result<()> { buffer.clear(); buffer.reserve(bitmap.serialized_size()); diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index 841c09543..52638d6f6 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -13,11 +13,10 @@ pub use grenad_helpers::{ GrenadParameters, MergeableReader, }; pub use merge_functions::{ - concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string, - merge_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps, - merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, merge_roaring_bitmaps, - obkvs_keep_last_addition_merge_deletions, obkvs_merge_additions_and_deletions, - serialize_roaring_bitmap, MergeFn, + keep_first, keep_latest_obkv, merge_btreeset_string, merge_cbo_roaring_bitmaps, + merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, + merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions, + obkvs_merge_additions_and_deletions, serialize_roaring_bitmap, MergeFn, }; use crate::MAX_WORD_LENGTH; @@ -46,11 +45,6 @@ where Some((head, tail)) } -#[allow(unused)] -pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator + '_ { - bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes) -} - /// Converts an fst Stream into an HashSet of Strings. pub fn fst_stream_into_hashset<'f, I, S>(stream: I) -> HashSet> where