diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index 3dec638da..3dce18b00 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -1,8 +1,13 @@ use std::borrow::Cow; +use std::collections::HashMap; use std::convert::TryInto; +use std::{fmt, str}; +use fst::map::IndexedValue; use fst::{IntoStreamer, Streamer}; +const DELETED_ID: u64 = u64::MAX; + pub struct ExternalDocumentsIds<'a> { pub(crate) hard: fst::Map>, pub(crate) soft: fst::Map>, @@ -31,8 +36,7 @@ impl<'a> ExternalDocumentsIds<'a> { pub fn get>(&self, external_id: A) -> Option { let external_id = external_id.as_ref(); match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) { - // u64 MAX means deleted in the soft fst map - Some(id) if id != u64::MAX => Some(id.try_into().unwrap()), + Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()), _otherwise => None, } } @@ -47,9 +51,10 @@ impl<'a> ExternalDocumentsIds<'a> { if docids.iter().any(|v| v.index == 1) { // If the `other` set returns a value here it means // that it must be marked as deleted. - new_soft_builder.insert(external_id, u64::MAX)?; + new_soft_builder.insert(external_id, DELETED_ID)?; } else { - new_soft_builder.insert(external_id, docids[0].value)?; + let value = docids.iter().find(|v| v.index == 0).unwrap().value; + new_soft_builder.insert(external_id, value)?; } } @@ -65,8 +70,8 @@ impl<'a> ExternalDocumentsIds<'a> { let mut new_soft_builder = fst::MapBuilder::memory(); let mut iter = union_op.into_stream(); - while let Some((external_id, docids)) = iter.next() { - let id = docids.last().unwrap().value; + while let Some((external_id, marked_docids)) = iter.next() { + let id = indexed_last_value(marked_docids).unwrap(); new_soft_builder.insert(external_id, id)?; } @@ -77,19 +82,34 @@ impl<'a> ExternalDocumentsIds<'a> { self.merge_soft_into_hard() } + /// An helper function to debug this type, returns an `HashMap` of both, + /// soft and hard fst maps, combined. + pub fn to_hash_map(&self) -> HashMap { + let mut map = HashMap::new(); + + let union_op = self.hard.op().add(&self.soft).r#union(); + let mut iter = union_op.into_stream(); + while let Some((external_id, marked_docids)) = iter.next() { + let id = indexed_last_value(marked_docids).unwrap(); + if id != DELETED_ID { + let external_id = str::from_utf8(external_id).unwrap(); + map.insert(external_id.to_owned(), id.try_into().unwrap()); + } + } + + map + } + fn merge_soft_into_hard(&mut self) -> fst::Result<()> { if self.soft.len() >= self.hard.len() / 2 { let union_op = self.hard.op().add(&self.soft).r#union(); let mut iter = union_op.into_stream(); let mut new_hard_builder = fst::MapBuilder::memory(); - while let Some((external_id, docids)) = iter.next() { - if docids.len() == 2 { - if docids[1].value != u64::MAX { - new_hard_builder.insert(external_id, docids[1].value)?; - } - } else { - new_hard_builder.insert(external_id, docids[0].value)?; + while let Some((external_id, marked_docids)) = iter.next() { + let value = indexed_last_value(marked_docids).unwrap(); + if value != DELETED_ID { + new_hard_builder.insert(external_id, value)?; } } @@ -103,6 +123,12 @@ impl<'a> ExternalDocumentsIds<'a> { } } +impl fmt::Debug for ExternalDocumentsIds<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish() + } +} + impl Default for ExternalDocumentsIds<'static> { fn default() -> Self { ExternalDocumentsIds { @@ -112,6 +138,11 @@ impl Default for ExternalDocumentsIds<'static> { } } +/// Returns the value of the `IndexedValue` with the highest _index_. +fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option { + indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value) +} + #[cfg(test)] mod tests { use super::*; @@ -162,4 +193,25 @@ mod tests { assert_eq!(external_documents_ids.get("g"), Some(7)); assert_eq!(external_documents_ids.get("h"), Some(8)); } + + #[test] + fn strange_delete_insert_ids() { + let mut external_documents_ids = ExternalDocumentsIds::default(); + + let new_ids = + fst::Map::from_iter(vec![("1", 0), ("123", 1), ("30", 2), ("456", 3)]).unwrap(); + external_documents_ids.insert_ids(&new_ids).unwrap(); + assert_eq!(external_documents_ids.get("1"), Some(0)); + assert_eq!(external_documents_ids.get("123"), Some(1)); + assert_eq!(external_documents_ids.get("30"), Some(2)); + assert_eq!(external_documents_ids.get("456"), Some(3)); + + let deleted_ids = fst::Set::from_iter(vec!["30"]).unwrap(); + external_documents_ids.delete_ids(deleted_ids).unwrap(); + assert_eq!(external_documents_ids.get("30"), None); + + let new_ids = fst::Map::from_iter(vec![("30", 2)]).unwrap(); + external_documents_ids.insert_ids(&new_ids).unwrap(); + assert_eq!(external_documents_ids.get("30"), Some(2)); + } } diff --git a/milli/src/index.rs b/milli/src/index.rs index c8e5ab089..6bcb0aebd 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -218,7 +218,7 @@ impl Index { } /// Deletes the primary key of the documents, this can be done to reset indexes settings. - pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) } @@ -333,7 +333,7 @@ impl Index { /// Deletes the displayed fields ids, this will make the engine to display /// all the documents attributes in the order of the `FieldsIdsMap`. - pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) } @@ -383,7 +383,7 @@ impl Index { } /// Deletes the searchable fields, when no fields are specified, all fields are indexed. - pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) } @@ -429,7 +429,7 @@ impl Index { } /// Deletes the filterable fields ids in the database. - pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) } @@ -602,7 +602,7 @@ impl Index { self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) } - pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) } @@ -642,7 +642,7 @@ impl Index { self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) } - pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) } @@ -663,7 +663,7 @@ impl Index { self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms) } - pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { + pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY) } diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 228d48bd7..2ba3b388f 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -328,7 +328,7 @@ pub fn resolve_query_tree<'t>( candidates = docids; first_loop = false; } else { - candidates.intersect_with(&docids); + candidates &= &docids; } } Ok(candidates) @@ -358,7 +358,7 @@ pub fn resolve_query_tree<'t>( let mut candidates = RoaringBitmap::new(); for op in ops { let docids = resolve_operation(ctx, op, wdcache)?; - candidates.union_with(&docids); + candidates |= docids; } Ok(candidates) } @@ -381,7 +381,7 @@ fn all_word_pair_proximity_docids, U: AsRef>( let current_docids = ctx .word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)? .unwrap_or_default(); - docids.union_with(¤t_docids); + docids |= current_docids; } } Ok(docids) @@ -401,7 +401,7 @@ fn query_docids( let mut docids = RoaringBitmap::new(); for (word, _typo) in words { let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); - docids.union_with(¤t_docids); + docids |= current_docids; } Ok(docids) } else { @@ -413,7 +413,7 @@ fn query_docids( let mut docids = RoaringBitmap::new(); for (word, _typo) in words { let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); - docids.union_with(¤t_docids); + docids |= current_docids; } Ok(docids) } @@ -430,7 +430,7 @@ fn query_pair_proximity_docids( if proximity >= 8 { let mut candidates = query_docids(ctx, left, wdcache)?; let right_candidates = query_docids(ctx, right, wdcache)?; - candidates.intersect_with(&right_candidates); + candidates &= right_candidates; return Ok(candidates); } @@ -463,7 +463,7 @@ fn query_pair_proximity_docids( proximity, )? .unwrap_or_default(); - docids.union_with(¤t_docids); + docids |= current_docids; } Ok(docids) } else if prefix { diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index 3e8196e93..f884de160 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -274,11 +274,11 @@ fn resolve_candidates<'t>( let mut candidates = query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?; if lcandidates.len() < rcandidates.len() { - candidates.intersect_with(lcandidates); - candidates.intersect_with(rcandidates); + candidates &= lcandidates; + candidates &= rcandidates; } else { - candidates.intersect_with(rcandidates); - candidates.intersect_with(lcandidates); + candidates &= rcandidates; + candidates &= lcandidates; } if !candidates.is_empty() { output.push((ll.clone(), rr.clone(), candidates)); @@ -317,7 +317,7 @@ fn resolve_candidates<'t>( for (_, rtail, mut candidates) in mdfs(ctx, tail, proximity - p, cache, wdcache)? { - candidates.intersect_with(&head_candidates); + candidates &= &head_candidates; if !candidates.is_empty() { output.push((lhead.clone(), rtail, candidates)); } @@ -334,7 +334,7 @@ fn resolve_candidates<'t>( let mut candidates = RoaringBitmap::new(); for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? { - candidates.union_with(&cds); + candidates |= cds; } Ok(candidates) } diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index f4ae15f0a..97a9b4e4b 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -281,7 +281,7 @@ fn resolve_candidates<'t>( let mut candidates = RoaringBitmap::new(); for op in ops { let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?; - candidates.union_with(&docids); + candidates |= docids; } Ok(candidates) } @@ -329,8 +329,8 @@ fn resolve_candidates<'t>( }; if !head_candidates.is_empty() { let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?; - head_candidates.intersect_with(&tail_candidates); - candidates.union_with(&head_candidates); + head_candidates &= tail_candidates; + candidates |= head_candidates; } } diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index 290a7602f..91620da2a 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -61,7 +61,7 @@ impl<'a> FacetDistinctIter<'a> { db_name: db_name::FACET_ID_STRING_DOCIDS, key: None, })?; - self.excluded.union_with(&facet_docids); + self.excluded |= facet_docids; } self.excluded.remove(id); @@ -79,7 +79,7 @@ impl<'a> FacetDistinctIter<'a> { db_name: db_name::FACET_ID_F64_DOCIDS, key: None, })?; - self.excluded.union_with(&facet_docids); + self.excluded |= facet_docids; } self.excluded.remove(id); @@ -92,7 +92,7 @@ impl<'a> FacetDistinctIter<'a> { /// handling easier. fn next_inner(&mut self) -> Result> { // The first step is to remove all the excluded documents from our candidates - self.candidates.difference_with(&self.excluded); + self.candidates -= &self.excluded; let mut candidates_iter = self.candidates.iter().skip(self.iter_offset); match candidates_iter.next() { diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 71816cf5d..3f55006f2 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -122,7 +122,7 @@ impl<'a> FacetDistribution<'a> { for result in iter { let (value, mut docids) = result?; - docids.intersect_with(candidates); + docids &= candidates; if !docids.is_empty() { distribution.insert(value.to_string(), docids.len()); } diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 31fc6018c..1b1eafcab 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -289,7 +289,7 @@ impl FilterCondition { for (i, result) in iter.enumerate() { let ((_fid, level, l, r), docids) = result?; debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len()); - output.union_with(&docids); + *output |= docids; // We save the leftest and rightest bounds we actually found at this level. if i == 0 { left_found = Some(l); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 240d99ccc..4e900bff4 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -213,10 +213,10 @@ impl<'t> Iterator for FacetIter<'t> { match result { Ok(((_fid, level, left, right), mut docids)) => { - docids.intersect_with(&documents_ids); + docids &= &*documents_ids; if !docids.is_empty() { if self.must_reduce { - documents_ids.difference_with(&docids); + *documents_ids -= &docids; } if level == 0 { diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 71d200e0c..f40a6aed6 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -173,7 +173,7 @@ impl<'a> Search<'a> { let mut candidates = distinct.distinct(candidates, excluded); - initial_candidates.union_with(&bucket_candidates); + initial_candidates |= bucket_candidates; if offset != 0 { let discarded = candidates.by_ref().take(offset).count(); diff --git a/milli/src/update/available_documents_ids.rs b/milli/src/update/available_documents_ids.rs index 9e3fce75d..653bc7dd2 100644 --- a/milli/src/update/available_documents_ids.rs +++ b/milli/src/update/available_documents_ids.rs @@ -12,7 +12,7 @@ impl AvailableDocumentsIds { match docids.max() { Some(last_id) => { let mut available = RoaringBitmap::from_iter(0..last_id); - available.difference_with(&docids); + available -= docids; let iter = match last_id.checked_add(1) { Some(id) => id..=u32::max_value(), diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index a0c1f48f5..313f8a909 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -43,7 +43,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { } pub fn delete_documents(&mut self, docids: &RoaringBitmap) { - self.documents_ids.union_with(docids); + self.documents_ids |= docids; } pub fn delete_external_id(&mut self, external_id: &str) -> Option { @@ -65,7 +65,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // We remove the documents ids that we want to delete // from the documents in the database and write them back. let current_documents_ids_len = documents_ids.len(); - documents_ids.difference_with(&self.documents_ids); + documents_ids -= &self.documents_ids; self.index.put_documents_ids(self.wtxn, &documents_ids)?; // We can execute a ClearDocuments operation when the number of documents @@ -194,7 +194,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if let Some((key, mut docids)) = iter.next().transpose()? { if key == word.as_ref() { let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -245,7 +245,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let (prefix, mut docids) = result?; let prefix = prefix.to_owned(); let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -285,7 +285,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { while let Some(result) = iter.next() { let (key, mut docids) = result?; let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -306,7 +306,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { while let Some(result) = iter.next() { let (bytes, mut docids) = result?; let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -325,7 +325,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { while let Some(result) = iter.next() { let (bytes, mut docids) = result?; let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -344,7 +344,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { while let Some(result) = iter.next() { let (bytes, mut docids) = result?; let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -361,7 +361,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?; while let Some((key, mut docids)) = iter.next().transpose()? { let previous_len = docids.len(); - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; @@ -390,7 +390,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { for field_id in self.index.faceted_fields_ids(self.wtxn)? { // Remove docids from the number faceted documents ids let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?; - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?; remove_docids_from_field_id_docid_facet_value( @@ -403,7 +403,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // Remove docids from the string faceted documents ids let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?; - docids.difference_with(&self.documents_ids); + docids -= &self.documents_ids; self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?; remove_docids_from_field_id_docid_facet_value( @@ -456,7 +456,7 @@ where while let Some(result) = iter.next() { let (bytes, mut docids) = result?; let previous_len = docids.len(); - docids.difference_with(to_remove); + docids -= to_remove; if docids.is_empty() { // safety: we don't keep references from inside the LMDB database. unsafe { iter.del_current()? }; diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 09f962bbc..0e2cad69d 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -181,7 +181,7 @@ fn compute_facet_number_levels<'t>( } // The right bound is always the bound we run through. - group_docids.union_with(&docids); + group_docids |= docids; right = value; } diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs index 3d9ffda6a..17283b232 100644 --- a/milli/src/update/index_documents/merge_function.rs +++ b/milli/src/update/index_documents/merge_function.rs @@ -61,8 +61,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result let mut head = RoaringBitmap::deserialize_from(&head[..])?; for value in tail { - let bitmap = RoaringBitmap::deserialize_from(&value[..])?; - head.union_with(&bitmap); + head |= RoaringBitmap::deserialize_from(&value[..])?; } let mut vec = Vec::with_capacity(head.serialized_size()); @@ -75,8 +74,7 @@ pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result IndexDocuments<'t, 'u, 'i, 'a> { self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; // We merge the new documents ids with the existing ones. - documents_ids.union_with(&new_documents_ids); - documents_ids.union_with(&replaced_documents_ids); + documents_ids |= new_documents_ids; + documents_ids |= replaced_documents_ids; self.index.put_documents_ids(self.wtxn, &documents_ids)?; let mut database_count = 0; @@ -845,6 +845,7 @@ mod tests { use heed::EnvOpenOptions; use super::*; + use crate::update::DeleteDocuments; #[test] fn simple_document_replacement() { @@ -1303,4 +1304,52 @@ mod tests { builder.execute(Cursor::new(documents), |_, _| ()).unwrap(); wtxn.commit().unwrap(); } + + #[test] + fn delete_documents_then_insert() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let content = &br#"[ + { "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" }, + { "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" }, + { "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" }, + { "objectId": 30, "title": "Hamlet" } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.update_format(UpdateFormat::Json); + builder.execute(content, |_, _| ()).unwrap(); + + assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId")); + + // Delete not all of the documents but some of them. + let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap(); + builder.delete_external_id("30"); + builder.execute().unwrap(); + + let external_documents_ids = index.external_documents_ids(&wtxn).unwrap(); + assert!(external_documents_ids.get("30").is_none()); + + let content = &br#"[ + { "objectId": 30, "title": "Hamlet" } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.update_format(UpdateFormat::Json); + builder.execute(content, |_, _| ()).unwrap(); + + let external_documents_ids = index.external_documents_ids(&wtxn).unwrap(); + assert!(external_documents_ids.get("30").is_some()); + + let content = &br#"[ + { "objectId": 30, "title": "Hamlet" } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.update_format(UpdateFormat::Json); + builder.execute(content, |_, _| ()).unwrap(); + + wtxn.commit().unwrap(); + } } diff --git a/milli/src/update/words_level_positions.rs b/milli/src/update/words_level_positions.rs index d43cd19b8..c656d7105 100644 --- a/milli/src/update/words_level_positions.rs +++ b/milli/src/update/words_level_positions.rs @@ -236,7 +236,7 @@ fn compute_positions_levels( } // The right bound is always the bound we run through. - group_docids.union_with(&docids); + group_docids |= docids; } if !group_docids.is_empty() {