269: Fix bug when inserting previously deleted documents r=Kerollmops a=Kerollmops

This PR fixes #268.

The issue was in the `ExternalDocumentsIds` implementation in the specific case that an external document id was in the soft map marked as deleted.

The bug was due to a wrong assumption on my side about how the FST unions were returning the `IndexedValue`s, I thought the values returned in an array were in the same order as the FSTs given to the `OpBuilder` but in fact, [the `IndexedValue`'s `index` field was here to indicate from which FST the values were coming from](https://docs.rs/fst/0.4.7/fst/map/struct.IndexedValue.html).

271: Remove the roaring operation functions warnings r=Kerollmops a=Kerollmops

In this PR we are just replacing the usages of the roaring operations function by the new operators. This removes a lot of warnings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-06-30 12:34:55 +00:00 committed by GitHub
commit b4dcdbf00d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 164 additions and 65 deletions

View File

@ -1,8 +1,13 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::{fmt, str};
use fst::map::IndexedValue;
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
const DELETED_ID: u64 = u64::MAX;
pub struct ExternalDocumentsIds<'a> { pub struct ExternalDocumentsIds<'a> {
pub(crate) hard: fst::Map<Cow<'a, [u8]>>, pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
pub(crate) soft: fst::Map<Cow<'a, [u8]>>, pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
@ -31,8 +36,7 @@ impl<'a> ExternalDocumentsIds<'a> {
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> { pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
let external_id = external_id.as_ref(); let external_id = external_id.as_ref();
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) { match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
// u64 MAX means deleted in the soft fst map Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()),
Some(id) if id != u64::MAX => Some(id.try_into().unwrap()),
_otherwise => None, _otherwise => None,
} }
} }
@ -47,9 +51,10 @@ impl<'a> ExternalDocumentsIds<'a> {
if docids.iter().any(|v| v.index == 1) { if docids.iter().any(|v| v.index == 1) {
// If the `other` set returns a value here it means // If the `other` set returns a value here it means
// that it must be marked as deleted. // that it must be marked as deleted.
new_soft_builder.insert(external_id, u64::MAX)?; new_soft_builder.insert(external_id, DELETED_ID)?;
} else { } else {
new_soft_builder.insert(external_id, docids[0].value)?; let value = docids.iter().find(|v| v.index == 0).unwrap().value;
new_soft_builder.insert(external_id, value)?;
} }
} }
@ -65,8 +70,8 @@ impl<'a> ExternalDocumentsIds<'a> {
let mut new_soft_builder = fst::MapBuilder::memory(); let mut new_soft_builder = fst::MapBuilder::memory();
let mut iter = union_op.into_stream(); let mut iter = union_op.into_stream();
while let Some((external_id, docids)) = iter.next() { while let Some((external_id, marked_docids)) = iter.next() {
let id = docids.last().unwrap().value; let id = indexed_last_value(marked_docids).unwrap();
new_soft_builder.insert(external_id, id)?; new_soft_builder.insert(external_id, id)?;
} }
@ -77,19 +82,34 @@ impl<'a> ExternalDocumentsIds<'a> {
self.merge_soft_into_hard() self.merge_soft_into_hard()
} }
/// An helper function to debug this type, returns an `HashMap` of both,
/// soft and hard fst maps, combined.
pub fn to_hash_map(&self) -> HashMap<String, u32> {
let mut map = HashMap::new();
let union_op = self.hard.op().add(&self.soft).r#union();
let mut iter = union_op.into_stream();
while let Some((external_id, marked_docids)) = iter.next() {
let id = indexed_last_value(marked_docids).unwrap();
if id != DELETED_ID {
let external_id = str::from_utf8(external_id).unwrap();
map.insert(external_id.to_owned(), id.try_into().unwrap());
}
}
map
}
fn merge_soft_into_hard(&mut self) -> fst::Result<()> { fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
if self.soft.len() >= self.hard.len() / 2 { if self.soft.len() >= self.hard.len() / 2 {
let union_op = self.hard.op().add(&self.soft).r#union(); let union_op = self.hard.op().add(&self.soft).r#union();
let mut iter = union_op.into_stream(); let mut iter = union_op.into_stream();
let mut new_hard_builder = fst::MapBuilder::memory(); let mut new_hard_builder = fst::MapBuilder::memory();
while let Some((external_id, docids)) = iter.next() { while let Some((external_id, marked_docids)) = iter.next() {
if docids.len() == 2 { let value = indexed_last_value(marked_docids).unwrap();
if docids[1].value != u64::MAX { if value != DELETED_ID {
new_hard_builder.insert(external_id, docids[1].value)?; new_hard_builder.insert(external_id, value)?;
}
} else {
new_hard_builder.insert(external_id, docids[0].value)?;
} }
} }
@ -103,6 +123,12 @@ impl<'a> ExternalDocumentsIds<'a> {
} }
} }
impl fmt::Debug for ExternalDocumentsIds<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
}
}
impl Default for ExternalDocumentsIds<'static> { impl Default for ExternalDocumentsIds<'static> {
fn default() -> Self { fn default() -> Self {
ExternalDocumentsIds { ExternalDocumentsIds {
@ -112,6 +138,11 @@ impl Default for ExternalDocumentsIds<'static> {
} }
} }
/// Returns the value of the `IndexedValue` with the highest _index_.
fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -162,4 +193,25 @@ mod tests {
assert_eq!(external_documents_ids.get("g"), Some(7)); assert_eq!(external_documents_ids.get("g"), Some(7));
assert_eq!(external_documents_ids.get("h"), Some(8)); assert_eq!(external_documents_ids.get("h"), Some(8));
} }
#[test]
fn strange_delete_insert_ids() {
let mut external_documents_ids = ExternalDocumentsIds::default();
let new_ids =
fst::Map::from_iter(vec![("1", 0), ("123", 1), ("30", 2), ("456", 3)]).unwrap();
external_documents_ids.insert_ids(&new_ids).unwrap();
assert_eq!(external_documents_ids.get("1"), Some(0));
assert_eq!(external_documents_ids.get("123"), Some(1));
assert_eq!(external_documents_ids.get("30"), Some(2));
assert_eq!(external_documents_ids.get("456"), Some(3));
let deleted_ids = fst::Set::from_iter(vec!["30"]).unwrap();
external_documents_ids.delete_ids(deleted_ids).unwrap();
assert_eq!(external_documents_ids.get("30"), None);
let new_ids = fst::Map::from_iter(vec![("30", 2)]).unwrap();
external_documents_ids.insert_ids(&new_ids).unwrap();
assert_eq!(external_documents_ids.get("30"), Some(2));
}
} }

View File

@ -218,7 +218,7 @@ impl Index {
} }
/// Deletes the primary key of the documents, this can be done to reset indexes settings. /// Deletes the primary key of the documents, this can be done to reset indexes settings.
pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
} }
@ -333,7 +333,7 @@ impl Index {
/// Deletes the displayed fields ids, this will make the engine to display /// Deletes the displayed fields ids, this will make the engine to display
/// all the documents attributes in the order of the `FieldsIdsMap`. /// all the documents attributes in the order of the `FieldsIdsMap`.
pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
} }
@ -383,7 +383,7 @@ impl Index {
} }
/// Deletes the searchable fields, when no fields are specified, all fields are indexed. /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
} }
@ -429,7 +429,7 @@ impl Index {
} }
/// Deletes the filterable fields ids in the database. /// Deletes the filterable fields ids in the database.
pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
} }
@ -602,7 +602,7 @@ impl Index {
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
} }
pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
} }
@ -642,7 +642,7 @@ impl Index {
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
} }
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
} }
@ -663,7 +663,7 @@ impl Index {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms) self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
} }
pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY) self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
} }

View File

@ -328,7 +328,7 @@ pub fn resolve_query_tree<'t>(
candidates = docids; candidates = docids;
first_loop = false; first_loop = false;
} else { } else {
candidates.intersect_with(&docids); candidates &= &docids;
} }
} }
Ok(candidates) Ok(candidates)
@ -358,7 +358,7 @@ pub fn resolve_query_tree<'t>(
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
for op in ops { for op in ops {
let docids = resolve_operation(ctx, op, wdcache)?; let docids = resolve_operation(ctx, op, wdcache)?;
candidates.union_with(&docids); candidates |= docids;
} }
Ok(candidates) Ok(candidates)
} }
@ -381,7 +381,7 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
let current_docids = ctx let current_docids = ctx
.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)? .word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?
.unwrap_or_default(); .unwrap_or_default();
docids.union_with(&current_docids); docids |= current_docids;
} }
} }
Ok(docids) Ok(docids)
@ -401,7 +401,7 @@ fn query_docids(
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (word, _typo) in words { for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
docids.union_with(&current_docids); docids |= current_docids;
} }
Ok(docids) Ok(docids)
} else { } else {
@ -413,7 +413,7 @@ fn query_docids(
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (word, _typo) in words { for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
docids.union_with(&current_docids); docids |= current_docids;
} }
Ok(docids) Ok(docids)
} }
@ -430,7 +430,7 @@ fn query_pair_proximity_docids(
if proximity >= 8 { if proximity >= 8 {
let mut candidates = query_docids(ctx, left, wdcache)?; let mut candidates = query_docids(ctx, left, wdcache)?;
let right_candidates = query_docids(ctx, right, wdcache)?; let right_candidates = query_docids(ctx, right, wdcache)?;
candidates.intersect_with(&right_candidates); candidates &= right_candidates;
return Ok(candidates); return Ok(candidates);
} }
@ -463,7 +463,7 @@ fn query_pair_proximity_docids(
proximity, proximity,
)? )?
.unwrap_or_default(); .unwrap_or_default();
docids.union_with(&current_docids); docids |= current_docids;
} }
Ok(docids) Ok(docids)
} else if prefix { } else if prefix {

View File

@ -274,11 +274,11 @@ fn resolve_candidates<'t>(
let mut candidates = let mut candidates =
query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?; query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
if lcandidates.len() < rcandidates.len() { if lcandidates.len() < rcandidates.len() {
candidates.intersect_with(lcandidates); candidates &= lcandidates;
candidates.intersect_with(rcandidates); candidates &= rcandidates;
} else { } else {
candidates.intersect_with(rcandidates); candidates &= rcandidates;
candidates.intersect_with(lcandidates); candidates &= lcandidates;
} }
if !candidates.is_empty() { if !candidates.is_empty() {
output.push((ll.clone(), rr.clone(), candidates)); output.push((ll.clone(), rr.clone(), candidates));
@ -317,7 +317,7 @@ fn resolve_candidates<'t>(
for (_, rtail, mut candidates) in for (_, rtail, mut candidates) in
mdfs(ctx, tail, proximity - p, cache, wdcache)? mdfs(ctx, tail, proximity - p, cache, wdcache)?
{ {
candidates.intersect_with(&head_candidates); candidates &= &head_candidates;
if !candidates.is_empty() { if !candidates.is_empty() {
output.push((lhead.clone(), rtail, candidates)); output.push((lhead.clone(), rtail, candidates));
} }
@ -334,7 +334,7 @@ fn resolve_candidates<'t>(
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? { for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? {
candidates.union_with(&cds); candidates |= cds;
} }
Ok(candidates) Ok(candidates)
} }

View File

@ -281,7 +281,7 @@ fn resolve_candidates<'t>(
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
for op in ops { for op in ops {
let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?; let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?;
candidates.union_with(&docids); candidates |= docids;
} }
Ok(candidates) Ok(candidates)
} }
@ -329,8 +329,8 @@ fn resolve_candidates<'t>(
}; };
if !head_candidates.is_empty() { if !head_candidates.is_empty() {
let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?; let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?;
head_candidates.intersect_with(&tail_candidates); head_candidates &= tail_candidates;
candidates.union_with(&head_candidates); candidates |= head_candidates;
} }
} }

View File

@ -61,7 +61,7 @@ impl<'a> FacetDistinctIter<'a> {
db_name: db_name::FACET_ID_STRING_DOCIDS, db_name: db_name::FACET_ID_STRING_DOCIDS,
key: None, key: None,
})?; })?;
self.excluded.union_with(&facet_docids); self.excluded |= facet_docids;
} }
self.excluded.remove(id); self.excluded.remove(id);
@ -79,7 +79,7 @@ impl<'a> FacetDistinctIter<'a> {
db_name: db_name::FACET_ID_F64_DOCIDS, db_name: db_name::FACET_ID_F64_DOCIDS,
key: None, key: None,
})?; })?;
self.excluded.union_with(&facet_docids); self.excluded |= facet_docids;
} }
self.excluded.remove(id); self.excluded.remove(id);
@ -92,7 +92,7 @@ impl<'a> FacetDistinctIter<'a> {
/// handling easier. /// handling easier.
fn next_inner(&mut self) -> Result<Option<DocumentId>> { fn next_inner(&mut self) -> Result<Option<DocumentId>> {
// The first step is to remove all the excluded documents from our candidates // The first step is to remove all the excluded documents from our candidates
self.candidates.difference_with(&self.excluded); self.candidates -= &self.excluded;
let mut candidates_iter = self.candidates.iter().skip(self.iter_offset); let mut candidates_iter = self.candidates.iter().skip(self.iter_offset);
match candidates_iter.next() { match candidates_iter.next() {

View File

@ -122,7 +122,7 @@ impl<'a> FacetDistribution<'a> {
for result in iter { for result in iter {
let (value, mut docids) = result?; let (value, mut docids) = result?;
docids.intersect_with(candidates); docids &= candidates;
if !docids.is_empty() { if !docids.is_empty() {
distribution.insert(value.to_string(), docids.len()); distribution.insert(value.to_string(), docids.len());
} }

View File

@ -289,7 +289,7 @@ impl FilterCondition {
for (i, result) in iter.enumerate() { for (i, result) in iter.enumerate() {
let ((_fid, level, l, r), docids) = result?; let ((_fid, level, l, r), docids) = result?;
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len()); debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
output.union_with(&docids); *output |= docids;
// We save the leftest and rightest bounds we actually found at this level. // We save the leftest and rightest bounds we actually found at this level.
if i == 0 { if i == 0 {
left_found = Some(l); left_found = Some(l);

View File

@ -213,10 +213,10 @@ impl<'t> Iterator for FacetIter<'t> {
match result { match result {
Ok(((_fid, level, left, right), mut docids)) => { Ok(((_fid, level, left, right), mut docids)) => {
docids.intersect_with(&documents_ids); docids &= &*documents_ids;
if !docids.is_empty() { if !docids.is_empty() {
if self.must_reduce { if self.must_reduce {
documents_ids.difference_with(&docids); *documents_ids -= &docids;
} }
if level == 0 { if level == 0 {

View File

@ -173,7 +173,7 @@ impl<'a> Search<'a> {
let mut candidates = distinct.distinct(candidates, excluded); let mut candidates = distinct.distinct(candidates, excluded);
initial_candidates.union_with(&bucket_candidates); initial_candidates |= bucket_candidates;
if offset != 0 { if offset != 0 {
let discarded = candidates.by_ref().take(offset).count(); let discarded = candidates.by_ref().take(offset).count();

View File

@ -12,7 +12,7 @@ impl AvailableDocumentsIds {
match docids.max() { match docids.max() {
Some(last_id) => { Some(last_id) => {
let mut available = RoaringBitmap::from_iter(0..last_id); let mut available = RoaringBitmap::from_iter(0..last_id);
available.difference_with(&docids); available -= docids;
let iter = match last_id.checked_add(1) { let iter = match last_id.checked_add(1) {
Some(id) => id..=u32::max_value(), Some(id) => id..=u32::max_value(),

View File

@ -43,7 +43,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
} }
pub fn delete_documents(&mut self, docids: &RoaringBitmap) { pub fn delete_documents(&mut self, docids: &RoaringBitmap) {
self.documents_ids.union_with(docids); self.documents_ids |= docids;
} }
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> { pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
@ -65,7 +65,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// We remove the documents ids that we want to delete // We remove the documents ids that we want to delete
// from the documents in the database and write them back. // from the documents in the database and write them back.
let current_documents_ids_len = documents_ids.len(); let current_documents_ids_len = documents_ids.len();
documents_ids.difference_with(&self.documents_ids); documents_ids -= &self.documents_ids;
self.index.put_documents_ids(self.wtxn, &documents_ids)?; self.index.put_documents_ids(self.wtxn, &documents_ids)?;
// We can execute a ClearDocuments operation when the number of documents // We can execute a ClearDocuments operation when the number of documents
@ -194,7 +194,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
if let Some((key, mut docids)) = iter.next().transpose()? { if let Some((key, mut docids)) = iter.next().transpose()? {
if key == word.as_ref() { if key == word.as_ref() {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -245,7 +245,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let (prefix, mut docids) = result?; let (prefix, mut docids) = result?;
let prefix = prefix.to_owned(); let prefix = prefix.to_owned();
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -285,7 +285,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let (key, mut docids) = result?; let (key, mut docids) = result?;
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -306,7 +306,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let (bytes, mut docids) = result?; let (bytes, mut docids) = result?;
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -325,7 +325,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let (bytes, mut docids) = result?; let (bytes, mut docids) = result?;
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -344,7 +344,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let (bytes, mut docids) = result?; let (bytes, mut docids) = result?;
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -361,7 +361,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?; let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?;
while let Some((key, mut docids)) = iter.next().transpose()? { while let Some((key, mut docids)) = iter.next().transpose()? {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
@ -390,7 +390,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
for field_id in self.index.faceted_fields_ids(self.wtxn)? { for field_id in self.index.faceted_fields_ids(self.wtxn)? {
// Remove docids from the number faceted documents ids // Remove docids from the number faceted documents ids
let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?; let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?; self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?;
remove_docids_from_field_id_docid_facet_value( remove_docids_from_field_id_docid_facet_value(
@ -403,7 +403,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// Remove docids from the string faceted documents ids // Remove docids from the string faceted documents ids
let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?; let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids); docids -= &self.documents_ids;
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?; self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?;
remove_docids_from_field_id_docid_facet_value( remove_docids_from_field_id_docid_facet_value(
@ -456,7 +456,7 @@ where
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let (bytes, mut docids) = result?; let (bytes, mut docids) = result?;
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(to_remove); docids -= to_remove;
if docids.is_empty() { if docids.is_empty() {
// safety: we don't keep references from inside the LMDB database. // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };

View File

@ -181,7 +181,7 @@ fn compute_facet_number_levels<'t>(
} }
// The right bound is always the bound we run through. // The right bound is always the bound we run through.
group_docids.union_with(&docids); group_docids |= docids;
right = value; right = value;
} }

View File

@ -61,8 +61,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>
let mut head = RoaringBitmap::deserialize_from(&head[..])?; let mut head = RoaringBitmap::deserialize_from(&head[..])?;
for value in tail { for value in tail {
let bitmap = RoaringBitmap::deserialize_from(&value[..])?; head |= RoaringBitmap::deserialize_from(&value[..])?;
head.union_with(&bitmap);
} }
let mut vec = Vec::with_capacity(head.serialized_size()); let mut vec = Vec::with_capacity(head.serialized_size());
@ -75,8 +74,7 @@ pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?; let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
for value in tail { for value in tail {
let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?; head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
head.union_with(&bitmap);
} }
let mut vec = Vec::new(); let mut vec = Vec::new();

View File

@ -608,8 +608,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
// We merge the new documents ids with the existing ones. // We merge the new documents ids with the existing ones.
documents_ids.union_with(&new_documents_ids); documents_ids |= new_documents_ids;
documents_ids.union_with(&replaced_documents_ids); documents_ids |= replaced_documents_ids;
self.index.put_documents_ids(self.wtxn, &documents_ids)?; self.index.put_documents_ids(self.wtxn, &documents_ids)?;
let mut database_count = 0; let mut database_count = 0;
@ -845,6 +845,7 @@ mod tests {
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use super::*; use super::*;
use crate::update::DeleteDocuments;
#[test] #[test]
fn simple_document_replacement() { fn simple_document_replacement() {
@ -1303,4 +1304,52 @@ mod tests {
builder.execute(Cursor::new(documents), |_, _| ()).unwrap(); builder.execute(Cursor::new(documents), |_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
} }
#[test]
fn delete_documents_then_insert() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
let mut wtxn = index.write_txn().unwrap();
let content = &br#"[
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
{ "objectId": 30, "title": "Hamlet" }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
// Delete not all of the documents but some of them.
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
builder.delete_external_id("30");
builder.execute().unwrap();
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
assert!(external_documents_ids.get("30").is_none());
let content = &br#"[
{ "objectId": 30, "title": "Hamlet" }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
assert!(external_documents_ids.get("30").is_some());
let content = &br#"[
{ "objectId": 30, "title": "Hamlet" }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.update_format(UpdateFormat::Json);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
}
} }

View File

@ -236,7 +236,7 @@ fn compute_positions_levels(
} }
// The right bound is always the bound we run through. // The right bound is always the bound we run through.
group_docids.union_with(&docids); group_docids |= docids;
} }
if !group_docids.is_empty() { if !group_docids.is_empty() {