mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-12 16:08:55 +01:00
269: Fix bug when inserting previously deleted documents r=Kerollmops a=Kerollmops This PR fixes #268. The issue was in the `ExternalDocumentsIds` implementation in the specific case that an external document id was in the soft map marked as deleted. The bug was due to a wrong assumption on my side about how the FST unions were returning the `IndexedValue`s, I thought the values returned in an array were in the same order as the FSTs given to the `OpBuilder` but in fact, [the `IndexedValue`'s `index` field was here to indicate from which FST the values were coming from](https://docs.rs/fst/0.4.7/fst/map/struct.IndexedValue.html). 271: Remove the roaring operation functions warnings r=Kerollmops a=Kerollmops In this PR we are just replacing the usages of the roaring operations function by the new operators. This removes a lot of warnings. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
b4dcdbf00d
@ -1,8 +1,13 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::{fmt, str};
|
||||||
|
|
||||||
|
use fst::map::IndexedValue;
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
|
|
||||||
|
const DELETED_ID: u64 = u64::MAX;
|
||||||
|
|
||||||
pub struct ExternalDocumentsIds<'a> {
|
pub struct ExternalDocumentsIds<'a> {
|
||||||
pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
|
pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
|
||||||
pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
|
pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
|
||||||
@ -31,8 +36,7 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
|
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
|
||||||
let external_id = external_id.as_ref();
|
let external_id = external_id.as_ref();
|
||||||
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
|
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
|
||||||
// u64 MAX means deleted in the soft fst map
|
Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()),
|
||||||
Some(id) if id != u64::MAX => Some(id.try_into().unwrap()),
|
|
||||||
_otherwise => None,
|
_otherwise => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -47,9 +51,10 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
if docids.iter().any(|v| v.index == 1) {
|
if docids.iter().any(|v| v.index == 1) {
|
||||||
// If the `other` set returns a value here it means
|
// If the `other` set returns a value here it means
|
||||||
// that it must be marked as deleted.
|
// that it must be marked as deleted.
|
||||||
new_soft_builder.insert(external_id, u64::MAX)?;
|
new_soft_builder.insert(external_id, DELETED_ID)?;
|
||||||
} else {
|
} else {
|
||||||
new_soft_builder.insert(external_id, docids[0].value)?;
|
let value = docids.iter().find(|v| v.index == 0).unwrap().value;
|
||||||
|
new_soft_builder.insert(external_id, value)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,8 +70,8 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
|
|
||||||
let mut new_soft_builder = fst::MapBuilder::memory();
|
let mut new_soft_builder = fst::MapBuilder::memory();
|
||||||
let mut iter = union_op.into_stream();
|
let mut iter = union_op.into_stream();
|
||||||
while let Some((external_id, docids)) = iter.next() {
|
while let Some((external_id, marked_docids)) = iter.next() {
|
||||||
let id = docids.last().unwrap().value;
|
let id = indexed_last_value(marked_docids).unwrap();
|
||||||
new_soft_builder.insert(external_id, id)?;
|
new_soft_builder.insert(external_id, id)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,19 +82,34 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
self.merge_soft_into_hard()
|
self.merge_soft_into_hard()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||||
|
/// soft and hard fst maps, combined.
|
||||||
|
pub fn to_hash_map(&self) -> HashMap<String, u32> {
|
||||||
|
let mut map = HashMap::new();
|
||||||
|
|
||||||
|
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||||
|
let mut iter = union_op.into_stream();
|
||||||
|
while let Some((external_id, marked_docids)) = iter.next() {
|
||||||
|
let id = indexed_last_value(marked_docids).unwrap();
|
||||||
|
if id != DELETED_ID {
|
||||||
|
let external_id = str::from_utf8(external_id).unwrap();
|
||||||
|
map.insert(external_id.to_owned(), id.try_into().unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
map
|
||||||
|
}
|
||||||
|
|
||||||
fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
|
fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
|
||||||
if self.soft.len() >= self.hard.len() / 2 {
|
if self.soft.len() >= self.hard.len() / 2 {
|
||||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||||
|
|
||||||
let mut iter = union_op.into_stream();
|
let mut iter = union_op.into_stream();
|
||||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||||
while let Some((external_id, docids)) = iter.next() {
|
while let Some((external_id, marked_docids)) = iter.next() {
|
||||||
if docids.len() == 2 {
|
let value = indexed_last_value(marked_docids).unwrap();
|
||||||
if docids[1].value != u64::MAX {
|
if value != DELETED_ID {
|
||||||
new_hard_builder.insert(external_id, docids[1].value)?;
|
new_hard_builder.insert(external_id, value)?;
|
||||||
}
|
|
||||||
} else {
|
|
||||||
new_hard_builder.insert(external_id, docids[0].value)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,6 +123,12 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for ExternalDocumentsIds<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for ExternalDocumentsIds<'static> {
|
impl Default for ExternalDocumentsIds<'static> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
ExternalDocumentsIds {
|
ExternalDocumentsIds {
|
||||||
@ -112,6 +138,11 @@ impl Default for ExternalDocumentsIds<'static> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the value of the `IndexedValue` with the highest _index_.
|
||||||
|
fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
|
||||||
|
indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -162,4 +193,25 @@ mod tests {
|
|||||||
assert_eq!(external_documents_ids.get("g"), Some(7));
|
assert_eq!(external_documents_ids.get("g"), Some(7));
|
||||||
assert_eq!(external_documents_ids.get("h"), Some(8));
|
assert_eq!(external_documents_ids.get("h"), Some(8));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strange_delete_insert_ids() {
|
||||||
|
let mut external_documents_ids = ExternalDocumentsIds::default();
|
||||||
|
|
||||||
|
let new_ids =
|
||||||
|
fst::Map::from_iter(vec![("1", 0), ("123", 1), ("30", 2), ("456", 3)]).unwrap();
|
||||||
|
external_documents_ids.insert_ids(&new_ids).unwrap();
|
||||||
|
assert_eq!(external_documents_ids.get("1"), Some(0));
|
||||||
|
assert_eq!(external_documents_ids.get("123"), Some(1));
|
||||||
|
assert_eq!(external_documents_ids.get("30"), Some(2));
|
||||||
|
assert_eq!(external_documents_ids.get("456"), Some(3));
|
||||||
|
|
||||||
|
let deleted_ids = fst::Set::from_iter(vec!["30"]).unwrap();
|
||||||
|
external_documents_ids.delete_ids(deleted_ids).unwrap();
|
||||||
|
assert_eq!(external_documents_ids.get("30"), None);
|
||||||
|
|
||||||
|
let new_ids = fst::Map::from_iter(vec![("30", 2)]).unwrap();
|
||||||
|
external_documents_ids.insert_ids(&new_ids).unwrap();
|
||||||
|
assert_eq!(external_documents_ids.get("30"), Some(2));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -218,7 +218,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
|
||||||
pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -333,7 +333,7 @@ impl Index {
|
|||||||
|
|
||||||
/// Deletes the displayed fields ids, this will make the engine to display
|
/// Deletes the displayed fields ids, this will make the engine to display
|
||||||
/// all the documents attributes in the order of the `FieldsIdsMap`.
|
/// all the documents attributes in the order of the `FieldsIdsMap`.
|
||||||
pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,7 +383,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
|
||||||
pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -429,7 +429,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the filterable fields ids in the database.
|
/// Deletes the filterable fields ids in the database.
|
||||||
pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -602,7 +602,7 @@ impl Index {
|
|||||||
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -642,7 +642,7 @@ impl Index {
|
|||||||
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
|
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -663,7 +663,7 @@ impl Index {
|
|||||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -328,7 +328,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
candidates = docids;
|
candidates = docids;
|
||||||
first_loop = false;
|
first_loop = false;
|
||||||
} else {
|
} else {
|
||||||
candidates.intersect_with(&docids);
|
candidates &= &docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
@ -358,7 +358,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
let docids = resolve_operation(ctx, op, wdcache)?;
|
let docids = resolve_operation(ctx, op, wdcache)?;
|
||||||
candidates.union_with(&docids);
|
candidates |= docids;
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
@ -381,7 +381,7 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
|
|||||||
let current_docids = ctx
|
let current_docids = ctx
|
||||||
.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?
|
.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
docids.union_with(¤t_docids);
|
docids |= current_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
@ -401,7 +401,7 @@ fn query_docids(
|
|||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (word, _typo) in words {
|
for (word, _typo) in words {
|
||||||
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
||||||
docids.union_with(¤t_docids);
|
docids |= current_docids;
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else {
|
} else {
|
||||||
@ -413,7 +413,7 @@ fn query_docids(
|
|||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (word, _typo) in words {
|
for (word, _typo) in words {
|
||||||
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
|
||||||
docids.union_with(¤t_docids);
|
docids |= current_docids;
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
@ -430,7 +430,7 @@ fn query_pair_proximity_docids(
|
|||||||
if proximity >= 8 {
|
if proximity >= 8 {
|
||||||
let mut candidates = query_docids(ctx, left, wdcache)?;
|
let mut candidates = query_docids(ctx, left, wdcache)?;
|
||||||
let right_candidates = query_docids(ctx, right, wdcache)?;
|
let right_candidates = query_docids(ctx, right, wdcache)?;
|
||||||
candidates.intersect_with(&right_candidates);
|
candidates &= right_candidates;
|
||||||
return Ok(candidates);
|
return Ok(candidates);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,7 +463,7 @@ fn query_pair_proximity_docids(
|
|||||||
proximity,
|
proximity,
|
||||||
)?
|
)?
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
docids.union_with(¤t_docids);
|
docids |= current_docids;
|
||||||
}
|
}
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else if prefix {
|
} else if prefix {
|
||||||
|
@ -274,11 +274,11 @@ fn resolve_candidates<'t>(
|
|||||||
let mut candidates =
|
let mut candidates =
|
||||||
query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
|
query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
|
||||||
if lcandidates.len() < rcandidates.len() {
|
if lcandidates.len() < rcandidates.len() {
|
||||||
candidates.intersect_with(lcandidates);
|
candidates &= lcandidates;
|
||||||
candidates.intersect_with(rcandidates);
|
candidates &= rcandidates;
|
||||||
} else {
|
} else {
|
||||||
candidates.intersect_with(rcandidates);
|
candidates &= rcandidates;
|
||||||
candidates.intersect_with(lcandidates);
|
candidates &= lcandidates;
|
||||||
}
|
}
|
||||||
if !candidates.is_empty() {
|
if !candidates.is_empty() {
|
||||||
output.push((ll.clone(), rr.clone(), candidates));
|
output.push((ll.clone(), rr.clone(), candidates));
|
||||||
@ -317,7 +317,7 @@ fn resolve_candidates<'t>(
|
|||||||
for (_, rtail, mut candidates) in
|
for (_, rtail, mut candidates) in
|
||||||
mdfs(ctx, tail, proximity - p, cache, wdcache)?
|
mdfs(ctx, tail, proximity - p, cache, wdcache)?
|
||||||
{
|
{
|
||||||
candidates.intersect_with(&head_candidates);
|
candidates &= &head_candidates;
|
||||||
if !candidates.is_empty() {
|
if !candidates.is_empty() {
|
||||||
output.push((lhead.clone(), rtail, candidates));
|
output.push((lhead.clone(), rtail, candidates));
|
||||||
}
|
}
|
||||||
@ -334,7 +334,7 @@ fn resolve_candidates<'t>(
|
|||||||
|
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? {
|
for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? {
|
||||||
candidates.union_with(&cds);
|
candidates |= cds;
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
|
@ -281,7 +281,7 @@ fn resolve_candidates<'t>(
|
|||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?;
|
let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?;
|
||||||
candidates.union_with(&docids);
|
candidates |= docids;
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
@ -329,8 +329,8 @@ fn resolve_candidates<'t>(
|
|||||||
};
|
};
|
||||||
if !head_candidates.is_empty() {
|
if !head_candidates.is_empty() {
|
||||||
let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?;
|
let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?;
|
||||||
head_candidates.intersect_with(&tail_candidates);
|
head_candidates &= tail_candidates;
|
||||||
candidates.union_with(&head_candidates);
|
candidates |= head_candidates;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
db_name: db_name::FACET_ID_STRING_DOCIDS,
|
db_name: db_name::FACET_ID_STRING_DOCIDS,
|
||||||
key: None,
|
key: None,
|
||||||
})?;
|
})?;
|
||||||
self.excluded.union_with(&facet_docids);
|
self.excluded |= facet_docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.excluded.remove(id);
|
self.excluded.remove(id);
|
||||||
@ -79,7 +79,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
db_name: db_name::FACET_ID_F64_DOCIDS,
|
db_name: db_name::FACET_ID_F64_DOCIDS,
|
||||||
key: None,
|
key: None,
|
||||||
})?;
|
})?;
|
||||||
self.excluded.union_with(&facet_docids);
|
self.excluded |= facet_docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.excluded.remove(id);
|
self.excluded.remove(id);
|
||||||
@ -92,7 +92,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
/// handling easier.
|
/// handling easier.
|
||||||
fn next_inner(&mut self) -> Result<Option<DocumentId>> {
|
fn next_inner(&mut self) -> Result<Option<DocumentId>> {
|
||||||
// The first step is to remove all the excluded documents from our candidates
|
// The first step is to remove all the excluded documents from our candidates
|
||||||
self.candidates.difference_with(&self.excluded);
|
self.candidates -= &self.excluded;
|
||||||
|
|
||||||
let mut candidates_iter = self.candidates.iter().skip(self.iter_offset);
|
let mut candidates_iter = self.candidates.iter().skip(self.iter_offset);
|
||||||
match candidates_iter.next() {
|
match candidates_iter.next() {
|
||||||
|
@ -122,7 +122,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
|
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let (value, mut docids) = result?;
|
let (value, mut docids) = result?;
|
||||||
docids.intersect_with(candidates);
|
docids &= candidates;
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
distribution.insert(value.to_string(), docids.len());
|
distribution.insert(value.to_string(), docids.len());
|
||||||
}
|
}
|
||||||
|
@ -289,7 +289,7 @@ impl FilterCondition {
|
|||||||
for (i, result) in iter.enumerate() {
|
for (i, result) in iter.enumerate() {
|
||||||
let ((_fid, level, l, r), docids) = result?;
|
let ((_fid, level, l, r), docids) = result?;
|
||||||
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
||||||
output.union_with(&docids);
|
*output |= docids;
|
||||||
// We save the leftest and rightest bounds we actually found at this level.
|
// We save the leftest and rightest bounds we actually found at this level.
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
left_found = Some(l);
|
left_found = Some(l);
|
||||||
|
@ -213,10 +213,10 @@ impl<'t> Iterator for FacetIter<'t> {
|
|||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(((_fid, level, left, right), mut docids)) => {
|
Ok(((_fid, level, left, right), mut docids)) => {
|
||||||
docids.intersect_with(&documents_ids);
|
docids &= &*documents_ids;
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
if self.must_reduce {
|
if self.must_reduce {
|
||||||
documents_ids.difference_with(&docids);
|
*documents_ids -= &docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
|
@ -173,7 +173,7 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
let mut candidates = distinct.distinct(candidates, excluded);
|
let mut candidates = distinct.distinct(candidates, excluded);
|
||||||
|
|
||||||
initial_candidates.union_with(&bucket_candidates);
|
initial_candidates |= bucket_candidates;
|
||||||
|
|
||||||
if offset != 0 {
|
if offset != 0 {
|
||||||
let discarded = candidates.by_ref().take(offset).count();
|
let discarded = candidates.by_ref().take(offset).count();
|
||||||
|
@ -12,7 +12,7 @@ impl AvailableDocumentsIds {
|
|||||||
match docids.max() {
|
match docids.max() {
|
||||||
Some(last_id) => {
|
Some(last_id) => {
|
||||||
let mut available = RoaringBitmap::from_iter(0..last_id);
|
let mut available = RoaringBitmap::from_iter(0..last_id);
|
||||||
available.difference_with(&docids);
|
available -= docids;
|
||||||
|
|
||||||
let iter = match last_id.checked_add(1) {
|
let iter = match last_id.checked_add(1) {
|
||||||
Some(id) => id..=u32::max_value(),
|
Some(id) => id..=u32::max_value(),
|
||||||
|
@ -43,7 +43,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_documents(&mut self, docids: &RoaringBitmap) {
|
pub fn delete_documents(&mut self, docids: &RoaringBitmap) {
|
||||||
self.documents_ids.union_with(docids);
|
self.documents_ids |= docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
|
pub fn delete_external_id(&mut self, external_id: &str) -> Option<u32> {
|
||||||
@ -65,7 +65,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// We remove the documents ids that we want to delete
|
// We remove the documents ids that we want to delete
|
||||||
// from the documents in the database and write them back.
|
// from the documents in the database and write them back.
|
||||||
let current_documents_ids_len = documents_ids.len();
|
let current_documents_ids_len = documents_ids.len();
|
||||||
documents_ids.difference_with(&self.documents_ids);
|
documents_ids -= &self.documents_ids;
|
||||||
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
||||||
|
|
||||||
// We can execute a ClearDocuments operation when the number of documents
|
// We can execute a ClearDocuments operation when the number of documents
|
||||||
@ -194,7 +194,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
if let Some((key, mut docids)) = iter.next().transpose()? {
|
if let Some((key, mut docids)) = iter.next().transpose()? {
|
||||||
if key == word.as_ref() {
|
if key == word.as_ref() {
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -245,7 +245,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let (prefix, mut docids) = result?;
|
let (prefix, mut docids) = result?;
|
||||||
let prefix = prefix.to_owned();
|
let prefix = prefix.to_owned();
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -285,7 +285,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (key, mut docids) = result?;
|
let (key, mut docids) = result?;
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -306,7 +306,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -325,7 +325,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -344,7 +344,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -361,7 +361,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?;
|
let mut iter = field_id_word_count_docids.iter_mut(self.wtxn)?;
|
||||||
while let Some((key, mut docids)) = iter.next().transpose()? {
|
while let Some((key, mut docids)) = iter.next().transpose()? {
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
@ -390,7 +390,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||||
// Remove docids from the number faceted documents ids
|
// Remove docids from the number faceted documents ids
|
||||||
let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?;
|
let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?;
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
remove_docids_from_field_id_docid_facet_value(
|
||||||
@ -403,7 +403,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
|
|
||||||
// Remove docids from the string faceted documents ids
|
// Remove docids from the string faceted documents ids
|
||||||
let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?;
|
let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?;
|
||||||
docids.difference_with(&self.documents_ids);
|
docids -= &self.documents_ids;
|
||||||
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
remove_docids_from_field_id_docid_facet_value(
|
||||||
@ -456,7 +456,7 @@ where
|
|||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
let previous_len = docids.len();
|
let previous_len = docids.len();
|
||||||
docids.difference_with(to_remove);
|
docids -= to_remove;
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
|
@ -181,7 +181,7 @@ fn compute_facet_number_levels<'t>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The right bound is always the bound we run through.
|
// The right bound is always the bound we run through.
|
||||||
group_docids.union_with(&docids);
|
group_docids |= docids;
|
||||||
right = value;
|
right = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,8 +61,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>
|
|||||||
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
|
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
|
||||||
|
|
||||||
for value in tail {
|
for value in tail {
|
||||||
let bitmap = RoaringBitmap::deserialize_from(&value[..])?;
|
head |= RoaringBitmap::deserialize_from(&value[..])?;
|
||||||
head.union_with(&bitmap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut vec = Vec::with_capacity(head.serialized_size());
|
let mut vec = Vec::with_capacity(head.serialized_size());
|
||||||
@ -75,8 +74,7 @@ pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec
|
|||||||
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
||||||
|
|
||||||
for value in tail {
|
for value in tail {
|
||||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
||||||
head.union_with(&bitmap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
|
@ -608,8 +608,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||||
|
|
||||||
// We merge the new documents ids with the existing ones.
|
// We merge the new documents ids with the existing ones.
|
||||||
documents_ids.union_with(&new_documents_ids);
|
documents_ids |= new_documents_ids;
|
||||||
documents_ids.union_with(&replaced_documents_ids);
|
documents_ids |= replaced_documents_ids;
|
||||||
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
||||||
|
|
||||||
let mut database_count = 0;
|
let mut database_count = 0;
|
||||||
@ -845,6 +845,7 @@ mod tests {
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::update::DeleteDocuments;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn simple_document_replacement() {
|
fn simple_document_replacement() {
|
||||||
@ -1303,4 +1304,52 @@ mod tests {
|
|||||||
builder.execute(Cursor::new(documents), |_, _| ()).unwrap();
|
builder.execute(Cursor::new(documents), |_, _| ()).unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delete_documents_then_insert() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &br#"[
|
||||||
|
{ "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" },
|
||||||
|
{ "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" },
|
||||||
|
{ "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" },
|
||||||
|
{ "objectId": 30, "title": "Hamlet" }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));
|
||||||
|
|
||||||
|
// Delete not all of the documents but some of them.
|
||||||
|
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
|
||||||
|
builder.delete_external_id("30");
|
||||||
|
builder.execute().unwrap();
|
||||||
|
|
||||||
|
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||||
|
assert!(external_documents_ids.get("30").is_none());
|
||||||
|
|
||||||
|
let content = &br#"[
|
||||||
|
{ "objectId": 30, "title": "Hamlet" }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
|
||||||
|
let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
|
||||||
|
assert!(external_documents_ids.get("30").is_some());
|
||||||
|
|
||||||
|
let content = &br#"[
|
||||||
|
{ "objectId": 30, "title": "Hamlet" }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -236,7 +236,7 @@ fn compute_positions_levels(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The right bound is always the bound we run through.
|
// The right bound is always the bound we run through.
|
||||||
group_docids.union_with(&docids);
|
group_docids |= docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
if !group_docids.is_empty() {
|
if !group_docids.is_empty() {
|
||||||
|
Loading…
Reference in New Issue
Block a user