Fix ExternalDocumentsIds struct when inserting previously deleted ids

This commit is contained in:
Kerollmops 2021-06-30 11:22:57 +02:00
parent 54889813ce
commit 28782ff99d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -3,6 +3,7 @@ use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::{fmt, str}; use std::{fmt, str};
use fst::map::IndexedValue;
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
const DELETED_ID: u64 = u64::MAX; const DELETED_ID: u64 = u64::MAX;
@ -35,7 +36,6 @@ impl<'a> ExternalDocumentsIds<'a> {
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> { pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
let external_id = external_id.as_ref(); let external_id = external_id.as_ref();
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) { match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
// u64 MAX means deleted in the soft fst map
Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()), Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()),
_otherwise => None, _otherwise => None,
} }
@ -53,7 +53,8 @@ impl<'a> ExternalDocumentsIds<'a> {
// that it must be marked as deleted. // that it must be marked as deleted.
new_soft_builder.insert(external_id, DELETED_ID)?; new_soft_builder.insert(external_id, DELETED_ID)?;
} else { } else {
new_soft_builder.insert(external_id, docids[0].value)?; let value = docids.iter().find(|v| v.index == 0).unwrap().value;
new_soft_builder.insert(external_id, value)?;
} }
} }
@ -69,8 +70,8 @@ impl<'a> ExternalDocumentsIds<'a> {
let mut new_soft_builder = fst::MapBuilder::memory(); let mut new_soft_builder = fst::MapBuilder::memory();
let mut iter = union_op.into_stream(); let mut iter = union_op.into_stream();
while let Some((external_id, docids)) = iter.next() { while let Some((external_id, marked_docids)) = iter.next() {
let id = docids.last().unwrap().value; let id = indexed_last_value(marked_docids).unwrap();
new_soft_builder.insert(external_id, id)?; new_soft_builder.insert(external_id, id)?;
} }
@ -89,7 +90,7 @@ impl<'a> ExternalDocumentsIds<'a> {
let union_op = self.hard.op().add(&self.soft).r#union(); let union_op = self.hard.op().add(&self.soft).r#union();
let mut iter = union_op.into_stream(); let mut iter = union_op.into_stream();
while let Some((external_id, marked_docids)) = iter.next() { while let Some((external_id, marked_docids)) = iter.next() {
let id = marked_docids.last().unwrap().value; let id = indexed_last_value(marked_docids).unwrap();
if id != DELETED_ID { if id != DELETED_ID {
let external_id = str::from_utf8(external_id).unwrap(); let external_id = str::from_utf8(external_id).unwrap();
map.insert(external_id.to_owned(), id.try_into().unwrap()); map.insert(external_id.to_owned(), id.try_into().unwrap());
@ -105,13 +106,10 @@ impl<'a> ExternalDocumentsIds<'a> {
let mut iter = union_op.into_stream(); let mut iter = union_op.into_stream();
let mut new_hard_builder = fst::MapBuilder::memory(); let mut new_hard_builder = fst::MapBuilder::memory();
while let Some((external_id, docids)) = iter.next() { while let Some((external_id, marked_docids)) = iter.next() {
if docids.len() == 2 { let value = indexed_last_value(marked_docids).unwrap();
if docids[1].value != DELETED_ID { if value != DELETED_ID {
new_hard_builder.insert(external_id, docids[1].value)?; new_hard_builder.insert(external_id, value)?;
}
} else {
new_hard_builder.insert(external_id, docids[0].value)?;
} }
} }
@ -140,6 +138,11 @@ impl Default for ExternalDocumentsIds<'static> {
} }
} }
/// Returns the value of the `IndexedValue` with the highest _index_.
fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -190,4 +193,25 @@ mod tests {
assert_eq!(external_documents_ids.get("g"), Some(7)); assert_eq!(external_documents_ids.get("g"), Some(7));
assert_eq!(external_documents_ids.get("h"), Some(8)); assert_eq!(external_documents_ids.get("h"), Some(8));
} }
#[test]
fn strange_delete_insert_ids() {
let mut external_documents_ids = ExternalDocumentsIds::default();
let new_ids =
fst::Map::from_iter(vec![("1", 0), ("123", 1), ("30", 2), ("456", 3)]).unwrap();
external_documents_ids.insert_ids(&new_ids).unwrap();
assert_eq!(external_documents_ids.get("1"), Some(0));
assert_eq!(external_documents_ids.get("123"), Some(1));
assert_eq!(external_documents_ids.get("30"), Some(2));
assert_eq!(external_documents_ids.get("456"), Some(3));
let deleted_ids = fst::Set::from_iter(vec!["30"]).unwrap();
external_documents_ids.delete_ids(deleted_ids).unwrap();
assert_eq!(external_documents_ids.get("30"), None);
let new_ids = fst::Map::from_iter(vec![("30", 2)]).unwrap();
external_documents_ids.insert_ids(&new_ids).unwrap();
assert_eq!(external_documents_ids.get("30"), Some(2));
}
} }