Fix ExternalDocumentsIds struct when inserting previously deleted ids

This commit is contained in:
Kerollmops 2021-06-30 11:22:57 +02:00
parent 54889813ce
commit 28782ff99d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -3,6 +3,7 @@ use std::collections::HashMap;
use std::convert::TryInto;
use std::{fmt, str};
use fst::map::IndexedValue;
use fst::{IntoStreamer, Streamer};
const DELETED_ID: u64 = u64::MAX;
@ -35,7 +36,6 @@ impl<'a> ExternalDocumentsIds<'a> {
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
let external_id = external_id.as_ref();
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
// u64 MAX means deleted in the soft fst map
Some(id) if id != DELETED_ID => Some(id.try_into().unwrap()),
_otherwise => None,
}
@ -53,7 +53,8 @@ impl<'a> ExternalDocumentsIds<'a> {
// that it must be marked as deleted.
new_soft_builder.insert(external_id, DELETED_ID)?;
} else {
new_soft_builder.insert(external_id, docids[0].value)?;
let value = docids.iter().find(|v| v.index == 0).unwrap().value;
new_soft_builder.insert(external_id, value)?;
}
}
@ -69,8 +70,8 @@ impl<'a> ExternalDocumentsIds<'a> {
let mut new_soft_builder = fst::MapBuilder::memory();
let mut iter = union_op.into_stream();
while let Some((external_id, docids)) = iter.next() {
let id = docids.last().unwrap().value;
while let Some((external_id, marked_docids)) = iter.next() {
let id = indexed_last_value(marked_docids).unwrap();
new_soft_builder.insert(external_id, id)?;
}
@ -89,7 +90,7 @@ impl<'a> ExternalDocumentsIds<'a> {
let union_op = self.hard.op().add(&self.soft).r#union();
let mut iter = union_op.into_stream();
while let Some((external_id, marked_docids)) = iter.next() {
let id = marked_docids.last().unwrap().value;
let id = indexed_last_value(marked_docids).unwrap();
if id != DELETED_ID {
let external_id = str::from_utf8(external_id).unwrap();
map.insert(external_id.to_owned(), id.try_into().unwrap());
@ -105,13 +106,10 @@ impl<'a> ExternalDocumentsIds<'a> {
let mut iter = union_op.into_stream();
let mut new_hard_builder = fst::MapBuilder::memory();
while let Some((external_id, docids)) = iter.next() {
if docids.len() == 2 {
if docids[1].value != DELETED_ID {
new_hard_builder.insert(external_id, docids[1].value)?;
}
} else {
new_hard_builder.insert(external_id, docids[0].value)?;
while let Some((external_id, marked_docids)) = iter.next() {
let value = indexed_last_value(marked_docids).unwrap();
if value != DELETED_ID {
new_hard_builder.insert(external_id, value)?;
}
}
@ -140,6 +138,11 @@ impl Default for ExternalDocumentsIds<'static> {
}
}
/// Returns the value of the `IndexedValue` with the highest _index_.
fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
}
#[cfg(test)]
mod tests {
use super::*;
@ -190,4 +193,25 @@ mod tests {
assert_eq!(external_documents_ids.get("g"), Some(7));
assert_eq!(external_documents_ids.get("h"), Some(8));
}
#[test]
fn strange_delete_insert_ids() {
let mut external_documents_ids = ExternalDocumentsIds::default();
let new_ids =
fst::Map::from_iter(vec![("1", 0), ("123", 1), ("30", 2), ("456", 3)]).unwrap();
external_documents_ids.insert_ids(&new_ids).unwrap();
assert_eq!(external_documents_ids.get("1"), Some(0));
assert_eq!(external_documents_ids.get("123"), Some(1));
assert_eq!(external_documents_ids.get("30"), Some(2));
assert_eq!(external_documents_ids.get("456"), Some(3));
let deleted_ids = fst::Set::from_iter(vec!["30"]).unwrap();
external_documents_ids.delete_ids(deleted_ids).unwrap();
assert_eq!(external_documents_ids.get("30"), None);
let new_ids = fst::Map::from_iter(vec![("30", 2)]).unwrap();
external_documents_ids.insert_ids(&new_ids).unwrap();
assert_eq!(external_documents_ids.get("30"), Some(2));
}
}