mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Introduce the ExternalDocumentsIds struct
This commit is contained in:
parent
eded5558b2
commit
415c0b86ba
149
src/external_documents_ids.rs
Normal file
149
src/external_documents_ids.rs
Normal file
@ -0,0 +1,149 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use fst::{Streamer, IntoStreamer};
|
||||
|
||||
pub struct ExternalDocumentsIds<'a> {
|
||||
pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
|
||||
pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
|
||||
}
|
||||
|
||||
impl<'a> ExternalDocumentsIds<'a> {
|
||||
pub fn new(hard: fst::Map<Cow<'a, [u8]>>, soft: fst::Map<Cow<'a, [u8]>>) -> ExternalDocumentsIds<'a> {
|
||||
ExternalDocumentsIds { hard, soft }
|
||||
}
|
||||
|
||||
pub fn get<A: AsRef<str>>(&self, external_id: A) -> Option<u32> {
|
||||
let external_id = external_id.as_ref();
|
||||
match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
|
||||
// u64 MAX means deleted in the soft fst map
|
||||
Some(id) if id != u64::MAX => Some(id.try_into().unwrap()),
|
||||
_otherwise => None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_ids<A: AsRef<[u8]>>(&mut self, other: fst::Set<A>) -> fst::Result<()> {
|
||||
let other = fst::Map::from(other.into_fst());
|
||||
let union_op = self.soft.op().add(&other).r#union();
|
||||
|
||||
let mut iter = union_op.into_stream();
|
||||
let mut new_soft_builder = fst::MapBuilder::memory();
|
||||
while let Some((external_id, docids)) = iter.next() {
|
||||
if docids.iter().any(|v| v.index == 1) {
|
||||
// If the `other` set returns a value here it means
|
||||
// that it must be marked as deleted.
|
||||
new_soft_builder.insert(external_id, u64::MAX)?;
|
||||
} else {
|
||||
new_soft_builder.insert(external_id, docids[0].value)?;
|
||||
}
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
// We save this new map as the new soft map.
|
||||
self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
|
||||
self.merge_soft_into_hard()
|
||||
}
|
||||
|
||||
pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
|
||||
let union_op = self.soft.op().add(other).r#union();
|
||||
|
||||
let mut new_soft_builder = fst::MapBuilder::memory();
|
||||
let mut iter = union_op.into_stream();
|
||||
while let Some((external_id, docids)) = iter.next() {
|
||||
let id = docids.last().unwrap().value;
|
||||
new_soft_builder.insert(external_id, id)?;
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
// We save the new map as the new soft map.
|
||||
self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
|
||||
self.merge_soft_into_hard()
|
||||
}
|
||||
|
||||
fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
|
||||
if self.soft.len() >= self.hard.len() / 2 {
|
||||
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||
|
||||
let mut iter = union_op.into_stream();
|
||||
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||
while let Some((external_id, docids)) = iter.next() {
|
||||
if docids.len() == 2 {
|
||||
if docids[1].value != u64::MAX {
|
||||
new_hard_builder.insert(external_id, docids[1].value)?;
|
||||
}
|
||||
} else {
|
||||
new_hard_builder.insert(external_id, docids[0].value)?;
|
||||
}
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
|
||||
self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
|
||||
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ExternalDocumentsIds<'static> {
|
||||
fn default() -> Self {
|
||||
ExternalDocumentsIds {
|
||||
hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
|
||||
soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_insert_delete_ids() {
|
||||
let mut external_documents_ids = ExternalDocumentsIds::default();
|
||||
|
||||
let new_ids = fst::Map::from_iter(vec![("a", 1), ("b", 2), ("c", 3), ("d", 4)]).unwrap();
|
||||
external_documents_ids.insert_ids(&new_ids).unwrap();
|
||||
|
||||
assert_eq!(external_documents_ids.get("a"), Some(1));
|
||||
assert_eq!(external_documents_ids.get("b"), Some(2));
|
||||
assert_eq!(external_documents_ids.get("c"), Some(3));
|
||||
assert_eq!(external_documents_ids.get("d"), Some(4));
|
||||
|
||||
let new_ids = fst::Map::from_iter(vec![("e", 5), ("f", 6), ("g", 7)]).unwrap();
|
||||
external_documents_ids.insert_ids(&new_ids).unwrap();
|
||||
|
||||
assert_eq!(external_documents_ids.get("a"), Some(1));
|
||||
assert_eq!(external_documents_ids.get("b"), Some(2));
|
||||
assert_eq!(external_documents_ids.get("c"), Some(3));
|
||||
assert_eq!(external_documents_ids.get("d"), Some(4));
|
||||
assert_eq!(external_documents_ids.get("e"), Some(5));
|
||||
assert_eq!(external_documents_ids.get("f"), Some(6));
|
||||
assert_eq!(external_documents_ids.get("g"), Some(7));
|
||||
|
||||
let del_ids = fst::Set::from_iter(vec!["a", "c", "f"]).unwrap();
|
||||
external_documents_ids.delete_ids(del_ids).unwrap();
|
||||
|
||||
assert_eq!(external_documents_ids.get("a"), None);
|
||||
assert_eq!(external_documents_ids.get("b"), Some(2));
|
||||
assert_eq!(external_documents_ids.get("c"), None);
|
||||
assert_eq!(external_documents_ids.get("d"), Some(4));
|
||||
assert_eq!(external_documents_ids.get("e"), Some(5));
|
||||
assert_eq!(external_documents_ids.get("f"), None);
|
||||
assert_eq!(external_documents_ids.get("g"), Some(7));
|
||||
|
||||
let new_ids = fst::Map::from_iter(vec![("a", 5), ("b", 6), ("h", 8)]).unwrap();
|
||||
external_documents_ids.insert_ids(&new_ids).unwrap();
|
||||
|
||||
assert_eq!(external_documents_ids.get("a"), Some(5));
|
||||
assert_eq!(external_documents_ids.get("b"), Some(6));
|
||||
assert_eq!(external_documents_ids.get("c"), None);
|
||||
assert_eq!(external_documents_ids.get("d"), Some(4));
|
||||
assert_eq!(external_documents_ids.get("e"), Some(5));
|
||||
assert_eq!(external_documents_ids.get("f"), None);
|
||||
assert_eq!(external_documents_ids.get("g"), Some(7));
|
||||
assert_eq!(external_documents_ids.get("h"), Some(8));
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
mod criterion;
|
||||
mod external_documents_ids;
|
||||
mod fields_ids_map;
|
||||
mod index;
|
||||
mod mdfs;
|
||||
|
Loading…
x
Reference in New Issue
Block a user