mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 19:27:27 +01:00
Use an LMDB database to store the external documents ids
This commit is contained in:
parent
fdf3f7f627
commit
dfab6293c9
@ -1575,11 +1575,14 @@ fn delete_document_by_filter<'a>(
|
|||||||
}
|
}
|
||||||
e => e.into(),
|
e => e.into(),
|
||||||
})?;
|
})?;
|
||||||
let external_documents_ids = index.external_documents_ids(wtxn)?;
|
let external_documents_ids = index.external_documents_ids();
|
||||||
// FIXME: for filters matching a lot of documents, this will allocate a huge vec of external docids (strings).
|
// FIXME: for filters matching a lot of documents, this will allocate a huge vec of external docids (strings).
|
||||||
// Since what we have is an iterator, it would be better to delete in chunks
|
// Since what we have is an iterator, it would be better to delete in chunks
|
||||||
let external_to_internal: std::result::Result<Vec<_>, RoaringBitmap> =
|
let external_to_internal: std::result::Result<Vec<_>, RoaringBitmap> =
|
||||||
external_documents_ids.find_external_id_of(candidates).only_external_ids().collect();
|
external_documents_ids
|
||||||
|
.find_external_id_of(wtxn, candidates)?
|
||||||
|
.only_external_ids()
|
||||||
|
.collect();
|
||||||
let document_ids = match external_to_internal {
|
let document_ids = match external_to_internal {
|
||||||
Ok(external_ids) => external_ids,
|
Ok(external_ids) => external_ids,
|
||||||
Err(remaining_ids) => panic!("Couldn't find some external ids {:?}", remaining_ids),
|
Err(remaining_ids) => panic!("Couldn't find some external ids {:?}", remaining_ids),
|
||||||
|
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
|
|||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
|
||||||
let internal_id = index
|
let internal_id = index
|
||||||
.external_documents_ids(&txn)?
|
.external_documents_ids()
|
||||||
.get(doc_id.as_bytes())
|
.get(&txn, doc_id)?
|
||||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||||
|
|
||||||
let document = index
|
let document = index
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use fst::Streamer;
|
use heed::types::{OwnedType, Str};
|
||||||
|
use heed::{Database, RoIter, RoTxn, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::DocumentId;
|
use crate::{DocumentId, BEU32};
|
||||||
|
|
||||||
pub enum DocumentOperationKind {
|
pub enum DocumentOperationKind {
|
||||||
Create,
|
Create,
|
||||||
@ -19,41 +18,31 @@ pub struct DocumentOperation {
|
|||||||
pub kind: DocumentOperationKind,
|
pub kind: DocumentOperationKind,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ExternalDocumentsIds<'a>(fst::Map<Cow<'a, [u8]>>);
|
pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
|
||||||
|
|
||||||
impl<'a> ExternalDocumentsIds<'a> {
|
impl ExternalDocumentsIds {
|
||||||
pub fn new(fst: fst::Map<Cow<'a, [u8]>>) -> ExternalDocumentsIds<'a> {
|
pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
|
||||||
ExternalDocumentsIds(fst)
|
ExternalDocumentsIds(db)
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_static(self) -> ExternalDocumentsIds<'static> {
|
|
||||||
ExternalDocumentsIds(self.0.map_data(|c| Cow::Owned(c.into_owned())).unwrap())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if hard and soft external documents lists are empty.
|
/// Returns `true` if hard and soft external documents lists are empty.
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
|
||||||
self.0.is_empty()
|
self.0.is_empty(rtxn).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
|
pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
|
||||||
let external_id = external_id.as_ref();
|
Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get().try_into().unwrap()))
|
||||||
self.0.get(external_id).map(|x| x.try_into().unwrap())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An helper function to debug this type, returns an `HashMap` of both,
|
/// An helper function to debug this type, returns an `HashMap` of both,
|
||||||
/// soft and hard fst maps, combined.
|
/// soft and hard fst maps, combined.
|
||||||
pub fn to_hash_map(&self) -> HashMap<String, u32> {
|
pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
|
||||||
let mut map = HashMap::default();
|
let mut map = HashMap::default();
|
||||||
let mut stream = self.0.stream();
|
for result in self.0.iter(rtxn)? {
|
||||||
while let Some((k, v)) = stream.next() {
|
let (external, internal) = result?;
|
||||||
let k = String::from_utf8(k.to_vec()).unwrap();
|
map.insert(external.to_owned(), internal.get().try_into().unwrap());
|
||||||
map.insert(k, v.try_into().unwrap());
|
|
||||||
}
|
}
|
||||||
map
|
Ok(map)
|
||||||
}
|
|
||||||
|
|
||||||
pub fn as_bytes(&self) -> &[u8] {
|
|
||||||
self.0.as_fst().as_bytes()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Looks for the internal ids in the passed bitmap, and returns an iterator over the mapping between
|
/// Looks for the internal ids in the passed bitmap, and returns an iterator over the mapping between
|
||||||
@ -65,12 +54,12 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
/// - `Err(remaining_ids)`: if the external ids for some of the requested internal ids weren't found.
|
/// - `Err(remaining_ids)`: if the external ids for some of the requested internal ids weren't found.
|
||||||
/// In that case the returned bitmap contains the internal ids whose external ids were not found after traversing
|
/// In that case the returned bitmap contains the internal ids whose external ids were not found after traversing
|
||||||
/// the entire fst.
|
/// the entire fst.
|
||||||
pub fn find_external_id_of(
|
pub fn find_external_id_of<'t>(
|
||||||
&self,
|
&self,
|
||||||
|
rtxn: &'t RoTxn,
|
||||||
internal_ids: RoaringBitmap,
|
internal_ids: RoaringBitmap,
|
||||||
) -> ExternalToInternalOwnedIterator<'_> {
|
) -> heed::Result<ExternalToInternalOwnedIterator<'t>> {
|
||||||
let it = ExternalToInternalOwnedIterator { stream: self.0.stream(), internal_ids };
|
self.0.iter(rtxn).map(|iter| ExternalToInternalOwnedIterator { iter, internal_ids })
|
||||||
it
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
|
/// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
|
||||||
@ -81,84 +70,39 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
///
|
///
|
||||||
/// - If attempting to delete a document that doesn't exist
|
/// - If attempting to delete a document that doesn't exist
|
||||||
/// - If attempting to create a document that already exists
|
/// - If attempting to create a document that already exists
|
||||||
pub fn apply(&mut self, mut operations: Vec<DocumentOperation>) {
|
pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
|
||||||
operations.sort_unstable_by(|left, right| left.external_id.cmp(&right.external_id));
|
for DocumentOperation { external_id, internal_id, kind } in operations {
|
||||||
operations.dedup_by(|left, right| left.external_id == right.external_id);
|
match kind {
|
||||||
|
DocumentOperationKind::Create => {
|
||||||
let mut builder = fst::MapBuilder::memory();
|
// TODO should we get before insert to be able to detect bugs?
|
||||||
|
// if matches!(kind, DocumentOperationKind::Create) {
|
||||||
let mut stream = self.0.stream();
|
// panic!("Attempting to create an already-existing document");
|
||||||
let mut next_stream = stream.next();
|
// }
|
||||||
let mut operations = operations.iter();
|
self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
|
||||||
let mut next_operation = operations.next();
|
}
|
||||||
|
DocumentOperationKind::Delete => {
|
||||||
loop {
|
if !self.0.delete(wtxn, &external_id)? {
|
||||||
(next_stream, next_operation) = match (next_stream.take(), next_operation.take()) {
|
|
||||||
(None, None) => break,
|
|
||||||
(None, Some(DocumentOperation { external_id, internal_id, kind })) => {
|
|
||||||
if matches!(kind, DocumentOperationKind::Delete) {
|
|
||||||
panic!("Attempting to delete a non-existing document")
|
panic!("Attempting to delete a non-existing document")
|
||||||
}
|
}
|
||||||
builder.insert(external_id, (*internal_id).into()).unwrap();
|
|
||||||
(None, operations.next())
|
|
||||||
}
|
}
|
||||||
(Some((k, v)), None) => {
|
|
||||||
builder.insert(k, v).unwrap();
|
|
||||||
(stream.next(), None)
|
|
||||||
}
|
|
||||||
(
|
|
||||||
current_stream @ Some((left_external_id, left_internal_id)),
|
|
||||||
current_operation @ Some(DocumentOperation {
|
|
||||||
external_id: right_external_id,
|
|
||||||
internal_id: right_internal_id,
|
|
||||||
kind,
|
|
||||||
}),
|
|
||||||
) => match left_external_id.cmp(right_external_id.as_bytes()) {
|
|
||||||
std::cmp::Ordering::Less => {
|
|
||||||
builder.insert(left_external_id, left_internal_id).unwrap();
|
|
||||||
(stream.next(), current_operation)
|
|
||||||
}
|
|
||||||
std::cmp::Ordering::Greater => {
|
|
||||||
builder.insert(right_external_id, (*right_internal_id).into()).unwrap();
|
|
||||||
(current_stream, operations.next())
|
|
||||||
}
|
|
||||||
std::cmp::Ordering::Equal => {
|
|
||||||
if matches!(kind, DocumentOperationKind::Create) {
|
|
||||||
panic!("Attempting to create an already-existing document");
|
|
||||||
}
|
|
||||||
// we delete the document, so we just advance both iterators to skip in stream
|
|
||||||
(stream.next(), operations.next())
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.0 = builder.into_map().map_data(Cow::Owned).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Debug for ExternalDocumentsIds<'_> {
|
Ok(())
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for ExternalDocumentsIds<'static> {
|
|
||||||
fn default() -> Self {
|
|
||||||
ExternalDocumentsIds(fst::Map::default().map_data(Cow::Owned).unwrap())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An iterator over mappings between requested internal ids and external ids.
|
/// An iterator over mappings between requested internal ids and external ids.
|
||||||
///
|
///
|
||||||
/// See [`ExternalDocumentsIds::find_external_id_of`] for details.
|
/// See [`ExternalDocumentsIds::find_external_id_of`] for details.
|
||||||
pub struct ExternalToInternalOwnedIterator<'it> {
|
pub struct ExternalToInternalOwnedIterator<'t> {
|
||||||
stream: fst::map::Stream<'it>,
|
iter: RoIter<'t, Str, OwnedType<BEU32>>,
|
||||||
internal_ids: RoaringBitmap,
|
internal_ids: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'it> Iterator for ExternalToInternalOwnedIterator<'it> {
|
impl<'t> Iterator for ExternalToInternalOwnedIterator<'t> {
|
||||||
/// A result indicating if a mapping was found, or if the stream was exhausted without finding all internal ids.
|
/// A result indicating if a mapping was found, or if the stream was exhausted without finding all internal ids.
|
||||||
type Item = Result<(String, DocumentId), RoaringBitmap>;
|
type Item = Result<(&'t str, DocumentId), RoaringBitmap>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
// if all requested ids were found, we won't find any other, so short-circuit
|
// if all requested ids were found, we won't find any other, so short-circuit
|
||||||
@ -166,23 +110,28 @@ impl<'it> Iterator for ExternalToInternalOwnedIterator<'it> {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
let Some((external, internal)) = self.stream.next() else {
|
let (external, internal) = match self.iter.next() {
|
||||||
// we exhausted the stream but we still have some internal ids to find
|
Some(Ok((external, internal))) => (external, internal),
|
||||||
let remaining_ids = std::mem::take(&mut self.internal_ids);
|
// TODO manage this better, remove panic
|
||||||
return Some(Err(remaining_ids));
|
Some(Err(e)) => panic!("{}", e),
|
||||||
// note: next calls to `next` will return `None` since we replaced the internal_ids
|
_ => {
|
||||||
// with the default empty bitmap
|
// we exhausted the stream but we still have some internal ids to find
|
||||||
|
let remaining_ids = std::mem::take(&mut self.internal_ids);
|
||||||
|
return Some(Err(remaining_ids));
|
||||||
|
// note: next calls to `next` will return `None` since we replaced the internal_ids
|
||||||
|
// with the default empty bitmap
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let internal = internal.try_into().unwrap();
|
let internal = internal.get();
|
||||||
let was_contained = self.internal_ids.remove(internal);
|
let was_contained = self.internal_ids.remove(internal);
|
||||||
if was_contained {
|
if was_contained {
|
||||||
return Some(Ok((std::str::from_utf8(external).unwrap().to_owned(), internal)));
|
return Some(Ok((external, internal)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'it> ExternalToInternalOwnedIterator<'it> {
|
impl<'t> ExternalToInternalOwnedIterator<'t> {
|
||||||
/// Returns the bitmap of internal ids whose external id are yet to be found
|
/// Returns the bitmap of internal ids whose external id are yet to be found
|
||||||
pub fn remaining_internal_ids(&self) -> &RoaringBitmap {
|
pub fn remaining_internal_ids(&self) -> &RoaringBitmap {
|
||||||
&self.internal_ids
|
&self.internal_ids
|
||||||
@ -191,7 +140,7 @@ impl<'it> ExternalToInternalOwnedIterator<'it> {
|
|||||||
/// Consumes this iterator and returns an iterator over only the external ids, ignoring the internal ids.
|
/// Consumes this iterator and returns an iterator over only the external ids, ignoring the internal ids.
|
||||||
///
|
///
|
||||||
/// Use this when you don't need the mapping between the external and the internal ids.
|
/// Use this when you don't need the mapping between the external and the internal ids.
|
||||||
pub fn only_external_ids(self) -> impl Iterator<Item = Result<String, RoaringBitmap>> + 'it {
|
pub fn only_external_ids(self) -> impl Iterator<Item = Result<String, RoaringBitmap>> + 't {
|
||||||
self.map(|res| res.map(|(external, _internal)| external))
|
self.map(|res| res.map(|(external, _internal)| external.to_owned()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,6 @@ pub mod main_key {
|
|||||||
/// It is concatenated with a big-endian encoded number (non-human readable).
|
/// It is concatenated with a big-endian encoded number (non-human readable).
|
||||||
/// e.g. vector-hnsw0x0032.
|
/// e.g. vector-hnsw0x0032.
|
||||||
pub const VECTOR_HNSW_KEY_PREFIX: &str = "vector-hnsw";
|
pub const VECTOR_HNSW_KEY_PREFIX: &str = "vector-hnsw";
|
||||||
pub const EXTERNAL_DOCUMENTS_IDS_KEY: &str = "external-documents-ids";
|
|
||||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||||
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
||||||
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
||||||
@ -81,6 +80,7 @@ pub mod db_name {
|
|||||||
pub const EXACT_WORD_DOCIDS: &str = "exact-word-docids";
|
pub const EXACT_WORD_DOCIDS: &str = "exact-word-docids";
|
||||||
pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
|
pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
|
||||||
pub const EXACT_WORD_PREFIX_DOCIDS: &str = "exact-word-prefix-docids";
|
pub const EXACT_WORD_PREFIX_DOCIDS: &str = "exact-word-prefix-docids";
|
||||||
|
pub const EXTERNAL_DOCUMENTS_IDS: &str = "external-documents-ids";
|
||||||
pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
|
pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
|
||||||
pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
|
pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
|
||||||
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
|
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
|
||||||
@ -112,6 +112,9 @@ pub struct Index {
|
|||||||
/// Contains many different types (e.g. the fields ids map).
|
/// Contains many different types (e.g. the fields ids map).
|
||||||
pub(crate) main: PolyDatabase,
|
pub(crate) main: PolyDatabase,
|
||||||
|
|
||||||
|
/// Maps the external documents ids with the internal document id.
|
||||||
|
pub external_documents_ids: Database<Str, OwnedType<BEU32>>,
|
||||||
|
|
||||||
/// A word and all the documents ids containing the word.
|
/// A word and all the documents ids containing the word.
|
||||||
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
|
pub word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
@ -183,13 +186,15 @@ impl Index {
|
|||||||
) -> Result<Index> {
|
) -> Result<Index> {
|
||||||
use db_name::*;
|
use db_name::*;
|
||||||
|
|
||||||
options.max_dbs(25);
|
options.max_dbs(26);
|
||||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||||
|
|
||||||
let env = options.open(path)?;
|
let env = options.open(path)?;
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
|
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
|
||||||
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
|
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
|
||||||
|
let external_documents_ids =
|
||||||
|
env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?;
|
||||||
let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
|
let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
|
||||||
let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
|
let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
|
||||||
let exact_word_prefix_docids =
|
let exact_word_prefix_docids =
|
||||||
@ -235,6 +240,7 @@ impl Index {
|
|||||||
Ok(Index {
|
Ok(Index {
|
||||||
env,
|
env,
|
||||||
main,
|
main,
|
||||||
|
external_documents_ids,
|
||||||
word_docids,
|
word_docids,
|
||||||
exact_word_docids,
|
exact_word_docids,
|
||||||
word_prefix_docids,
|
word_prefix_docids,
|
||||||
@ -386,29 +392,10 @@ impl Index {
|
|||||||
|
|
||||||
/* external documents ids */
|
/* external documents ids */
|
||||||
|
|
||||||
/// Writes the external documents ids and internal ids (i.e. `u32`).
|
|
||||||
pub(crate) fn put_external_documents_ids(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn,
|
|
||||||
external_documents_ids: &ExternalDocumentsIds<'_>,
|
|
||||||
) -> heed::Result<()> {
|
|
||||||
self.main.put::<_, Str, ByteSlice>(
|
|
||||||
wtxn,
|
|
||||||
main_key::EXTERNAL_DOCUMENTS_IDS_KEY,
|
|
||||||
external_documents_ids.as_bytes(),
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the external documents ids map which associate the external ids
|
/// Returns the external documents ids map which associate the external ids
|
||||||
/// with the internal ids (i.e. `u32`).
|
/// with the internal ids (i.e. `u32`).
|
||||||
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result<ExternalDocumentsIds<'t>> {
|
pub fn external_documents_ids(&self) -> ExternalDocumentsIds {
|
||||||
let fst = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::EXTERNAL_DOCUMENTS_IDS_KEY)?;
|
ExternalDocumentsIds::new(self.external_documents_ids)
|
||||||
let fst = match fst {
|
|
||||||
Some(fst) => fst::Map::new(fst)?.map_data(Cow::Borrowed)?,
|
|
||||||
None => fst::Map::default().map_data(Cow::Owned)?,
|
|
||||||
};
|
|
||||||
Ok(ExternalDocumentsIds::new(fst))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* fields ids map */
|
/* fields ids map */
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
|
use crate::{FieldDistribution, Index, Result};
|
||||||
|
|
||||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
@ -20,6 +20,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
env: _env,
|
||||||
main: _main,
|
main: _main,
|
||||||
|
external_documents_ids,
|
||||||
word_docids,
|
word_docids,
|
||||||
exact_word_docids,
|
exact_word_docids,
|
||||||
word_prefix_docids,
|
word_prefix_docids,
|
||||||
@ -54,7 +55,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
// We clean some of the main engine datastructures.
|
// We clean some of the main engine datastructures.
|
||||||
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
|
||||||
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
||||||
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
|
||||||
self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
|
self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
|
||||||
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
|
||||||
self.index.delete_geo_rtree(self.wtxn)?;
|
self.index.delete_geo_rtree(self.wtxn)?;
|
||||||
@ -62,6 +62,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
self.index.delete_vector_hnsw(self.wtxn)?;
|
self.index.delete_vector_hnsw(self.wtxn)?;
|
||||||
|
|
||||||
// Clear the other databases.
|
// Clear the other databases.
|
||||||
|
external_documents_ids.clear(self.wtxn)?;
|
||||||
word_docids.clear(self.wtxn)?;
|
word_docids.clear(self.wtxn)?;
|
||||||
exact_word_docids.clear(self.wtxn)?;
|
exact_word_docids.clear(self.wtxn)?;
|
||||||
word_prefix_docids.clear(self.wtxn)?;
|
word_prefix_docids.clear(self.wtxn)?;
|
||||||
|
@ -162,7 +162,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
|
||||||
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
let external_documents_ids = self.index.external_documents_ids();
|
||||||
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
|
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
|
||||||
|
|
||||||
let primary_key = cursor.primary_key().to_string();
|
let primary_key = cursor.primary_key().to_string();
|
||||||
@ -221,7 +221,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let docid = match self.new_external_documents_ids_builder.entry((*external_id).into()) {
|
let docid = match self.new_external_documents_ids_builder.entry((*external_id).into()) {
|
||||||
Entry::Occupied(entry) => *entry.get() as u32,
|
Entry::Occupied(entry) => *entry.get() as u32,
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
let docid = match external_documents_ids.get(entry.key()) {
|
let docid = match external_documents_ids.get(wtxn, entry.key())? {
|
||||||
Some(docid) => {
|
Some(docid) => {
|
||||||
// If it was already in the list of replaced documents it means it was deleted
|
// If it was already in the list of replaced documents it means it was deleted
|
||||||
// by the remove_document method. We should starts as if it never existed.
|
// by the remove_document method. We should starts as if it never existed.
|
||||||
@ -373,7 +373,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
to_remove.sort_unstable();
|
to_remove.sort_unstable();
|
||||||
to_remove.dedup();
|
to_remove.dedup();
|
||||||
|
|
||||||
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
let external_documents_ids = self.index.external_documents_ids();
|
||||||
|
|
||||||
let mut documents_deleted = 0;
|
let mut documents_deleted = 0;
|
||||||
let mut document_sorter_buffer = Vec::new();
|
let mut document_sorter_buffer = Vec::new();
|
||||||
@ -410,7 +410,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
|
|
||||||
// If the document was already in the db we mark it as a `to_delete` document.
|
// If the document was already in the db we mark it as a `to_delete` document.
|
||||||
// Then we push the document in sorters in deletion mode.
|
// Then we push the document in sorters in deletion mode.
|
||||||
let deleted_from_db = match external_documents_ids.get(&to_remove) {
|
let deleted_from_db = match external_documents_ids.get(wtxn, &to_remove)? {
|
||||||
Some(docid) => {
|
Some(docid) => {
|
||||||
self.replaced_documents_ids.insert(docid);
|
self.replaced_documents_ids.insert(docid);
|
||||||
|
|
||||||
|
@ -194,10 +194,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
db.delete(wtxn, &BEU32::new(docid))?;
|
db.delete(wtxn, &BEU32::new(docid))?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut external_documents_docids = index.external_documents_ids(wtxn)?.into_static();
|
let external_documents_docids = index.external_documents_ids();
|
||||||
external_documents_docids.apply(operations);
|
external_documents_docids.apply(wtxn, operations)?;
|
||||||
index.put_external_documents_ids(wtxn, &external_documents_docids)?;
|
|
||||||
|
|
||||||
index.put_documents_ids(wtxn, &docids)?;
|
index.put_documents_ids(wtxn, &docids)?;
|
||||||
}
|
}
|
||||||
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
|
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user