mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
WIP: reset documents in TypedChunk::Documents
This commit is contained in:
parent
cda6ca1ee6
commit
946c762d28
@ -35,7 +35,7 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
|
||||
DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
|
||||
WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||
@ -374,17 +374,6 @@ where
|
||||
drop(lmdb_writer_sx)
|
||||
});
|
||||
|
||||
// We delete the documents that this document addition replaces. This way we are
|
||||
// able to simply insert all the documents even if they already exist in the database.
|
||||
if !replaced_documents_ids.is_empty() {
|
||||
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
|
||||
deletion_builder.strategy(self.config.deletion_strategy);
|
||||
debug!("documents to delete {:?}", replaced_documents_ids);
|
||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
||||
let deleted_documents_result = deletion_builder.execute_inner()?;
|
||||
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
|
||||
}
|
||||
|
||||
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||
let index_is_empty = index_documents_ids.is_empty();
|
||||
let mut final_documents_ids = RoaringBitmap::new();
|
||||
@ -437,6 +426,7 @@ where
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
// FIXME: return newly added as well as newly deleted documents
|
||||
let (docids, is_merged_database) =
|
||||
write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
|
||||
if !docids.is_empty() {
|
||||
@ -472,8 +462,9 @@ where
|
||||
let external_documents_ids = external_documents_ids.into_static();
|
||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||
|
||||
// FIXME: remove `new_documents_ids` entirely and `replaced_documents_ids`
|
||||
let all_documents_ids = index_documents_ids | new_documents_ids;
|
||||
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||
//self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||
|
||||
// TODO: reactivate prefix DB with diff-indexing
|
||||
// self.execute_prefix_databases(
|
||||
|
@ -118,23 +118,39 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let mut is_merged_database = false;
|
||||
match typed_chunk {
|
||||
TypedChunk::Documents(obkv_documents_iter) => {
|
||||
let mut docids = index.documents_ids(wtxn)?;
|
||||
|
||||
let mut cursor = obkv_documents_iter.into_cursor()?;
|
||||
while let Some((docid, reader)) = cursor.move_on_next()? {
|
||||
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
||||
let reader: KvReader<FieldId> = KvReader::new(reader);
|
||||
let mut written = false;
|
||||
for (field_id, value) in reader.iter() {
|
||||
let Some(value) = KvReaderDelAdd::new(value).get(DelAdd::Addition) else {
|
||||
continue;
|
||||
};
|
||||
// TODO: writer.is_empty
|
||||
written = true;
|
||||
writer.insert(field_id, value)?;
|
||||
}
|
||||
index.documents.remap_types::<ByteSlice, ByteSlice>().put(
|
||||
wtxn,
|
||||
docid,
|
||||
&writer.into_inner().unwrap(),
|
||||
)?;
|
||||
|
||||
let db = index.documents.remap_data_type::<ByteSlice>();
|
||||
let docid = docid.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||
|
||||
if written {
|
||||
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
|
||||
docids.insert(docid);
|
||||
} else {
|
||||
db.delete(wtxn, &BEU32::new(docid))?;
|
||||
// FIXME: unwrap
|
||||
if !docids.remove(docid) {
|
||||
panic!("Attempt to remove a document id that doesn't exist")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
index.put_documents_ids(wtxn, &docids)?;
|
||||
}
|
||||
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
|
||||
append_entries_into_database(
|
||||
fid_word_count_docids_iter,
|
||||
|
Loading…
Reference in New Issue
Block a user