2020-05-22 15:00:50 +02:00
|
|
|
use std::borrow::Cow;
|
2020-05-19 11:45:46 +02:00
|
|
|
use std::collections::{HashMap, BTreeMap};
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2020-01-16 16:19:04 +01:00
|
|
|
use fst::{set::OpBuilder, SetBuilder};
|
2020-03-03 19:36:46 +01:00
|
|
|
use indexmap::IndexMap;
|
2020-05-18 15:29:58 +02:00
|
|
|
use meilisearch_schema::{Schema, FieldId};
|
|
|
|
use meilisearch_types::DocumentId;
|
2020-01-16 16:19:04 +01:00
|
|
|
use sdset::{duo::Union, SetOperation};
|
2020-05-18 12:22:41 +02:00
|
|
|
use serde::Deserialize;
|
2020-05-18 10:56:24 +02:00
|
|
|
use serde_json::Value;
|
2020-05-18 12:22:06 +02:00
|
|
|
|
2019-11-26 16:12:06 +01:00
|
|
|
use crate::database::{MainT, UpdateT};
|
2019-11-06 10:49:13 +01:00
|
|
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
2020-05-05 22:28:46 +02:00
|
|
|
use crate::facets;
|
2019-10-03 15:04:11 +02:00
|
|
|
use crate::raw_indexer::RawIndexer;
|
2020-05-19 14:37:13 +02:00
|
|
|
use crate::serde::Deserializer;
|
2020-05-19 11:45:46 +02:00
|
|
|
use crate::store::{self, DocumentsFields, DocumentsFieldsCounts, DiscoverIds};
|
2020-05-19 14:37:13 +02:00
|
|
|
use crate::update::helpers::{index_value, value_to_number, extract_document_id};
|
2020-01-16 16:19:04 +01:00
|
|
|
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
2020-05-19 14:37:13 +02:00
|
|
|
use crate::{Error, MResult, RankedMap};
|
2019-10-03 15:04:11 +02:00
|
|
|
|
|
|
|
pub struct DocumentsAddition<D> {
|
|
|
|
updates_store: store::Updates,
|
2019-10-07 16:16:04 +02:00
|
|
|
updates_results_store: store::UpdatesResults,
|
2019-11-06 10:49:13 +01:00
|
|
|
updates_notifier: UpdateEventsEmitter,
|
2019-10-03 15:04:11 +02:00
|
|
|
documents: Vec<D>,
|
2019-11-05 15:23:41 +01:00
|
|
|
is_partial: bool,
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<D> DocumentsAddition<D> {
|
2019-10-07 16:16:04 +02:00
|
|
|
pub fn new(
|
|
|
|
updates_store: store::Updates,
|
|
|
|
updates_results_store: store::UpdatesResults,
|
2019-11-06 10:49:13 +01:00
|
|
|
updates_notifier: UpdateEventsEmitter,
|
2019-10-18 13:05:28 +02:00
|
|
|
) -> DocumentsAddition<D> {
|
2019-10-07 16:16:04 +02:00
|
|
|
DocumentsAddition {
|
|
|
|
updates_store,
|
|
|
|
updates_results_store,
|
|
|
|
updates_notifier,
|
|
|
|
documents: Vec::new(),
|
2019-11-05 15:23:41 +01:00
|
|
|
is_partial: false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn new_partial(
|
|
|
|
updates_store: store::Updates,
|
|
|
|
updates_results_store: store::UpdatesResults,
|
2019-11-06 10:49:13 +01:00
|
|
|
updates_notifier: UpdateEventsEmitter,
|
2019-11-05 15:23:41 +01:00
|
|
|
) -> DocumentsAddition<D> {
|
|
|
|
DocumentsAddition {
|
|
|
|
updates_store,
|
|
|
|
updates_results_store,
|
|
|
|
updates_notifier,
|
|
|
|
documents: Vec::new(),
|
|
|
|
is_partial: true,
|
2019-10-07 16:16:04 +02:00
|
|
|
}
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn update_document(&mut self, document: D) {
|
|
|
|
self.documents.push(document);
|
|
|
|
}
|
|
|
|
|
2019-11-26 16:12:06 +01:00
|
|
|
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64>
|
2019-10-18 13:05:28 +02:00
|
|
|
where
|
|
|
|
D: serde::Serialize,
|
2019-10-03 15:04:11 +02:00
|
|
|
{
|
2019-11-06 10:49:13 +01:00
|
|
|
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
|
2019-10-07 16:16:04 +02:00
|
|
|
let update_id = push_documents_addition(
|
2019-10-11 11:29:47 +02:00
|
|
|
writer,
|
2019-10-07 16:16:04 +02:00
|
|
|
self.updates_store,
|
|
|
|
self.updates_results_store,
|
|
|
|
self.documents,
|
2019-11-05 15:23:41 +01:00
|
|
|
self.is_partial,
|
2019-10-07 16:16:04 +02:00
|
|
|
)?;
|
|
|
|
Ok(update_id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<D> Extend<D> for DocumentsAddition<D> {
|
2019-10-18 13:05:28 +02:00
|
|
|
fn extend<T: IntoIterator<Item = D>>(&mut self, iter: T) {
|
2019-10-07 16:16:04 +02:00
|
|
|
self.documents.extend(iter)
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-08 17:24:11 +02:00
|
|
|
pub fn push_documents_addition<D: serde::Serialize>(
|
2019-11-26 16:12:06 +01:00
|
|
|
writer: &mut heed::RwTxn<UpdateT>,
|
2019-10-08 17:24:11 +02:00
|
|
|
updates_store: store::Updates,
|
|
|
|
updates_results_store: store::UpdatesResults,
|
|
|
|
addition: Vec<D>,
|
2019-11-05 15:23:41 +01:00
|
|
|
is_partial: bool,
|
2019-10-18 13:05:28 +02:00
|
|
|
) -> MResult<u64> {
|
2019-10-08 17:24:11 +02:00
|
|
|
let mut values = Vec::with_capacity(addition.len());
|
|
|
|
for add in addition {
|
2019-10-11 16:16:21 +02:00
|
|
|
let vec = serde_json::to_vec(&add)?;
|
|
|
|
let add = serde_json::from_slice(&vec)?;
|
2019-10-08 17:24:11 +02:00
|
|
|
values.push(add);
|
|
|
|
}
|
|
|
|
|
|
|
|
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
|
|
|
|
2019-11-05 15:23:41 +01:00
|
|
|
let update = if is_partial {
|
2019-11-12 18:00:47 +01:00
|
|
|
Update::documents_partial(values)
|
2019-11-05 15:23:41 +01:00
|
|
|
} else {
|
2019-11-12 18:00:47 +01:00
|
|
|
Update::documents_addition(values)
|
2019-11-05 15:23:41 +01:00
|
|
|
};
|
|
|
|
|
2019-10-08 17:31:07 +02:00
|
|
|
updates_store.put_update(writer, last_update_id, &update)?;
|
2019-10-08 17:24:11 +02:00
|
|
|
|
|
|
|
Ok(last_update_id)
|
|
|
|
}
|
|
|
|
|
2020-06-27 15:10:39 +02:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2020-05-22 15:00:50 +02:00
|
|
|
fn index_document<A>(
|
2020-05-18 15:29:58 +02:00
|
|
|
writer: &mut heed::RwTxn<MainT>,
|
|
|
|
documents_fields: DocumentsFields,
|
|
|
|
documents_fields_counts: DocumentsFieldsCounts,
|
|
|
|
ranked_map: &mut RankedMap,
|
2020-05-22 15:00:50 +02:00
|
|
|
indexer: &mut RawIndexer<A>,
|
2020-05-18 15:29:58 +02:00
|
|
|
schema: &Schema,
|
|
|
|
field_id: FieldId,
|
|
|
|
document_id: DocumentId,
|
|
|
|
value: &Value,
|
|
|
|
) -> MResult<()>
|
2020-05-22 15:00:50 +02:00
|
|
|
where A: AsRef<[u8]>,
|
2020-05-18 15:29:58 +02:00
|
|
|
{
|
|
|
|
let serialized = serde_json::to_vec(value)?;
|
|
|
|
documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
|
|
|
|
|
|
|
if let Some(indexed_pos) = schema.is_indexed(field_id) {
|
|
|
|
let number_of_words = index_value(indexer, document_id, *indexed_pos, value);
|
|
|
|
if let Some(number_of_words) = number_of_words {
|
|
|
|
documents_fields_counts.put_document_field_count(
|
|
|
|
writer,
|
|
|
|
document_id,
|
|
|
|
*indexed_pos,
|
|
|
|
number_of_words as u16,
|
|
|
|
)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if schema.is_ranked(field_id) {
|
|
|
|
let number = value_to_number(value).unwrap_or_default();
|
|
|
|
ranked_map.insert(document_id, field_id, number);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2020-05-05 22:28:46 +02:00
|
|
|
pub fn apply_addition<'a, 'b>(
|
2019-11-26 16:12:06 +01:00
|
|
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
2020-01-16 16:29:50 +01:00
|
|
|
index: &store::Index,
|
2020-05-19 14:11:48 +02:00
|
|
|
new_documents: Vec<IndexMap<String, Value>>,
|
2020-05-05 22:28:46 +02:00
|
|
|
partial: bool
|
2020-05-20 15:21:08 +02:00
|
|
|
) -> MResult<()>
|
|
|
|
{
|
2020-02-02 22:59:19 +01:00
|
|
|
let mut schema = match index.main.schema(writer)? {
|
2019-10-07 17:48:26 +02:00
|
|
|
Some(schema) => schema,
|
|
|
|
None => return Err(Error::SchemaMissing),
|
|
|
|
};
|
|
|
|
|
2020-05-19 11:45:46 +02:00
|
|
|
// Retrieve the documents ids related structures
|
2020-05-20 14:49:41 +02:00
|
|
|
let external_docids = index.main.external_docids(writer)?;
|
|
|
|
let internal_docids = index.main.internal_docids(writer)?;
|
|
|
|
let mut available_ids = DiscoverIds::new(&internal_docids);
|
2020-05-19 11:45:46 +02:00
|
|
|
|
2020-03-09 18:40:49 +01:00
|
|
|
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2019-10-21 17:33:52 +02:00
|
|
|
// 1. store documents ids for future deletion
|
2020-05-20 15:21:08 +02:00
|
|
|
let mut documents_additions = HashMap::new();
|
|
|
|
let mut new_external_docids = BTreeMap::new();
|
|
|
|
let mut new_internal_docids = Vec::with_capacity(new_documents.len());
|
|
|
|
|
2020-05-19 14:11:48 +02:00
|
|
|
for mut document in new_documents {
|
2020-07-07 14:52:49 +02:00
|
|
|
let external_docids_get = |docid: &str| {
|
|
|
|
match (external_docids.get(docid), new_external_docids.get(docid)) {
|
|
|
|
(_, Some(&id))
|
|
|
|
| (Some(id), _) => Some(id as u32),
|
|
|
|
(None, None) => None,
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-05-20 15:21:08 +02:00
|
|
|
let (internal_docid, external_docid) =
|
|
|
|
extract_document_id(
|
|
|
|
&primary_key,
|
|
|
|
&document,
|
2020-07-07 14:52:49 +02:00
|
|
|
&external_docids_get,
|
2020-05-20 15:21:08 +02:00
|
|
|
&mut available_ids,
|
|
|
|
)?;
|
|
|
|
|
2020-07-07 14:52:49 +02:00
|
|
|
new_external_docids.insert(external_docid, internal_docid.0 as u64);
|
2020-05-20 15:21:08 +02:00
|
|
|
new_internal_docids.push(internal_docid);
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2020-05-05 22:28:46 +02:00
|
|
|
if partial {
|
|
|
|
let mut deserializer = Deserializer {
|
2020-05-20 15:21:08 +02:00
|
|
|
document_id: internal_docid,
|
2020-05-05 22:28:46 +02:00
|
|
|
reader: writer,
|
|
|
|
documents_fields: index.documents_fields,
|
|
|
|
schema: &schema,
|
|
|
|
fields: None,
|
|
|
|
};
|
|
|
|
|
2020-05-19 14:11:48 +02:00
|
|
|
let old_document = Option::<HashMap<String, Value>>::deserialize(&mut deserializer)?;
|
|
|
|
if let Some(old_document) = old_document {
|
2020-05-05 22:28:46 +02:00
|
|
|
for (key, value) in old_document {
|
|
|
|
document.entry(key).or_insert(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-05-20 15:21:08 +02:00
|
|
|
documents_additions.insert(internal_docid, document);
|
2019-10-21 17:33:52 +02:00
|
|
|
}
|
|
|
|
|
2020-05-19 13:12:02 +02:00
|
|
|
// 2. remove the documents postings lists
|
2019-10-28 20:40:33 +01:00
|
|
|
let number_of_inserted_documents = documents_additions.len();
|
2020-05-20 14:49:41 +02:00
|
|
|
let documents_ids = new_external_docids.iter().map(|(id, _)| id.clone()).collect();
|
2020-01-16 16:29:50 +01:00
|
|
|
apply_documents_deletion(writer, index, documents_ids)?;
|
2019-10-21 17:33:52 +02:00
|
|
|
|
2020-01-16 16:29:50 +01:00
|
|
|
let mut ranked_map = match index.main.ranked_map(writer)? {
|
2019-10-21 17:33:52 +02:00
|
|
|
Some(ranked_map) => ranked_map,
|
|
|
|
None => RankedMap::default(),
|
|
|
|
};
|
|
|
|
|
2020-05-22 15:00:50 +02:00
|
|
|
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
2019-11-05 15:23:41 +01:00
|
|
|
|
2020-05-05 22:28:46 +02:00
|
|
|
|
2019-11-05 15:23:41 +01:00
|
|
|
let mut indexer = RawIndexer::new(stop_words);
|
|
|
|
|
2020-05-18 10:56:24 +02:00
|
|
|
// For each document in this update
|
2020-05-28 19:35:34 +02:00
|
|
|
for (document_id, document) in &documents_additions {
|
2020-05-18 10:56:24 +02:00
|
|
|
// For each key-value pair in the document.
|
|
|
|
for (attribute, value) in document {
|
2020-10-30 11:30:18 +01:00
|
|
|
let field_id = schema.insert_and_index(&attribute)?;
|
2020-05-18 15:29:58 +02:00
|
|
|
index_document(
|
|
|
|
writer,
|
|
|
|
index.documents_fields,
|
|
|
|
index.documents_fields_counts,
|
|
|
|
&mut ranked_map,
|
|
|
|
&mut indexer,
|
|
|
|
&schema,
|
|
|
|
field_id,
|
2020-05-28 19:35:34 +02:00
|
|
|
*document_id,
|
2020-05-18 15:29:58 +02:00
|
|
|
&value,
|
|
|
|
)?;
|
2020-05-18 10:56:24 +02:00
|
|
|
}
|
2019-11-05 15:23:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
write_documents_addition_index(
|
|
|
|
writer,
|
2020-01-16 16:29:50 +01:00
|
|
|
index,
|
2019-11-05 15:23:41 +01:00
|
|
|
&ranked_map,
|
|
|
|
number_of_inserted_documents,
|
|
|
|
indexer,
|
2019-12-30 12:27:24 +01:00
|
|
|
)?;
|
|
|
|
|
2020-02-02 22:59:19 +01:00
|
|
|
index.main.put_schema(writer, &schema)?;
|
2019-12-30 14:37:31 +01:00
|
|
|
|
2020-05-20 15:21:08 +02:00
|
|
|
let new_external_docids = fst::Map::from_iter(new_external_docids.iter().map(|(ext, id)| (ext, *id as u64)))?;
|
2020-05-20 14:49:41 +02:00
|
|
|
let new_internal_docids = sdset::SetBuf::from_dirty(new_internal_docids);
|
|
|
|
index.main.merge_external_docids(writer, &new_external_docids)?;
|
|
|
|
index.main.merge_internal_docids(writer, &new_internal_docids)?;
|
2020-05-19 11:45:46 +02:00
|
|
|
|
2020-06-19 15:31:35 +02:00
|
|
|
// recompute all facet attributes after document update.
|
|
|
|
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
|
|
|
let docids = index.main.internal_docids(writer)?;
|
|
|
|
let facet_map = facets::facet_map_from_docids(writer, index, &docids, attributes_for_facetting.as_ref())?;
|
|
|
|
index.facets.add(writer, facet_map)?;
|
|
|
|
}
|
|
|
|
|
2020-05-28 19:35:34 +02:00
|
|
|
// update is finished; update sorted document id cache with new state
|
|
|
|
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
|
|
|
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
|
|
|
|
|
2019-12-30 12:27:24 +01:00
|
|
|
Ok(())
|
2019-11-05 15:23:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn apply_documents_partial_addition<'a, 'b>(
|
2019-11-26 16:12:06 +01:00
|
|
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
2020-01-16 16:29:50 +01:00
|
|
|
index: &store::Index,
|
2020-05-19 14:11:48 +02:00
|
|
|
new_documents: Vec<IndexMap<String, Value>>,
|
2019-11-05 15:23:41 +01:00
|
|
|
) -> MResult<()> {
|
2020-05-19 14:11:48 +02:00
|
|
|
apply_addition(writer, index, new_documents, true)
|
2020-05-05 22:28:46 +02:00
|
|
|
}
|
2020-01-16 16:19:04 +01:00
|
|
|
|
2020-05-05 22:28:46 +02:00
|
|
|
pub fn apply_documents_addition<'a, 'b>(
|
|
|
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
|
|
|
index: &store::Index,
|
2020-05-19 14:11:48 +02:00
|
|
|
new_documents: Vec<IndexMap<String, Value>>,
|
2020-05-05 22:28:46 +02:00
|
|
|
) -> MResult<()> {
|
2020-05-19 14:11:48 +02:00
|
|
|
apply_addition(writer, index, new_documents, false)
|
2019-10-21 17:33:52 +02:00
|
|
|
}
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2020-01-16 16:29:50 +01:00
|
|
|
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
|
|
|
let schema = match index.main.schema(writer)? {
|
2019-10-21 17:33:52 +02:00
|
|
|
Some(schema) => schema,
|
|
|
|
None => return Err(Error::SchemaMissing),
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut ranked_map = RankedMap::default();
|
|
|
|
|
|
|
|
// 1. retrieve all documents ids
|
|
|
|
let mut documents_ids_to_reindex = Vec::new();
|
2020-01-16 16:29:50 +01:00
|
|
|
for result in index.documents_fields_counts.documents_ids(writer)? {
|
2019-10-21 17:33:52 +02:00
|
|
|
let document_id = result?;
|
|
|
|
documents_ids_to_reindex.push(document_id);
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
|
2019-10-21 17:33:52 +02:00
|
|
|
// 2. remove the documents posting lists
|
2020-01-16 16:29:50 +01:00
|
|
|
index.main.put_words_fst(writer, &fst::Set::default())?;
|
|
|
|
index.main.put_ranked_map(writer, &ranked_map)?;
|
|
|
|
index.main.put_number_of_documents(writer, |_| 0)?;
|
2020-05-05 22:28:46 +02:00
|
|
|
index.facets.clear(writer)?;
|
2020-01-16 16:29:50 +01:00
|
|
|
index.postings_lists.clear(writer)?;
|
|
|
|
index.docs_words.clear(writer)?;
|
2019-10-21 17:33:52 +02:00
|
|
|
|
2020-05-22 15:00:50 +02:00
|
|
|
let stop_words = index.main
|
|
|
|
.stop_words_fst(writer)?
|
|
|
|
.map_data(Cow::into_owned)
|
|
|
|
.unwrap();
|
2019-10-29 17:46:23 +01:00
|
|
|
|
2020-02-27 14:51:29 +01:00
|
|
|
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
|
|
|
let mut indexer = RawIndexer::new(stop_words);
|
|
|
|
let mut ram_store = HashMap::new();
|
2019-10-29 17:46:23 +01:00
|
|
|
|
2020-05-05 22:28:46 +02:00
|
|
|
if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
|
|
|
let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?;
|
|
|
|
index.facets.add(writer, facet_map)?;
|
|
|
|
}
|
|
|
|
// ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v
|
2020-05-28 19:35:34 +02:00
|
|
|
for document_id in &documents_ids_to_reindex {
|
|
|
|
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
2020-02-27 14:51:29 +01:00
|
|
|
let (field_id, bytes) = result?;
|
2020-05-18 10:56:24 +02:00
|
|
|
let value: Value = serde_json::from_slice(bytes)?;
|
2020-02-27 14:51:29 +01:00
|
|
|
ram_store.insert((document_id, field_id), value);
|
2019-10-21 17:33:52 +02:00
|
|
|
}
|
|
|
|
|
2020-05-18 10:56:24 +02:00
|
|
|
// For each key-value pair in the document.
|
|
|
|
for ((document_id, field_id), value) in ram_store.drain() {
|
2020-05-18 15:29:58 +02:00
|
|
|
index_document(
|
|
|
|
writer,
|
|
|
|
index.documents_fields,
|
|
|
|
index.documents_fields_counts,
|
|
|
|
&mut ranked_map,
|
|
|
|
&mut indexer,
|
|
|
|
&schema,
|
|
|
|
field_id,
|
2020-05-28 19:35:34 +02:00
|
|
|
*document_id,
|
2020-05-18 15:29:58 +02:00
|
|
|
&value,
|
|
|
|
)?;
|
2020-02-27 14:51:29 +01:00
|
|
|
}
|
2019-10-14 14:07:10 +02:00
|
|
|
}
|
|
|
|
|
2020-02-27 14:51:29 +01:00
|
|
|
// 4. write the new index in the main store
|
|
|
|
write_documents_addition_index(
|
|
|
|
writer,
|
|
|
|
index,
|
|
|
|
&ranked_map,
|
|
|
|
number_of_inserted_documents,
|
|
|
|
indexer,
|
|
|
|
)?;
|
|
|
|
|
2020-02-02 22:59:19 +01:00
|
|
|
index.main.put_schema(writer, &schema)?;
|
2020-01-16 16:19:04 +01:00
|
|
|
|
2020-06-19 15:31:35 +02:00
|
|
|
// recompute all facet attributes after document update.
|
|
|
|
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
|
|
|
let docids = index.main.internal_docids(writer)?;
|
|
|
|
let facet_map = facets::facet_map_from_docids(writer, index, &docids, attributes_for_facetting.as_ref())?;
|
|
|
|
index.facets.add(writer, facet_map)?;
|
|
|
|
}
|
|
|
|
|
2020-05-28 19:35:34 +02:00
|
|
|
// update is finished; update sorted document id cache with new state
|
|
|
|
let mut document_ids = index.main.internal_docids(writer)?.to_vec();
|
|
|
|
super::cache_document_ids_sorted(writer, &ranked_map, index, &mut document_ids)?;
|
|
|
|
|
2019-10-29 17:46:23 +01:00
|
|
|
Ok(())
|
2019-10-21 17:33:52 +02:00
|
|
|
}
|
|
|
|
|
2020-05-22 15:00:50 +02:00
|
|
|
pub fn write_documents_addition_index<A>(
|
2019-11-26 16:12:06 +01:00
|
|
|
writer: &mut heed::RwTxn<MainT>,
|
2020-01-16 16:29:50 +01:00
|
|
|
index: &store::Index,
|
2019-10-29 17:46:23 +01:00
|
|
|
ranked_map: &RankedMap,
|
2019-10-21 17:33:52 +02:00
|
|
|
number_of_inserted_documents: usize,
|
2020-05-22 15:00:50 +02:00
|
|
|
indexer: RawIndexer<A>,
|
|
|
|
) -> MResult<()>
|
|
|
|
where A: AsRef<[u8]>,
|
|
|
|
{
|
2019-10-03 15:04:11 +02:00
|
|
|
let indexed = indexer.build();
|
|
|
|
let mut delta_words_builder = SetBuilder::memory();
|
|
|
|
|
|
|
|
for (word, delta_set) in indexed.words_doc_indexes {
|
|
|
|
delta_words_builder.insert(&word).unwrap();
|
|
|
|
|
2020-01-16 16:29:50 +01:00
|
|
|
let set = match index.postings_lists.postings_list(writer, &word)? {
|
2020-01-08 15:30:43 +01:00
|
|
|
Some(postings) => Union::new(&postings.matches, &delta_set).into_set_buf(),
|
2019-10-03 15:04:11 +02:00
|
|
|
None => delta_set,
|
|
|
|
};
|
|
|
|
|
2020-01-16 16:29:50 +01:00
|
|
|
index.postings_lists.put_postings_list(writer, &word, &set)?;
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (id, words) in indexed.docs_words {
|
2020-01-16 16:29:50 +01:00
|
|
|
index.docs_words.put_doc_words(writer, id, &words)?;
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
|
2020-05-22 15:00:50 +02:00
|
|
|
let delta_words = delta_words_builder.into_set();
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2020-05-22 15:00:50 +02:00
|
|
|
let words_fst = index.main.words_fst(writer)?;
|
|
|
|
let words = if !words_fst.is_empty() {
|
|
|
|
let op = OpBuilder::new()
|
|
|
|
.add(words_fst.stream())
|
|
|
|
.add(delta_words.stream())
|
|
|
|
.r#union();
|
|
|
|
|
|
|
|
let mut words_builder = SetBuilder::memory();
|
|
|
|
words_builder.extend_stream(op).unwrap();
|
|
|
|
words_builder.into_set()
|
|
|
|
} else {
|
|
|
|
delta_words
|
2019-10-03 15:04:11 +02:00
|
|
|
};
|
|
|
|
|
2020-01-16 16:29:50 +01:00
|
|
|
index.main.put_words_fst(writer, &words)?;
|
|
|
|
index.main.put_ranked_map(writer, ranked_map)?;
|
|
|
|
index.main.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2020-05-22 15:00:50 +02:00
|
|
|
compute_short_prefixes(writer, &words, index)?;
|
2020-01-16 16:19:04 +01:00
|
|
|
|
2019-10-03 15:04:11 +02:00
|
|
|
Ok(())
|
|
|
|
}
|