From e07fe017c1cd7142f188c0cb7190807e192d4f7a Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 5 May 2020 22:28:46 +0200 Subject: [PATCH] document update --- Cargo.lock | 1 + meilisearch-core/src/error.rs | 2 +- meilisearch-core/src/facets.rs | 2 +- meilisearch-core/src/store/main.rs | 1 + .../src/update/documents_addition.rs | 121 ++++++------------ .../src/update/documents_deletion.rs | 10 +- meilisearch-http/src/error.rs | 2 +- meilisearch-schema/Cargo.toml | 1 + meilisearch-schema/src/lib.rs | 1 + 9 files changed, 56 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1eb95126d..4b718210a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1685,6 +1685,7 @@ dependencies = [ "serde", "serde_json", "toml", + "zerocopy", ] [[package]] diff --git a/meilisearch-core/src/error.rs b/meilisearch-core/src/error.rs index d871d782e..7990f691f 100644 --- a/meilisearch-core/src/error.rs +++ b/meilisearch-core/src/error.rs @@ -197,7 +197,7 @@ impl fmt::Display for FacetError { InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found), AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr), AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")), - InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: string and [string]", attr), + InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: String and [String]", attr), } } } diff --git a/meilisearch-core/src/facets.rs b/meilisearch-core/src/facets.rs index a71991468..dc8654915 100644 --- a/meilisearch-core/src/facets.rs +++ b/meilisearch-core/src/facets.rs @@ -70,7 +70,7 @@ impl FacetFilter { } return Ok(Self(filter)); } - bad_value => Err(FacetError::unexpected_token(&["String"], bad_value)), + bad_value => Err(FacetError::unexpected_token(&["Array"], bad_value)), } } } diff --git a/meilisearch-core/src/store/main.rs b/meilisearch-core/src/store/main.rs index 33737a002..34a88afcd 100644 --- a/meilisearch-core/src/store/main.rs +++ b/meilisearch-core/src/store/main.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::sync::Arc; use std::collections::HashMap; diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index 464e330ad..d8f1f53f1 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize}; use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; +use crate::facets; use crate::raw_indexer::RawIndexer; use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer}; use crate::store; @@ -103,10 +104,11 @@ pub fn push_documents_addition( Ok(last_update_id) } -pub fn apply_documents_addition<'a, 'b>( +pub fn apply_addition<'a, 'b>( writer: &'a mut heed::RwTxn<'b, MainT>, index: &store::Index, addition: Vec>, + partial: bool ) -> MResult<()> { let mut documents_additions = HashMap::new(); @@ -118,12 +120,30 @@ pub fn apply_documents_addition<'a, 'b>( let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; // 1. store documents ids for future deletion - for document in addition { + for mut document in addition { let document_id = match extract_document_id(&primary_key, &document)? { Some(id) => id, None => return Err(Error::MissingDocumentId), }; + if partial { + let mut deserializer = Deserializer { + document_id, + reader: writer, + documents_fields: index.documents_fields, + schema: &schema, + fields: None, + }; + + // retrieve the old document and + // update the new one with missing keys found in the old one + let result = Option::>::deserialize(&mut deserializer)?; + if let Some(old_document) = result { + for (key, value) in old_document { + document.entry(key).or_insert(value); + } + } + } documents_additions.insert(document_id, document); } @@ -143,6 +163,11 @@ pub fn apply_documents_addition<'a, 'b>( }; // 3. index the documents fields in the stores + if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { + let facet_map = facets::facet_map_from_docs(&schema, &documents_additions, attributes_for_facetting.as_ref())?; + index.facets.add(writer, facet_map)?; + } + let mut indexer = RawIndexer::new(stop_words); for (document_id, document) in documents_additions { @@ -177,85 +202,15 @@ pub fn apply_documents_partial_addition<'a, 'b>( index: &store::Index, addition: Vec>, ) -> MResult<()> { - let mut documents_additions = HashMap::new(); + apply_addition(writer, index, addition, true) +} - let mut schema = match index.main.schema(writer)? { - Some(schema) => schema, - None => return Err(Error::SchemaMissing), - }; - - let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; - - // 1. store documents ids for future deletion - for mut document in addition { - let document_id = match extract_document_id(&primary_key, &document)? { - Some(id) => id, - None => return Err(Error::MissingDocumentId), - }; - - let mut deserializer = Deserializer { - document_id, - reader: writer, - documents_fields: index.documents_fields, - schema: &schema, - fields: None, - }; - - // retrieve the old document and - // update the new one with missing keys found in the old one - let result = Option::>::deserialize(&mut deserializer)?; - if let Some(old_document) = result { - for (key, value) in old_document { - document.entry(key).or_insert(value); - } - } - - documents_additions.insert(document_id, document); - } - - // 2. remove the documents posting lists - let number_of_inserted_documents = documents_additions.len(); - let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect(); - apply_documents_deletion(writer, index, documents_ids)?; - - let mut ranked_map = match index.main.ranked_map(writer)? { - Some(ranked_map) => ranked_map, - None => RankedMap::default(), - }; - - let stop_words = match index.main.stop_words_fst(writer)? { - Some(stop_words) => stop_words, - None => fst::Set::default(), - }; - - // 3. index the documents fields in the stores - let mut indexer = RawIndexer::new(stop_words); - - for (document_id, document) in documents_additions { - let serializer = Serializer { - txn: writer, - schema: &mut schema, - document_store: index.documents_fields, - document_fields_counts: index.documents_fields_counts, - indexer: &mut indexer, - ranked_map: &mut ranked_map, - document_id, - }; - - document.serialize(serializer)?; - } - - write_documents_addition_index( - writer, - index, - &ranked_map, - number_of_inserted_documents, - indexer, - )?; - - index.main.put_schema(writer, &schema)?; - - Ok(()) +pub fn apply_documents_addition<'a, 'b>( + writer: &'a mut heed::RwTxn<'b, MainT>, + index: &store::Index, + addition: Vec>, +) -> MResult<()> { + apply_addition(writer, index, addition, false) } pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Index) -> MResult<()> { @@ -277,6 +232,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind index.main.put_words_fst(writer, &fst::Set::default())?; index.main.put_ranked_map(writer, &ranked_map)?; index.main.put_number_of_documents(writer, |_| 0)?; + index.facets.clear(writer)?; index.postings_lists.clear(writer)?; index.docs_words.clear(writer)?; @@ -289,6 +245,11 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn, index: &store::Ind let mut indexer = RawIndexer::new(stop_words); let mut ram_store = HashMap::new(); + if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { + let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?; + index.facets.add(writer, facet_map)?; + } + // ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v for document_id in documents_ids_to_reindex { for result in index.documents_fields.document_fields(writer, document_id)? { let (field_id, bytes) = result?; diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index f28709ad9..30d563efb 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -6,6 +6,7 @@ use sdset::{duo::DifferenceByKey, SetBuf, SetOperation}; use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; +use crate::facets; use crate::serde::extract_document_id; use crate::store; use crate::update::{next_update_id, compute_short_prefixes, Update}; @@ -88,8 +89,6 @@ pub fn apply_documents_deletion( index: &store::Index, deletion: Vec, ) -> MResult<()> { - let idset = SetBuf::from_dirty(deletion); - let schema = match index.main.schema(writer)? { Some(schema) => schema, None => return Err(Error::SchemaMissing), @@ -100,9 +99,16 @@ pub fn apply_documents_deletion( None => RankedMap::default(), }; + // facet filters deletion + if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? { + let facet_map = facets::facet_map_from_docids(writer, &index, &deletion, &attributes_for_facetting)?; + index.facets.remove(writer, facet_map)?; + } + // collect the ranked attributes according to the schema let ranked_fields = schema.ranked(); + let idset = SetBuf::from_dirty(deletion); let mut words_document_ids = HashMap::new(); for id in idset { // remove all the ranked attributes from the ranked_map diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 273694fb6..e24a57bb8 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -120,7 +120,7 @@ impl aweb::error::ResponseError for ResponseError { } fn status_code(&self) -> StatusCode { - match *self { + match *self { Self::BadParameter(_, _) | Self::BadRequest(_) | Self::CreateIndex(_) diff --git a/meilisearch-schema/Cargo.toml b/meilisearch-schema/Cargo.toml index 126fe04bb..712fdb008 100644 --- a/meilisearch-schema/Cargo.toml +++ b/meilisearch-schema/Cargo.toml @@ -11,3 +11,4 @@ indexmap = { version = "1.3.2", features = ["serde-1"] } serde = { version = "1.0.105", features = ["derive"] } serde_json = { version = "1.0.50", features = ["preserve_order"] } toml = { version = "0.5.6", features = ["preserve_order"] } +zerocopy = "0.3.0" diff --git a/meilisearch-schema/src/lib.rs b/meilisearch-schema/src/lib.rs index 7f17d5a89..a35c30c03 100644 --- a/meilisearch-schema/src/lib.rs +++ b/meilisearch-schema/src/lib.rs @@ -6,6 +6,7 @@ pub use error::{Error, SResult}; pub use fields_map::FieldsMap; pub use schema::Schema; use serde::{Deserialize, Serialize}; +use zerocopy::{AsBytes, FromBytes}; #[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)] pub struct IndexedPos(pub u16);