mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
document update
This commit is contained in:
parent
270c7b0288
commit
e07fe017c1
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1685,6 +1685,7 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"toml",
|
"toml",
|
||||||
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -197,7 +197,7 @@ impl fmt::Display for FacetError {
|
|||||||
InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found),
|
InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found),
|
||||||
AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr),
|
AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr),
|
||||||
AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")),
|
AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")),
|
||||||
InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: string and [string]", attr),
|
InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: String and [String]", attr),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -70,7 +70,7 @@ impl FacetFilter {
|
|||||||
}
|
}
|
||||||
return Ok(Self(filter));
|
return Ok(Self(filter));
|
||||||
}
|
}
|
||||||
bad_value => Err(FacetError::unexpected_token(&["String"], bad_value)),
|
bad_value => Err(FacetError::unexpected_token(&["Array"], bad_value)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||||
|
use crate::facets;
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
||||||
use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
|
use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
|
||||||
use crate::store;
|
use crate::store;
|
||||||
@ -103,10 +104,11 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
|||||||
Ok(last_update_id)
|
Ok(last_update_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply_documents_addition<'a, 'b>(
|
pub fn apply_addition<'a, 'b>(
|
||||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
addition: Vec<IndexMap<String, serde_json::Value>>,
|
addition: Vec<IndexMap<String, serde_json::Value>>,
|
||||||
|
partial: bool
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut documents_additions = HashMap::new();
|
let mut documents_additions = HashMap::new();
|
||||||
|
|
||||||
@ -118,12 +120,30 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||||
|
|
||||||
// 1. store documents ids for future deletion
|
// 1. store documents ids for future deletion
|
||||||
for document in addition {
|
for mut document in addition {
|
||||||
let document_id = match extract_document_id(&primary_key, &document)? {
|
let document_id = match extract_document_id(&primary_key, &document)? {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if partial {
|
||||||
|
let mut deserializer = Deserializer {
|
||||||
|
document_id,
|
||||||
|
reader: writer,
|
||||||
|
documents_fields: index.documents_fields,
|
||||||
|
schema: &schema,
|
||||||
|
fields: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// retrieve the old document and
|
||||||
|
// update the new one with missing keys found in the old one
|
||||||
|
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
|
||||||
|
if let Some(old_document) = result {
|
||||||
|
for (key, value) in old_document {
|
||||||
|
document.entry(key).or_insert(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
documents_additions.insert(document_id, document);
|
documents_additions.insert(document_id, document);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +163,11 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// 3. index the documents fields in the stores
|
// 3. index the documents fields in the stores
|
||||||
|
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||||
|
let facet_map = facets::facet_map_from_docs(&schema, &documents_additions, attributes_for_facetting.as_ref())?;
|
||||||
|
index.facets.add(writer, facet_map)?;
|
||||||
|
}
|
||||||
|
|
||||||
let mut indexer = RawIndexer::new(stop_words);
|
let mut indexer = RawIndexer::new(stop_words);
|
||||||
|
|
||||||
for (document_id, document) in documents_additions {
|
for (document_id, document) in documents_additions {
|
||||||
@ -177,85 +202,15 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
addition: Vec<IndexMap<String, serde_json::Value>>,
|
addition: Vec<IndexMap<String, serde_json::Value>>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut documents_additions = HashMap::new();
|
apply_addition(writer, index, addition, true)
|
||||||
|
|
||||||
let mut schema = match index.main.schema(writer)? {
|
|
||||||
Some(schema) => schema,
|
|
||||||
None => return Err(Error::SchemaMissing),
|
|
||||||
};
|
|
||||||
|
|
||||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
|
||||||
|
|
||||||
// 1. store documents ids for future deletion
|
|
||||||
for mut document in addition {
|
|
||||||
let document_id = match extract_document_id(&primary_key, &document)? {
|
|
||||||
Some(id) => id,
|
|
||||||
None => return Err(Error::MissingDocumentId),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut deserializer = Deserializer {
|
|
||||||
document_id,
|
|
||||||
reader: writer,
|
|
||||||
documents_fields: index.documents_fields,
|
|
||||||
schema: &schema,
|
|
||||||
fields: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
// retrieve the old document and
|
|
||||||
// update the new one with missing keys found in the old one
|
|
||||||
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
|
|
||||||
if let Some(old_document) = result {
|
|
||||||
for (key, value) in old_document {
|
|
||||||
document.entry(key).or_insert(value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
documents_additions.insert(document_id, document);
|
pub fn apply_documents_addition<'a, 'b>(
|
||||||
}
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||||
|
index: &store::Index,
|
||||||
// 2. remove the documents posting lists
|
addition: Vec<IndexMap<String, serde_json::Value>>,
|
||||||
let number_of_inserted_documents = documents_additions.len();
|
) -> MResult<()> {
|
||||||
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
|
apply_addition(writer, index, addition, false)
|
||||||
apply_documents_deletion(writer, index, documents_ids)?;
|
|
||||||
|
|
||||||
let mut ranked_map = match index.main.ranked_map(writer)? {
|
|
||||||
Some(ranked_map) => ranked_map,
|
|
||||||
None => RankedMap::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let stop_words = match index.main.stop_words_fst(writer)? {
|
|
||||||
Some(stop_words) => stop_words,
|
|
||||||
None => fst::Set::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// 3. index the documents fields in the stores
|
|
||||||
let mut indexer = RawIndexer::new(stop_words);
|
|
||||||
|
|
||||||
for (document_id, document) in documents_additions {
|
|
||||||
let serializer = Serializer {
|
|
||||||
txn: writer,
|
|
||||||
schema: &mut schema,
|
|
||||||
document_store: index.documents_fields,
|
|
||||||
document_fields_counts: index.documents_fields_counts,
|
|
||||||
indexer: &mut indexer,
|
|
||||||
ranked_map: &mut ranked_map,
|
|
||||||
document_id,
|
|
||||||
};
|
|
||||||
|
|
||||||
document.serialize(serializer)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
write_documents_addition_index(
|
|
||||||
writer,
|
|
||||||
index,
|
|
||||||
&ranked_map,
|
|
||||||
number_of_inserted_documents,
|
|
||||||
indexer,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
index.main.put_schema(writer, &schema)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
||||||
@ -277,6 +232,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
|||||||
index.main.put_words_fst(writer, &fst::Set::default())?;
|
index.main.put_words_fst(writer, &fst::Set::default())?;
|
||||||
index.main.put_ranked_map(writer, &ranked_map)?;
|
index.main.put_ranked_map(writer, &ranked_map)?;
|
||||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||||
|
index.facets.clear(writer)?;
|
||||||
index.postings_lists.clear(writer)?;
|
index.postings_lists.clear(writer)?;
|
||||||
index.docs_words.clear(writer)?;
|
index.docs_words.clear(writer)?;
|
||||||
|
|
||||||
@ -289,6 +245,11 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
|||||||
let mut indexer = RawIndexer::new(stop_words);
|
let mut indexer = RawIndexer::new(stop_words);
|
||||||
let mut ram_store = HashMap::new();
|
let mut ram_store = HashMap::new();
|
||||||
|
|
||||||
|
if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||||
|
let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?;
|
||||||
|
index.facets.add(writer, facet_map)?;
|
||||||
|
}
|
||||||
|
// ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v
|
||||||
for document_id in documents_ids_to_reindex {
|
for document_id in documents_ids_to_reindex {
|
||||||
for result in index.documents_fields.document_fields(writer, document_id)? {
|
for result in index.documents_fields.document_fields(writer, document_id)? {
|
||||||
let (field_id, bytes) = result?;
|
let (field_id, bytes) = result?;
|
||||||
|
@ -6,6 +6,7 @@ use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
|||||||
|
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||||
|
use crate::facets;
|
||||||
use crate::serde::extract_document_id;
|
use crate::serde::extract_document_id;
|
||||||
use crate::store;
|
use crate::store;
|
||||||
use crate::update::{next_update_id, compute_short_prefixes, Update};
|
use crate::update::{next_update_id, compute_short_prefixes, Update};
|
||||||
@ -88,8 +89,6 @@ pub fn apply_documents_deletion(
|
|||||||
index: &store::Index,
|
index: &store::Index,
|
||||||
deletion: Vec<DocumentId>,
|
deletion: Vec<DocumentId>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let idset = SetBuf::from_dirty(deletion);
|
|
||||||
|
|
||||||
let schema = match index.main.schema(writer)? {
|
let schema = match index.main.schema(writer)? {
|
||||||
Some(schema) => schema,
|
Some(schema) => schema,
|
||||||
None => return Err(Error::SchemaMissing),
|
None => return Err(Error::SchemaMissing),
|
||||||
@ -100,9 +99,16 @@ pub fn apply_documents_deletion(
|
|||||||
None => RankedMap::default(),
|
None => RankedMap::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// facet filters deletion
|
||||||
|
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||||
|
let facet_map = facets::facet_map_from_docids(writer, &index, &deletion, &attributes_for_facetting)?;
|
||||||
|
index.facets.remove(writer, facet_map)?;
|
||||||
|
}
|
||||||
|
|
||||||
// collect the ranked attributes according to the schema
|
// collect the ranked attributes according to the schema
|
||||||
let ranked_fields = schema.ranked();
|
let ranked_fields = schema.ranked();
|
||||||
|
|
||||||
|
let idset = SetBuf::from_dirty(deletion);
|
||||||
let mut words_document_ids = HashMap::new();
|
let mut words_document_ids = HashMap::new();
|
||||||
for id in idset {
|
for id in idset {
|
||||||
// remove all the ranked attributes from the ranked_map
|
// remove all the ranked attributes from the ranked_map
|
||||||
|
@ -11,3 +11,4 @@ indexmap = { version = "1.3.2", features = ["serde-1"] }
|
|||||||
serde = { version = "1.0.105", features = ["derive"] }
|
serde = { version = "1.0.105", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.50", features = ["preserve_order"] }
|
serde_json = { version = "1.0.50", features = ["preserve_order"] }
|
||||||
toml = { version = "0.5.6", features = ["preserve_order"] }
|
toml = { version = "0.5.6", features = ["preserve_order"] }
|
||||||
|
zerocopy = "0.3.0"
|
||||||
|
@ -6,6 +6,7 @@ pub use error::{Error, SResult};
|
|||||||
pub use fields_map::FieldsMap;
|
pub use fields_map::FieldsMap;
|
||||||
pub use schema::Schema;
|
pub use schema::Schema;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use zerocopy::{AsBytes, FromBytes};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
pub struct IndexedPos(pub u16);
|
pub struct IndexedPos(pub u16);
|
||||||
|
Loading…
Reference in New Issue
Block a user