mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 12:54:26 +01:00
document update
This commit is contained in:
parent
270c7b0288
commit
e07fe017c1
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1685,6 +1685,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"toml",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -197,7 +197,7 @@ impl fmt::Display for FacetError {
|
||||
InvalidFormat(found) => write!(f, "invalid facet: {}, facets should be \"facetName:facetValue\"", found),
|
||||
AttributeNotFound(attr) => write!(f, "unknown {:?} attribute", attr),
|
||||
AttributeNotSet { found, expected } => write!(f, "`{}` is not set as a faceted attribute. available facet attributes: {}", found, expected.join(", ")),
|
||||
InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: string and [string]", attr),
|
||||
InvalidDocumentAttribute(attr) => write!(f, "invalid document attribute {}, accepted types: String and [String]", attr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -70,7 +70,7 @@ impl FacetFilter {
|
||||
}
|
||||
return Ok(Self(filter));
|
||||
}
|
||||
bad_value => Err(FacetError::unexpected_token(&["String"], bad_value)),
|
||||
bad_value => Err(FacetError::unexpected_token(&["Array"], bad_value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
use std::collections::HashMap;
|
||||
|
||||
|
@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
|
||||
use crate::store;
|
||||
@ -103,10 +104,11 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
Ok(last_update_id)
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition<'a, 'b>(
|
||||
pub fn apply_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
addition: Vec<IndexMap<String, serde_json::Value>>,
|
||||
partial: bool
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
||||
@ -118,12 +120,30 @@ pub fn apply_documents_addition<'a, 'b>(
|
||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
for document in addition {
|
||||
for mut document in addition {
|
||||
let document_id = match extract_document_id(&primary_key, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
if partial {
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader: writer,
|
||||
documents_fields: index.documents_fields,
|
||||
schema: &schema,
|
||||
fields: None,
|
||||
};
|
||||
|
||||
// retrieve the old document and
|
||||
// update the new one with missing keys found in the old one
|
||||
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
|
||||
if let Some(old_document) = result {
|
||||
for (key, value) in old_document {
|
||||
document.entry(key).or_insert(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
documents_additions.insert(document_id, document);
|
||||
}
|
||||
|
||||
@ -143,6 +163,11 @@ pub fn apply_documents_addition<'a, 'b>(
|
||||
};
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docs(&schema, &documents_additions, attributes_for_facetting.as_ref())?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
for (document_id, document) in documents_additions {
|
||||
@ -177,85 +202,15 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
index: &store::Index,
|
||||
addition: Vec<IndexMap<String, serde_json::Value>>,
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
for mut document in addition {
|
||||
let document_id = match extract_document_id(&primary_key, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
|
||||
let mut deserializer = Deserializer {
|
||||
document_id,
|
||||
reader: writer,
|
||||
documents_fields: index.documents_fields,
|
||||
schema: &schema,
|
||||
fields: None,
|
||||
};
|
||||
|
||||
// retrieve the old document and
|
||||
// update the new one with missing keys found in the old one
|
||||
let result = Option::<HashMap<String, serde_json::Value>>::deserialize(&mut deserializer)?;
|
||||
if let Some(old_document) = result {
|
||||
for (key, value) in old_document {
|
||||
document.entry(key).or_insert(value);
|
||||
}
|
||||
apply_addition(writer, index, addition, true)
|
||||
}
|
||||
|
||||
documents_additions.insert(document_id, document);
|
||||
}
|
||||
|
||||
// 2. remove the documents posting lists
|
||||
let number_of_inserted_documents = documents_additions.len();
|
||||
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
|
||||
apply_documents_deletion(writer, index, documents_ids)?;
|
||||
|
||||
let mut ranked_map = match index.main.ranked_map(writer)? {
|
||||
Some(ranked_map) => ranked_map,
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
let stop_words = match index.main.stop_words_fst(writer)? {
|
||||
Some(stop_words) => stop_words,
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
for (document_id, document) in documents_additions {
|
||||
let serializer = Serializer {
|
||||
txn: writer,
|
||||
schema: &mut schema,
|
||||
document_store: index.documents_fields,
|
||||
document_fields_counts: index.documents_fields_counts,
|
||||
indexer: &mut indexer,
|
||||
ranked_map: &mut ranked_map,
|
||||
document_id,
|
||||
};
|
||||
|
||||
document.serialize(serializer)?;
|
||||
}
|
||||
|
||||
write_documents_addition_index(
|
||||
writer,
|
||||
index,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
)?;
|
||||
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
Ok(())
|
||||
pub fn apply_documents_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
index: &store::Index,
|
||||
addition: Vec<IndexMap<String, serde_json::Value>>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, addition, false)
|
||||
}
|
||||
|
||||
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
||||
@ -277,6 +232,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
index.main.put_words_fst(writer, &fst::Set::default())?;
|
||||
index.main.put_ranked_map(writer, &ranked_map)?;
|
||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||
index.facets.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
|
||||
@ -289,6 +245,11 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
let mut ram_store = HashMap::new();
|
||||
|
||||
if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docids(writer, &index, &documents_ids_to_reindex, &attributes_for_facetting)?;
|
||||
index.facets.add(writer, facet_map)?;
|
||||
}
|
||||
// ^-- https://github.com/meilisearch/MeiliSearch/pull/631#issuecomment-626624470 --v
|
||||
for document_id in documents_ids_to_reindex {
|
||||
for result in index.documents_fields.document_fields(writer, document_id)? {
|
||||
let (field_id, bytes) = result?;
|
||||
|
@ -6,6 +6,7 @@ use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::facets;
|
||||
use crate::serde::extract_document_id;
|
||||
use crate::store;
|
||||
use crate::update::{next_update_id, compute_short_prefixes, Update};
|
||||
@ -88,8 +89,6 @@ pub fn apply_documents_deletion(
|
||||
index: &store::Index,
|
||||
deletion: Vec<DocumentId>,
|
||||
) -> MResult<()> {
|
||||
let idset = SetBuf::from_dirty(deletion);
|
||||
|
||||
let schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
@ -100,9 +99,16 @@ pub fn apply_documents_deletion(
|
||||
None => RankedMap::default(),
|
||||
};
|
||||
|
||||
// facet filters deletion
|
||||
if let Some(attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
let facet_map = facets::facet_map_from_docids(writer, &index, &deletion, &attributes_for_facetting)?;
|
||||
index.facets.remove(writer, facet_map)?;
|
||||
}
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_fields = schema.ranked();
|
||||
|
||||
let idset = SetBuf::from_dirty(deletion);
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in idset {
|
||||
// remove all the ranked attributes from the ranked_map
|
||||
|
@ -11,3 +11,4 @@ indexmap = { version = "1.3.2", features = ["serde-1"] }
|
||||
serde = { version = "1.0.105", features = ["derive"] }
|
||||
serde_json = { version = "1.0.50", features = ["preserve_order"] }
|
||||
toml = { version = "0.5.6", features = ["preserve_order"] }
|
||||
zerocopy = "0.3.0"
|
||||
|
@ -6,6 +6,7 @@ pub use error::{Error, SResult};
|
||||
pub use fields_map::FieldsMap;
|
||||
pub use schema::Schema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
pub struct IndexedPos(pub u16);
|
||||
|
Loading…
Reference in New Issue
Block a user