mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Merge pull request #242 from meilisearch/accept-dup-documents
Make documents additions accept only the last duplicate document
This commit is contained in:
commit
9602d7a960
@ -12,7 +12,6 @@ pub enum Error {
|
|||||||
SchemaMissing,
|
SchemaMissing,
|
||||||
WordIndexMissing,
|
WordIndexMissing,
|
||||||
MissingDocumentId,
|
MissingDocumentId,
|
||||||
DuplicateDocument,
|
|
||||||
Zlmdb(heed::Error),
|
Zlmdb(heed::Error),
|
||||||
Fst(fst::Error),
|
Fst(fst::Error),
|
||||||
SerdeJson(SerdeJsonError),
|
SerdeJson(SerdeJsonError),
|
||||||
@ -80,7 +79,6 @@ impl fmt::Display for Error {
|
|||||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||||
MissingDocumentId => write!(f, "document id is missing"),
|
MissingDocumentId => write!(f, "document id is missing"),
|
||||||
DuplicateDocument => write!(f, "update contains documents with the same id"),
|
|
||||||
Zlmdb(e) => write!(f, "heed error; {}", e),
|
Zlmdb(e) => write!(f, "heed error; {}", e),
|
||||||
Fst(e) => write!(f, "fst error; {}", e),
|
Fst(e) => write!(f, "fst error; {}", e),
|
||||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use fst::{set::OpBuilder, SetBuilder};
|
use fst::{set::OpBuilder, SetBuilder};
|
||||||
use sdset::{duo::Union, SetOperation};
|
use sdset::{duo::Union, SetOperation};
|
||||||
@ -86,7 +86,7 @@ pub fn apply_documents_addition(
|
|||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
addition: Vec<serde_json::Value>,
|
addition: Vec<serde_json::Value>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut documents_ids = HashSet::new();
|
let mut documents_additions = HashMap::new();
|
||||||
let mut indexer = RawIndexer::new();
|
let mut indexer = RawIndexer::new();
|
||||||
|
|
||||||
let schema = match main_store.schema(writer)? {
|
let schema = match main_store.schema(writer)? {
|
||||||
@ -97,19 +97,18 @@ pub fn apply_documents_addition(
|
|||||||
let identifier = schema.identifier_name();
|
let identifier = schema.identifier_name();
|
||||||
|
|
||||||
// 1. store documents ids for future deletion
|
// 1. store documents ids for future deletion
|
||||||
for document in addition.iter() {
|
for document in addition {
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
let document_id = match extract_document_id(identifier, &document)? {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
|
|
||||||
if !documents_ids.insert(document_id) {
|
documents_additions.insert(document_id, document);
|
||||||
return Err(Error::DuplicateDocument);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. remove the documents posting lists
|
// 2. remove the documents posting lists
|
||||||
let number_of_inserted_documents = documents_ids.len();
|
let number_of_inserted_documents = documents_additions.len();
|
||||||
|
let documents_ids = documents_additions.iter().map(|(id, _)| *id).collect();
|
||||||
apply_documents_deletion(
|
apply_documents_deletion(
|
||||||
writer,
|
writer,
|
||||||
main_store,
|
main_store,
|
||||||
@ -117,7 +116,7 @@ pub fn apply_documents_addition(
|
|||||||
documents_fields_counts_store,
|
documents_fields_counts_store,
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
documents_ids.into_iter().collect(),
|
documents_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut ranked_map = match main_store.ranked_map(writer)? {
|
let mut ranked_map = match main_store.ranked_map(writer)? {
|
||||||
@ -126,12 +125,7 @@ pub fn apply_documents_addition(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// 3. index the documents fields in the stores
|
// 3. index the documents fields in the stores
|
||||||
for document in addition {
|
for (document_id, document) in documents_additions {
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
|
||||||
Some(id) => id,
|
|
||||||
None => return Err(Error::MissingDocumentId),
|
|
||||||
};
|
|
||||||
|
|
||||||
let serializer = Serializer {
|
let serializer = Serializer {
|
||||||
txn: writer,
|
txn: writer,
|
||||||
schema: &schema,
|
schema: &schema,
|
||||||
|
Loading…
Reference in New Issue
Block a user