mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
convert the field_distribution to a BTreeMap and avoid counting twice the same documents
This commit is contained in:
parent
969adaefdf
commit
d08cfda796
@ -791,7 +791,7 @@ pub(crate) mod tests {
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashmap;
|
use maplit::btreemap;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use crate::update::{IndexDocuments, UpdateFormat};
|
use crate::update::{IndexDocuments, UpdateFormat};
|
||||||
@ -845,11 +845,54 @@ pub(crate) mod tests {
|
|||||||
let field_distribution = index.field_distribution(&rtxn).unwrap();
|
let field_distribution = index.field_distribution(&rtxn).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
field_distribution,
|
field_distribution,
|
||||||
hashmap! {
|
btreemap! {
|
||||||
"id".to_string() => 2,
|
"id".to_string() => 2,
|
||||||
"name".to_string() => 2,
|
"name".to_string() => 2,
|
||||||
"age".to_string() => 1,
|
"age".to_string() => 1,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// we add all the documents a second time. we are supposed to get the same
|
||||||
|
// field_distribution in the end
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let field_distribution = index.field_distribution(&rtxn).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
field_distribution,
|
||||||
|
btreemap! {
|
||||||
|
"id".to_string() => 2,
|
||||||
|
"name".to_string() => 2,
|
||||||
|
"age".to_string() => 1,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// then we update a document by removing one field and another by adding one field
|
||||||
|
let content = &br#"[
|
||||||
|
{ "id": 1, "name": "kevin", "has_dog": true },
|
||||||
|
{ "id": 2, "name": "bob" }
|
||||||
|
]"#[..];
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let field_distribution = index.field_distribution(&rtxn).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
field_distribution,
|
||||||
|
btreemap! {
|
||||||
|
"id".to_string() => 2,
|
||||||
|
"name".to_string() => 2,
|
||||||
|
"has_dog".to_string() => 1,
|
||||||
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ pub mod tree_level;
|
|||||||
pub mod update;
|
pub mod update;
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
|
|
||||||
@ -50,7 +50,7 @@ pub type Attribute = u32;
|
|||||||
pub type DocumentId = u32;
|
pub type DocumentId = u32;
|
||||||
pub type FieldId = u8;
|
pub type FieldId = u8;
|
||||||
pub type Position = u32;
|
pub type Position = u32;
|
||||||
pub type FieldsDistribution = HashMap<String, u64>;
|
pub type FieldsDistribution = BTreeMap<String, u64>;
|
||||||
|
|
||||||
type MergeFn<E> = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Vec<u8>, E>;
|
type MergeFn<E> = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Vec<u8>, E>;
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::hash_map::Entry;
|
use std::collections::btree_map::Entry;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::btree_map::Entry;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{Read, Seek, SeekFrom};
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
@ -419,11 +420,6 @@ impl Transform<'_, '_> {
|
|||||||
// we use it and insert it in the list of replaced documents.
|
// we use it and insert it in the list of replaced documents.
|
||||||
replaced_documents_ids.insert(docid);
|
replaced_documents_ids.insert(docid);
|
||||||
|
|
||||||
// Depending on the update indexing method we will merge
|
|
||||||
// the document update with the current document or not.
|
|
||||||
match self.index_documents_method {
|
|
||||||
IndexDocumentsMethod::ReplaceDocuments => (docid, update_obkv),
|
|
||||||
IndexDocumentsMethod::UpdateDocuments => {
|
|
||||||
let key = BEU32::new(docid);
|
let key = BEU32::new(docid);
|
||||||
let base_obkv = self.index.documents.get(&self.rtxn, &key)?.ok_or(
|
let base_obkv = self.index.documents.get(&self.rtxn, &key)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry {
|
InternalError::DatabaseMissingEntry {
|
||||||
@ -431,6 +427,25 @@ impl Transform<'_, '_> {
|
|||||||
key: None,
|
key: None,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// we remove all the fields that were already counted
|
||||||
|
for (field_id, _) in base_obkv.iter() {
|
||||||
|
let field_name = fields_ids_map.name(field_id).unwrap();
|
||||||
|
if let Entry::Occupied(mut entry) =
|
||||||
|
field_distribution.entry(field_name.to_string())
|
||||||
|
{
|
||||||
|
match entry.get().checked_sub(1) {
|
||||||
|
Some(0) | None => entry.remove(),
|
||||||
|
Some(count) => entry.insert(count),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Depending on the update indexing method we will merge
|
||||||
|
// the document update with the current document or not.
|
||||||
|
match self.index_documents_method {
|
||||||
|
IndexDocumentsMethod::ReplaceDocuments => (docid, update_obkv),
|
||||||
|
IndexDocumentsMethod::UpdateDocuments => {
|
||||||
let update_obkv = obkv::KvReader::new(update_obkv);
|
let update_obkv = obkv::KvReader::new(update_obkv);
|
||||||
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
|
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
|
||||||
(docid, obkv_buffer.as_slice())
|
(docid, obkv_buffer.as_slice())
|
||||||
|
Loading…
Reference in New Issue
Block a user