mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-27 07:14:26 +01:00
Add a test to check that merging works correctly with CSVs
This commit is contained in:
parent
955302fd95
commit
0d01e4854b
@ -70,7 +70,7 @@ pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<
|
||||
bail!("merging documents is an error ({:?})", key.as_bstr())
|
||||
}
|
||||
|
||||
pub fn merge_two_obkv(base: obkv::KvReader, update: obkv::KvReader, buffer: &mut Vec<u8>) {
|
||||
pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mut Vec<u8>) {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
|
@ -491,7 +491,7 @@ mod tests {
|
||||
use heed::EnvOpenOptions;
|
||||
|
||||
#[test]
|
||||
fn simple_replacement() {
|
||||
fn simple_document_replacement() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
@ -516,7 +516,7 @@ mod tests {
|
||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Check that there is **always*** 3 documents.
|
||||
// Check that there is **always** 3 documents.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let count = index.number_of_documents(&rtxn).unwrap();
|
||||
assert_eq!(count, 3);
|
||||
@ -528,10 +528,73 @@ mod tests {
|
||||
IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Check that there is **always*** 3 documents.
|
||||
// Check that there is **always** 3 documents.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let count = index.number_of_documents(&rtxn).unwrap();
|
||||
assert_eq!(count, 3);
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple_document_merge() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// First we send 3 documents with duplicate ids and
|
||||
// change the index method to merge documents.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let content = &b"id,name\n1,kevin\n1,kevina\n1,benoit\n"[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
||||
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Check that there is only 1 document now.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let count = index.number_of_documents(&rtxn).unwrap();
|
||||
assert_eq!(count, 1);
|
||||
|
||||
// Check that we get only one document from the database.
|
||||
let docs = index.documents(&rtxn, Some(0)).unwrap();
|
||||
assert_eq!(docs.len(), 1);
|
||||
let (id, doc) = docs[0];
|
||||
assert_eq!(id, 0);
|
||||
|
||||
// Check that this document is equal to the last one sent.
|
||||
let mut doc_iter = doc.iter();
|
||||
assert_eq!(doc_iter.next(), Some((0, &br#""1""#[..])));
|
||||
assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
|
||||
assert_eq!(doc_iter.next(), None);
|
||||
drop(rtxn);
|
||||
|
||||
// Second we send 1 document with id 1, to force it to be merged with the previous one.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let content = &b"id,age\n1,25\n"[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index);
|
||||
builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Check that there is **always** 1 document.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let count = index.number_of_documents(&rtxn).unwrap();
|
||||
assert_eq!(count, 1);
|
||||
|
||||
// Check that we get only one document from the database.
|
||||
let docs = index.documents(&rtxn, Some(0)).unwrap();
|
||||
assert_eq!(docs.len(), 1);
|
||||
let (id, doc) = docs[0];
|
||||
assert_eq!(id, 0);
|
||||
|
||||
// Check that this document is equal to the last one sent.
|
||||
let mut doc_iter = doc.iter();
|
||||
assert_eq!(doc_iter.next(), Some((0, &br#""1""#[..])));
|
||||
assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..])));
|
||||
assert_eq!(doc_iter.next(), Some((2, &br#""25""#[..])));
|
||||
assert_eq!(doc_iter.next(), None);
|
||||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{BEU32, Index, FieldsIdsMap};
|
||||
use crate::update::AvailableDocumentsIds;
|
||||
use super::merge_function::merge_two_obkv;
|
||||
use super::merge_function::merge_two_obkvs;
|
||||
use super::{create_writer, create_sorter, IndexDocumentsMethod};
|
||||
|
||||
pub struct TransformOutput {
|
||||
@ -58,14 +58,14 @@ impl Transform<'_, '_> {
|
||||
fields_ids.push(id);
|
||||
}
|
||||
|
||||
/// The last value associated with an id is kept.
|
||||
fn merge_last_win(_key: &[u8], vals: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
vals.last().context("no last value").map(|last| last.clone().into_owned())
|
||||
/// Only the last value associated with an id is kept.
|
||||
fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
obkvs.last().context("no last value").map(|last| last.clone().into_owned())
|
||||
}
|
||||
|
||||
// We initialize the sorter with the user indexing settings.
|
||||
let mut sorter = create_sorter(
|
||||
merge_last_win,
|
||||
keep_latest_obkv,
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
@ -132,7 +132,7 @@ impl Transform<'_, '_> {
|
||||
let base_obkv = self.index.documents.get(&self.rtxn, &key)?
|
||||
.context("document not found")?;
|
||||
let update_obkv = obkv::KvReader::new(update_obkv);
|
||||
merge_two_obkv(base_obkv, update_obkv, &mut obkv_buffer);
|
||||
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
|
||||
(docid, obkv_buffer.as_slice())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user