mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 22:14:32 +01:00
Merge #454
454: Reintroduce appending sorted entries when possible r=Kerollmops a=Kerollmops This PR modifies the `sorter_into_lmdb_database` function to append values into the database instead of get-put-merging them, it should improve the indexation speed for when the database is empty. ```txt group indexing_main_25123af3 indexing_reintroduce-appending-sorted-values_c05e42a8 ----- ---------------------- ----------------------------------------------------- indexing/Indexing movies with default settings 1.07 17.8±0.99s ? ?/sec 1.00 16.6±1.04s ? ?/sec indexing/Indexing songs with default settings 1.00 57.0±6.01s ? ?/sec 1.05 60.1±7.07s ? ?/sec indexing/Indexing songs without any facets 1.10 51.8±5.36s ? ?/sec 1.00 47.3±3.30s ? ?/sec ``` Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
21898ffc60
@ -3,7 +3,7 @@ use std::fs::File;
|
|||||||
use std::io::{self, Seek, SeekFrom};
|
use std::io::{self, Seek, SeekFrom};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use grenad::{CompressionType, MergerIter, Reader, Sorter};
|
use grenad::{CompressionType, Reader, Sorter};
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
@ -209,36 +209,34 @@ pub fn sorter_into_lmdb_database(
|
|||||||
debug!("Writing MTBL sorter...");
|
debug!("Writing MTBL sorter...");
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
merger_iter_into_lmdb_database(wtxn, database, sorter.into_stream_merger_iter()?, merge)?;
|
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||||
|
if database.is_empty(wtxn)? {
|
||||||
debug!("MTBL sorter writen in {:.02?}!", before.elapsed());
|
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||||
Ok(())
|
while let Some((k, v)) = merger_iter.next()? {
|
||||||
}
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
|
unsafe { out_iter.append(k, v)? };
|
||||||
fn merger_iter_into_lmdb_database<R: io::Read + io::Seek>(
|
}
|
||||||
wtxn: &mut heed::RwTxn,
|
} else {
|
||||||
database: heed::PolyDatabase,
|
while let Some((k, v)) = merger_iter.next()? {
|
||||||
mut merger_iter: MergerIter<R, MergeFn>,
|
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
|
||||||
merge: MergeFn,
|
match iter.next().transpose()? {
|
||||||
) -> Result<()> {
|
Some((key, old_val)) if key == k => {
|
||||||
while let Some((k, v)) = merger_iter.next()? {
|
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
||||||
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
|
let val = merge(k, &vals).map_err(|_| {
|
||||||
match iter.next().transpose()? {
|
// TODO just wrap this error?
|
||||||
Some((key, old_val)) if key == k => {
|
InternalError::IndexingMergingKeys { process: "get-put-merge" }
|
||||||
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
})?;
|
||||||
let val = merge(k, &vals).map_err(|_| {
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
// TODO just wrap this error?
|
unsafe { iter.put_current(k, &val)? };
|
||||||
InternalError::IndexingMergingKeys { process: "get-put-merge" }
|
}
|
||||||
})?;
|
_ => {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
drop(iter);
|
||||||
unsafe { iter.put_current(k, &val)? };
|
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
|
||||||
}
|
}
|
||||||
_ => {
|
|
||||||
drop(iter);
|
|
||||||
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
debug!("MTBL sorter writen in {:.02?}!", before.elapsed());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user