mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Optimise the merge functions to avoid allocations
This commit is contained in:
parent
cde8478388
commit
f948a03be2
4 changed files with 17 additions and 16 deletions
|
@ -1,3 +1,5 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
use anyhow::bail;
|
||||
use bstr::ByteSlice as _;
|
||||
use fst::IntoStreamer;
|
||||
|
@ -9,7 +11,7 @@ const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes();
|
|||
const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes();
|
||||
const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes();
|
||||
|
||||
pub fn main_merge(key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
|
||||
pub fn main_merge(key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
match key {
|
||||
WORDS_FST_KEY => {
|
||||
let fsts: Vec<_> = values.iter().map(|v| fst::Set::new(v).unwrap()).collect();
|
||||
|
@ -32,12 +34,12 @@ pub fn main_merge(key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn word_docids_merge(_key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
|
||||
pub fn word_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
let (head, tail) = values.split_first().unwrap();
|
||||
let mut head = RoaringBitmap::deserialize_from(head.as_slice())?;
|
||||
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
|
||||
|
||||
for value in tail {
|
||||
let bitmap = RoaringBitmap::deserialize_from(value.as_slice())?;
|
||||
let bitmap = RoaringBitmap::deserialize_from(&value[..])?;
|
||||
head.union_with(&bitmap);
|
||||
}
|
||||
|
||||
|
@ -46,16 +48,16 @@ pub fn word_docids_merge(_key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<
|
|||
Ok(vec)
|
||||
}
|
||||
|
||||
pub fn docid_word_positions_merge(key: &[u8], _values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
|
||||
pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
bail!("merging docid word positions is an error ({:?})", key.as_bstr())
|
||||
}
|
||||
|
||||
pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
|
||||
pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
let (head, tail) = values.split_first().unwrap();
|
||||
let mut head = CboRoaringBitmapCodec::deserialize_from(head.as_slice())?;
|
||||
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
||||
|
||||
for value in tail {
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(value.as_slice())?;
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?;
|
||||
head.union_with(&bitmap);
|
||||
}
|
||||
|
||||
|
@ -64,6 +66,6 @@ pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Vec<u8>]) ->
|
|||
Ok(vec)
|
||||
}
|
||||
|
||||
pub fn documents_merge(key: &[u8], _values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
|
||||
pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
|
||||
bail!("merging documents is an error ({:?})", key.as_bstr())
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read, Seek, SeekFrom};
|
||||
use std::sync::mpsc::sync_channel;
|
||||
|
@ -81,7 +82,7 @@ enum WriteMethod {
|
|||
GetMergePut,
|
||||
}
|
||||
|
||||
type MergeFn = fn(&[u8], &[Vec<u8>]) -> anyhow::Result<Vec<u8>>;
|
||||
type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>;
|
||||
|
||||
fn create_writer(typ: CompressionType, level: Option<u32>, file: File) -> io::Result<Writer<File>> {
|
||||
let mut builder = Writer::builder();
|
||||
|
@ -159,8 +160,7 @@ fn merge_into_lmdb_database(
|
|||
while let Some((k, v)) = in_iter.next()? {
|
||||
match database.get::<_, ByteSlice, ByteSlice>(wtxn, k)? {
|
||||
Some(old_val) => {
|
||||
// TODO improve the function signature and avoid allocating here!
|
||||
let vals = vec![old_val.to_vec(), v.to_vec()];
|
||||
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
||||
let val = merge(k, &vals).expect("merge failed");
|
||||
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, &val)?
|
||||
},
|
||||
|
@ -195,8 +195,7 @@ fn write_into_lmdb_database(
|
|||
while let Some((k, v)) = reader.next()? {
|
||||
match database.get::<_, ByteSlice, ByteSlice>(wtxn, k)? {
|
||||
Some(old_val) => {
|
||||
// TODO improve the function signature and avoid alocating here!
|
||||
let vals = vec![old_val.to_vec(), v.to_vec()];
|
||||
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
|
||||
let val = merge(k, &vals).expect("merge failed");
|
||||
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, &val)?
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue