mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Remove cached Allocations struct from wpppd indexing
This commit is contained in:
parent
ef75a77464
commit
1bc4788e59
@ -265,9 +265,6 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
|
|
||||||
// This is an optimisation, to reuse allocations between loop iterations
|
|
||||||
let mut allocations = Allocations::default();
|
|
||||||
|
|
||||||
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
|
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
|
||||||
let prefixes = PrefixTrieNode::from_sorted_prefixes(
|
let prefixes = PrefixTrieNode::from_sorted_prefixes(
|
||||||
common_prefix_fst_words
|
common_prefix_fst_words
|
||||||
@ -297,7 +294,6 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
&prefixes,
|
&prefixes,
|
||||||
&mut allocations,
|
|
||||||
self.max_proximity,
|
self.max_proximity,
|
||||||
// and this argument tells what to do with each new key (word1, prefix, proximity) and value (roaring bitmap)
|
// and this argument tells what to do with each new key (word1, prefix, proximity) and value (roaring bitmap)
|
||||||
|key, value| {
|
|key, value| {
|
||||||
@ -340,7 +336,6 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
&mut db_iter,
|
&mut db_iter,
|
||||||
|db_iter| db_iter.next().transpose().map_err(|e| e.into()),
|
|db_iter| db_iter.next().transpose().map_err(|e| e.into()),
|
||||||
&prefixes,
|
&prefixes,
|
||||||
&mut allocations,
|
|
||||||
self.max_proximity,
|
self.max_proximity,
|
||||||
|key, value| writer.insert(key, value).map_err(|e| e.into()),
|
|key, value| writer.insert(key, value).map_err(|e| e.into()),
|
||||||
)?;
|
)?;
|
||||||
@ -393,7 +388,6 @@ fn execute_on_word_pairs_and_prefixes<Iter>(
|
|||||||
Option<((&'a [u8], &'a [u8], u8), &'a [u8])>,
|
Option<((&'a [u8], &'a [u8], u8), &'a [u8])>,
|
||||||
>,
|
>,
|
||||||
prefixes: &PrefixTrieNode,
|
prefixes: &PrefixTrieNode,
|
||||||
allocations: &mut Allocations,
|
|
||||||
max_proximity: u8,
|
max_proximity: u8,
|
||||||
mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
|
mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -406,8 +400,8 @@ fn execute_on_word_pairs_and_prefixes<Iter>(
|
|||||||
// Optimisation: true if there are no potential prefixes for the current word2 based on its first letter
|
// Optimisation: true if there are no potential prefixes for the current word2 based on its first letter
|
||||||
let mut empty_prefixes = false;
|
let mut empty_prefixes = false;
|
||||||
|
|
||||||
let mut prefix_buffer = allocations.take_byte_vector();
|
let mut prefix_buffer = Vec::with_capacity(8);
|
||||||
let mut merge_buffer = allocations.take_byte_vector();
|
let mut merge_buffer = Vec::with_capacity(65_536);
|
||||||
|
|
||||||
while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? {
|
while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? {
|
||||||
// skip this iteration if the proximity is over the threshold
|
// skip this iteration if the proximity is over the threshold
|
||||||
@ -426,7 +420,7 @@ fn execute_on_word_pairs_and_prefixes<Iter>(
|
|||||||
// than the previous start of word2, then we'll need to flush the batch
|
// than the previous start of word2, then we'll need to flush the batch
|
||||||
let word1_different_than_prev = word1 != batch.word1;
|
let word1_different_than_prev = word1 != batch.word1;
|
||||||
if word1_different_than_prev || word2_start_different_than_prev {
|
if word1_different_than_prev || word2_start_different_than_prev {
|
||||||
batch.flush(allocations, &mut merge_buffer, &mut insert)?;
|
batch.flush(&mut merge_buffer, &mut insert)?;
|
||||||
// don't forget to reset the value of batch.word1 and prev_word2_start
|
// don't forget to reset the value of batch.word1 and prev_word2_start
|
||||||
if word1_different_than_prev {
|
if word1_different_than_prev {
|
||||||
prefix_search_start.0 = 0;
|
prefix_search_start.0 = 0;
|
||||||
@ -448,19 +442,17 @@ fn execute_on_word_pairs_and_prefixes<Iter>(
|
|||||||
&mut prefix_buffer,
|
&mut prefix_buffer,
|
||||||
&prefix_search_start,
|
&prefix_search_start,
|
||||||
|prefix_buffer| {
|
|prefix_buffer| {
|
||||||
let mut value = allocations.take_byte_vector();
|
|
||||||
value.extend_from_slice(&data);
|
|
||||||
let prefix_len = prefix_buffer.len();
|
let prefix_len = prefix_buffer.len();
|
||||||
prefix_buffer.push(0);
|
prefix_buffer.push(0);
|
||||||
prefix_buffer.push(proximity);
|
prefix_buffer.push(proximity);
|
||||||
batch.insert(&prefix_buffer, value, allocations);
|
batch.insert(&prefix_buffer, data.to_vec());
|
||||||
prefix_buffer.truncate(prefix_len);
|
prefix_buffer.truncate(prefix_len);
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
prefix_buffer.clear();
|
prefix_buffer.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
batch.flush(allocations, &mut merge_buffer, &mut insert)?;
|
batch.flush(&mut merge_buffer, &mut insert)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -482,17 +474,13 @@ struct PrefixAndProximityBatch {
|
|||||||
|
|
||||||
impl PrefixAndProximityBatch {
|
impl PrefixAndProximityBatch {
|
||||||
/// Insert the new key and value into the batch
|
/// Insert the new key and value into the batch
|
||||||
fn insert(&mut self, new_key: &[u8], new_value: Vec<u8>, allocations: &mut Allocations) {
|
fn insert(&mut self, new_key: &[u8], new_value: Vec<u8>) {
|
||||||
match self.batch.binary_search_by_key(&new_key, |(k, _)| k.as_slice()) {
|
match self.batch.binary_search_by_key(&new_key, |(k, _)| k.as_slice()) {
|
||||||
Ok(position) => {
|
Ok(position) => {
|
||||||
self.batch[position].1.push(Cow::Owned(new_value));
|
self.batch[position].1.push(Cow::Owned(new_value));
|
||||||
}
|
}
|
||||||
Err(position) => {
|
Err(position) => {
|
||||||
let mut key = allocations.take_byte_vector();
|
self.batch.insert(position, (new_key.to_vec(), vec![Cow::Owned(new_value)]));
|
||||||
key.extend_from_slice(new_key);
|
|
||||||
let mut mergeable_data = allocations.take_mergeable_data_vector();
|
|
||||||
mergeable_data.push(Cow::Owned(new_value));
|
|
||||||
self.batch.insert(position, (key, mergeable_data));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -502,7 +490,6 @@ impl PrefixAndProximityBatch {
|
|||||||
/// The key given to `insert` is `(word1, prefix, proximity)` and the value is the associated merged roaring bitmap.
|
/// The key given to `insert` is `(word1, prefix, proximity)` and the value is the associated merged roaring bitmap.
|
||||||
fn flush(
|
fn flush(
|
||||||
&mut self,
|
&mut self,
|
||||||
allocations: &mut Allocations,
|
|
||||||
merge_buffer: &mut Vec<u8>,
|
merge_buffer: &mut Vec<u8>,
|
||||||
insert: &mut impl for<'buffer> FnMut(&'buffer [u8], &'buffer [u8]) -> Result<()>,
|
insert: &mut impl for<'buffer> FnMut(&'buffer [u8], &'buffer [u8]) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -512,7 +499,7 @@ impl PrefixAndProximityBatch {
|
|||||||
}
|
}
|
||||||
merge_buffer.clear();
|
merge_buffer.clear();
|
||||||
|
|
||||||
let mut buffer = allocations.take_byte_vector();
|
let mut buffer = Vec::with_capacity(word1.len() + 1 + 6 + 1);
|
||||||
buffer.extend_from_slice(word1);
|
buffer.extend_from_slice(word1);
|
||||||
buffer.push(0);
|
buffer.push(0);
|
||||||
|
|
||||||
@ -528,8 +515,6 @@ impl PrefixAndProximityBatch {
|
|||||||
};
|
};
|
||||||
insert(buffer.as_slice(), data)?;
|
insert(buffer.as_slice(), data)?;
|
||||||
merge_buffer.clear();
|
merge_buffer.clear();
|
||||||
allocations.reclaim_byte_vector(key);
|
|
||||||
allocations.reclaim_mergeable_data_vector(mergeable_data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -591,36 +576,6 @@ pub fn write_into_lmdb_database_without_merging(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Allocations {
|
|
||||||
byte_vectors: Vec<Vec<u8>>,
|
|
||||||
mergeable_data_vectors: Vec<Vec<Cow<'static, [u8]>>>,
|
|
||||||
}
|
|
||||||
impl Default for Allocations {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
byte_vectors: Vec::with_capacity(65_536),
|
|
||||||
mergeable_data_vectors: Vec::with_capacity(4096),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl Allocations {
|
|
||||||
fn take_byte_vector(&mut self) -> Vec<u8> {
|
|
||||||
self.byte_vectors.pop().unwrap_or_else(|| Vec::with_capacity(16))
|
|
||||||
}
|
|
||||||
fn take_mergeable_data_vector(&mut self) -> Vec<Cow<'static, [u8]>> {
|
|
||||||
self.mergeable_data_vectors.pop().unwrap_or_else(|| Vec::with_capacity(8))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn reclaim_byte_vector(&mut self, mut data: Vec<u8>) {
|
|
||||||
data.clear();
|
|
||||||
self.byte_vectors.push(data);
|
|
||||||
}
|
|
||||||
fn reclaim_mergeable_data_vector(&mut self, mut data: Vec<Cow<'static, [u8]>>) {
|
|
||||||
data.clear();
|
|
||||||
self.mergeable_data_vectors.push(data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Debug)]
|
#[derive(Default, Debug)]
|
||||||
struct PrefixTrieNode {
|
struct PrefixTrieNode {
|
||||||
children: Vec<(PrefixTrieNode, u8)>,
|
children: Vec<(PrefixTrieNode, u8)>,
|
||||||
@ -970,7 +925,6 @@ mod tests {
|
|||||||
|
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
|
|
||||||
let mut allocations = Allocations::default();
|
|
||||||
let mut iter =
|
let mut iter =
|
||||||
IntoIterator::into_iter(word_pairs).map(|((word1, word2, proximity), data)| {
|
IntoIterator::into_iter(word_pairs).map(|((word1, word2, proximity), data)| {
|
||||||
((word1.as_bytes(), word2.as_bytes(), proximity), data.as_slice())
|
((word1.as_bytes(), word2.as_bytes(), proximity), data.as_slice())
|
||||||
@ -979,7 +933,6 @@ mod tests {
|
|||||||
&mut iter,
|
&mut iter,
|
||||||
|iter| Ok(iter.next()),
|
|iter| Ok(iter.next()),
|
||||||
&prefixes,
|
&prefixes,
|
||||||
&mut allocations,
|
|
||||||
2,
|
2,
|
||||||
|k, v| {
|
|k, v| {
|
||||||
let (word1, prefix, proximity) = StrStrU8Codec::bytes_decode(k).unwrap();
|
let (word1, prefix, proximity) = StrStrU8Codec::bytes_decode(k).unwrap();
|
||||||
|
Loading…
Reference in New Issue
Block a user