mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Apply suggestions from code review
This commit is contained in:
parent
176ffd23f5
commit
a983129613
@ -448,9 +448,8 @@ pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBit
|
|||||||
} else {
|
} else {
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
for dist in 0..=dist {
|
for dist in 0..=dist {
|
||||||
match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
if let Some(m) = ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
|
||||||
Some(m) => bitmap |= m,
|
bitmap |= m
|
||||||
None => {}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if bitmap.is_empty() {
|
if bitmap.is_empty() {
|
||||||
|
@ -528,7 +528,13 @@ where
|
|||||||
|
|
||||||
if let Some(word_pair_proximity_docids) = word_pair_proximity_docids {
|
if let Some(word_pair_proximity_docids) = word_pair_proximity_docids {
|
||||||
// Run the word prefix pair proximity docids update operation.
|
// Run the word prefix pair proximity docids update operation.
|
||||||
PrefixWordPairsProximityDocids::new(self.wtxn, self.index).execute(
|
PrefixWordPairsProximityDocids::new(
|
||||||
|
self.wtxn,
|
||||||
|
self.index,
|
||||||
|
self.indexer_config.chunk_compression_type,
|
||||||
|
self.indexer_config.chunk_compression_level,
|
||||||
|
)
|
||||||
|
.execute(
|
||||||
word_pair_proximity_docids,
|
word_pair_proximity_docids,
|
||||||
&new_prefix_fst_words,
|
&new_prefix_fst_words,
|
||||||
&common_prefix_fst_words,
|
&common_prefix_fst_words,
|
||||||
|
@ -2,6 +2,7 @@ use std::borrow::Cow;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
|
use grenad::CompressionType;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
|
|
||||||
use super::index_documents::{merge_cbo_roaring_bitmaps, CursorClonableMmap};
|
use super::index_documents::{merge_cbo_roaring_bitmaps, CursorClonableMmap};
|
||||||
@ -18,10 +19,24 @@ pub struct PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
max_proximity: u8,
|
max_proximity: u8,
|
||||||
max_prefix_length: usize,
|
max_prefix_length: usize,
|
||||||
|
chunk_compression_type: CompressionType,
|
||||||
|
chunk_compression_level: Option<u32>,
|
||||||
}
|
}
|
||||||
impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
||||||
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Self {
|
pub fn new(
|
||||||
Self { wtxn, index, max_proximity: 4, max_prefix_length: 2 }
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
|
index: &'i Index,
|
||||||
|
chunk_compression_type: CompressionType,
|
||||||
|
chunk_compression_level: Option<u32>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
wtxn,
|
||||||
|
index,
|
||||||
|
max_proximity: 4,
|
||||||
|
max_prefix_length: 2,
|
||||||
|
chunk_compression_type,
|
||||||
|
chunk_compression_level,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/// Set the maximum proximity required to make a prefix be part of the words prefixes
|
/// Set the maximum proximity required to make a prefix be part of the words prefixes
|
||||||
/// database. If two words are too far from the threshold the associated documents will
|
/// database. If two words are too far from the threshold the associated documents will
|
||||||
@ -42,6 +57,7 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
|||||||
self.max_prefix_length = value;
|
self.max_prefix_length = value;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||||
pub fn execute<'a>(
|
pub fn execute<'a>(
|
||||||
self,
|
self,
|
||||||
@ -60,6 +76,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
|||||||
new_prefix_fst_words,
|
new_prefix_fst_words,
|
||||||
common_prefix_fst_words,
|
common_prefix_fst_words,
|
||||||
del_prefix_fst_words,
|
del_prefix_fst_words,
|
||||||
|
self.chunk_compression_type,
|
||||||
|
self.chunk_compression_level,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
index_prefix_word_database(
|
index_prefix_word_database(
|
||||||
@ -72,6 +90,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
|||||||
new_prefix_fst_words,
|
new_prefix_fst_words,
|
||||||
common_prefix_fst_words,
|
common_prefix_fst_words,
|
||||||
del_prefix_fst_words,
|
del_prefix_fst_words,
|
||||||
|
self.chunk_compression_type,
|
||||||
|
self.chunk_compression_level,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -23,6 +23,8 @@ pub fn index_prefix_word_database(
|
|||||||
new_prefix_fst_words: &[String],
|
new_prefix_fst_words: &[String],
|
||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
|
chunk_compression_type: CompressionType,
|
||||||
|
chunk_compression_level: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let max_proximity = max_proximity - 1;
|
let max_proximity = max_proximity - 1;
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
@ -35,7 +37,7 @@ pub fn index_prefix_word_database(
|
|||||||
.filter(|s| s.len() <= max_prefix_length)
|
.filter(|s| s.len() <= max_prefix_length)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
for proximity in 1..=max_proximity - 1 {
|
for proximity in 1..max_proximity {
|
||||||
for prefix in common_prefixes.iter() {
|
for prefix in common_prefixes.iter() {
|
||||||
let mut prefix_key = vec![];
|
let mut prefix_key = vec![];
|
||||||
prefix_key.push(proximity);
|
prefix_key.push(proximity);
|
||||||
@ -78,7 +80,8 @@ pub fn index_prefix_word_database(
|
|||||||
|
|
||||||
// Since we read the DB, we can't write to it directly, so we add each new (word1, prefix, proximity)
|
// Since we read the DB, we can't write to it directly, so we add each new (word1, prefix, proximity)
|
||||||
// element in an intermediary grenad
|
// element in an intermediary grenad
|
||||||
let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
|
let mut writer =
|
||||||
|
create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
|
||||||
|
|
||||||
for proximity in 1..=max_proximity - 1 {
|
for proximity in 1..=max_proximity - 1 {
|
||||||
for prefix in new_prefixes.iter() {
|
for prefix in new_prefixes.iter() {
|
||||||
@ -144,7 +147,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
|
|||||||
mut next_word2_and_docids: impl for<'a> FnMut(&'a mut I) -> Result<Option<(&'a [u8], &'a [u8])>>,
|
mut next_word2_and_docids: impl for<'a> FnMut(&'a mut I) -> Result<Option<(&'a [u8], &'a [u8])>>,
|
||||||
mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
|
mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = <_>::default();
|
let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = BTreeMap::default();
|
||||||
|
|
||||||
// Memory usage check:
|
// Memory usage check:
|
||||||
// The content of the loop will be called for each `word2` that follows a word beginning
|
// The content of the loop will be called for each `word2` that follows a word beginning
|
||||||
|
@ -187,6 +187,8 @@ pub fn index_word_prefix_database(
|
|||||||
new_prefix_fst_words: &[String],
|
new_prefix_fst_words: &[String],
|
||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
|
chunk_compression_type: CompressionType,
|
||||||
|
chunk_compression_level: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
|
|
||||||
@ -249,7 +251,8 @@ pub fn index_word_prefix_database(
|
|||||||
|
|
||||||
// Since we read the DB, we can't write to it directly, so we add each new (proximity, word1, prefix)
|
// Since we read the DB, we can't write to it directly, so we add each new (proximity, word1, prefix)
|
||||||
// element in an intermediary grenad
|
// element in an intermediary grenad
|
||||||
let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
|
let mut writer =
|
||||||
|
create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
|
||||||
|
|
||||||
execute_on_word_pairs_and_prefixes(
|
execute_on_word_pairs_and_prefixes(
|
||||||
&mut db_iter,
|
&mut db_iter,
|
||||||
@ -325,7 +328,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
|
|||||||
};
|
};
|
||||||
let word2_start_different_than_prev = word2[0] != prev_word2_start;
|
let word2_start_different_than_prev = word2[0] != prev_word2_start;
|
||||||
// if there were no potential prefixes for the previous word2 based on its first letter,
|
// if there were no potential prefixes for the previous word2 based on its first letter,
|
||||||
// and if the current word2 starts with the s`ame letter, then there is also no potential
|
// and if the current word2 starts with the same letter, then there is also no potential
|
||||||
// prefixes for the current word2, and we can skip to the next iteration
|
// prefixes for the current word2, and we can skip to the next iteration
|
||||||
if empty_prefixes && !word2_start_different_than_prev {
|
if empty_prefixes && !word2_start_different_than_prev {
|
||||||
continue;
|
continue;
|
||||||
|
Loading…
Reference in New Issue
Block a user