mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Introduce a max proximity parameter to the word prefix pair proximity update
This commit is contained in:
parent
23ea3ad738
commit
1514dfa1b7
@ -18,6 +18,7 @@ pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
pub(crate) chunk_compression_level: Option<u32>,
|
||||
pub(crate) max_nb_chunks: Option<usize>,
|
||||
pub(crate) max_memory: Option<usize>,
|
||||
max_proximity: u8,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
@ -32,9 +33,21 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
chunk_compression_level: None,
|
||||
max_nb_chunks: None,
|
||||
max_memory: None,
|
||||
max_proximity: 4,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the maximum proximity required to make a prefix be part of the words prefixes
|
||||
/// database. If two words are two far from the threshold the associated documents will
|
||||
/// not be part of the prefix database.
|
||||
///
|
||||
/// Default value is 4. This value must be lower or equal than 4 and will be clamped
|
||||
/// to this bound otherwise.
|
||||
pub fn max_proximity(&mut self, value: u8) -> &mut Self {
|
||||
self.max_proximity = value.max(7);
|
||||
self
|
||||
}
|
||||
|
||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||
pub fn execute(self) -> Result<()> {
|
||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||
@ -62,6 +75,10 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
let mut current_prefixes: Option<&&[String]> = None;
|
||||
let mut prefixes_cache = HashMap::new();
|
||||
while let Some(((w1, w2, prox), data)) = db.next().transpose()? {
|
||||
if prox > self.max_proximity {
|
||||
continue;
|
||||
}
|
||||
|
||||
current_prefixes = match current_prefixes.take() {
|
||||
Some(prefixes) if w2.starts_with(&prefixes[0]) => Some(prefixes),
|
||||
_otherwise => {
|
||||
|
Loading…
Reference in New Issue
Block a user