mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Introduce a max proximity parameter to the word prefix pair proximity update
This commit is contained in:
parent
23ea3ad738
commit
1514dfa1b7
@ -18,6 +18,7 @@ pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
pub(crate) chunk_compression_level: Option<u32>,
|
pub(crate) chunk_compression_level: Option<u32>,
|
||||||
pub(crate) max_nb_chunks: Option<usize>,
|
pub(crate) max_nb_chunks: Option<usize>,
|
||||||
pub(crate) max_memory: Option<usize>,
|
pub(crate) max_memory: Option<usize>,
|
||||||
|
max_proximity: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||||
@ -32,9 +33,21 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
chunk_compression_level: None,
|
chunk_compression_level: None,
|
||||||
max_nb_chunks: None,
|
max_nb_chunks: None,
|
||||||
max_memory: None,
|
max_memory: None,
|
||||||
|
max_proximity: 4,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the maximum proximity required to make a prefix be part of the words prefixes
|
||||||
|
/// database. If two words are two far from the threshold the associated documents will
|
||||||
|
/// not be part of the prefix database.
|
||||||
|
///
|
||||||
|
/// Default value is 4. This value must be lower or equal than 4 and will be clamped
|
||||||
|
/// to this bound otherwise.
|
||||||
|
pub fn max_proximity(&mut self, value: u8) -> &mut Self {
|
||||||
|
self.max_proximity = value.max(7);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||||
pub fn execute(self) -> Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
@ -62,6 +75,10 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
let mut current_prefixes: Option<&&[String]> = None;
|
let mut current_prefixes: Option<&&[String]> = None;
|
||||||
let mut prefixes_cache = HashMap::new();
|
let mut prefixes_cache = HashMap::new();
|
||||||
while let Some(((w1, w2, prox), data)) = db.next().transpose()? {
|
while let Some(((w1, w2, prox), data)) = db.next().transpose()? {
|
||||||
|
if prox > self.max_proximity {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
current_prefixes = match current_prefixes.take() {
|
current_prefixes = match current_prefixes.take() {
|
||||||
Some(prefixes) if w2.starts_with(&prefixes[0]) => Some(prefixes),
|
Some(prefixes) if w2.starts_with(&prefixes[0]) => Some(prefixes),
|
||||||
_otherwise => {
|
_otherwise => {
|
||||||
|
Loading…
Reference in New Issue
Block a user