mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-29 11:00:39 +01:00
Introduce a max prefix length parameter to the word prefix pair proximity update
This commit is contained in:
parent
1514dfa1b7
commit
f04cd19886
@ -19,6 +19,7 @@ pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
pub(crate) max_nb_chunks: Option<usize>,
|
pub(crate) max_nb_chunks: Option<usize>,
|
||||||
pub(crate) max_memory: Option<usize>,
|
pub(crate) max_memory: Option<usize>,
|
||||||
max_proximity: u8,
|
max_proximity: u8,
|
||||||
|
max_prefix_length: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||||
@ -34,6 +35,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
max_nb_chunks: None,
|
max_nb_chunks: None,
|
||||||
max_memory: None,
|
max_memory: None,
|
||||||
max_proximity: 4,
|
max_proximity: 4,
|
||||||
|
max_prefix_length: 2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,6 +50,17 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the maximum length the prefix of a word pair is allowed to have be part of the words
|
||||||
|
/// prefixes database. If two words are two far from the threshold the associated documents
|
||||||
|
/// will not be part of the prefix database.
|
||||||
|
///
|
||||||
|
/// Default value is 4. This value must be lower or equal than 4 and will be clamped
|
||||||
|
/// to this bound otherwise.
|
||||||
|
pub fn max_prefix_length(&mut self, value: usize) -> &mut Self {
|
||||||
|
self.max_prefix_length = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||||
pub fn execute(self) -> Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
@ -94,15 +107,17 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.extend_from_slice(w1.as_bytes());
|
buffer.extend_from_slice(w1.as_bytes());
|
||||||
buffer.push(0);
|
buffer.push(0);
|
||||||
for prefix in prefixes.iter().filter(|prefix| w2.starts_with(prefix.as_str())) {
|
for prefix in prefixes.iter() {
|
||||||
buffer.truncate(w1.len() + 1);
|
if prefix.len() <= self.max_prefix_length && w2.starts_with(prefix) {
|
||||||
buffer.extend_from_slice(prefix.as_bytes());
|
buffer.truncate(w1.len() + 1);
|
||||||
buffer.push(prox);
|
buffer.extend_from_slice(prefix.as_bytes());
|
||||||
|
buffer.push(prox);
|
||||||
|
|
||||||
match prefixes_cache.get_mut(&buffer) {
|
match prefixes_cache.get_mut(&buffer) {
|
||||||
Some(value) => value.push(data),
|
Some(value) => value.push(data),
|
||||||
None => {
|
None => {
|
||||||
prefixes_cache.insert(buffer.clone(), vec![data]);
|
prefixes_cache.insert(buffer.clone(), vec![data]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user