mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 03:37:28 +01:00
Introduce a max prefix length parameter to the word prefix pair proximity update
This commit is contained in:
parent
1514dfa1b7
commit
f04cd19886
@ -19,6 +19,7 @@ pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
pub(crate) max_nb_chunks: Option<usize>,
|
||||
pub(crate) max_memory: Option<usize>,
|
||||
max_proximity: u8,
|
||||
max_prefix_length: usize,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
@ -34,6 +35,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
max_nb_chunks: None,
|
||||
max_memory: None,
|
||||
max_proximity: 4,
|
||||
max_prefix_length: 2,
|
||||
}
|
||||
}
|
||||
|
||||
@ -48,6 +50,17 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the maximum length the prefix of a word pair is allowed to have be part of the words
|
||||
/// prefixes database. If two words are two far from the threshold the associated documents
|
||||
/// will not be part of the prefix database.
|
||||
///
|
||||
/// Default value is 4. This value must be lower or equal than 4 and will be clamped
|
||||
/// to this bound otherwise.
|
||||
pub fn max_prefix_length(&mut self, value: usize) -> &mut Self {
|
||||
self.max_prefix_length = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||
pub fn execute(self) -> Result<()> {
|
||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||
@ -94,15 +107,17 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(w1.as_bytes());
|
||||
buffer.push(0);
|
||||
for prefix in prefixes.iter().filter(|prefix| w2.starts_with(prefix.as_str())) {
|
||||
buffer.truncate(w1.len() + 1);
|
||||
buffer.extend_from_slice(prefix.as_bytes());
|
||||
buffer.push(prox);
|
||||
for prefix in prefixes.iter() {
|
||||
if prefix.len() <= self.max_prefix_length && w2.starts_with(prefix) {
|
||||
buffer.truncate(w1.len() + 1);
|
||||
buffer.extend_from_slice(prefix.as_bytes());
|
||||
buffer.push(prox);
|
||||
|
||||
match prefixes_cache.get_mut(&buffer) {
|
||||
Some(value) => value.push(data),
|
||||
None => {
|
||||
prefixes_cache.insert(buffer.clone(), vec![data]);
|
||||
match prefixes_cache.get_mut(&buffer) {
|
||||
Some(value) => value.push(data),
|
||||
None => {
|
||||
prefixes_cache.insert(buffer.clone(), vec![data]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user