mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Implement proximityPrecision setting on milli side
This commit is contained in:
parent
0c3fa8cbc4
commit
467b49153d
6 changed files with 224 additions and 66 deletions
|
@ -32,6 +32,7 @@ use super::helpers::{
|
|||
MergeFn, MergeableReader,
|
||||
};
|
||||
use super::{helpers, TypedChunk};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::{FieldId, Result};
|
||||
|
||||
/// Extract data for each databases from obkv documents in parallel.
|
||||
|
@ -52,7 +53,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
// TODO: add a proximity database deactivation parameter.
|
||||
proximity_precision: ProximityPrecision,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
|
@ -151,16 +152,17 @@ pub(crate) fn data_from_obkv_documents(
|
|||
});
|
||||
}
|
||||
|
||||
// TODO: Skip this part if deactivated
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_word_pair_proximity_docids,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
TypedChunk::WordPairProximityDocids,
|
||||
"word-pair-proximity-docids",
|
||||
);
|
||||
if proximity_precision == ProximityPrecision::WordScale {
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
extract_word_pair_proximity_docids,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
TypedChunk::WordPairProximityDocids,
|
||||
"word-pair-proximity-docids",
|
||||
);
|
||||
}
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
|
|
|
@ -352,6 +352,7 @@ where
|
|||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||
let proximity_precision = self.index.proximity_precision(self.wtxn)?.unwrap_or_default();
|
||||
|
||||
let pool_params = GrenadParameters {
|
||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||
|
@ -392,6 +393,7 @@ where
|
|||
dictionary.as_deref(),
|
||||
max_positions_per_attributes,
|
||||
exact_attributes,
|
||||
proximity_precision,
|
||||
)
|
||||
});
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ use super::IndexerConfig;
|
|||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::index_documents::IndexDocumentsMethod;
|
||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
||||
|
@ -127,7 +128,7 @@ pub struct Settings<'a, 't, 'i> {
|
|||
max_values_per_facet: Setting<usize>,
|
||||
sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
|
||||
pagination_max_total_hits: Setting<usize>,
|
||||
// TODO: add a proximity database deactivation attribute.
|
||||
proximity_precision: Setting<ProximityPrecision>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
|
@ -159,6 +160,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
max_values_per_facet: Setting::NotSet,
|
||||
sort_facet_values_by: Setting::NotSet,
|
||||
pagination_max_total_hits: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
indexer_config,
|
||||
}
|
||||
}
|
||||
|
@ -333,6 +335,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
self.pagination_max_total_hits = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_proximity_precision(&mut self, value: ProximityPrecision) {
|
||||
self.proximity_precision = Setting::Set(value);
|
||||
}
|
||||
|
||||
pub fn reset_proximity_precision(&mut self) {
|
||||
self.proximity_precision = Setting::Reset;
|
||||
}
|
||||
|
||||
fn reindex<FP, FA>(
|
||||
&mut self,
|
||||
progress_callback: &FP,
|
||||
|
@ -862,6 +872,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn update_proximity_precision(&mut self) -> Result<bool> {
|
||||
let changed = match self.proximity_precision {
|
||||
Setting::Set(new) => {
|
||||
let old = self.index.proximity_precision(self.wtxn)?;
|
||||
if old == Some(new) {
|
||||
false
|
||||
} else {
|
||||
self.index.put_proximity_precision(self.wtxn, new)?;
|
||||
true
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
||||
where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
|
@ -898,6 +926,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
let synonyms_updated = self.update_synonyms()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
let exact_attributes_updated = self.update_exact_attributes()?;
|
||||
let proximity_precision = self.update_proximity_precision()?;
|
||||
|
||||
if stop_words_updated
|
||||
|| non_separator_tokens_updated
|
||||
|
@ -907,7 +936,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
|| synonyms_updated
|
||||
|| searchable_updated
|
||||
|| exact_attributes_updated
|
||||
// TODO: reindex if proximity database is activated
|
||||
|| proximity_precision
|
||||
{
|
||||
self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
|
||||
}
|
||||
|
@ -1733,6 +1762,7 @@ mod tests {
|
|||
max_values_per_facet,
|
||||
sort_facet_values_by,
|
||||
pagination_max_total_hits,
|
||||
proximity_precision,
|
||||
} = settings;
|
||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||
|
@ -1754,6 +1784,7 @@ mod tests {
|
|||
assert!(matches!(max_values_per_facet, Setting::NotSet));
|
||||
assert!(matches!(sort_facet_values_by, Setting::NotSet));
|
||||
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
|
||||
assert!(matches!(proximity_precision, Setting::NotSet));
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue