mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 12:47:28 +01:00
Merge #5131
5131: Ignore documents whose selected fields didn't change r=dureuill a=dureuill Attempts to improve the new indexer performance by ignoring documents whose selected fields didn't change: - Add `Update::has_changed_for_fields` function - Ignore documents whose searchable attributes didn't change for word docids and word pair proximity extraction - Ignore documents whose faceted attributes didn't change for facet extraction Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
4a082683df
@ -1,7 +1,10 @@
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
|
||||||
use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
|
use super::document::{
|
||||||
|
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||||
|
};
|
||||||
|
use super::extract::perm_json_p;
|
||||||
use super::vector_document::{
|
use super::vector_document::{
|
||||||
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
||||||
};
|
};
|
||||||
@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
|
||||||
|
///
|
||||||
|
/// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
|
||||||
|
/// Otherwise `false`.
|
||||||
|
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
|
||||||
|
&self,
|
||||||
|
fields: Option<&[&str]>,
|
||||||
|
rtxn: &'t RoTxn,
|
||||||
|
index: &'t Index,
|
||||||
|
mapper: &'t Mapper,
|
||||||
|
) -> Result<bool> {
|
||||||
|
let mut changed = false;
|
||||||
|
let mut cached_current = None;
|
||||||
|
let mut updated_selected_field_count = 0;
|
||||||
|
|
||||||
|
for entry in self.updated().iter_top_level_fields() {
|
||||||
|
let (key, updated_value) = entry?;
|
||||||
|
|
||||||
|
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
updated_selected_field_count += 1;
|
||||||
|
let current = match cached_current {
|
||||||
|
Some(current) => current,
|
||||||
|
None => self.current(rtxn, index, mapper)?,
|
||||||
|
};
|
||||||
|
let current_value = current.top_level_field(key)?;
|
||||||
|
let Some(current_value) = current_value else {
|
||||||
|
changed = true;
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
if current_value.get() != updated_value.get() {
|
||||||
|
changed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cached_current = Some(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.has_deletion {
|
||||||
|
// no field deletion, so fields that don't appear in `updated` cannot have changed
|
||||||
|
return Ok(changed);
|
||||||
|
}
|
||||||
|
|
||||||
|
if changed {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// we saw all updated fields, and set `changed` if any field wasn't in `current`.
|
||||||
|
// so if there are as many fields in `current` as in `updated`, then nothing changed.
|
||||||
|
// If there is any more fields in `current`, then they are missing in `updated`.
|
||||||
|
let has_deleted_fields = {
|
||||||
|
let current = match cached_current {
|
||||||
|
Some(current) => current,
|
||||||
|
None => self.current(rtxn, index, mapper)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut current_selected_field_count = 0;
|
||||||
|
for entry in current.iter_top_level_fields() {
|
||||||
|
let (key, _) = entry?;
|
||||||
|
|
||||||
|
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
current_selected_field_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
current_selected_field_count != updated_selected_field_count
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(has_deleted_fields)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn updated_vectors(
|
pub fn updated_vectors(
|
||||||
&self,
|
&self,
|
||||||
doc_alloc: &'doc Bump,
|
doc_alloc: &'doc Bump,
|
||||||
|
@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
|
|||||||
},
|
},
|
||||||
),
|
),
|
||||||
DocumentChange::Update(inner) => {
|
DocumentChange::Update(inner) => {
|
||||||
|
if !inner.has_changed_for_fields(
|
||||||
|
Some(attributes_to_extract),
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
)? {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
extract_document_facets(
|
extract_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||||
|
@ -351,6 +351,15 @@ impl WordDocidsExtractors {
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
DocumentChange::Update(inner) => {
|
DocumentChange::Update(inner) => {
|
||||||
|
if !inner.has_changed_for_fields(
|
||||||
|
document_tokenizer.attribute_to_extract,
|
||||||
|
&context.rtxn,
|
||||||
|
context.index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
)? {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
||||||
cached_sorter.insert_del_u32(
|
cached_sorter.insert_del_u32(
|
||||||
fid,
|
fid,
|
||||||
|
@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
DocumentChange::Update(inner) => {
|
DocumentChange::Update(inner) => {
|
||||||
|
if !inner.has_changed_for_fields(
|
||||||
|
document_tokenizer.attribute_to_extract,
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
context.db_fields_ids_map,
|
||||||
|
)? {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
|
let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
|
||||||
process_document_tokens(
|
process_document_tokens(
|
||||||
document,
|
document,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user