mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Merge branch 'main' into merge-release-v1.8.1-in-main
This commit is contained in:
commit
e1fbfde6c4
105 changed files with 5863 additions and 1031 deletions
|
@ -398,8 +398,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
// if the settings are set before any document update, we don't need to do anything, and
|
||||
// will set the primary key during the first document addition.
|
||||
if self.index.number_of_documents(self.wtxn)? == 0 {
|
||||
|
@ -461,50 +459,39 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
Ok(true)
|
||||
}
|
||||
|
||||
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
|
||||
/// reflect the order of the searchable attributes.
|
||||
/// Updates the index's searchable attributes.
|
||||
fn update_searchable(&mut self) -> Result<bool> {
|
||||
match self.searchable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
// Check to see if the searchable fields changed before doing anything else
|
||||
let old_fields = self.index.searchable_fields(self.wtxn)?;
|
||||
let did_change = match old_fields {
|
||||
// If old_fields is Some, let's check to see if the fields actually changed
|
||||
Some(old_fields) => {
|
||||
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
new_fields != old_fields
|
||||
}
|
||||
// If old_fields is None, the fields have changed (because they are being set)
|
||||
None => true,
|
||||
let did_change = {
|
||||
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
|
||||
new_fields != old_fields
|
||||
};
|
||||
if !did_change {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// every time the searchable attributes are updated, we need to update the
|
||||
// ids for any settings that uses the facets. (distinct_fields, filterable_fields).
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
// Since we're updating the settings we can only add new fields at the end of the field id map
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
|
||||
|
||||
// Add all the searchable attributes to the field map, and then add the
|
||||
// remaining fields from the old field map to the new one
|
||||
for name in names.iter() {
|
||||
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||
}
|
||||
|
||||
for (_, name) in old_fields_ids_map.iter() {
|
||||
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||
// The fields ids map won't change the field id of already present elements thus only the
|
||||
// new fields will be inserted.
|
||||
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||
}
|
||||
|
||||
self.index.put_all_searchable_fields_from_fields_ids_map(
|
||||
self.wtxn,
|
||||
&names,
|
||||
&new_fields_ids_map,
|
||||
&fields_ids_map,
|
||||
)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
Ok(true)
|
||||
}
|
||||
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
|
||||
|
@ -1078,10 +1065,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
|
||||
let embedding_configs_updated = self.update_embedding_configs()?;
|
||||
|
||||
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
|
||||
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
|
||||
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
|
||||
|
||||
let primary_key_id = self
|
||||
.index
|
||||
.primary_key(self.wtxn)?
|
||||
.and_then(|name| new_inner_settings.fields_ids_map.id(name));
|
||||
let inner_settings_diff = InnerIndexSettingsDiff {
|
||||
old: old_inner_settings,
|
||||
new: new_inner_settings,
|
||||
primary_key_id,
|
||||
embedding_configs_updated,
|
||||
settings_update_only: true,
|
||||
};
|
||||
|
@ -1097,10 +1091,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
pub struct InnerIndexSettingsDiff {
|
||||
pub(crate) old: InnerIndexSettings,
|
||||
pub(crate) new: InnerIndexSettings,
|
||||
|
||||
pub(crate) primary_key_id: Option<FieldId>,
|
||||
// TODO: compare directly the embedders.
|
||||
pub(crate) embedding_configs_updated: bool,
|
||||
|
||||
pub(crate) settings_update_only: bool,
|
||||
}
|
||||
|
||||
|
@ -1110,13 +1103,8 @@ impl InnerIndexSettingsDiff {
|
|||
}
|
||||
|
||||
pub fn reindex_searchable(&self) -> bool {
|
||||
self.old
|
||||
.fields_ids_map
|
||||
.iter()
|
||||
.zip(self.new.fields_ids_map.iter())
|
||||
.any(|(old, new)| old != new)
|
||||
|| self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|
||||
!= self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|
||||
self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|
||||
!= self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|
||||
|| self.old.allowed_separators != self.new.allowed_separators
|
||||
|| self.old.dictionary != self.new.dictionary
|
||||
|| self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields
|
||||
|
@ -1143,15 +1131,7 @@ impl InnerIndexSettingsDiff {
|
|||
return true;
|
||||
}
|
||||
|
||||
let faceted_updated =
|
||||
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields);
|
||||
|
||||
self.old
|
||||
.fields_ids_map
|
||||
.iter()
|
||||
.zip(self.new.fields_ids_map.iter())
|
||||
.any(|(old, new)| old != new)
|
||||
|| faceted_updated
|
||||
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
|
||||
}
|
||||
|
||||
pub fn reindex_vectors(&self) -> bool {
|
||||
|
@ -1181,7 +1161,7 @@ pub(crate) struct InnerIndexSettings {
|
|||
pub user_defined_faceted_fields: HashSet<String>,
|
||||
pub user_defined_searchable_fields: Option<Vec<String>>,
|
||||
pub faceted_fields_ids: HashSet<FieldId>,
|
||||
pub searchable_fields_ids: Option<Vec<FieldId>>,
|
||||
pub searchable_fields_ids: Vec<FieldId>,
|
||||
pub exact_attributes: HashSet<FieldId>,
|
||||
pub proximity_precision: ProximityPrecision,
|
||||
pub embedding_configs: EmbeddingConfigs,
|
||||
|
@ -1262,18 +1242,21 @@ impl InnerIndexSettings {
|
|||
|
||||
// find and insert the new field ids
|
||||
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
|
||||
let searchable_fields = self
|
||||
.user_defined_searchable_fields
|
||||
.as_ref()
|
||||
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
|
||||
|
||||
// in case new fields were introduced we're going to recreate the searchable fields.
|
||||
if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
|
||||
let searchable_fields =
|
||||
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
|
||||
if let Some(searchable_fields) = searchable_fields {
|
||||
index.put_all_searchable_fields_from_fields_ids_map(
|
||||
wtxn,
|
||||
&searchable_fields,
|
||||
&self.fields_ids_map,
|
||||
)?;
|
||||
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||
self.searchable_fields_ids = searchable_fields_ids;
|
||||
}
|
||||
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||
self.searchable_fields_ids = searchable_fields_ids;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1546,12 +1529,13 @@ mod tests {
|
|||
use big_s::S;
|
||||
use heed::types::Bytes;
|
||||
use maplit::{btreemap, btreeset, hashset};
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::ClearDocuments;
|
||||
use crate::{Criterion, Filter, SearchResult};
|
||||
use crate::{db_snap, Criterion, Filter, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn set_and_reset_searchable_fields() {
|
||||
|
@ -1580,6 +1564,17 @@ mod tests {
|
|||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 name |
|
||||
2 age |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["name"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
1 0 |
|
||||
"###);
|
||||
|
||||
// Check that the searchable field is correctly set to "name" only.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// When we search for something that is not in
|
||||
|
@ -1591,8 +1586,9 @@ mod tests {
|
|||
// we must find the appropriate document.
|
||||
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
|
||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
assert_eq!(documents.len(), 1);
|
||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
||||
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||
drop(rtxn);
|
||||
|
||||
// We change the searchable fields to be the "name" field only.
|
||||
|
@ -1602,14 +1598,31 @@ mod tests {
|
|||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 name |
|
||||
2 age |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 0 |
|
||||
2 0 |
|
||||
"###);
|
||||
|
||||
// Check that the searchable field have been reset and documents are found now.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fid_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
|
||||
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
|
||||
// the searchable fields should contain all the fields
|
||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(searchable_fields, None);
|
||||
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
|
||||
let result = index.search(&rtxn).query("23").execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
|
||||
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
|
||||
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue