Merge branch 'main' into merge-release-v1.8.1-in-main

This commit is contained in:
Many the fish 2024-05-29 11:31:03 +02:00 committed by GitHub
commit e1fbfde6c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
105 changed files with 5863 additions and 1031 deletions

View file

@ -398,8 +398,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
// if the settings are set before any document update, we don't need to do anything, and
// will set the primary key during the first document addition.
if self.index.number_of_documents(self.wtxn)? == 0 {
@ -461,50 +459,39 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(true)
}
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
/// reflect the order of the searchable attributes.
/// Updates the index's searchable attributes.
fn update_searchable(&mut self) -> Result<bool> {
match self.searchable_fields {
Setting::Set(ref fields) => {
// Check to see if the searchable fields changed before doing anything else
let old_fields = self.index.searchable_fields(self.wtxn)?;
let did_change = match old_fields {
// If old_fields is Some, let's check to see if the fields actually changed
Some(old_fields) => {
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
new_fields != old_fields
}
// If old_fields is None, the fields have changed (because they are being set)
None => true,
let did_change = {
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
new_fields != old_fields
};
if !did_change {
return Ok(false);
}
// every time the searchable attributes are updated, we need to update the
// ids for any settings that uses the facets. (distinct_fields, filterable_fields).
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_fields_ids_map = FieldsIdsMap::new();
// Since we're updating the settings we can only add new fields at the end of the field id map
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// fields are deduplicated, only the first occurrence is taken into account
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one
for name in names.iter() {
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
for (_, name) in old_fields_ids_map.iter() {
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
// The fields ids map won't change the field id of already present elements thus only the
// new fields will be inserted.
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn,
&names,
&new_fields_ids_map,
&fields_ids_map,
)?;
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
Ok(true)
}
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
@ -1078,10 +1065,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
let embedding_configs_updated = self.update_embedding_configs()?;
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
let primary_key_id = self
.index
.primary_key(self.wtxn)?
.and_then(|name| new_inner_settings.fields_ids_map.id(name));
let inner_settings_diff = InnerIndexSettingsDiff {
old: old_inner_settings,
new: new_inner_settings,
primary_key_id,
embedding_configs_updated,
settings_update_only: true,
};
@ -1097,10 +1091,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pub struct InnerIndexSettingsDiff {
pub(crate) old: InnerIndexSettings,
pub(crate) new: InnerIndexSettings,
pub(crate) primary_key_id: Option<FieldId>,
// TODO: compare directly the embedders.
pub(crate) embedding_configs_updated: bool,
pub(crate) settings_update_only: bool,
}
@ -1110,13 +1103,8 @@ impl InnerIndexSettingsDiff {
}
pub fn reindex_searchable(&self) -> bool {
self.old
.fields_ids_map
.iter()
.zip(self.new.fields_ids_map.iter())
.any(|(old, new)| old != new)
|| self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
!= self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
!= self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
|| self.old.allowed_separators != self.new.allowed_separators
|| self.old.dictionary != self.new.dictionary
|| self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields
@ -1143,15 +1131,7 @@ impl InnerIndexSettingsDiff {
return true;
}
let faceted_updated =
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields);
self.old
.fields_ids_map
.iter()
.zip(self.new.fields_ids_map.iter())
.any(|(old, new)| old != new)
|| faceted_updated
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
}
pub fn reindex_vectors(&self) -> bool {
@ -1181,7 +1161,7 @@ pub(crate) struct InnerIndexSettings {
pub user_defined_faceted_fields: HashSet<String>,
pub user_defined_searchable_fields: Option<Vec<String>>,
pub faceted_fields_ids: HashSet<FieldId>,
pub searchable_fields_ids: Option<Vec<FieldId>>,
pub searchable_fields_ids: Vec<FieldId>,
pub exact_attributes: HashSet<FieldId>,
pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs,
@ -1262,18 +1242,21 @@ impl InnerIndexSettings {
// find and insert the new field ids
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
let searchable_fields = self
.user_defined_searchable_fields
.as_ref()
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
// in case new fields were introduced we're going to recreate the searchable fields.
if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
let searchable_fields =
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
if let Some(searchable_fields) = searchable_fields {
index.put_all_searchable_fields_from_fields_ids_map(
wtxn,
&searchable_fields,
&self.fields_ids_map,
)?;
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
}
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
Ok(())
}
@ -1546,12 +1529,13 @@ mod tests {
use big_s::S;
use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset};
use meili_snap::snapshot;
use super::*;
use crate::error::Error;
use crate::index::tests::TempIndex;
use crate::update::ClearDocuments;
use crate::{Criterion, Filter, SearchResult};
use crate::{db_snap, Criterion, Filter, SearchResult};
#[test]
fn set_and_reset_searchable_fields() {
@ -1580,6 +1564,17 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |
2 age |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
1 0 |
"###);
// Check that the searchable field is correctly set to "name" only.
let rtxn = index.read_txn().unwrap();
// When we search for something that is not in
@ -1591,8 +1586,9 @@ mod tests {
// we must find the appropriate document.
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
assert_eq!(documents.len(), 1);
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
drop(rtxn);
// We change the searchable fields to be the "name" field only.
@ -1602,14 +1598,31 @@ mod tests {
})
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |
2 age |
"###);
db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
2 0 |
"###);
// Check that the searchable field have been reset and documents are found now.
let rtxn = index.read_txn().unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
// the searchable fields should contain all the fields
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
assert_eq!(searchable_fields, None);
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
let result = index.search(&rtxn).query("23").execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
let documents = index.documents(&rtxn, result.documents_ids).unwrap();
assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
}
#[test]