fix the searchable fields bug when a field is nested

Update milli/src/index.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
Tamo 2022-05-16 15:22:52 +02:00
parent 65e6aa0de2
commit f586028f9a
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
3 changed files with 176 additions and 7 deletions

View File

@ -42,6 +42,7 @@ pub mod main_key {
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids"; pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
pub const PRIMARY_KEY_KEY: &str = "primary-key"; pub const PRIMARY_KEY_KEY: &str = "primary-key";
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const STOP_WORDS_KEY: &str = "stop-words"; pub const STOP_WORDS_KEY: &str = "stop-words";
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
@ -457,12 +458,43 @@ impl Index {
/* searchable fields */ /* searchable fields */
/// Writes the searchable fields, when this list is specified, only these are indexed. /// Write the user defined searchable fields and generate the real searchable fields from the specified fields ids map.
pub(crate) fn put_searchable_fields( pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
fields: &[&str], user_fields: &[&str],
fields_ids_map: &FieldsIdsMap,
) -> heed::Result<()> { ) -> heed::Result<()> {
// We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
// Now we generate the real searchable fields:
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
let mut real_fields = user_fields.to_vec();
for field_from_map in fields_ids_map.names() {
for user_field in user_fields {
if crate::is_faceted_by(field_from_map, user_field)
&& !user_fields.contains(&field_from_map)
{
real_fields.push(field_from_map);
}
}
}
self.put_searchable_fields(wtxn, &real_fields)
}
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.delete_searchable_fields(wtxn)?;
self.delete_user_defined_searchable_fields(wtxn)
}
/// Writes the searchable fields, when this list is specified, only these are indexed.
fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<&[&str]>>( self.main.put::<_, Str, SerdeBincode<&[&str]>>(
wtxn, wtxn,
main_key::SEARCHABLE_FIELDS_KEY, main_key::SEARCHABLE_FIELDS_KEY,
@ -471,7 +503,7 @@ impl Index {
} }
/// Deletes the searchable fields, when no fields are specified, all fields are indexed. /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
pub(crate) fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
} }
@ -498,6 +530,36 @@ impl Index {
} }
} }
/// Writes the searchable fields, when this list is specified, only these are indexed.
pub(crate) fn put_user_defined_searchable_fields(
&self,
wtxn: &mut RwTxn,
fields: &[&str],
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(
wtxn,
main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY,
&fields,
)
}
/// Deletes the searchable fields, when no fields are specified, all fields are indexed.
pub(crate) fn delete_user_defined_searchable_fields(
&self,
wtxn: &mut RwTxn,
) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
}
/// Returns the user defined searchable fields.
pub fn user_defined_searchable_fields<'t>(
&self,
rtxn: &'t RoTxn,
) -> heed::Result<Option<Vec<&'t str>>> {
self.main
.get::<_, Str, SerdeBincode<Vec<_>>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY)
}
/* filterable fields */ /* filterable fields */
/// Writes the filterable fields names in the database. /// Writes the filterable fields names in the database.
@ -1031,12 +1093,13 @@ impl Index {
pub(crate) mod tests { pub(crate) mod tests {
use std::ops::Deref; use std::ops::Deref;
use big_s::S;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::btreemap; use maplit::btreemap;
use tempfile::TempDir; use tempfile::TempDir;
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; use crate::update::{self, IndexDocuments, IndexDocumentsConfig, IndexerConfig};
use crate::Index; use crate::Index;
pub(crate) struct TempIndex { pub(crate) struct TempIndex {
@ -1184,4 +1247,94 @@ pub(crate) mod tests {
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 3); assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 3);
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 15); assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 15);
} }
#[test]
fn add_documents_and_set_searchable_fields() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
let mut wtxn = index.write_txn().unwrap();
let content = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// set searchable fields
let mut wtxn = index.write_txn().unwrap();
let mut builder = update::Settings::new(&mut wtxn, &index, &config);
builder.set_searchable_fields(vec![S("doggo"), S("name")]);
builder.execute(drop).unwrap();
wtxn.commit().unwrap();
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]);
}
#[test]
fn set_searchable_fields_and_add_documents() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
let config = IndexerConfig::default();
// set searchable fields
let mut wtxn = index.write_txn().unwrap();
let mut builder = update::Settings::new(&mut wtxn, &index, &config);
builder.set_searchable_fields(vec![S("doggo"), S("name")]);
builder.execute(drop).unwrap();
wtxn.commit().unwrap();
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]);
let mut wtxn = index.write_txn().unwrap();
let content = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let indexing_config = IndexDocumentsConfig::default();
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(content).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
// ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]);
}
} }

View File

@ -157,6 +157,18 @@ where
let new_facets = output.compute_real_facets(self.wtxn, self.index)?; let new_facets = output.compute_real_facets(self.wtxn, self.index)?;
self.index.put_faceted_fields(self.wtxn, &new_facets)?; self.index.put_faceted_fields(self.wtxn, &new_facets)?;
// in case new fields were introduced we're going to recreate the searchable fields.
if let Some(faceted_fields) = self.index.user_defined_searchable_fields(self.wtxn)? {
// we can't keep references on the faceted fields while we update the index thus we need to own it.
let faceted_fields: Vec<String> =
faceted_fields.into_iter().map(str::to_string).collect();
self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn,
&faceted_fields.iter().map(String::as_ref).collect::<Vec<_>>(),
&output.fields_ids_map,
)?;
}
let indexed_documents = output.documents_count as u64; let indexed_documents = output.documents_count as u64;
let number_of_documents = self.execute_raw(output)?; let number_of_documents = self.execute_raw(output)?;

View File

@ -343,11 +343,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
new_fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?; new_fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?;
} }
self.index.put_searchable_fields(self.wtxn, &names)?; self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn,
&names,
&new_fields_ids_map,
)?;
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
} }
Setting::Reset => { Setting::Reset => {
self.index.delete_searchable_fields(self.wtxn)?; self.index.delete_all_searchable_fields(self.wtxn)?;
} }
Setting::NotSet => return Ok(false), Setting::NotSet => return Ok(false),
} }