mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Make sure that we generate the faceted database when required
This commit is contained in:
parent
b0c0490e85
commit
3c304c89d4
@ -23,7 +23,7 @@ use crate::fields_ids_map::FieldsIdsMap;
|
||||
|
||||
pub const CRITERIA_KEY: &str = "criteria";
|
||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||
pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key";
|
||||
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
@ -365,7 +365,7 @@ impl Index {
|
||||
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||
let distinct_field = self.distinct_attribute(rtxn)?;
|
||||
let distinct_field = self.distinct_field(rtxn)?;
|
||||
let asc_desc_fields = self.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|criterion| match criterion {
|
||||
@ -465,18 +465,18 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* Distinct attribute */
|
||||
/* distinct field */
|
||||
|
||||
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
|
||||
pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
|
||||
}
|
||||
|
||||
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
|
||||
}
|
||||
|
||||
/* criteria */
|
||||
|
@ -79,7 +79,7 @@ mod test {
|
||||
// set distinct and faceted attributes for the index.
|
||||
let builder = UpdateBuilder::new(0);
|
||||
let mut update = builder.settings(&mut txn, &index);
|
||||
update.set_distinct_attribute(distinct.to_string());
|
||||
update.set_distinct_field(distinct.to_string());
|
||||
update.execute(|_, _| ()).unwrap();
|
||||
|
||||
// add documents to the index
|
||||
|
@ -136,7 +136,7 @@ impl<'a> Search<'a> {
|
||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?;
|
||||
|
||||
match self.index.distinct_attribute(self.rtxn)? {
|
||||
match self.index.distinct_field(self.rtxn)? {
|
||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||
Some(name) => {
|
||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
|
@ -57,14 +57,14 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
|
||||
pub fn execute(self) -> anyhow::Result<()> {
|
||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||
// We get the filterable fields to be able to create the facet levels.
|
||||
let filterable_fields = self.index.filterable_fields_ids(self.wtxn)?;
|
||||
// We get the faceted fields to be able to create the facet levels.
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||
|
||||
for field_id in filterable_fields {
|
||||
// Compute and store the filterable strings documents ids.
|
||||
let string_documents_ids = compute_filterable_documents_ids(
|
||||
for field_id in faceted_fields {
|
||||
// Compute and store the faceted strings documents ids.
|
||||
let string_documents_ids = compute_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_id_string_docids.remap_key_type::<ByteSlice>(),
|
||||
field_id,
|
||||
@ -77,8 +77,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
// Compute and store the filterable numbers documents ids.
|
||||
let number_documents_ids = compute_filterable_documents_ids(
|
||||
// Compute and store the faceted numbers documents ids.
|
||||
let number_documents_ids = compute_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_id_f64_docids.remap_key_type::<ByteSlice>(),
|
||||
field_id,
|
||||
@ -191,7 +191,7 @@ fn compute_facet_number_levels<'t>(
|
||||
writer_into_reader(writer, shrink_size)
|
||||
}
|
||||
|
||||
fn compute_filterable_documents_ids(
|
||||
fn compute_faceted_documents_ids(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
field_id: u8,
|
||||
|
@ -68,7 +68,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
filterable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<String>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
distinct_attribute: Setting<String>,
|
||||
distinct_field: Setting<String>,
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
}
|
||||
|
||||
@ -94,7 +94,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
filterable_fields: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
distinct_field: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
update_id,
|
||||
}
|
||||
@ -144,12 +144,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_distinct_attribute(&mut self) {
|
||||
self.distinct_attribute = Setting::Reset;
|
||||
pub fn reset_distinct_field(&mut self) {
|
||||
self.distinct_field = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) {
|
||||
self.distinct_attribute = Setting::Set(distinct_attribute);
|
||||
pub fn set_distinct_field(&mut self, distinct_field: String) {
|
||||
self.distinct_field = Setting::Set(distinct_field);
|
||||
}
|
||||
|
||||
pub fn reset_synonyms(&mut self) {
|
||||
@ -197,7 +197,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let output = transform.remap_index_documents(
|
||||
primary_key.to_string(),
|
||||
old_fields_ids_map,
|
||||
fields_ids_map.clone())?;
|
||||
fields_ids_map.clone(),
|
||||
)?;
|
||||
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?;
|
||||
@ -214,6 +215,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
indexing_builder.thread_pool = self.thread_pool;
|
||||
indexing_builder.execute_raw(output, &cb)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -242,18 +244,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn update_distinct_attribute(&mut self) -> anyhow::Result<bool> {
|
||||
match self.distinct_attribute {
|
||||
fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
|
||||
match self.distinct_field {
|
||||
Setting::Set(ref attr) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
fields_ids_map
|
||||
.insert(attr)
|
||||
.context("field id limit exceeded")?;
|
||||
|
||||
self.index.put_distinct_attribute(self.wtxn, &attr)?;
|
||||
self.index.put_distinct_field(self.wtxn, &attr)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Setting::Reset => { self.index.delete_distinct_attribute(self.wtxn)?; },
|
||||
Setting::Reset => { self.index.delete_distinct_field(self.wtxn)?; },
|
||||
Setting::NotSet => return Ok(false),
|
||||
}
|
||||
Ok(true)
|
||||
@ -380,7 +382,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
||||
fn update_filterable(&mut self) -> anyhow::Result<()> {
|
||||
match self.filterable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
@ -393,9 +395,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; }
|
||||
Setting::NotSet => return Ok(false)
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
Ok(true)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
||||
@ -419,20 +421,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||
{
|
||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||
|
||||
let old_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
|
||||
|
||||
self.update_displayed()?;
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let facets_updated = self.update_facets()?;
|
||||
self.update_distinct_attribute()?;
|
||||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
||||
// as facets.
|
||||
self.update_filterable()?;
|
||||
self.update_distinct_field()?;
|
||||
self.update_criteria()?;
|
||||
|
||||
// If there is new faceted fields we indicate that we must reindex as we must
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||
let new_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let synonyms_updated = self.update_synonyms()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
|
||||
if stop_words_updated || facets_updated || synonyms_updated || searchable_updated {
|
||||
if stop_words_updated || faceted_updated || synonyms_updated || searchable_updated {
|
||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@ -444,7 +455,7 @@ mod tests {
|
||||
use maplit::{btreeset, hashmap, hashset};
|
||||
use big_s::S;
|
||||
|
||||
use crate::{Criterion, FilterCondition};
|
||||
use crate::{Criterion, FilterCondition, SearchResult};
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
||||
use super::*;
|
||||
@ -669,6 +680,88 @@ mod tests {
|
||||
assert_eq!(count, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn set_asc_desc_field() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the filterable fields to be the age.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
// Don't display the generated `id` field.
|
||||
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||
builder.set_criteria(vec![S("asc(age)")]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
let content = &br#"[
|
||||
{ "name": "kevin", "age": 23 },
|
||||
{ "name": "kevina", "age": 21 },
|
||||
{ "name": "benoit", "age": 34 }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.enable_autogenerate_docids();
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Run an empty query just to ensure that the search results are ordered.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||
let documents = index.documents(&rtxn, documents_ids).unwrap();
|
||||
|
||||
// Fetch the documents "age" field in the ordre in which the documents appear.
|
||||
let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
|
||||
let iter = documents.into_iter().map(|(_, doc)| {
|
||||
let bytes = doc.get(age_field_id).unwrap();
|
||||
let string = std::str::from_utf8(bytes).unwrap();
|
||||
string.parse::<u32>().unwrap()
|
||||
});
|
||||
|
||||
assert_eq!(iter.collect::<Vec<_>>(), vec![21, 23, 34]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn set_distinct_field() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the filterable fields to be the age.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
// Don't display the generated `id` field.
|
||||
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||
builder.set_distinct_field(S("age"));
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
let content = &br#"[
|
||||
{ "name": "kevin", "age": 23 },
|
||||
{ "name": "kevina", "age": 21 },
|
||||
{ "name": "benoit", "age": 34 },
|
||||
{ "name": "bernard", "age": 34 },
|
||||
{ "name": "bertrand", "age": 34 },
|
||||
{ "name": "bernie", "age": 34 },
|
||||
{ "name": "ben", "age": 34 }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.enable_autogenerate_docids();
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Run an empty query just to ensure that the search results are ordered.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||
|
||||
// There must be at least one document with a 34 as the age.
|
||||
assert_eq!(documents_ids.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_stop_words() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
|
Loading…
Reference in New Issue
Block a user