mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Make sure that we generate the faceted database when required
This commit is contained in:
parent
b0c0490e85
commit
3c304c89d4
@ -23,7 +23,7 @@ use crate::fields_ids_map::FieldsIdsMap;
|
|||||||
|
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||||
pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key";
|
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
@ -365,7 +365,7 @@ impl Index {
|
|||||||
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
||||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||||
let distinct_field = self.distinct_attribute(rtxn)?;
|
let distinct_field = self.distinct_field(rtxn)?;
|
||||||
let asc_desc_fields = self.criteria(rtxn)?
|
let asc_desc_fields = self.criteria(rtxn)?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|criterion| match criterion {
|
.filter_map(|criterion| match criterion {
|
||||||
@ -465,18 +465,18 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Distinct attribute */
|
/* distinct field */
|
||||||
|
|
||||||
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
|
pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
|
self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
|
self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
|
self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* criteria */
|
/* criteria */
|
||||||
|
@ -79,7 +79,7 @@ mod test {
|
|||||||
// set distinct and faceted attributes for the index.
|
// set distinct and faceted attributes for the index.
|
||||||
let builder = UpdateBuilder::new(0);
|
let builder = UpdateBuilder::new(0);
|
||||||
let mut update = builder.settings(&mut txn, &index);
|
let mut update = builder.settings(&mut txn, &index);
|
||||||
update.set_distinct_attribute(distinct.to_string());
|
update.set_distinct_field(distinct.to_string());
|
||||||
update.execute(|_, _| ()).unwrap();
|
update.execute(|_, _| ()).unwrap();
|
||||||
|
|
||||||
// add documents to the index
|
// add documents to the index
|
||||||
|
@ -136,7 +136,7 @@ impl<'a> Search<'a> {
|
|||||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||||
let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?;
|
let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?;
|
||||||
|
|
||||||
match self.index.distinct_attribute(self.rtxn)? {
|
match self.index.distinct_field(self.rtxn)? {
|
||||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||||
Some(name) => {
|
Some(name) => {
|
||||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||||
|
@ -57,14 +57,14 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<()> {
|
pub fn execute(self) -> anyhow::Result<()> {
|
||||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||||
// We get the filterable fields to be able to create the facet levels.
|
// We get the faceted fields to be able to create the facet levels.
|
||||||
let filterable_fields = self.index.filterable_fields_ids(self.wtxn)?;
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||||
|
|
||||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||||
|
|
||||||
for field_id in filterable_fields {
|
for field_id in faceted_fields {
|
||||||
// Compute and store the filterable strings documents ids.
|
// Compute and store the faceted strings documents ids.
|
||||||
let string_documents_ids = compute_filterable_documents_ids(
|
let string_documents_ids = compute_faceted_documents_ids(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
self.index.facet_id_string_docids.remap_key_type::<ByteSlice>(),
|
self.index.facet_id_string_docids.remap_key_type::<ByteSlice>(),
|
||||||
field_id,
|
field_id,
|
||||||
@ -77,8 +77,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Compute and store the filterable numbers documents ids.
|
// Compute and store the faceted numbers documents ids.
|
||||||
let number_documents_ids = compute_filterable_documents_ids(
|
let number_documents_ids = compute_faceted_documents_ids(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
self.index.facet_id_f64_docids.remap_key_type::<ByteSlice>(),
|
self.index.facet_id_f64_docids.remap_key_type::<ByteSlice>(),
|
||||||
field_id,
|
field_id,
|
||||||
@ -191,7 +191,7 @@ fn compute_facet_number_levels<'t>(
|
|||||||
writer_into_reader(writer, shrink_size)
|
writer_into_reader(writer, shrink_size)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compute_filterable_documents_ids(
|
fn compute_faceted_documents_ids(
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||||
field_id: u8,
|
field_id: u8,
|
||||||
|
@ -68,7 +68,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
filterable_fields: Setting<HashSet<String>>,
|
filterable_fields: Setting<HashSet<String>>,
|
||||||
criteria: Setting<Vec<String>>,
|
criteria: Setting<Vec<String>>,
|
||||||
stop_words: Setting<BTreeSet<String>>,
|
stop_words: Setting<BTreeSet<String>>,
|
||||||
distinct_attribute: Setting<String>,
|
distinct_field: Setting<String>,
|
||||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,7 +94,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
filterable_fields: Setting::NotSet,
|
filterable_fields: Setting::NotSet,
|
||||||
criteria: Setting::NotSet,
|
criteria: Setting::NotSet,
|
||||||
stop_words: Setting::NotSet,
|
stop_words: Setting::NotSet,
|
||||||
distinct_attribute: Setting::NotSet,
|
distinct_field: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
update_id,
|
update_id,
|
||||||
}
|
}
|
||||||
@ -144,12 +144,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset_distinct_attribute(&mut self) {
|
pub fn reset_distinct_field(&mut self) {
|
||||||
self.distinct_attribute = Setting::Reset;
|
self.distinct_field = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) {
|
pub fn set_distinct_field(&mut self, distinct_field: String) {
|
||||||
self.distinct_attribute = Setting::Set(distinct_attribute);
|
self.distinct_field = Setting::Set(distinct_field);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset_synonyms(&mut self) {
|
pub fn reset_synonyms(&mut self) {
|
||||||
@ -197,7 +197,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
let output = transform.remap_index_documents(
|
let output = transform.remap_index_documents(
|
||||||
primary_key.to_string(),
|
primary_key.to_string(),
|
||||||
old_fields_ids_map,
|
old_fields_ids_map,
|
||||||
fields_ids_map.clone())?;
|
fields_ids_map.clone(),
|
||||||
|
)?;
|
||||||
|
|
||||||
// We clear the full database (words-fst, documents ids and documents content).
|
// We clear the full database (words-fst, documents ids and documents content).
|
||||||
ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?;
|
ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?;
|
||||||
@ -214,6 +215,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||||
indexing_builder.thread_pool = self.thread_pool;
|
indexing_builder.thread_pool = self.thread_pool;
|
||||||
indexing_builder.execute_raw(output, &cb)?;
|
indexing_builder.execute_raw(output, &cb)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -242,18 +244,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_distinct_attribute(&mut self) -> anyhow::Result<bool> {
|
fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
|
||||||
match self.distinct_attribute {
|
match self.distinct_field {
|
||||||
Setting::Set(ref attr) => {
|
Setting::Set(ref attr) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
fields_ids_map
|
fields_ids_map
|
||||||
.insert(attr)
|
.insert(attr)
|
||||||
.context("field id limit exceeded")?;
|
.context("field id limit exceeded")?;
|
||||||
|
|
||||||
self.index.put_distinct_attribute(self.wtxn, &attr)?;
|
self.index.put_distinct_field(self.wtxn, &attr)?;
|
||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
}
|
}
|
||||||
Setting::Reset => { self.index.delete_distinct_attribute(self.wtxn)?; },
|
Setting::Reset => { self.index.delete_distinct_field(self.wtxn)?; },
|
||||||
Setting::NotSet => return Ok(false),
|
Setting::NotSet => return Ok(false),
|
||||||
}
|
}
|
||||||
Ok(true)
|
Ok(true)
|
||||||
@ -380,7 +382,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
fn update_filterable(&mut self) -> anyhow::Result<()> {
|
||||||
match self.filterable_fields {
|
match self.filterable_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
@ -393,9 +395,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
}
|
}
|
||||||
Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; }
|
Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; }
|
||||||
Setting::NotSet => return Ok(false)
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
Ok(true)
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
||||||
@ -419,20 +421,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||||
{
|
{
|
||||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||||
|
|
||||||
|
let old_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||||
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
|
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
|
||||||
|
|
||||||
self.update_displayed()?;
|
self.update_displayed()?;
|
||||||
let stop_words_updated = self.update_stop_words()?;
|
self.update_filterable()?;
|
||||||
let facets_updated = self.update_facets()?;
|
self.update_distinct_field()?;
|
||||||
self.update_distinct_attribute()?;
|
|
||||||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
|
||||||
// as facets.
|
|
||||||
self.update_criteria()?;
|
self.update_criteria()?;
|
||||||
|
|
||||||
|
// If there is new faceted fields we indicate that we must reindex as we must
|
||||||
|
// index new fields as facets. It means that the distinct attribute,
|
||||||
|
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||||
|
let new_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||||
|
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||||
|
|
||||||
|
let stop_words_updated = self.update_stop_words()?;
|
||||||
let synonyms_updated = self.update_synonyms()?;
|
let synonyms_updated = self.update_synonyms()?;
|
||||||
let searchable_updated = self.update_searchable()?;
|
let searchable_updated = self.update_searchable()?;
|
||||||
|
|
||||||
if stop_words_updated || facets_updated || synonyms_updated || searchable_updated {
|
if stop_words_updated || faceted_updated || synonyms_updated || searchable_updated {
|
||||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -444,7 +455,7 @@ mod tests {
|
|||||||
use maplit::{btreeset, hashmap, hashset};
|
use maplit::{btreeset, hashmap, hashset};
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
|
|
||||||
use crate::{Criterion, FilterCondition};
|
use crate::{Criterion, FilterCondition, SearchResult};
|
||||||
use crate::update::{IndexDocuments, UpdateFormat};
|
use crate::update::{IndexDocuments, UpdateFormat};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -669,6 +680,88 @@ mod tests {
|
|||||||
assert_eq!(count, 4);
|
assert_eq!(count, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn set_asc_desc_field() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// Set the filterable fields to be the age.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
// Don't display the generated `id` field.
|
||||||
|
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||||
|
builder.set_criteria(vec![S("asc(age)")]);
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
|
||||||
|
// Then index some documents.
|
||||||
|
let content = &br#"[
|
||||||
|
{ "name": "kevin", "age": 23 },
|
||||||
|
{ "name": "kevina", "age": 21 },
|
||||||
|
{ "name": "benoit", "age": 34 }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.enable_autogenerate_docids();
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Run an empty query just to ensure that the search results are ordered.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||||
|
let documents = index.documents(&rtxn, documents_ids).unwrap();
|
||||||
|
|
||||||
|
// Fetch the documents "age" field in the ordre in which the documents appear.
|
||||||
|
let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
|
||||||
|
let iter = documents.into_iter().map(|(_, doc)| {
|
||||||
|
let bytes = doc.get(age_field_id).unwrap();
|
||||||
|
let string = std::str::from_utf8(bytes).unwrap();
|
||||||
|
string.parse::<u32>().unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
assert_eq!(iter.collect::<Vec<_>>(), vec![21, 23, 34]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn set_distinct_field() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// Set the filterable fields to be the age.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||||
|
// Don't display the generated `id` field.
|
||||||
|
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||||
|
builder.set_distinct_field(S("age"));
|
||||||
|
builder.execute(|_, _| ()).unwrap();
|
||||||
|
|
||||||
|
// Then index some documents.
|
||||||
|
let content = &br#"[
|
||||||
|
{ "name": "kevin", "age": 23 },
|
||||||
|
{ "name": "kevina", "age": 21 },
|
||||||
|
{ "name": "benoit", "age": 34 },
|
||||||
|
{ "name": "bernard", "age": 34 },
|
||||||
|
{ "name": "bertrand", "age": 34 },
|
||||||
|
{ "name": "bernie", "age": 34 },
|
||||||
|
{ "name": "ben", "age": 34 }
|
||||||
|
]"#[..];
|
||||||
|
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||||
|
builder.update_format(UpdateFormat::Json);
|
||||||
|
builder.enable_autogenerate_docids();
|
||||||
|
builder.execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Run an empty query just to ensure that the search results are ordered.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||||
|
|
||||||
|
// There must be at least one document with a 34 as the age.
|
||||||
|
assert_eq!(documents_ids.len(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn default_stop_words() {
|
fn default_stop_words() {
|
||||||
let path = tempfile::tempdir().unwrap();
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
Loading…
Reference in New Issue
Block a user