MeiliSearch/meilisearch-core/src/update/settings_update.rs

314 lines
9.2 KiB
Rust
Raw Normal View History

2020-01-13 19:10:58 +01:00
use std::collections::{BTreeMap, BTreeSet};
2020-01-08 14:17:38 +01:00
use heed::Result as ZResult;
use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf;
2020-01-13 19:10:58 +01:00
use meilisearch_schema::Schema;
2020-01-08 14:17:38 +01:00
use crate::database::{MainT, UpdateT};
use crate::settings::{UpdateState, SettingsUpdate, RankingRule};
2020-01-08 14:17:38 +01:00
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
2020-01-13 19:10:58 +01:00
use crate::{store, MResult, Error};
2020-01-08 14:17:38 +01:00
pub fn push_settings_update(
writer: &mut heed::RwTxn<UpdateT>,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
settings: SettingsUpdate,
) -> ZResult<u64> {
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
let update = Update::settings(settings);
updates_store.put_update(writer, last_update_id, &update)?;
Ok(last_update_id)
}
pub fn apply_settings_update(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
settings: SettingsUpdate,
) -> MResult<()> {
let mut must_reindex = false;
2020-01-13 19:10:58 +01:00
let mut schema = match index.main.schema(writer)? {
Some(schema) => schema,
None => {
match settings.primary_key.clone() {
UpdateState::Update(id) => Schema::with_primary_key(&id),
_ => return Err(Error::MissingPrimaryKey)
2020-01-13 19:10:58 +01:00
}
}
};
2020-01-08 14:17:38 +01:00
match settings.ranking_rules {
UpdateState::Update(v) => {
2020-02-11 15:16:02 +01:00
let ranked_field: Vec<&str> = v.iter().filter_map(RankingRule::field).collect();
schema.update_ranked(&ranked_field)?;
2020-02-11 15:16:02 +01:00
index.main.put_ranking_rules(writer, &v)?;
2020-01-22 14:29:39 +01:00
must_reindex = true;
2020-01-08 14:17:38 +01:00
},
UpdateState::Clear => {
index.main.delete_ranking_rules(writer)?;
2020-03-02 14:34:29 +01:00
schema.clear_ranked();
2020-01-22 14:29:39 +01:00
must_reindex = true;
2020-01-08 14:17:38 +01:00
},
2020-01-29 18:30:21 +01:00
UpdateState::Nothing => (),
2020-01-08 14:17:38 +01:00
}
2020-01-27 08:52:36 +01:00
match settings.distinct_attribute {
2020-01-08 14:17:38 +01:00
UpdateState::Update(v) => {
2020-06-16 10:45:17 +02:00
let field_id = schema.insert(&v)?;
index.main.put_distinct_attribute(writer, field_id)?;
2020-01-08 14:17:38 +01:00
},
UpdateState::Clear => {
index.main.delete_distinct_attribute(writer)?;
2020-01-08 14:17:38 +01:00
},
2020-01-29 18:30:21 +01:00
UpdateState::Nothing => (),
2020-01-08 14:17:38 +01:00
}
2020-01-13 19:10:58 +01:00
2020-01-29 18:30:21 +01:00
match settings.searchable_attributes.clone() {
2020-01-22 14:29:39 +01:00
UpdateState::Update(v) => {
2020-07-16 16:12:23 +02:00
if v.iter().any(|e| e == "*") || v.is_empty() {
2020-07-08 21:01:26 +02:00
schema.set_all_fields_as_indexed();
} else {
schema.update_indexed(v)?;
}
2020-01-22 14:29:39 +01:00
must_reindex = true;
},
2020-01-13 19:10:58 +01:00
UpdateState::Clear => {
2020-02-26 18:49:17 +01:00
schema.set_all_fields_as_indexed();
2020-01-22 14:29:39 +01:00
must_reindex = true;
2020-01-08 14:17:38 +01:00
},
2020-01-13 19:10:58 +01:00
UpdateState::Nothing => (),
}
2020-01-29 18:30:21 +01:00
match settings.displayed_attributes.clone() {
2020-07-08 21:01:26 +02:00
UpdateState::Update(v) => {
2020-07-16 16:12:23 +02:00
if v.contains("*") || v.is_empty() {
2020-07-08 21:01:26 +02:00
schema.set_all_fields_as_displayed();
} else {
schema.update_displayed(v)?
}
},
2020-01-13 19:10:58 +01:00
UpdateState::Clear => {
2020-02-26 18:49:17 +01:00
schema.set_all_fields_as_displayed();
2020-01-08 14:17:38 +01:00
},
2020-01-13 19:10:58 +01:00
UpdateState::Nothing => (),
2020-02-11 15:16:02 +01:00
}
2020-05-05 22:27:06 +02:00
match settings.attributes_for_faceting {
UpdateState::Update(attrs) => {
apply_attributes_for_faceting_update(writer, index, &mut schema, &attrs)?;
must_reindex = true;
},
UpdateState::Clear => {
index.main.delete_attributes_for_faceting(writer)?;
index.facets.clear(writer)?;
},
UpdateState::Nothing => (),
}
2020-02-11 15:16:02 +01:00
index.main.put_schema(writer, &schema)?;
2020-01-08 14:17:38 +01:00
match settings.stop_words {
UpdateState::Update(stop_words) => {
if apply_stop_words_update(writer, index, stop_words)? {
must_reindex = true;
}
},
UpdateState::Clear => {
if apply_stop_words_update(writer, index, BTreeSet::new())? {
must_reindex = true;
}
},
2020-02-11 15:16:02 +01:00
UpdateState::Nothing => (),
2020-01-08 14:17:38 +01:00
}
match settings.synonyms {
UpdateState::Update(synonyms) => apply_synonyms_update(writer, index, synonyms)?,
UpdateState::Clear => apply_synonyms_update(writer, index, BTreeMap::new())?,
2020-02-11 15:16:02 +01:00
UpdateState::Nothing => (),
2020-01-08 14:17:38 +01:00
}
if must_reindex {
2020-02-02 22:59:19 +01:00
reindex_all_documents(writer, index)?;
2020-01-08 14:17:38 +01:00
}
2020-02-02 22:59:19 +01:00
2020-01-08 14:17:38 +01:00
Ok(())
}
2020-05-05 22:27:06 +02:00
fn apply_attributes_for_faceting_update(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
schema: &mut Schema,
attributes: &[String]
) -> MResult<()> {
let mut attribute_ids = Vec::new();
for name in attributes {
attribute_ids.push(schema.insert(name)?);
}
let attributes_for_faceting = SetBuf::from_dirty(attribute_ids);
index.main.put_attributes_for_faceting(writer, &attributes_for_faceting)?;
Ok(())
}
2020-01-08 14:17:38 +01:00
pub fn apply_stop_words_update(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
stop_words: BTreeSet<String>,
) -> MResult<bool>
{
let mut must_reindex = false;
2020-01-08 14:17:38 +01:00
2020-02-02 22:59:19 +01:00
let old_stop_words: BTreeSet<String> = index.main
2020-01-08 14:17:38 +01:00
.stop_words_fst(writer)?
.stream()
.into_strs()?
.into_iter()
.collect();
2020-01-08 14:17:38 +01:00
2020-01-29 18:30:21 +01:00
let deletion: BTreeSet<String> = old_stop_words.difference(&stop_words).cloned().collect();
let addition: BTreeSet<String> = stop_words.difference(&old_stop_words).cloned().collect();
2020-01-08 14:17:38 +01:00
if !addition.is_empty() {
apply_stop_words_addition(writer, index, addition)?;
2020-01-08 14:17:38 +01:00
}
if !deletion.is_empty() {
must_reindex = true;
apply_stop_words_deletion(writer, index, deletion)?;
2020-01-08 14:17:38 +01:00
}
2020-02-10 16:50:55 +01:00
2020-05-22 15:00:50 +02:00
let words_fst = index.main.words_fst(writer)?;
if !words_fst.is_empty() {
let stop_words = fst::Set::from_iter(stop_words)?;
let op = OpBuilder::new()
.add(&words_fst)
.add(&stop_words)
.difference();
let mut builder = fst::SetBuilder::memory();
builder.extend_stream(op)?;
2020-05-22 15:00:50 +02:00
let words_fst = builder.into_set();
index.main.put_words_fst(writer, &words_fst)?;
index.main.put_stop_words_fst(writer, &stop_words)?;
}
Ok(must_reindex)
2020-01-08 14:17:38 +01:00
}
fn apply_stop_words_addition(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
addition: BTreeSet<String>,
) -> MResult<()>
{
2020-01-08 14:17:38 +01:00
let main_store = index.main;
let postings_lists_store = index.postings_lists;
let mut stop_words_builder = SetBuilder::memory();
for word in addition {
stop_words_builder.insert(&word)?;
2020-01-08 14:17:38 +01:00
// we remove every posting list associated to a new stop word
postings_lists_store.del_postings_list(writer, word.as_bytes())?;
}
// create the new delta stop words fst
2020-05-22 15:00:50 +02:00
let delta_stop_words = stop_words_builder.into_set();
2020-01-08 14:17:38 +01:00
// we also need to remove all the stop words from the main fst
2020-05-22 15:00:50 +02:00
let words_fst = main_store.words_fst(writer)?;
if !words_fst.is_empty() {
2020-01-08 14:17:38 +01:00
let op = OpBuilder::new()
2020-05-22 15:00:50 +02:00
.add(&words_fst)
2020-01-08 14:17:38 +01:00
.add(&delta_stop_words)
.difference();
let mut word_fst_builder = SetBuilder::memory();
word_fst_builder.extend_stream(op)?;
2020-05-22 15:00:50 +02:00
let word_fst = word_fst_builder.into_set();
2020-01-08 14:17:38 +01:00
main_store.put_words_fst(writer, &word_fst)?;
}
// now we add all of these stop words from the main store
2020-05-22 15:00:50 +02:00
let stop_words_fst = main_store.stop_words_fst(writer)?;
2020-01-08 14:17:38 +01:00
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.r#union();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op)?;
2020-05-22 15:00:50 +02:00
let stop_words_fst = stop_words_builder.into_set();
2020-01-08 14:17:38 +01:00
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
Ok(())
}
fn apply_stop_words_deletion(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
deletion: BTreeSet<String>,
2020-01-29 18:30:21 +01:00
) -> MResult<()> {
2020-01-08 14:17:38 +01:00
let mut stop_words_builder = SetBuilder::memory();
for word in deletion {
stop_words_builder.insert(&word)?;
2020-01-08 14:17:38 +01:00
}
// create the new delta stop words fst
2020-05-22 15:00:50 +02:00
let delta_stop_words = stop_words_builder.into_set();
2020-01-08 14:17:38 +01:00
// now we delete all of these stop words from the main store
2020-05-22 15:00:50 +02:00
let stop_words_fst = index.main.stop_words_fst(writer)?;
2020-01-08 14:17:38 +01:00
let op = OpBuilder::new()
.add(&stop_words_fst)
.add(&delta_stop_words)
.difference();
let mut stop_words_builder = SetBuilder::memory();
stop_words_builder.extend_stream(op)?;
2020-05-22 15:00:50 +02:00
let stop_words_fst = stop_words_builder.into_set();
2020-01-08 14:17:38 +01:00
2020-02-02 22:59:19 +01:00
Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
2020-01-08 14:17:38 +01:00
}
pub fn apply_synonyms_update(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
synonyms: BTreeMap<String, Vec<String>>,
) -> MResult<()> {
let main_store = index.main;
let synonyms_store = index.synonyms;
let mut synonyms_builder = SetBuilder::memory();
synonyms_store.clear(writer)?;
for (word, alternatives) in synonyms.clone() {
synonyms_builder.insert(&word)?;
2020-01-08 14:17:38 +01:00
let alternatives = {
let alternatives = SetBuf::from_dirty(alternatives);
let mut alternatives_builder = SetBuilder::memory();
alternatives_builder.extend_iter(alternatives)?;
2020-05-22 15:00:50 +02:00
alternatives_builder.into_set()
2020-01-08 14:17:38 +01:00
};
synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?;
}
2020-05-22 15:00:50 +02:00
let synonyms_set = synonyms_builder.into_set();
2020-01-08 14:17:38 +01:00
main_store.put_synonyms_fst(writer, &synonyms_set)?;
Ok(())
}