MeiliSearch/meilisearch-http/src/index/updates.rs

271 lines
8.4 KiB
Rust
Raw Normal View History

use std::collections::{BTreeSet, HashMap};
2021-03-04 11:56:32 +01:00
use std::io;
use std::num::NonZeroUsize;
2021-05-10 17:30:09 +02:00
use std::marker::PhantomData;
2021-03-04 11:56:32 +01:00
use flate2::read::GzDecoder;
use log::info;
2021-04-22 10:14:29 +02:00
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
2021-03-15 18:11:10 +01:00
use serde::{de::Deserializer, Deserialize, Serialize};
2021-03-04 11:56:32 +01:00
use super::Index;
2021-04-22 10:14:29 +02:00
use crate::index_controller::UpdateResult;
2021-03-04 11:56:32 +01:00
2021-05-10 17:30:09 +02:00
#[derive(Clone, Default, Debug)]
pub struct Checked;
#[derive(Clone, Default, Debug)]
pub struct Unchecked;
2021-03-04 11:56:32 +01:00
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
2021-05-10 17:30:09 +02:00
pub struct Settings<T> {
2021-03-04 11:56:32 +01:00
#[serde(
default,
deserialize_with = "deserialize_some",
2021-03-15 18:11:10 +01:00
skip_serializing_if = "Option::is_none"
2021-03-04 11:56:32 +01:00
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
2021-03-15 18:11:10 +01:00
skip_serializing_if = "Option::is_none"
2021-03-04 11:56:32 +01:00
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)]
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
2021-03-04 11:56:32 +01:00
#[serde(
default,
deserialize_with = "deserialize_some",
2021-03-15 18:11:10 +01:00
skip_serializing_if = "Option::is_none"
2021-03-04 11:56:32 +01:00
)]
2021-03-11 22:39:16 +01:00
pub ranking_rules: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub stop_words: Option<Option<BTreeSet<String>>>,
2021-03-29 09:22:36 +02:00
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub distinct_attribute: Option<Option<String>>,
2021-05-10 17:30:09 +02:00
#[serde(skip)]
pub _kind: PhantomData<T>,
2021-03-04 11:56:32 +01:00
}
2021-05-10 17:30:09 +02:00
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
2021-03-04 11:56:32 +01:00
displayed_attributes: Some(None),
searchable_attributes: Some(None),
attributes_for_faceting: Some(None),
2021-03-11 22:39:16 +01:00
ranking_rules: Some(None),
stop_words: Some(None),
2021-03-29 09:22:36 +02:00
distinct_attribute: Some(None),
2021-05-10 17:30:09 +02:00
_kind: PhantomData,
2021-03-04 11:56:32 +01:00
}
}
}
2021-05-10 17:30:09 +02:00
impl Settings<Unchecked> {
2021-05-10 18:22:41 +02:00
pub fn check(mut self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes.take() {
Some(Some(fields)) => {
if fields.iter().any(|f| f == "*") {
Some(None)
} else {
Some(Some(fields))
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes.take() {
Some(Some(fields)) => {
if fields.iter().any(|f| f == "*") {
Some(None)
} else {
Some(Some(fields))
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
attributes_for_faceting: self.attributes_for_faceting,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
distinct_attribute: self.distinct_attribute,
_kind: PhantomData,
}
2021-05-10 17:30:09 +02:00
}
}
2021-03-04 11:56:32 +01:00
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
2021-03-15 18:11:10 +01:00
where
T: Deserialize<'de>,
D: Deserializer<'de>,
2021-03-04 11:56:32 +01:00
{
Deserialize::deserialize(deserializer).map(Some)
}
impl Index {
pub fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
2021-04-22 10:14:29 +02:00
content: Option<impl io::Read>,
2021-03-04 11:56:32 +01:00
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
info!("performing document addition");
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
// Set the primary key if not set already, ignore if already set.
2021-03-15 18:11:10 +01:00
if let (None, Some(ref primary_key)) = (self.primary_key(&wtxn)?, primary_key) {
2021-03-15 16:52:05 +01:00
self.put_primary_key(&mut wtxn, primary_key)?;
2021-03-04 11:56:32 +01:00
}
let mut builder = update_builder.index_documents(&mut wtxn, self);
builder.update_format(format);
builder.index_documents_method(method);
2021-04-22 10:14:29 +02:00
let indexing_callback =
|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step);
2021-03-04 11:56:32 +01:00
let gzipped = false;
2021-04-22 10:14:29 +02:00
let result = match content {
Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback),
Some(content) => builder.execute(content, indexing_callback),
None => builder.execute(std::io::empty(), indexing_callback),
2021-03-04 11:56:32 +01:00
};
info!("document addition done: {:?}", result);
2021-03-15 18:11:10 +01:00
result.and_then(|addition_result| {
wtxn.commit()
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
.map_err(Into::into)
})
2021-03-04 11:56:32 +01:00
}
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, self);
match builder.execute() {
Ok(_count) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
2021-03-15 16:52:05 +01:00
Err(e) => Err(e),
2021-03-04 11:56:32 +01:00
}
}
pub fn update_settings(
&self,
2021-05-10 17:30:09 +02:00
settings: &Settings<Checked>,
2021-03-04 11:56:32 +01:00
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let mut builder = update_builder.settings(&mut wtxn, self);
if let Some(ref names) = settings.searchable_attributes {
match names {
Some(names) => builder.set_searchable_fields(names.clone()),
None => builder.reset_searchable_fields(),
}
}
if let Some(ref names) = settings.displayed_attributes {
match names {
Some(names) => builder.set_displayed_fields(names.clone()),
None => builder.reset_displayed_fields(),
}
}
if let Some(ref facet_types) = settings.attributes_for_faceting {
2021-03-15 16:52:05 +01:00
let facet_types = facet_types.clone().unwrap_or_else(HashMap::new);
2021-03-04 11:56:32 +01:00
builder.set_faceted_fields(facet_types);
}
2021-03-11 22:39:16 +01:00
if let Some(ref criteria) = settings.ranking_rules {
2021-03-04 11:56:32 +01:00
match criteria {
Some(criteria) => builder.set_criteria(criteria.clone()),
None => builder.reset_criteria(),
}
}
if let Some(ref stop_words) = settings.stop_words {
match stop_words {
Some(stop_words) => builder.set_stop_words(stop_words.clone()),
_ => builder.reset_stop_words(),
}
}
2021-03-29 09:22:36 +02:00
if let Some(ref distinct_attribute) = settings.distinct_attribute {
match distinct_attribute {
Some(attr) => builder.set_distinct_attribute(attr.clone()),
None => builder.reset_distinct_attribute(),
}
}
2021-03-04 11:56:32 +01:00
let result = builder
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
match result {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
2021-03-15 16:52:05 +01:00
Err(e) => Err(e),
2021-03-04 11:56:32 +01:00
}
}
pub fn delete_documents(
&self,
2021-04-22 10:14:29 +02:00
document_ids: Option<impl io::Read>,
2021-03-04 11:56:32 +01:00
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
2021-04-22 10:14:29 +02:00
let ids = match document_ids {
Some(reader) => serde_json::from_reader(reader)?,
None => Vec::<String>::new(),
};
2021-03-04 11:56:32 +01:00
let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?;
// We ignore unexisting document ids
2021-03-15 18:11:10 +01:00
ids.iter().for_each(|id| {
builder.delete_external_id(id);
});
2021-03-04 11:56:32 +01:00
match builder.execute() {
Ok(deleted) => txn
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
2021-03-15 18:11:10 +01:00
Err(e) => Err(e),
2021-03-04 11:56:32 +01:00
}
}
}