2021-04-06 15:41:03 +02:00
|
|
|
use std::collections::{BTreeSet, HashMap};
|
2021-03-04 11:56:32 +01:00
|
|
|
use std::io;
|
|
|
|
use std::num::NonZeroUsize;
|
|
|
|
|
|
|
|
use flate2::read::GzDecoder;
|
|
|
|
use log::info;
|
2021-03-15 18:11:10 +01:00
|
|
|
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
|
|
|
use serde::{de::Deserializer, Deserialize, Serialize};
|
2021-03-04 11:56:32 +01:00
|
|
|
|
|
|
|
use super::Index;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
pub enum UpdateResult {
|
|
|
|
DocumentsAddition(DocumentAdditionResult),
|
2021-03-11 19:40:18 +01:00
|
|
|
DocumentDeletion { deleted: u64 },
|
2021-03-04 11:56:32 +01:00
|
|
|
Other,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
|
|
#[serde(deny_unknown_fields)]
|
|
|
|
#[serde(rename_all = "camelCase")]
|
|
|
|
pub struct Settings {
|
|
|
|
#[serde(
|
|
|
|
default,
|
|
|
|
deserialize_with = "deserialize_some",
|
2021-03-15 18:11:10 +01:00
|
|
|
skip_serializing_if = "Option::is_none"
|
2021-03-04 11:56:32 +01:00
|
|
|
)]
|
|
|
|
pub displayed_attributes: Option<Option<Vec<String>>>,
|
|
|
|
|
|
|
|
#[serde(
|
|
|
|
default,
|
|
|
|
deserialize_with = "deserialize_some",
|
2021-03-15 18:11:10 +01:00
|
|
|
skip_serializing_if = "Option::is_none"
|
2021-03-04 11:56:32 +01:00
|
|
|
)]
|
|
|
|
pub searchable_attributes: Option<Option<Vec<String>>>,
|
|
|
|
|
|
|
|
#[serde(default)]
|
2021-03-15 13:53:50 +01:00
|
|
|
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
|
2021-03-04 11:56:32 +01:00
|
|
|
|
|
|
|
#[serde(
|
|
|
|
default,
|
|
|
|
deserialize_with = "deserialize_some",
|
2021-03-15 18:11:10 +01:00
|
|
|
skip_serializing_if = "Option::is_none"
|
2021-03-04 11:56:32 +01:00
|
|
|
)]
|
2021-03-11 22:39:16 +01:00
|
|
|
pub ranking_rules: Option<Option<Vec<String>>>,
|
2021-04-06 15:41:03 +02:00
|
|
|
#[serde(
|
|
|
|
default,
|
|
|
|
deserialize_with = "deserialize_some",
|
|
|
|
skip_serializing_if = "Option::is_none"
|
|
|
|
)]
|
|
|
|
pub stop_words: Option<Option<BTreeSet<String>>>,
|
2021-03-29 09:22:36 +02:00
|
|
|
#[serde(
|
|
|
|
default,
|
|
|
|
deserialize_with = "deserialize_some",
|
|
|
|
skip_serializing_if = "Option::is_none"
|
|
|
|
)]
|
|
|
|
pub distinct_attribute: Option<Option<String>>,
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Settings {
|
|
|
|
pub fn cleared() -> Self {
|
|
|
|
Self {
|
|
|
|
displayed_attributes: Some(None),
|
|
|
|
searchable_attributes: Some(None),
|
2021-03-15 13:53:50 +01:00
|
|
|
attributes_for_faceting: Some(None),
|
2021-03-11 22:39:16 +01:00
|
|
|
ranking_rules: Some(None),
|
2021-04-06 15:41:03 +02:00
|
|
|
stop_words: Some(None),
|
2021-03-29 09:22:36 +02:00
|
|
|
distinct_attribute: Some(None),
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
#[serde(deny_unknown_fields)]
|
|
|
|
#[serde(rename_all = "camelCase")]
|
|
|
|
pub struct Facets {
|
|
|
|
pub level_group_size: Option<NonZeroUsize>,
|
|
|
|
pub min_level_size: Option<NonZeroUsize>,
|
|
|
|
}
|
|
|
|
|
|
|
|
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
2021-03-15 18:11:10 +01:00
|
|
|
where
|
|
|
|
T: Deserialize<'de>,
|
|
|
|
D: Deserializer<'de>,
|
2021-03-04 11:56:32 +01:00
|
|
|
{
|
|
|
|
Deserialize::deserialize(deserializer).map(Some)
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Index {
|
|
|
|
pub fn update_documents(
|
|
|
|
&self,
|
|
|
|
format: UpdateFormat,
|
|
|
|
method: IndexDocumentsMethod,
|
|
|
|
content: impl io::Read,
|
|
|
|
update_builder: UpdateBuilder,
|
|
|
|
primary_key: Option<&str>,
|
|
|
|
) -> anyhow::Result<UpdateResult> {
|
|
|
|
info!("performing document addition");
|
|
|
|
// We must use the write transaction of the update here.
|
|
|
|
let mut wtxn = self.write_txn()?;
|
|
|
|
|
|
|
|
// Set the primary key if not set already, ignore if already set.
|
2021-03-15 18:11:10 +01:00
|
|
|
if let (None, Some(ref primary_key)) = (self.primary_key(&wtxn)?, primary_key) {
|
2021-03-15 16:52:05 +01:00
|
|
|
self.put_primary_key(&mut wtxn, primary_key)?;
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut builder = update_builder.index_documents(&mut wtxn, self);
|
|
|
|
builder.update_format(format);
|
|
|
|
builder.index_documents_method(method);
|
|
|
|
|
|
|
|
let gzipped = false;
|
|
|
|
let reader = if gzipped {
|
|
|
|
Box::new(GzDecoder::new(content))
|
|
|
|
} else {
|
|
|
|
Box::new(content) as Box<dyn io::Read>
|
|
|
|
};
|
|
|
|
|
|
|
|
let result = builder.execute(reader, |indexing_step, update_id| {
|
|
|
|
info!("update {}: {:?}", update_id, indexing_step)
|
|
|
|
});
|
|
|
|
|
|
|
|
info!("document addition done: {:?}", result);
|
|
|
|
|
2021-03-15 18:11:10 +01:00
|
|
|
result.and_then(|addition_result| {
|
|
|
|
wtxn.commit()
|
|
|
|
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
|
|
|
|
.map_err(Into::into)
|
|
|
|
})
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
|
|
|
|
// We must use the write transaction of the update here.
|
|
|
|
let mut wtxn = self.write_txn()?;
|
|
|
|
let builder = update_builder.clear_documents(&mut wtxn, self);
|
|
|
|
|
|
|
|
match builder.execute() {
|
|
|
|
Ok(_count) => wtxn
|
|
|
|
.commit()
|
|
|
|
.and(Ok(UpdateResult::Other))
|
|
|
|
.map_err(Into::into),
|
2021-03-15 16:52:05 +01:00
|
|
|
Err(e) => Err(e),
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn update_settings(
|
|
|
|
&self,
|
|
|
|
settings: &Settings,
|
|
|
|
update_builder: UpdateBuilder,
|
|
|
|
) -> anyhow::Result<UpdateResult> {
|
|
|
|
// We must use the write transaction of the update here.
|
|
|
|
let mut wtxn = self.write_txn()?;
|
|
|
|
let mut builder = update_builder.settings(&mut wtxn, self);
|
|
|
|
|
|
|
|
if let Some(ref names) = settings.searchable_attributes {
|
|
|
|
match names {
|
|
|
|
Some(names) => builder.set_searchable_fields(names.clone()),
|
|
|
|
None => builder.reset_searchable_fields(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(ref names) = settings.displayed_attributes {
|
|
|
|
match names {
|
|
|
|
Some(names) => builder.set_displayed_fields(names.clone()),
|
|
|
|
None => builder.reset_displayed_fields(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-15 13:53:50 +01:00
|
|
|
if let Some(ref facet_types) = settings.attributes_for_faceting {
|
2021-03-15 16:52:05 +01:00
|
|
|
let facet_types = facet_types.clone().unwrap_or_else(HashMap::new);
|
2021-03-04 11:56:32 +01:00
|
|
|
builder.set_faceted_fields(facet_types);
|
|
|
|
}
|
|
|
|
|
2021-03-11 22:39:16 +01:00
|
|
|
if let Some(ref criteria) = settings.ranking_rules {
|
2021-03-04 11:56:32 +01:00
|
|
|
match criteria {
|
|
|
|
Some(criteria) => builder.set_criteria(criteria.clone()),
|
|
|
|
None => builder.reset_criteria(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-06 15:41:03 +02:00
|
|
|
if let Some(ref stop_words) = settings.stop_words {
|
|
|
|
match stop_words {
|
|
|
|
Some(stop_words) => builder.set_stop_words(stop_words.clone()),
|
|
|
|
_ => builder.reset_stop_words(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-29 09:22:36 +02:00
|
|
|
if let Some(ref distinct_attribute) = settings.distinct_attribute {
|
|
|
|
match distinct_attribute {
|
|
|
|
Some(attr) => builder.set_distinct_attribute(attr.clone()),
|
|
|
|
None => builder.reset_distinct_attribute(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-04 11:56:32 +01:00
|
|
|
let result = builder
|
|
|
|
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
|
|
|
|
|
|
|
|
match result {
|
|
|
|
Ok(()) => wtxn
|
|
|
|
.commit()
|
|
|
|
.and(Ok(UpdateResult::Other))
|
|
|
|
.map_err(Into::into),
|
2021-03-15 16:52:05 +01:00
|
|
|
Err(e) => Err(e),
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn update_facets(
|
|
|
|
&self,
|
|
|
|
levels: &Facets,
|
|
|
|
update_builder: UpdateBuilder,
|
|
|
|
) -> anyhow::Result<UpdateResult> {
|
|
|
|
// We must use the write transaction of the update here.
|
|
|
|
let mut wtxn = self.write_txn()?;
|
|
|
|
let mut builder = update_builder.facets(&mut wtxn, self);
|
|
|
|
if let Some(value) = levels.level_group_size {
|
|
|
|
builder.level_group_size(value);
|
|
|
|
}
|
|
|
|
if let Some(value) = levels.min_level_size {
|
|
|
|
builder.min_level_size(value);
|
|
|
|
}
|
|
|
|
match builder.execute() {
|
|
|
|
Ok(()) => wtxn
|
|
|
|
.commit()
|
|
|
|
.and(Ok(UpdateResult::Other))
|
|
|
|
.map_err(Into::into),
|
2021-03-15 16:52:05 +01:00
|
|
|
Err(e) => Err(e),
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn delete_documents(
|
|
|
|
&self,
|
|
|
|
document_ids: impl io::Read,
|
|
|
|
update_builder: UpdateBuilder,
|
|
|
|
) -> anyhow::Result<UpdateResult> {
|
|
|
|
let ids: Vec<String> = serde_json::from_reader(document_ids)?;
|
|
|
|
let mut txn = self.write_txn()?;
|
|
|
|
let mut builder = update_builder.delete_documents(&mut txn, self)?;
|
|
|
|
|
|
|
|
// We ignore unexisting document ids
|
2021-03-15 18:11:10 +01:00
|
|
|
ids.iter().for_each(|id| {
|
|
|
|
builder.delete_external_id(id);
|
|
|
|
});
|
2021-03-04 11:56:32 +01:00
|
|
|
|
|
|
|
match builder.execute() {
|
|
|
|
Ok(deleted) => txn
|
|
|
|
.commit()
|
|
|
|
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
|
|
|
|
.map_err(Into::into),
|
2021-03-15 18:11:10 +01:00
|
|
|
Err(e) => Err(e),
|
2021-03-04 11:56:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|