mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
split meilisearch-http and meilisearch-lib
This commit is contained in:
parent
09d4e37044
commit
60518449fc
63 changed files with 608 additions and 324 deletions
166
meilisearch-lib/src/index/dump.rs
Normal file
166
meilisearch-lib/src/index/dump.rs
Normal file
|
@ -0,0 +1,166 @@
|
|||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{Index, Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpMeta {
|
||||
settings: Settings<Unchecked>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
const META_FILE_NAME: &str = "meta.json";
|
||||
const DATA_FILE_NAME: &str = "documents.jsonl";
|
||||
|
||||
impl Index {
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
// acquire write txn make sure any ongoing write is finished before we start.
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
self.dump_documents(&txn, &path)?;
|
||||
self.dump_meta(&txn, &path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
||||
let mut document_file = File::create(&document_file_path)?;
|
||||
|
||||
let documents = self.all_documents(txn)?;
|
||||
let fields_ids_map = self.fields_ids_map(txn)?;
|
||||
|
||||
// dump documents
|
||||
let mut json_map = IndexMap::new();
|
||||
for document in documents {
|
||||
let (_, reader) = document?;
|
||||
|
||||
for (fid, bytes) in reader.iter() {
|
||||
if let Some(name) = fields_ids_map.name(fid) {
|
||||
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut document_file, &json_map)?;
|
||||
document_file.write_all(b"\n")?;
|
||||
|
||||
json_map.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
||||
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_file_path)?;
|
||||
|
||||
let settings = self.settings_txn(txn)?.into_unchecked();
|
||||
let primary_key = self.primary_key(txn)?.map(String::from);
|
||||
let meta = DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
};
|
||||
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_size: usize,
|
||||
_indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
//let dir_name = src
|
||||
//.as_ref()
|
||||
//.file_name()
|
||||
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||
|
||||
//let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||
//create_dir_all(&dst_dir_path)?;
|
||||
|
||||
//let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
//let mut meta_file = File::open(meta_path)?;
|
||||
|
||||
//// We first deserialize the dump meta into a serde_json::Value and change
|
||||
//// the custom ranking rules settings from the old format to the new format.
|
||||
//let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
//if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
//convert_custom_ranking_rules(ranking_rules);
|
||||
//}
|
||||
|
||||
//// Then we serialize it back into a vec to deserialize it
|
||||
//// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||
//let patched_meta = serde_json::to_vec(&meta)?;
|
||||
|
||||
//let DumpMeta {
|
||||
//settings,
|
||||
//primary_key,
|
||||
//} = serde_json::from_slice(&patched_meta)?;
|
||||
//let settings = settings.check();
|
||||
//let index = Self::open(&dst_dir_path, size)?;
|
||||
//let mut txn = index.write_txn()?;
|
||||
|
||||
//let handler = UpdateHandler::new(indexing_options)?;
|
||||
|
||||
//index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
|
||||
|
||||
//let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
//let reader = File::open(&document_file_path)?;
|
||||
//let mut reader = BufReader::new(reader);
|
||||
//reader.fill_buf()?;
|
||||
// If the document file is empty, we don't perform the document addition, to prevent
|
||||
// a primary key error to be thrown.
|
||||
|
||||
todo!("fix obk document dumps")
|
||||
//if !reader.buffer().is_empty() {
|
||||
//index.update_documents_txn(
|
||||
//&mut txn,
|
||||
//IndexDocumentsMethod::UpdateDocuments,
|
||||
//Some(reader),
|
||||
//handler.update_builder(0),
|
||||
//primary_key.as_deref(),
|
||||
//)?;
|
||||
//}
|
||||
|
||||
//txn.commit()?;
|
||||
|
||||
//match Arc::try_unwrap(index.0) {
|
||||
//Ok(inner) => inner.prepare_for_closing().wait(),
|
||||
//Err(_) => bail!("Could not close index properly."),
|
||||
//}
|
||||
|
||||
//Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
// ///
|
||||
// /// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
// /// since the new syntax was introduced soon after the new dump version.
|
||||
//fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
//*ranking_rules = match ranking_rules.take() {
|
||||
//Value::Array(values) => values
|
||||
//.into_iter()
|
||||
//.filter_map(|value| match value {
|
||||
//Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:asc", f))
|
||||
//.map(Value::String),
|
||||
//Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:desc", f))
|
||||
//.map(Value::String),
|
||||
//otherwise => Some(otherwise),
|
||||
//})
|
||||
//.collect(),
|
||||
//otherwise => otherwise,
|
||||
//}
|
||||
//}
|
52
meilisearch-lib/src/index/error.rs
Normal file
52
meilisearch-lib/src/index/error.rs
Normal file
|
@ -0,0 +1,52 @@
|
|||
use std::error::Error;
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::MilliError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("Document with id {0} not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("{0}")]
|
||||
Facet(#[from] FacetError),
|
||||
#[error("{0}")]
|
||||
Milli(#[from] milli::Error),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
IndexError: std::io::Error,
|
||||
heed::Error,
|
||||
fst::Error,
|
||||
serde_json::Error
|
||||
);
|
||||
|
||||
impl ErrorCode for IndexError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexError::Internal(_) => Code::Internal,
|
||||
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
IndexError::Facet(e) => e.error_code(),
|
||||
IndexError::Milli(e) => MilliError(e).error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum FacetError {
|
||||
#[error("Invalid facet expression, expected {}, found: {1}", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
}
|
||||
|
||||
impl ErrorCode for FacetError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
FacetError::InvalidExpression(_, _) => Code::Facet,
|
||||
}
|
||||
}
|
||||
}
|
202
meilisearch-lib/src/index/mod.rs
Normal file
202
meilisearch-lib/src/index/mod.rs
Normal file
|
@ -0,0 +1,202 @@
|
|||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::fs::create_dir_all;
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::update::Setting;
|
||||
use milli::{obkv_to_json, FieldId};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use error::Result;
|
||||
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
||||
pub use updates::{Checked, Facets, Settings, Unchecked};
|
||||
|
||||
use crate::EnvSizer;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::error::IndexError;
|
||||
|
||||
pub mod error;
|
||||
pub mod update_handler;
|
||||
|
||||
mod dump;
|
||||
mod search;
|
||||
mod updates;
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub inner: Arc<milli::Index>,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
}
|
||||
|
||||
impl Deref for Index {
|
||||
type Target = milli::Index;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.inner.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>) -> Result<Self> {
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let inner = Arc::new(milli::Index::new(options, &path)?);
|
||||
Ok(Index { inner, update_file_store })
|
||||
}
|
||||
|
||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
||||
let txn = self.read_txn()?;
|
||||
self.settings_txn(&txn)
|
||||
}
|
||||
|
||||
pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> {
|
||||
let displayed_attributes = self
|
||||
.displayed_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = self
|
||||
.searchable_fields(txn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect();
|
||||
|
||||
let criteria = self
|
||||
.criteria(txn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = self
|
||||
.stop_words(txn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_else(BTreeSet::new);
|
||||
let distinct_field = self.distinct_field(txn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = self
|
||||
.synonyms(txn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn retrieve_documents<S: AsRef<str>>(
|
||||
&self,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Vec<Map<String, Value>>> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
let fields_to_display =
|
||||
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
||||
|
||||
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for entry in iter {
|
||||
let (_id, obkv) = entry?;
|
||||
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
|
||||
documents.push(object);
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub fn retrieve_document<S: AsRef<str>>(
|
||||
&self,
|
||||
doc_id: String,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Map<String, Value>> {
|
||||
let txn = self.read_txn()?;
|
||||
|
||||
let fields_ids_map = self.fields_ids_map(&txn)?;
|
||||
|
||||
let fields_to_display =
|
||||
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
||||
|
||||
let internal_id = self
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?;
|
||||
|
||||
let document = self
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or(IndexError::DocumentNotFound(doc_id))?;
|
||||
|
||||
let document = obkv_to_json(&fields_to_display, &fields_ids_map, document)?;
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.env.size()
|
||||
}
|
||||
|
||||
fn fields_to_display<S: AsRef<str>>(
|
||||
&self,
|
||||
txn: &heed::RoTxn,
|
||||
attributes_to_retrieve: &Option<Vec<S>>,
|
||||
fields_ids_map: &milli::FieldsIdsMap,
|
||||
) -> Result<Vec<FieldId>> {
|
||||
let mut displayed_fields_ids = match self.displayed_fields_ids(txn)? {
|
||||
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
let attributes_to_retrieve_ids = match attributes_to_retrieve {
|
||||
Some(attrs) => attrs
|
||||
.iter()
|
||||
.filter_map(|f| fields_ids_map.id(f.as_ref()))
|
||||
.collect::<HashSet<_>>(),
|
||||
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
||||
};
|
||||
|
||||
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
|
||||
Ok(displayed_fields_ids)
|
||||
}
|
||||
}
|
1335
meilisearch-lib/src/index/search.rs
Normal file
1335
meilisearch-lib/src/index/search.rs
Normal file
File diff suppressed because it is too large
Load diff
72
meilisearch-lib/src/index/update_handler.rs
Normal file
72
meilisearch-lib/src/index/update_handler.rs
Normal file
|
@ -0,0 +1,72 @@
|
|||
use crate::index::Index;
|
||||
use milli::update::UpdateBuilder;
|
||||
use milli::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::index_controller::update_actor::RegisterUpdate;
|
||||
use crate::index_controller::{Failed, Processed, Processing};
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
pub struct UpdateHandler {
|
||||
max_nb_chunks: Option<usize>,
|
||||
chunk_compression_level: Option<u32>,
|
||||
thread_pool: ThreadPool,
|
||||
log_frequency: usize,
|
||||
max_memory: Option<usize>,
|
||||
chunk_compression_type: CompressionType,
|
||||
}
|
||||
|
||||
impl UpdateHandler {
|
||||
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
|
||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
|
||||
.build()?;
|
||||
|
||||
Ok(Self {
|
||||
max_nb_chunks: opt.max_nb_chunks,
|
||||
chunk_compression_level: opt.chunk_compression_level,
|
||||
thread_pool,
|
||||
log_frequency: opt.log_every_n,
|
||||
max_memory: opt.max_memory.map(|m| m.get_bytes() as usize),
|
||||
chunk_compression_type: opt.chunk_compression_type,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_builder(&self, update_id: u64) -> UpdateBuilder {
|
||||
// We prepare the update by using the update builder.
|
||||
let mut update_builder = UpdateBuilder::new(update_id);
|
||||
if let Some(max_nb_chunks) = self.max_nb_chunks {
|
||||
update_builder.max_nb_chunks(max_nb_chunks);
|
||||
}
|
||||
if let Some(chunk_compression_level) = self.chunk_compression_level {
|
||||
update_builder.chunk_compression_level(chunk_compression_level);
|
||||
}
|
||||
update_builder.thread_pool(&self.thread_pool);
|
||||
update_builder.log_every_n(self.log_frequency);
|
||||
if let Some(max_memory) = self.max_memory {
|
||||
update_builder.max_memory(max_memory);
|
||||
}
|
||||
update_builder.chunk_compression_type(self.chunk_compression_type);
|
||||
update_builder
|
||||
}
|
||||
|
||||
pub fn handle_update(
|
||||
&self,
|
||||
index: Index,
|
||||
meta: Processing,
|
||||
) -> Result<Processed, Failed> {
|
||||
let update_id = meta.id();
|
||||
let update_builder = self.update_builder(update_id);
|
||||
|
||||
let result = match meta.meta() {
|
||||
RegisterUpdate::DocumentAddition { primary_key, content_uuid, method } => {
|
||||
index.update_documents(*method, *content_uuid, update_builder, primary_key.as_deref())
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(result) => Ok(meta.process(result)),
|
||||
Err(e) => Err(meta.fail(e)),
|
||||
}
|
||||
}
|
||||
}
|
366
meilisearch-lib/src/index/updates.rs
Normal file
366
meilisearch-lib/src/index/updates.rs
Normal file
|
@ -0,0 +1,366 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::marker::PhantomData;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use log::{debug, info, trace};
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::UpdateResult;
|
||||
|
||||
use super::Index;
|
||||
use super::error::Result;
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
s: S,
|
||||
) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let wildcard = vec!["*".to_string()];
|
||||
match field {
|
||||
Setting::Set(value) => Some(value),
|
||||
Setting::Reset => Some(&wildcard),
|
||||
Setting::NotSet => None,
|
||||
}
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
|
||||
pub struct Unchecked;
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl Settings<Checked> {
|
||||
pub fn cleared() -> Settings<Checked> {
|
||||
Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_unchecked(self) -> Settings<Unchecked> {
|
||||
let Self {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
..
|
||||
} = self;
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Settings<Unchecked> {
|
||||
pub fn check(self) -> Settings<Checked> {
|
||||
let displayed_attributes = match self.displayed_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
let searchable_attributes = match self.searchable_attributes {
|
||||
Setting::Set(fields) => {
|
||||
if fields.iter().any(|f| f == "*") {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(fields)
|
||||
}
|
||||
}
|
||||
otherwise => otherwise,
|
||||
};
|
||||
|
||||
Settings {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Facets {
|
||||
pub level_group_size: Option<NonZeroUsize>,
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let result = self.update_documents_txn(&mut txn, method, content_uuid, update_builder, primary_key)?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn update_documents_txn<'a, 'b>(
|
||||
&'a self,
|
||||
txn: &mut heed::RwTxn<'a, 'b>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
trace!("performing document addition");
|
||||
|
||||
// Set the primary key if not set already, ignore if already set.
|
||||
if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) {
|
||||
let mut builder = UpdateBuilder::new(0).settings(txn, self);
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
builder.execute(|_, _| ())?;
|
||||
}
|
||||
|
||||
let indexing_callback =
|
||||
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
|
||||
|
||||
let content_file = self.update_file_store.get_update(content_uuid).unwrap();
|
||||
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
|
||||
|
||||
let mut builder = update_builder.index_documents(txn, self);
|
||||
builder.index_documents_method(method);
|
||||
let addition = builder.execute(reader, indexing_callback)?;
|
||||
|
||||
info!("document addition done: {:?}", addition);
|
||||
|
||||
Ok(UpdateResult::DocumentsAddition(addition))
|
||||
}
|
||||
|
||||
//pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
|
||||
//// We must use the write transaction of the update here.
|
||||
//let mut wtxn = self.write_txn()?;
|
||||
//let builder = update_builder.clear_documents(&mut wtxn, self);
|
||||
|
||||
//let _count = builder.execute()?;
|
||||
|
||||
//wtxn.commit()
|
||||
//.and(Ok(UpdateResult::Other))
|
||||
//.map_err(Into::into)
|
||||
//}
|
||||
|
||||
//pub fn update_settings_txn<'a, 'b>(
|
||||
//&'a self,
|
||||
//txn: &mut heed::RwTxn<'a, 'b>,
|
||||
//settings: &Settings<Checked>,
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//// We must use the write transaction of the update here.
|
||||
//let mut builder = update_builder.settings(txn, self);
|
||||
|
||||
//match settings.searchable_attributes {
|
||||
//Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||
//Setting::Reset => builder.reset_searchable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.displayed_attributes {
|
||||
//Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||
//Setting::Reset => builder.reset_displayed_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.filterable_attributes {
|
||||
//Setting::Set(ref facets) => {
|
||||
//builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_filterable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.sortable_attributes {
|
||||
//Setting::Set(ref fields) => {
|
||||
//builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_sortable_fields(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.ranking_rules {
|
||||
//Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||
//Setting::Reset => builder.reset_criteria(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.stop_words {
|
||||
//Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||
//Setting::Reset => builder.reset_stop_words(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.synonyms {
|
||||
//Setting::Set(ref synonyms) => {
|
||||
//builder.set_synonyms(synonyms.clone().into_iter().collect())
|
||||
//}
|
||||
//Setting::Reset => builder.reset_synonyms(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//match settings.distinct_attribute {
|
||||
//Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||
//Setting::Reset => builder.reset_distinct_field(),
|
||||
//Setting::NotSet => (),
|
||||
//}
|
||||
|
||||
//builder.execute(|indexing_step, update_id| {
|
||||
//debug!("update {}: {:?}", update_id, indexing_step)
|
||||
//})?;
|
||||
|
||||
//Ok(UpdateResult::Other)
|
||||
//}
|
||||
|
||||
//pub fn update_settings(
|
||||
//&self,
|
||||
//settings: &Settings<Checked>,
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//let mut txn = self.write_txn()?;
|
||||
//let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
|
||||
//txn.commit()?;
|
||||
//Ok(result)
|
||||
//}
|
||||
|
||||
//pub fn delete_documents(
|
||||
//&self,
|
||||
//document_ids: &[String],
|
||||
//update_builder: UpdateBuilder,
|
||||
//) -> Result<UpdateResult> {
|
||||
//let mut txn = self.write_txn()?;
|
||||
//let mut builder = update_builder.delete_documents(&mut txn, self)?;
|
||||
|
||||
//// We ignore unexisting document ids
|
||||
//document_ids.iter().for_each(|id| {
|
||||
//builder.delete_external_id(id);
|
||||
//});
|
||||
|
||||
//let deleted = builder.execute()?;
|
||||
//txn.commit()
|
||||
//.and(Ok(UpdateResult::DocumentDeletion { deleted }))
|
||||
//.map_err(Into::into)
|
||||
//}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_setting_check() {
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.clone().check();
|
||||
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
|
||||
assert_eq!(
|
||||
settings.searchable_attributes,
|
||||
checked.searchable_attributes
|
||||
);
|
||||
|
||||
// test wildcard
|
||||
// test no changes
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![String::from("*")]),
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
let checked = settings.check();
|
||||
assert_eq!(checked.displayed_attributes, Setting::Reset);
|
||||
assert_eq!(checked.searchable_attributes, Setting::Reset);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue