diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 0550cb09f..8c901035d 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -15,7 +15,7 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::search::{ perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, - DEFAULT_SEARCH_OFFSET + DEFAULT_SEARCH_OFFSET, }; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -72,9 +72,7 @@ impl From for SearchQuery { limit: other.limit, page: other.page, hits_per_page: other.hits_per_page, - attributes_to_retrieve: other - .attributes_to_retrieve - .map(|o| o.into_iter().collect()), + attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), crop_length: other.crop_length, attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), diff --git a/meilisearch-http/src/search.rs b/meilisearch-http/src/search.rs index adcfdb825..7310e7914 100644 --- a/meilisearch-http/src/search.rs +++ b/meilisearch-http/src/search.rs @@ -19,6 +19,7 @@ use crate::error::MeilisearchHttpError; type MatchesPosition = BTreeMap>; +pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20; pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10; pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); @@ -29,9 +30,12 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { pub q: Option, - pub offset: Option, + #[serde(default = "DEFAULT_SEARCH_OFFSET")] + pub offset: usize, #[serde(default = "DEFAULT_SEARCH_LIMIT")] pub limit: usize, + pub page: Option, + pub hits_per_page: Option, pub attributes_to_retrieve: Option>, pub attributes_to_crop: Option>, #[serde(default = "DEFAULT_CROP_LENGTH")] @@ -53,6 +57,12 @@ pub struct SearchQuery { pub matching_strategy: MatchingStrategy, } +impl SearchQuery { + pub fn is_finite_pagination(&self) -> bool { + self.page.or(self.hits_per_page).is_some() + } +} + #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub enum MatchingStrategy { @@ -91,15 +101,23 @@ pub struct SearchHit { #[serde(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, - pub estimated_total_hits: u64, pub query: String, - pub limit: usize, - pub offset: usize, pub processing_time_ms: u128, + #[serde(flatten)] + pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] pub facet_distribution: Option>>, } +#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum HitsInfo { + #[serde(rename_all = "camelCase")] + Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize }, + #[serde(rename_all = "camelCase")] + OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, +} + pub fn perform_search( index: &Index, query: SearchQuery, @@ -113,6 +131,7 @@ pub fn perform_search( search.query(query); } + let is_finite_pagination = query.is_finite_pagination(); search.terms_matching_strategy(query.matching_strategy.into()); let max_total_hits = index @@ -120,10 +139,23 @@ pub fn perform_search( .map_err(milli::Error::from)? .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); + search.exhaustive_number_hits(is_finite_pagination); + + // compute the offset on the limit depending on the pagination mode. + let (offset, limit) = if is_finite_pagination { + let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + let page = query.page.unwrap_or(1); + + // page 0 gives a limit of 0 forcing Meilisearch to return no document. + page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit)) + } else { + (query.offset, query.limit) + }; + // Make sure that a user can't get more documents than the hard limit, // we align that on the offset too. - let offset = min(query.offset.unwrap_or(0), max_total_hits); - let limit = min(query.limit, max_total_hits.saturating_sub(offset)); + let offset = min(offset, max_total_hits); + let limit = min(limit, max_total_hits.saturating_sub(offset)); search.offset(offset); search.limit(limit); @@ -239,7 +271,23 @@ pub fn perform_search( documents.push(hit); } - let estimated_total_hits = candidates.len(); + let number_of_hits = min(candidates.len() as usize, max_total_hits); + let hits_info = if is_finite_pagination { + let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + // If hit_per_page is 0, then pages can't be computed and so we respond 0. + let total_pages = (number_of_hits + hits_per_page.saturating_sub(1)) + .checked_div(hits_per_page) + .unwrap_or(0); + + HitsInfo::Pagination { + hits_per_page, + page: query.page.unwrap_or(1), + total_pages, + total_hits: number_of_hits, + } + } else { + HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } + }; let facet_distribution = match query.facets { Some(ref fields) => { @@ -263,10 +311,8 @@ pub fn perform_search( let result = SearchResult { hits: documents, - estimated_total_hits, + hits_info, query: query.q.clone().unwrap_or_default(), - limit: query.limit, - offset: query.offset.unwrap_or_default(), processing_time_ms: before_search.elapsed().as_millis(), facet_distribution, }; diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml deleted file mode 100644 index 7883b6490..000000000 --- a/meilisearch-lib/Cargo.toml +++ /dev/null @@ -1,85 +0,0 @@ -[package] -name = "meilisearch-lib" -version = "0.29.1" -edition = "2021" - -[dependencies] -actix-web = { version = "4.2.1", default-features = false } -anyhow = { version = "1.0.65", features = ["backtrace"] } -async-stream = "0.3.3" -async-trait = "0.1.57" -atomic_refcell = "0.1.8" -byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] } -bytes = "1.2.1" -clap = { version = "4.0.9", features = ["derive", "env"] } -crossbeam-channel = "0.5.6" -csv = "1.1.6" -derivative = "2.2.0" -either = { version = "1.8.0", features = ["serde"] } -flate2 = "1.0.24" -fs_extra = "1.2.0" -fst = "0.4.7" -futures = "0.3.24" -futures-util = "0.3.24" -http = "0.2.8" -indexmap = { version = "1.9.1", features = ["serde-1"] } -itertools = "0.10.5" -lazy_static = "1.4.0" -log = "0.4.17" -meilisearch-auth = { path = "../meilisearch-auth" } -meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.34.0", default-features = false } -mime = "0.3.16" -num_cpus = "1.13.1" -obkv = "0.2.0" -once_cell = "1.15.0" -page_size = "0.4.2" -parking_lot = "0.12.1" -permissive-json-pointer = { path = "../permissive-json-pointer" } -rand = "0.8.5" -rayon = "1.5.3" -regex = "1.6.0" -reqwest = { version = "0.11.12", features = ["json", "rustls-tls"], default-features = false, optional = true } -roaring = "0.10.1" -rustls = "0.20.6" -serde = { version = "1.0.145", features = ["derive"] } -serde_json = { version = "1.0.85", features = ["preserve_order"] } -siphasher = "0.3.10" -slice-group-by = "0.3.0" -sysinfo = "0.26.4" -tar = "0.4.38" -tempfile = "3.3.0" -thiserror = "1.0.37" -time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] } -tokio = { version = "1.21.2", features = ["full"] } -uuid = { version = "1.1.2", features = ["serde", "v4"] } -walkdir = "2.3.2" -whoami = { version = "1.2.3", optional = true } -index-scheduler = { path = "../index-scheduler" } -index = { path = "../index" } -file-store = { path = "../file-store" } - -[dev-dependencies] -actix-rt = "2.7.0" -meilisearch-types = { path = "../meilisearch-types", features = ["test-traits"] } -mockall = "0.11.2" -nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} -paste = "1.0.9" -proptest = "1.0.0" -proptest-derive = "0.3.0" - -[features] -# all specialized tokenizations -default = ["milli/default"] - -# chinese specialized tokenization -chinese = ["milli/chinese"] - -# hebrew specialized tokenization -hebrew = ["milli/hebrew"] - -# japanese specialized tokenization -japanese = ["milli/japanese"] - -# thai specialized tokenization -thai = ["milli/thai"] diff --git a/meilisearch-lib/src/dump/compat/v4.rs b/meilisearch-lib/src/dump/compat/v4.rs deleted file mode 100644 index 89e9ee1ab..000000000 --- a/meilisearch-lib/src/dump/compat/v4.rs +++ /dev/null @@ -1,145 +0,0 @@ -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::index::{Settings, Unchecked}; -use crate::tasks::batch::BatchId; -use crate::tasks::task::{ - DocumentDeletion, TaskContent as NewTaskContent, TaskEvent as NewTaskEvent, TaskId, TaskResult, -}; - -#[derive(Debug, Serialize, Deserialize)] -pub struct Task { - pub id: TaskId, - pub index_uid: IndexUid, - pub content: TaskContent, - pub events: Vec, -} - -impl From for crate::tasks::task::Task { - fn from(other: Task) -> Self { - Self { - id: other.id, - content: NewTaskContent::from((other.index_uid, other.content)), - events: other.events.into_iter().map(Into::into).collect(), - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub enum TaskEvent { - Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), - Batched { - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - batch_id: BatchId, - }, - Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), - Succeded { - result: TaskResult, - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, - Failed { - error: ResponseError, - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, -} - -impl From for NewTaskEvent { - fn from(other: TaskEvent) -> Self { - match other { - TaskEvent::Created(x) => NewTaskEvent::Created(x), - TaskEvent::Batched { - timestamp, - batch_id, - } => NewTaskEvent::Batched { - timestamp, - batch_id, - }, - TaskEvent::Processing(x) => NewTaskEvent::Processing(x), - TaskEvent::Succeded { result, timestamp } => { - NewTaskEvent::Succeeded { result, timestamp } - } - TaskEvent::Failed { error, timestamp } => NewTaskEvent::Failed { error, timestamp }, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[allow(clippy::large_enum_variant)] -pub enum TaskContent { - DocumentAddition { - content_uuid: Uuid, - merge_strategy: IndexDocumentsMethod, - primary_key: Option, - documents_count: usize, - allow_index_creation: bool, - }, - DocumentDeletion(DocumentDeletion), - SettingsUpdate { - settings: Settings, - /// Indicates whether the task was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - IndexDeletion, - IndexCreation { - primary_key: Option, - }, - IndexUpdate { - primary_key: Option, - }, - Dump { - uid: String, - }, -} - -impl From<(IndexUid, TaskContent)> for NewTaskContent { - fn from((index_uid, content): (IndexUid, TaskContent)) -> Self { - match content { - TaskContent::DocumentAddition { - content_uuid, - merge_strategy, - primary_key, - documents_count, - allow_index_creation, - } => NewTaskContent::DocumentAddition { - index_uid, - content_uuid, - merge_strategy, - primary_key, - documents_count, - allow_index_creation, - }, - TaskContent::DocumentDeletion(deletion) => NewTaskContent::DocumentDeletion { - index_uid, - deletion, - }, - TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - } => NewTaskContent::SettingsUpdate { - index_uid, - settings, - is_deletion, - allow_index_creation, - }, - TaskContent::IndexDeletion => NewTaskContent::IndexDeletion { index_uid }, - TaskContent::IndexCreation { primary_key } => NewTaskContent::IndexCreation { - index_uid, - primary_key, - }, - TaskContent::IndexUpdate { primary_key } => NewTaskContent::IndexUpdate { - index_uid, - primary_key, - }, - TaskContent::Dump { uid } => NewTaskContent::Dump { uid }, - } - } -} diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs deleted file mode 100644 index 0aeaba14e..000000000 --- a/meilisearch-lib/src/index/mod.rs +++ /dev/null @@ -1,250 +0,0 @@ -pub use search::{ - HitsInfo, MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, -}; -pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; - -mod dump; -pub mod error; -mod search; -pub mod updates; - -#[allow(clippy::module_inception)] -mod index; - -pub use index::{Document, IndexMeta, IndexStats}; - -#[cfg(not(test))] -pub use index::Index; - -#[cfg(test)] -pub use test::MockIndex as Index; - -/// The index::test module provides means of mocking an index instance. I can be used throughout the -/// code for unit testing, in places where an index would normally be used. -#[cfg(test)] -pub mod test { - use std::path::{Path, PathBuf}; - use std::sync::Arc; - - use milli::update::{ - DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig, - }; - use nelson::Mocker; - use uuid::Uuid; - - use super::error::Result; - use super::index::Index; - use super::Document; - use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings}; - use crate::update_file_store::UpdateFileStore; - - #[derive(Clone)] - pub enum MockIndex { - Real(Index), - Mock(Arc), - } - - impl MockIndex { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(Arc::new(mocker)) - } - - pub fn open( - path: impl AsRef, - size: usize, - uuid: Uuid, - update_handler: Arc, - ) -> Result { - let index = Index::open(path, size, uuid, update_handler)?; - Ok(Self::Real(index)) - } - - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - size: usize, - update_handler: &IndexerConfig, - ) -> anyhow::Result<()> { - Index::load_dump(src, dst, size, update_handler) - } - - pub fn uuid(&self) -> Uuid { - match self { - MockIndex::Real(index) => index.uuid(), - MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) }, - } - } - - pub fn stats(&self) -> Result { - match self { - MockIndex::Real(index) => index.stats(), - MockIndex::Mock(m) => unsafe { m.get("stats").call(()) }, - } - } - - pub fn meta(&self) -> Result { - match self { - MockIndex::Real(index) => index.meta(), - MockIndex::Mock(_) => todo!(), - } - } - pub fn settings(&self) -> Result> { - match self { - MockIndex::Real(index) => index.settings(), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn retrieve_documents>( - &self, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - match self { - MockIndex::Real(index) => { - index.retrieve_documents(offset, limit, attributes_to_retrieve) - } - MockIndex::Mock(_) => todo!(), - } - } - - pub fn retrieve_document>( - &self, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - match self { - MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn size(&self) -> u64 { - match self { - MockIndex::Real(index) => index.size(), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn snapshot(&self, path: impl AsRef) -> Result<()> { - match self { - MockIndex::Real(index) => index.snapshot(path), - MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) }, - } - } - - pub fn close(self) { - match self { - MockIndex::Real(index) => index.close(), - MockIndex::Mock(m) => unsafe { m.get("close").call(()) }, - } - } - - pub fn perform_search(&self, query: SearchQuery) -> Result { - match self { - MockIndex::Real(index) => index.perform_search(query), - MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) }, - } - } - - pub fn dump(&self, path: impl AsRef) -> Result<()> { - match self { - MockIndex::Real(index) => index.dump(path), - MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) }, - } - } - - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - primary_key: Option, - file_store: UpdateFileStore, - contents: impl Iterator, - ) -> Result>> { - match self { - MockIndex::Real(index) => { - index.update_documents(method, primary_key, file_store, contents) - } - MockIndex::Mock(mocker) => unsafe { - mocker - .get("update_documents") - .call((method, primary_key, file_store, contents)) - }, - } - } - - pub fn update_settings(&self, settings: &Settings) -> Result<()> { - match self { - MockIndex::Real(index) => index.update_settings(settings), - MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) }, - } - } - - pub fn update_primary_key(&self, primary_key: String) -> Result { - match self { - MockIndex::Real(index) => index.update_primary_key(primary_key), - MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) }, - } - } - - pub fn delete_documents(&self, ids: &[String]) -> Result { - match self { - MockIndex::Real(index) => index.delete_documents(ids), - MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) }, - } - } - - pub fn clear_documents(&self) -> Result<()> { - match self { - MockIndex::Real(index) => index.clear_documents(), - MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) }, - } - } - } - - #[test] - fn test_faux_index() { - let faux = Mocker::default(); - faux.when("snapshot") - .times(2) - .then(|_: &Path| -> Result<()> { Ok(()) }); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } - - #[test] - #[should_panic] - fn test_faux_unexisting_method_stub() { - let faux = Mocker::default(); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } - - #[test] - #[should_panic] - fn test_faux_panic() { - let faux = Mocker::default(); - faux.when("snapshot") - .times(2) - .then(|_: &Path| -> Result<()> { - panic!(); - }); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } -} diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs deleted file mode 100644 index 558a530c0..000000000 --- a/meilisearch-lib/src/index/search.rs +++ /dev/null @@ -1,747 +0,0 @@ -use std::cmp::min; -use std::collections::{BTreeMap, BTreeSet, HashSet}; -use std::str::FromStr; -use std::time::Instant; - -use either::Either; -use milli::tokenizer::TokenizerBuilder; -use milli::{ - AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError, - TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, -}; -use regex::Regex; -use serde::{Deserialize, Serialize}; -use serde_json::{json, Value}; - -use crate::index::error::FacetError; - -use super::error::{IndexError, Result}; -use super::index::Index; - -pub type Document = serde_json::Map; -type MatchesPosition = BTreeMap>; - -pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; -pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20; -pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10; -pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); -pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); -pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); - -/// The maximum number of results that the engine -/// will be able to return in one search call. -pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; - -#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] -#[serde(rename_all = "camelCase", deny_unknown_fields)] -pub struct SearchQuery { - pub q: Option, - #[serde(default = "DEFAULT_SEARCH_OFFSET")] - pub offset: usize, - #[serde(default = "DEFAULT_SEARCH_LIMIT")] - pub limit: usize, - pub page: Option, - pub hits_per_page: Option, - pub attributes_to_retrieve: Option>, - pub attributes_to_crop: Option>, - #[serde(default = "DEFAULT_CROP_LENGTH")] - pub crop_length: usize, - pub attributes_to_highlight: Option>, - // Default to false - #[serde(default = "Default::default")] - pub show_matches_position: bool, - pub filter: Option, - pub sort: Option>, - pub facets: Option>, - #[serde(default = "DEFAULT_HIGHLIGHT_PRE_TAG")] - pub highlight_pre_tag: String, - #[serde(default = "DEFAULT_HIGHLIGHT_POST_TAG")] - pub highlight_post_tag: String, - #[serde(default = "DEFAULT_CROP_MARKER")] - pub crop_marker: String, - #[serde(default)] - pub matching_strategy: MatchingStrategy, -} - -impl SearchQuery { - pub fn is_finite_pagination(&self) -> bool { - self.page.or(self.hits_per_page).is_some() - } -} - -#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] -#[serde(rename_all = "camelCase")] -pub enum MatchingStrategy { - /// Remove query words from last to first - Last, - /// All query words are mandatory - All, -} - -impl Default for MatchingStrategy { - fn default() -> Self { - Self::Last - } -} - -impl From for TermsMatchingStrategy { - fn from(other: MatchingStrategy) -> Self { - match other { - MatchingStrategy::Last => Self::Last, - MatchingStrategy::All => Self::All, - } - } -} - -#[derive(Debug, Clone, Serialize, PartialEq)] -pub struct SearchHit { - #[serde(flatten)] - pub document: Document, - #[serde(rename = "_formatted", skip_serializing_if = "Document::is_empty")] - pub formatted: Document, - #[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")] - pub matches_position: Option, -} - -#[derive(Serialize, Debug, Clone, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct SearchResult { - pub hits: Vec, - pub query: String, - pub processing_time_ms: u128, - #[serde(flatten)] - pub hits_info: HitsInfo, - #[serde(skip_serializing_if = "Option::is_none")] - pub facet_distribution: Option>>, -} - -#[derive(Serialize, Debug, Clone, PartialEq, Eq)] -#[serde(untagged)] -pub enum HitsInfo { - #[serde(rename_all = "camelCase")] - Pagination { - hits_per_page: usize, - page: usize, - total_pages: usize, - total_hits: usize, - }, - #[serde(rename_all = "camelCase")] - OffsetLimit { - limit: usize, - offset: usize, - estimated_total_hits: usize, - }, -} - -impl Index { - pub fn perform_search(&self, query: SearchQuery) -> Result { - let before_search = Instant::now(); - let rtxn = self.read_txn()?; - - let mut search = self.search(&rtxn); - - if let Some(ref query) = query.q { - search.query(query); - } - - let is_finite_pagination = query.is_finite_pagination(); - search.terms_matching_strategy(query.matching_strategy.into()); - - let max_total_hits = self - .pagination_max_total_hits(&rtxn)? - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); - - search.exhaustive_number_hits(is_finite_pagination); - - // compute the offset on the limit depending on the pagination mode. - let (offset, limit) = if is_finite_pagination { - let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); - let page = query.page.unwrap_or(1); - - // page 0 gives a limit of 0 forcing Meilisearch to return no document. - page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit)) - } else { - (query.offset, query.limit) - }; - - // Make sure that a user can't get more documents than the hard limit, - // we align that on the offset too. - let offset = min(offset, max_total_hits); - let limit = min(limit, max_total_hits.saturating_sub(offset)); - - search.offset(offset); - search.limit(limit); - - if let Some(ref filter) = query.filter { - if let Some(facets) = parse_filter(filter)? { - search.filter(facets); - } - } - - if let Some(ref sort) = query.sort { - let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { - Ok(sorts) => sorts, - Err(asc_desc_error) => { - return Err(IndexError::Milli(SortError::from(asc_desc_error).into())) - } - }; - - search.sort_criteria(sort); - } - - let milli::SearchResult { - documents_ids, - matching_words, - candidates, - .. - } = search.execute()?; - - let fields_ids_map = self.fields_ids_map(&rtxn).unwrap(); - - let displayed_ids = self - .displayed_fields_ids(&rtxn)? - .map(|fields| fields.into_iter().collect::>()) - .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - - let fids = |attrs: &BTreeSet| { - let mut ids = BTreeSet::new(); - for attr in attrs { - if attr == "*" { - ids = displayed_ids.clone(); - break; - } - - if let Some(id) = fields_ids_map.id(attr) { - ids.insert(id); - } - } - ids - }; - - // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), - // but these attributes must be also be present - // - in the fields_ids_map - // - in the the displayed attributes - let to_retrieve_ids: BTreeSet<_> = query - .attributes_to_retrieve - .as_ref() - .map(fids) - .unwrap_or_else(|| displayed_ids.clone()) - .intersection(&displayed_ids) - .cloned() - .collect(); - - let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); - - let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); - - // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` - // These attributes are: - // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) - // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped - // But these attributes must be also present in displayed attributes - let formatted_options = compute_formatted_options( - &attr_to_highlight, - &attr_to_crop, - query.crop_length, - &to_retrieve_ids, - &fields_ids_map, - &displayed_ids, - ); - - let tokenizer = TokenizerBuilder::default().build(); - - let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer); - formatter_builder.crop_marker(query.crop_marker); - formatter_builder.highlight_prefix(query.highlight_pre_tag); - formatter_builder.highlight_suffix(query.highlight_post_tag); - - let mut documents = Vec::new(); - - let documents_iter = self.documents(&rtxn, documents_ids)?; - - for (_id, obkv) in documents_iter { - // First generate a document with all the displayed fields - let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; - - // select the attributes to retrieve - let attributes_to_retrieve = to_retrieve_ids - .iter() - .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); - let mut document = - permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); - - let (matches_position, formatted) = format_fields( - &displayed_document, - &fields_ids_map, - &formatter_builder, - &formatted_options, - query.show_matches_position, - &displayed_ids, - )?; - - if let Some(sort) = query.sort.as_ref() { - insert_geo_distance(sort, &mut document); - } - - let hit = SearchHit { - document, - formatted, - matches_position, - }; - documents.push(hit); - } - - let number_of_hits = min(candidates.len() as usize, max_total_hits); - let hits_info = if is_finite_pagination { - let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); - // If hit_per_page is 0, then pages can't be computed and so we respond 0. - let total_pages = (number_of_hits + hits_per_page.saturating_sub(1)) - .checked_div(hits_per_page) - .unwrap_or(0); - - HitsInfo::Pagination { - hits_per_page, - page: query.page.unwrap_or(1), - total_pages, - total_hits: number_of_hits, - } - } else { - HitsInfo::OffsetLimit { - limit: query.limit, - offset, - estimated_total_hits: number_of_hits, - } - }; - - let facet_distribution = match query.facets { - Some(ref fields) => { - let mut facet_distribution = self.facets_distribution(&rtxn); - - let max_values_by_facet = self - .max_values_per_facet(&rtxn)? - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - if fields.iter().all(|f| f != "*") { - facet_distribution.facets(fields); - } - let distribution = facet_distribution.candidates(candidates).execute()?; - - Some(distribution) - } - None => None, - }; - - let result = SearchResult { - hits: documents, - hits_info, - query: query.q.clone().unwrap_or_default(), - processing_time_ms: before_search.elapsed().as_millis(), - facet_distribution, - }; - Ok(result) - } -} - -fn insert_geo_distance(sorts: &[String], document: &mut Document) { - lazy_static::lazy_static! { - static ref GEO_REGEX: Regex = - Regex::new(r"_geoPoint\(\s*([[:digit:].\-]+)\s*,\s*([[:digit:].\-]+)\s*\)").unwrap(); - }; - if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) { - // TODO: TAMO: milli encountered an internal error, what do we want to do? - let base = [ - capture_group[1].parse().unwrap(), - capture_group[2].parse().unwrap(), - ]; - let geo_point = &document.get("_geo").unwrap_or(&json!(null)); - if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) { - let distance = milli::distance_between_two_points(&base, &[lat, lng]); - document.insert("_geoDistance".to_string(), json!(distance.round() as usize)); - } - } -} - -fn compute_formatted_options( - attr_to_highlight: &HashSet, - attr_to_crop: &[String], - query_crop_length: usize, - to_retrieve_ids: &BTreeSet, - fields_ids_map: &FieldsIdsMap, - displayed_ids: &BTreeSet, -) -> BTreeMap { - let mut formatted_options = BTreeMap::new(); - - add_highlight_to_formatted_options( - &mut formatted_options, - attr_to_highlight, - fields_ids_map, - displayed_ids, - ); - - add_crop_to_formatted_options( - &mut formatted_options, - attr_to_crop, - query_crop_length, - fields_ids_map, - displayed_ids, - ); - - // Should not return `_formatted` if no valid attributes to highlight/crop - if !formatted_options.is_empty() { - add_non_formatted_ids_to_formatted_options(&mut formatted_options, to_retrieve_ids); - } - - formatted_options -} - -fn add_highlight_to_formatted_options( - formatted_options: &mut BTreeMap, - attr_to_highlight: &HashSet, - fields_ids_map: &FieldsIdsMap, - displayed_ids: &BTreeSet, -) { - for attr in attr_to_highlight { - let new_format = FormatOptions { - highlight: true, - crop: None, - }; - - if attr == "*" { - for id in displayed_ids { - formatted_options.insert(*id, new_format); - } - break; - } - - if let Some(id) = fields_ids_map.id(attr) { - if displayed_ids.contains(&id) { - formatted_options.insert(id, new_format); - } - } - } -} - -fn add_crop_to_formatted_options( - formatted_options: &mut BTreeMap, - attr_to_crop: &[String], - crop_length: usize, - fields_ids_map: &FieldsIdsMap, - displayed_ids: &BTreeSet, -) { - for attr in attr_to_crop { - let mut split = attr.rsplitn(2, ':'); - let (attr_name, attr_len) = match split.next().zip(split.next()) { - Some((len, name)) => { - let crop_len = len.parse::().unwrap_or(crop_length); - (name, crop_len) - } - None => (attr.as_str(), crop_length), - }; - - if attr_name == "*" { - for id in displayed_ids { - formatted_options - .entry(*id) - .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); - } - } - - if let Some(id) = fields_ids_map.id(attr_name) { - if displayed_ids.contains(&id) { - formatted_options - .entry(id) - .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); - } - } - } -} - -fn add_non_formatted_ids_to_formatted_options( - formatted_options: &mut BTreeMap, - to_retrieve_ids: &BTreeSet, -) { - for id in to_retrieve_ids { - formatted_options.entry(*id).or_insert(FormatOptions { - highlight: false, - crop: None, - }); - } -} - -fn make_document( - displayed_attributes: &BTreeSet, - field_ids_map: &FieldsIdsMap, - obkv: obkv::KvReaderU16, -) -> Result { - let mut document = serde_json::Map::new(); - - // recreate the original json - for (key, value) in obkv.iter() { - let value = serde_json::from_slice(value)?; - let key = field_ids_map - .name(key) - .expect("Missing field name") - .to_string(); - - document.insert(key, value); - } - - // select the attributes to retrieve - let displayed_attributes = displayed_attributes - .iter() - .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); - - let document = permissive_json_pointer::select_values(&document, displayed_attributes); - Ok(document) -} - -fn format_fields<'a, A: AsRef<[u8]>>( - document: &Document, - field_ids_map: &FieldsIdsMap, - builder: &MatcherBuilder<'a, A>, - formatted_options: &BTreeMap, - compute_matches: bool, - displayable_ids: &BTreeSet, -) -> Result<(Option, Document)> { - let mut matches_position = compute_matches.then(BTreeMap::new); - let mut document = document.clone(); - - // select the attributes to retrieve - let displayable_names = displayable_ids - .iter() - .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); - permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| { - // To get the formatting option of each key we need to see all the rules that applies - // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` - // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only - // highlighted. - let format = formatted_options - .iter() - .filter(|(field, _option)| { - let name = field_ids_map.name(**field).unwrap(); - milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name) - }) - .map(|(_, option)| *option) - .reduce(|acc, option| acc.merge(option)); - let mut infos = Vec::new(); - - *value = format_value( - std::mem::take(value), - builder, - format, - &mut infos, - compute_matches, - ); - - if let Some(matches) = matches_position.as_mut() { - if !infos.is_empty() { - matches.insert(key.to_owned(), infos); - } - } - }); - - let selectors = formatted_options - .keys() - // This unwrap must be safe since we got the ids from the fields_ids_map just - // before. - .map(|&fid| field_ids_map.name(fid).unwrap()); - let document = permissive_json_pointer::select_values(&document, selectors); - - Ok((matches_position, document)) -} - -fn format_value<'a, A: AsRef<[u8]>>( - value: Value, - builder: &MatcherBuilder<'a, A>, - format_options: Option, - infos: &mut Vec, - compute_matches: bool, -) -> Value { - match value { - Value::String(old_string) => { - let mut matcher = builder.build(&old_string); - if compute_matches { - let matches = matcher.matches(); - infos.extend_from_slice(&matches[..]); - } - - match format_options { - Some(format_options) => { - let value = matcher.format(format_options); - Value::String(value.into_owned()) - } - None => Value::String(old_string), - } - } - Value::Array(values) => Value::Array( - values - .into_iter() - .map(|v| { - format_value( - v, - builder, - format_options.map(|format_options| FormatOptions { - highlight: format_options.highlight, - crop: None, - }), - infos, - compute_matches, - ) - }) - .collect(), - ), - Value::Object(object) => Value::Object( - object - .into_iter() - .map(|(k, v)| { - ( - k, - format_value( - v, - builder, - format_options.map(|format_options| FormatOptions { - highlight: format_options.highlight, - crop: None, - }), - infos, - compute_matches, - ), - ) - }) - .collect(), - ), - Value::Number(number) => { - let s = number.to_string(); - - let mut matcher = builder.build(&s); - if compute_matches { - let matches = matcher.matches(); - infos.extend_from_slice(&matches[..]); - } - - match format_options { - Some(format_options) => { - let value = matcher.format(format_options); - Value::String(value.into_owned()) - } - None => Value::Number(number), - } - } - value => value, - } -} - -fn parse_filter(facets: &Value) -> Result> { - match facets { - Value::String(expr) => { - let condition = Filter::from_str(expr)?; - Ok(condition) - } - Value::Array(arr) => parse_filter_array(arr), - v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()), - } -} - -fn parse_filter_array(arr: &[Value]) -> Result> { - let mut ands = Vec::new(); - for value in arr { - match value { - Value::String(s) => ands.push(Either::Right(s.as_str())), - Value::Array(arr) => { - let mut ors = Vec::new(); - for value in arr { - match value { - Value::String(s) => ors.push(s.as_str()), - v => { - return Err(FacetError::InvalidExpression(&["String"], v.clone()).into()) - } - } - } - ands.push(Either::Left(ors)); - } - v => { - return Err( - FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(), - ) - } - } - } - - Ok(Filter::from_array(ands)?) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_insert_geo_distance() { - let value: Document = serde_json::from_str( - r#"{ - "_geo": { - "lat": 50.629973371633746, - "lng": 3.0569447399419567 - }, - "city": "Lille", - "id": "1" - }"#, - ) - .unwrap(); - - let sorters = &["_geoPoint(50.629973371633746,3.0569447399419567):desc".to_string()]; - let mut document = value.clone(); - insert_geo_distance(sorters, &mut document); - assert_eq!(document.get("_geoDistance"), Some(&json!(0))); - - let sorters = &["_geoPoint(50.629973371633746, 3.0569447399419567):asc".to_string()]; - let mut document = value.clone(); - insert_geo_distance(sorters, &mut document); - assert_eq!(document.get("_geoDistance"), Some(&json!(0))); - - let sorters = - &["_geoPoint( 50.629973371633746 , 3.0569447399419567 ):desc".to_string()]; - let mut document = value.clone(); - insert_geo_distance(sorters, &mut document); - assert_eq!(document.get("_geoDistance"), Some(&json!(0))); - - let sorters = &[ - "prix:asc", - "villeneuve:desc", - "_geoPoint(50.629973371633746, 3.0569447399419567):asc", - "ubu:asc", - ] - .map(|s| s.to_string()); - let mut document = value.clone(); - insert_geo_distance(sorters, &mut document); - assert_eq!(document.get("_geoDistance"), Some(&json!(0))); - - // only the first geoPoint is used to compute the distance - let sorters = &[ - "chien:desc", - "_geoPoint(50.629973371633746, 3.0569447399419567):asc", - "pangolin:desc", - "_geoPoint(100.0, -80.0):asc", - "chat:asc", - ] - .map(|s| s.to_string()); - let mut document = value.clone(); - insert_geo_distance(sorters, &mut document); - assert_eq!(document.get("_geoDistance"), Some(&json!(0))); - - // there was no _geoPoint so nothing is inserted in the document - let sorters = &["chien:asc".to_string()]; - let mut document = value; - insert_geo_distance(sorters, &mut document); - assert_eq!(document.get("_geoDistance"), None); - } -} diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs deleted file mode 100644 index 7058d65c3..000000000 --- a/meilisearch-lib/src/index/updates.rs +++ /dev/null @@ -1,559 +0,0 @@ -use std::collections::{BTreeMap, BTreeSet}; -use std::marker::PhantomData; -use std::num::NonZeroUsize; - -use log::{debug, info, trace}; -use milli::documents::DocumentsBatchReader; -use milli::update::{ - DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, - Setting, -}; -use serde::{Deserialize, Serialize, Serializer}; -use uuid::Uuid; - -use super::error::{IndexError, Result}; -use super::index::{Index, IndexMeta}; -use crate::update_file_store::UpdateFileStore; - -fn serialize_with_wildcard( - field: &Setting>, - s: S, -) -> std::result::Result -where - S: Serializer, -{ - let wildcard = vec!["*".to_string()]; - match field { - Setting::Set(value) => Some(value), - Setting::Reset => Some(&wildcard), - Setting::NotSet => None, - } - .serialize(s) -} - -#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)] -pub struct Checked; - -#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] -pub struct Unchecked; - -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[serde(deny_unknown_fields)] -#[serde(rename_all = "camelCase")] -pub struct MinWordSizeTyposSetting { - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub one_typo: Setting, - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub two_typos: Setting, -} - -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[serde(deny_unknown_fields)] -#[serde(rename_all = "camelCase")] -pub struct TypoSettings { - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub enabled: Setting, - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub min_word_size_for_typos: Setting, - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub disable_on_words: Setting>, - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub disable_on_attributes: Setting>, -} - -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[serde(deny_unknown_fields)] -#[serde(rename_all = "camelCase")] -pub struct FacetingSettings { - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub max_values_per_facet: Setting, -} - -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[serde(deny_unknown_fields)] -#[serde(rename_all = "camelCase")] -pub struct PaginationSettings { - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - pub max_total_hits: Setting, -} - -/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings -/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a -/// call to `check` will return a `Settings` from a `Settings`. -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[serde(deny_unknown_fields)] -#[serde(rename_all = "camelCase")] -#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub struct Settings { - #[serde( - default, - serialize_with = "serialize_with_wildcard", - skip_serializing_if = "Setting::is_not_set" - )] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub displayed_attributes: Setting>, - - #[serde( - default, - serialize_with = "serialize_with_wildcard", - skip_serializing_if = "Setting::is_not_set" - )] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub searchable_attributes: Setting>, - - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub filterable_attributes: Setting>, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub sortable_attributes: Setting>, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub ranking_rules: Setting>, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub stop_words: Setting>, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub synonyms: Setting>>, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub distinct_attribute: Setting, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub typo_tolerance: Setting, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub faceting: Setting, - #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] - pub pagination: Setting, - - #[serde(skip)] - pub _kind: PhantomData, -} - -impl Settings { - pub fn cleared() -> Settings { - Settings { - displayed_attributes: Setting::Reset, - searchable_attributes: Setting::Reset, - filterable_attributes: Setting::Reset, - sortable_attributes: Setting::Reset, - ranking_rules: Setting::Reset, - stop_words: Setting::Reset, - synonyms: Setting::Reset, - distinct_attribute: Setting::Reset, - typo_tolerance: Setting::Reset, - faceting: Setting::Reset, - pagination: Setting::Reset, - _kind: PhantomData, - } - } - - pub fn into_unchecked(self) -> Settings { - let Self { - displayed_attributes, - searchable_attributes, - filterable_attributes, - sortable_attributes, - ranking_rules, - stop_words, - synonyms, - distinct_attribute, - typo_tolerance, - faceting, - pagination, - .. - } = self; - - Settings { - displayed_attributes, - searchable_attributes, - filterable_attributes, - sortable_attributes, - ranking_rules, - stop_words, - synonyms, - distinct_attribute, - typo_tolerance, - faceting, - pagination, - _kind: PhantomData, - } - } -} - -impl Settings { - pub fn check(self) -> Settings { - let displayed_attributes = match self.displayed_attributes { - Setting::Set(fields) => { - if fields.iter().any(|f| f == "*") { - Setting::Reset - } else { - Setting::Set(fields) - } - } - otherwise => otherwise, - }; - - let searchable_attributes = match self.searchable_attributes { - Setting::Set(fields) => { - if fields.iter().any(|f| f == "*") { - Setting::Reset - } else { - Setting::Set(fields) - } - } - otherwise => otherwise, - }; - - Settings { - displayed_attributes, - searchable_attributes, - filterable_attributes: self.filterable_attributes, - sortable_attributes: self.sortable_attributes, - ranking_rules: self.ranking_rules, - stop_words: self.stop_words, - synonyms: self.synonyms, - distinct_attribute: self.distinct_attribute, - typo_tolerance: self.typo_tolerance, - faceting: self.faceting, - pagination: self.pagination, - _kind: PhantomData, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -#[serde(rename_all = "camelCase")] -pub struct Facets { - pub level_group_size: Option, - pub min_level_size: Option, -} - -impl Index { - fn update_primary_key_txn<'a, 'b>( - &'a self, - txn: &mut milli::heed::RwTxn<'a, 'b>, - primary_key: String, - ) -> Result { - let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref()); - builder.set_primary_key(primary_key); - builder.execute(|_| ())?; - let meta = IndexMeta::new_txn(self, txn)?; - - Ok(meta) - } - - pub fn update_primary_key(&self, primary_key: String) -> Result { - let mut txn = self.write_txn()?; - let res = self.update_primary_key_txn(&mut txn, primary_key)?; - txn.commit()?; - - Ok(res) - } - - /// Deletes `ids` from the index, and returns how many documents were deleted. - pub fn delete_documents(&self, ids: &[String]) -> Result { - let mut txn = self.write_txn()?; - let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?; - - // We ignore unexisting document ids - ids.iter().for_each(|id| { - builder.delete_external_id(id); - }); - - let deleted = builder.execute()?; - - txn.commit()?; - - Ok(deleted) - } - - pub fn clear_documents(&self) -> Result<()> { - let mut txn = self.write_txn()?; - milli::update::ClearDocuments::new(&mut txn, self).execute()?; - txn.commit()?; - - Ok(()) - } - - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - primary_key: Option, - file_store: UpdateFileStore, - contents: impl IntoIterator, - ) -> Result>> { - trace!("performing document addition"); - let mut txn = self.write_txn()?; - - if let Some(primary_key) = primary_key { - if self.primary_key(&txn)?.is_none() { - self.update_primary_key_txn(&mut txn, primary_key)?; - } - } - - let config = IndexDocumentsConfig { - update_method: method, - ..Default::default() - }; - - let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step); - let mut builder = milli::update::IndexDocuments::new( - &mut txn, - self, - self.indexer_config.as_ref(), - config, - indexing_callback, - )?; - - let mut results = Vec::new(); - for content_uuid in contents.into_iter() { - let content_file = file_store.get_update(content_uuid)?; - let reader = DocumentsBatchReader::from_reader(content_file)?; - let (new_builder, user_result) = builder.add_documents(reader)?; - builder = new_builder; - - let user_result = match user_result { - Ok(count) => Ok(DocumentAdditionResult { - indexed_documents: count, - number_of_documents: count, - }), - Err(e) => Err(IndexError::from(e)), - }; - - results.push(user_result); - } - - if results.iter().any(Result::is_ok) { - let addition = builder.execute()?; - txn.commit()?; - info!("document addition done: {:?}", addition); - } - - Ok(results) - } - - pub fn update_settings(&self, settings: &Settings) -> Result<()> { - // We must use the write transaction of the update here. - let mut txn = self.write_txn()?; - let mut builder = - milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref()); - - apply_settings_to_builder(settings, &mut builder); - - builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?; - - txn.commit()?; - - Ok(()) - } -} - -pub fn apply_settings_to_builder( - settings: &Settings, - builder: &mut milli::update::Settings, -) { - match settings.searchable_attributes { - Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), - Setting::Reset => builder.reset_searchable_fields(), - Setting::NotSet => (), - } - - match settings.displayed_attributes { - Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), - Setting::Reset => builder.reset_displayed_fields(), - Setting::NotSet => (), - } - - match settings.filterable_attributes { - Setting::Set(ref facets) => { - builder.set_filterable_fields(facets.clone().into_iter().collect()) - } - Setting::Reset => builder.reset_filterable_fields(), - Setting::NotSet => (), - } - - match settings.sortable_attributes { - Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()), - Setting::Reset => builder.reset_sortable_fields(), - Setting::NotSet => (), - } - - match settings.ranking_rules { - Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()), - Setting::Reset => builder.reset_criteria(), - Setting::NotSet => (), - } - - match settings.stop_words { - Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), - Setting::Reset => builder.reset_stop_words(), - Setting::NotSet => (), - } - - match settings.synonyms { - Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), - Setting::Reset => builder.reset_synonyms(), - Setting::NotSet => (), - } - - match settings.distinct_attribute { - Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), - Setting::Reset => builder.reset_distinct_field(), - Setting::NotSet => (), - } - - match settings.typo_tolerance { - Setting::Set(ref value) => { - match value.enabled { - Setting::Set(val) => builder.set_autorize_typos(val), - Setting::Reset => builder.reset_authorize_typos(), - Setting::NotSet => (), - } - - match value.min_word_size_for_typos { - Setting::Set(ref setting) => { - match setting.one_typo { - Setting::Set(val) => builder.set_min_word_len_one_typo(val), - Setting::Reset => builder.reset_min_word_len_one_typo(), - Setting::NotSet => (), - } - match setting.two_typos { - Setting::Set(val) => builder.set_min_word_len_two_typos(val), - Setting::Reset => builder.reset_min_word_len_two_typos(), - Setting::NotSet => (), - } - } - Setting::Reset => { - builder.reset_min_word_len_one_typo(); - builder.reset_min_word_len_two_typos(); - } - Setting::NotSet => (), - } - - match value.disable_on_words { - Setting::Set(ref words) => { - builder.set_exact_words(words.clone()); - } - Setting::Reset => builder.reset_exact_words(), - Setting::NotSet => (), - } - - match value.disable_on_attributes { - Setting::Set(ref words) => { - builder.set_exact_attributes(words.iter().cloned().collect()) - } - Setting::Reset => builder.reset_exact_attributes(), - Setting::NotSet => (), - } - } - Setting::Reset => { - // all typo settings need to be reset here. - builder.reset_authorize_typos(); - builder.reset_min_word_len_one_typo(); - builder.reset_min_word_len_two_typos(); - builder.reset_exact_words(); - builder.reset_exact_attributes(); - } - Setting::NotSet => (), - } - - match settings.faceting { - Setting::Set(ref value) => match value.max_values_per_facet { - Setting::Set(val) => builder.set_max_values_per_facet(val), - Setting::Reset => builder.reset_max_values_per_facet(), - Setting::NotSet => (), - }, - Setting::Reset => builder.reset_max_values_per_facet(), - Setting::NotSet => (), - } - - match settings.pagination { - Setting::Set(ref value) => match value.max_total_hits { - Setting::Set(val) => builder.set_pagination_max_total_hits(val), - Setting::Reset => builder.reset_pagination_max_total_hits(), - Setting::NotSet => (), - }, - Setting::Reset => builder.reset_pagination_max_total_hits(), - Setting::NotSet => (), - } -} - -#[cfg(test)] -pub(crate) mod test { - use proptest::prelude::*; - - use super::*; - - pub(super) fn setting_strategy() -> impl Strategy> { - prop_oneof![ - Just(Setting::NotSet), - Just(Setting::Reset), - any::().prop_map(Setting::Set) - ] - } - - #[test] - fn test_setting_check() { - // test no changes - let settings = Settings { - displayed_attributes: Setting::Set(vec![String::from("hello")]), - searchable_attributes: Setting::Set(vec![String::from("hello")]), - filterable_attributes: Setting::NotSet, - sortable_attributes: Setting::NotSet, - ranking_rules: Setting::NotSet, - stop_words: Setting::NotSet, - synonyms: Setting::NotSet, - distinct_attribute: Setting::NotSet, - typo_tolerance: Setting::NotSet, - faceting: Setting::NotSet, - pagination: Setting::NotSet, - _kind: PhantomData::, - }; - - let checked = settings.clone().check(); - assert_eq!(settings.displayed_attributes, checked.displayed_attributes); - assert_eq!( - settings.searchable_attributes, - checked.searchable_attributes - ); - - // test wildcard - // test no changes - let settings = Settings { - displayed_attributes: Setting::Set(vec![String::from("*")]), - searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]), - filterable_attributes: Setting::NotSet, - sortable_attributes: Setting::NotSet, - ranking_rules: Setting::NotSet, - stop_words: Setting::NotSet, - synonyms: Setting::NotSet, - distinct_attribute: Setting::NotSet, - typo_tolerance: Setting::NotSet, - faceting: Setting::NotSet, - pagination: Setting::NotSet, - _kind: PhantomData::, - }; - - let checked = settings.check(); - assert_eq!(checked.displayed_attributes, Setting::Reset); - assert_eq!(checked.searchable_attributes, Setting::Reset); - } -} diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs deleted file mode 100644 index f4dcf3c94..000000000 --- a/meilisearch-lib/src/index_controller/mod.rs +++ /dev/null @@ -1,574 +0,0 @@ -use std::collections::BTreeMap; -use std::fmt; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; - -use actix_web::error::PayloadError; -use bytes::Bytes; -use futures::Stream; -use index_scheduler::task::{Status, Task}; -use index_scheduler::{IndexScheduler, KindWithContent, TaskId, TaskView}; -use meilisearch_auth::SearchRules; -use milli::update::{IndexDocumentsMethod, IndexerConfig}; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use tokio::task::spawn_blocking; -use tokio::time::sleep; -use uuid::Uuid; - -// use crate::dump::{self, load_dump, DumpHandler}; -use crate::options::{IndexerOpts, SchedulerConfig}; -// use crate::snapshot::{load_snapshot, SnapshotService}; -use error::Result; -use index::{ - Checked, Document, Index, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, -}; - -pub mod error; -pub mod versioning; - -pub type Payload = Box< - dyn Stream> + Send + Sync + 'static + Unpin, ->; - -pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result { - let mut options = milli::heed::EnvOpenOptions::new(); - options.map_size(size); - options.max_dbs(20); - options.open(path) -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMetadata { - #[serde(skip)] - pub uuid: Uuid, - pub uid: String, - #[serde(flatten)] - pub meta: IndexMeta, -} - -#[derive(Clone, Debug)] -pub struct IndexSettings { - pub uid: Option, - pub primary_key: Option, -} - -#[derive(Clone)] -pub struct Meilisearch { - index_scheduler: IndexScheduler, -} - -impl std::ops::Deref for Meilisearch { - type Target = IndexScheduler; - - fn deref(&self) -> &Self::Target { - &self.index_scheduler - } -} - -#[derive(Debug)] -pub enum DocumentAdditionFormat { - Json, - Csv, - Ndjson, -} - -impl fmt::Display for DocumentAdditionFormat { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - DocumentAdditionFormat::Json => write!(f, "json"), - DocumentAdditionFormat::Ndjson => write!(f, "ndjson"), - DocumentAdditionFormat::Csv => write!(f, "csv"), - } - } -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Stats { - pub database_size: u64, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - pub last_update: Option, - pub indexes: BTreeMap, -} - -#[allow(clippy::large_enum_variant)] -#[derive(derivative::Derivative)] -#[derivative(Debug)] -pub enum Update { - DeleteDocuments(Vec), - ClearDocuments, - Settings { - settings: Settings, - /// Indicates whether the update was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - DocumentAddition { - #[derivative(Debug = "ignore")] - payload: Payload, - primary_key: Option, - method: IndexDocumentsMethod, - format: DocumentAdditionFormat, - allow_index_creation: bool, - }, - DeleteIndex, - CreateIndex { - primary_key: Option, - }, - UpdateIndex { - primary_key: Option, - }, -} - -#[derive(Default, Debug)] -pub struct IndexControllerBuilder { - max_index_size: Option, - max_task_store_size: Option, - snapshot_dir: Option, - import_snapshot: Option, - snapshot_interval: Option, - ignore_snapshot_if_db_exists: bool, - ignore_missing_snapshot: bool, - schedule_snapshot: bool, - dump_src: Option, - dump_dst: Option, - ignore_dump_if_db_exists: bool, - ignore_missing_dump: bool, -} - -impl IndexControllerBuilder { - pub fn build( - self, - db_path: impl AsRef, - indexer_options: IndexerOpts, - scheduler_config: SchedulerConfig, - ) -> anyhow::Result { - let index_size = self - .max_index_size - .ok_or_else(|| anyhow::anyhow!("Missing index size"))?; - let task_store_size = self - .max_task_store_size - .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; - - /* - TODO: TAMO: enable dumps and snapshots to happens - if let Some(ref path) = self.import_snapshot { - log::info!("Loading from snapshot {:?}", path); - load_snapshot( - db_path.as_ref(), - path, - self.ignore_snapshot_if_db_exists, - self.ignore_missing_snapshot, - )?; - } else if let Some(ref src_path) = self.dump_src { - load_dump( - db_path.as_ref(), - src_path, - self.ignore_dump_if_db_exists, - self.ignore_missing_dump, - index_size, - task_store_size, - &indexer_options, - )?; - } else if db_path.as_ref().exists() { - // Directory could be pre-created without any database in. - let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); - if !db_is_empty { - versioning::check_version_file(db_path.as_ref())?; - } - } - */ - - std::fs::create_dir_all(db_path.as_ref())?; - - let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); - - // Create or overwrite the version file for this DB - versioning::create_version_file(db_path.as_ref())?; - - let indexer_config = IndexerConfig { - log_every_n: Some(indexer_options.log_every_n), - max_nb_chunks: indexer_options.max_nb_chunks, - documents_chunk_size: None, - // TODO: TAMO: Fix this thing - max_memory: None, // Some(indexer_options.max_indexing_memory.into()), - chunk_compression_type: milli::CompressionType::None, - chunk_compression_level: None, - // TODO: TAMO: do something with the indexing_config.max_indexing_threads - thread_pool: None, - max_positions_per_attributes: None, - }; - - let index_scheduler = IndexScheduler::new( - db_path.as_ref().join("tasks"), - db_path.as_ref().join("update_files"), - db_path.as_ref().join("indexes"), - index_size, - indexer_config, - )?; - - /* - if self.schedule_snapshot { - let snapshot_period = self - .snapshot_interval - .ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?; - let snapshot_path = self - .snapshot_dir - .ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?; - - let snapshot_service = SnapshotService { - db_path: db_path.as_ref().to_path_buf(), - snapshot_period, - snapshot_path, - index_size, - meta_env_size: task_store_size, - scheduler: scheduler.clone(), - }; - - tokio::task::spawn_local(snapshot_service.run()); - } - */ - - Ok(Meilisearch { index_scheduler }) - } - - /// Set the index controller builder's max update store size. - pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self { - let max_update_store_size = clamp_to_page_size(max_update_store_size); - self.max_task_store_size.replace(max_update_store_size); - self - } - - pub fn set_max_index_size(&mut self, size: usize) -> &mut Self { - let size = clamp_to_page_size(size); - self.max_index_size.replace(size); - self - } - - /// Set the index controller builder's snapshot path. - pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self { - self.snapshot_dir.replace(snapshot_dir); - self - } - - /// Set the index controller builder's ignore snapshot if db exists. - pub fn set_ignore_snapshot_if_db_exists( - &mut self, - ignore_snapshot_if_db_exists: bool, - ) -> &mut Self { - self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; - self - } - - /// Set the index controller builder's ignore missing snapshot. - pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self { - self.ignore_missing_snapshot = ignore_missing_snapshot; - self - } - - /// Set the index controller builder's import snapshot. - pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { - self.import_snapshot.replace(import_snapshot); - self - } - - /// Set the index controller builder's snapshot interval sec. - pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self { - self.snapshot_interval = Some(snapshot_interval); - self - } - - /// Set the index controller builder's schedule snapshot. - pub fn set_schedule_snapshot(&mut self) -> &mut Self { - self.schedule_snapshot = true; - self - } - - /// Set the index controller builder's dump src. - pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { - self.dump_src.replace(dump_src); - self - } - - /// Set the index controller builder's dump dst. - pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { - self.dump_dst.replace(dump_dst); - self - } - - /// Set the index controller builder's ignore dump if db exists. - pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self { - self.ignore_dump_if_db_exists = ignore_dump_if_db_exists; - self - } - - /// Set the index controller builder's ignore missing dump. - pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self { - self.ignore_missing_dump = ignore_missing_dump; - self - } -} - -impl Meilisearch { - pub fn builder() -> IndexControllerBuilder { - IndexControllerBuilder::default() - } - - pub async fn register_task(&self, task: KindWithContent) -> Result { - let this = self.clone(); - Ok( - tokio::task::spawn_blocking(move || this.clone().index_scheduler.register(task)) - .await??, - ) - } - - pub async fn list_tasks(&self, filter: index_scheduler::Query) -> Result> { - Ok(self.index_scheduler.get_tasks(filter)?) - } - - pub async fn list_indexes(&self) -> Result> { - let this = self.clone(); - Ok(spawn_blocking(move || this.index_scheduler.indexes()).await??) - } - - /// Return the total number of documents contained in the index + the selected documents. - pub async fn documents( - &self, - uid: String, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - let this = self.clone(); - spawn_blocking(move || -> Result<_> { - let index = this.index_scheduler.index(&uid)?; - Ok(index.retrieve_documents(offset, limit, attributes_to_retrieve)?) - }) - .await? - } - - pub async fn document( - &self, - uid: String, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let this = self.clone(); - spawn_blocking(move || -> Result<_> { - let index = this.index_scheduler.index(&uid)?; - Ok(index.retrieve_document(doc_id, attributes_to_retrieve)?) - }) - .await? - } - - pub async fn search(&self, uid: String, query: SearchQuery) -> Result { - let this = self.clone(); - spawn_blocking(move || -> Result<_> { - let index = this.index_scheduler.index(&uid)?; - Ok(index.perform_search(query)?) - }) - .await? - } - - pub async fn get_index(&self, uid: String) -> Result { - let this = self.clone(); - Ok(spawn_blocking(move || this.index_scheduler.index(&uid)).await??) - } - - pub async fn get_index_stats(&self, uid: String) -> Result { - let processing_tasks = self - .index_scheduler - .get_tasks(index_scheduler::Query::default().with_status(Status::Processing))?; - // Check if the currently indexing update is from our index. - let is_indexing = processing_tasks.first().map_or(false, |task| { - task.index_uid.as_ref().map_or(false, |u| u == &uid) - }); - - let index = self.get_index(uid).await?; - let mut stats = spawn_blocking(move || index.stats()).await??; - stats.is_indexing = Some(is_indexing); - - Ok(stats) - } - - pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result { - let mut last_task: Option = None; - let mut indexes = BTreeMap::new(); - let mut database_size = 0; - let processing_tasks = self - .index_scheduler - .get_tasks(index_scheduler::Query::default().with_status(Status::Processing))?; - - for index in self.list_indexes().await? { - if !search_rules.is_index_authorized(&index.name) { - continue; - } - let index_name = index.name.clone(); - - let (mut stats, meta) = - spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || { - Ok((index.stats()?, index.meta()?)) - }) - .await??; - - database_size += stats.size; - - last_task = last_task.map_or(Some(meta.updated_at), |last| { - Some(last.max(meta.updated_at)) - }); - - // Check if the currently indexing update is from our index. - stats.is_indexing = processing_tasks - .first() - .and_then(|p| p.index_uid.as_ref().map(|u| u == &index_name)) - .or(Some(false)); - - indexes.insert(index_name, stats); - } - - Ok(Stats { - database_size, - last_update: last_task, - indexes, - }) - } -} - -pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { - loop { - match Arc::try_unwrap(item) { - Ok(item) => return item, - Err(item_arc) => { - item = item_arc; - sleep(Duration::from_millis(100)).await; - continue; - } - } - } -} - -// Clamp the provided value to be a multiple of system page size. -fn clamp_to_page_size(size: usize) -> usize { - size / page_size::get() * page_size::get() -} - -/* -TODO: TAMO: uncomment this test - -#[cfg(test)] -mod test { - use futures::future::ok; - use mockall::predicate::eq; - use nelson::Mocker; - - use crate::index::error::Result as IndexResult; - use crate::index::{HitsInfo, Index}; - use crate::index::{ - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - }; - use crate::index_resolver::index_store::MockIndexStore; - use crate::index_resolver::meta_store::MockIndexMetaStore; - use crate::index_resolver::IndexResolver; - - use super::*; - - #[actix_rt::test] - async fn test_search_simple() { - let index_uid = "test"; - let index_uuid = Uuid::new_v4(); - let query = SearchQuery { - q: Some(String::from("hello world")), - offset: 10, - limit: 0, - page: Some(1), - hits_per_page: Some(10), - attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()), - attributes_to_crop: None, - crop_length: 18, - attributes_to_highlight: None, - show_matches_position: true, - filter: None, - sort: None, - facets: None, - highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), - highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), - crop_marker: DEFAULT_CROP_MARKER(), - matching_strategy: Default::default(), - }; - - let result = SearchResult { - hits: vec![], - query: "hello world".to_string(), - hits_info: HitsInfo::OffsetLimit { - limit: 24, - offset: 0, - estimated_total_hits: 29, - }, - processing_time_ms: 50, - facet_distribution: None, - }; - - let mut uuid_store = MockIndexMetaStore::new(); - uuid_store - .expect_get() - .with(eq(index_uid.to_owned())) - .returning(move |s| { - Box::pin(ok(( - s, - Some(crate::index_resolver::meta_store::IndexMeta { - uuid: index_uuid, - creation_task_id: 0, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - let result_clone = result.clone(); - let query_clone = query.clone(); - index_store - .expect_get() - .with(eq(index_uuid)) - .returning(move |_uuid| { - let result = result_clone.clone(); - let query = query_clone.clone(); - let mocker = Mocker::default(); - mocker - .when::>("perform_search") - .once() - .then(move |q| { - assert_eq!(&q, &query); - Ok(result.clone()) - }); - let index = Index::mock(mocker); - Box::pin(ok(Some(index))) - }); - - let task_store_mocker = nelson::Mocker::default(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = Arc::new(IndexResolver::new( - uuid_store, - index_store, - update_file_store.clone(), - )); - let task_store = TaskStore::mock(task_store_mocker); - let scheduler = Scheduler::new( - task_store.clone(), - vec![index_resolver.clone()], - SchedulerConfig::default(), - ) - .unwrap(); - let index_controller = - IndexController::mock(index_resolver, task_store, update_file_store, scheduler); - - let r = index_controller - .search(index_uid.to_owned(), query.clone()) - .await - .unwrap(); - assert_eq!(r, result); - } -} -*/ diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs deleted file mode 100644 index e0a18895b..000000000 --- a/meilisearch-lib/src/tasks/task.rs +++ /dev/null @@ -1,195 +0,0 @@ -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use super::batch::BatchId; -use crate::index::{Settings, Unchecked}; - -pub type TaskId = u32; - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum TaskResult { - DocumentAddition { indexed_documents: u64 }, - DocumentDeletion { deleted_documents: u64 }, - ClearAll { deleted_documents: u64 }, - Other, -} - -impl From for TaskResult { - fn from(other: DocumentAdditionResult) -> Self { - Self::DocumentAddition { - indexed_documents: other.indexed_documents, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum TaskEvent { - Created( - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - OffsetDateTime, - ), - Batched { - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - batch_id: BatchId, - }, - Processing( - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - OffsetDateTime, - ), - Succeeded { - result: TaskResult, - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, - Failed { - error: ResponseError, - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, -} - -impl TaskEvent { - pub fn succeeded(result: TaskResult) -> Self { - Self::Succeeded { - result, - timestamp: OffsetDateTime::now_utc(), - } - } - - pub fn failed(error: impl Into) -> Self { - Self::Failed { - error: error.into(), - timestamp: OffsetDateTime::now_utc(), - } - } -} - -/// A task represents an operation that Meilisearch must do. -/// It's stored on disk and executed from the lowest to highest Task id. -/// Every time a new task is created it has a higher Task id than the previous one. -/// See also `Job`. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub struct Task { - pub id: TaskId, - /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) - /// then this is None - // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of - // the TaskContent. - pub content: TaskContent, - pub events: Vec, -} - -impl Task { - /// Return true when a task is finished. - /// A task is finished when its last state is either `Succeeded` or `Failed`. - pub fn is_finished(&self) -> bool { - self.events.last().map_or(false, |event| { - matches!( - event, - TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. } - ) - }) - } - - /// Return the content_uuid of the `Task` if there is one. - pub fn get_content_uuid(&self) -> Option { - match self { - Task { - content: TaskContent::DocumentAddition { content_uuid, .. }, - .. - } => Some(*content_uuid), - _ => None, - } - } - - pub fn index_uid(&self) -> Option<&str> { - match &self.content { - TaskContent::DocumentAddition { index_uid, .. } - | TaskContent::DocumentDeletion { index_uid, .. } - | TaskContent::SettingsUpdate { index_uid, .. } - | TaskContent::IndexDeletion { index_uid } - | TaskContent::IndexCreation { index_uid, .. } - | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), - TaskContent::Dump { .. } => None, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum DocumentDeletion { - Clear, - Ids(Vec), -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[allow(clippy::large_enum_variant)] -pub enum TaskContent { - DocumentAddition { - index_uid: IndexUid, - #[cfg_attr(test, proptest(value = "Uuid::new_v4()"))] - content_uuid: Uuid, - #[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))] - merge_strategy: IndexDocumentsMethod, - primary_key: Option, - documents_count: usize, - allow_index_creation: bool, - }, - DocumentDeletion { - index_uid: IndexUid, - deletion: DocumentDeletion, - }, - SettingsUpdate { - index_uid: IndexUid, - settings: Settings, - /// Indicates whether the task was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - IndexDeletion { - index_uid: IndexUid, - }, - IndexCreation { - index_uid: IndexUid, - primary_key: Option, - }, - IndexUpdate { - index_uid: IndexUid, - primary_key: Option, - }, - Dump { - uid: String, - }, -} - -#[cfg(test)] -mod test { - use proptest::prelude::*; - - use super::*; - - pub(super) fn index_document_method_strategy() -> impl Strategy { - prop_oneof![ - Just(IndexDocumentsMethod::ReplaceDocuments), - Just(IndexDocumentsMethod::UpdateDocuments), - ] - } - - pub(super) fn datetime_strategy() -> impl Strategy { - Just(OffsetDateTime::now_utc()) - } -}