diff --git a/Cargo.lock b/Cargo.lock index bd17cdc0b..57ac0d98b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1117,17 +1117,13 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "grenad" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7824d499230110f4e4a8d4fd3fd4dc15c1347fce5082e4bba82eef17f43e1ed8" +checksum = "1a7a9cc43b28a20f791b17863f34a36654fdfa50be6d0a67bb18c1e34d145f18" dependencies = [ "bytemuck", "byteorder", - "flate2", - "lz4_flex", - "snap", "tempfile", - "zstd", ] [[package]] @@ -1352,15 +1348,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9" -[[package]] -name = "itertools" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.10.1" @@ -1555,15 +1542,6 @@ dependencies = [ "syn 0.15.44", ] -[[package]] -name = "lz4_flex" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827b976d911b5d2e42b2ccfc7c0d2461a1414e8280436885218762fc529b3f8" -dependencies = [ - "twox-hash", -] - [[package]] name = "main_error" version = "0.1.1" @@ -1619,7 +1597,7 @@ dependencies = [ "hex", "http", "indexmap", - "itertools 0.10.1", + "itertools", "jemallocator", "log", "main_error", @@ -1706,8 +1684,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.12.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.12.0#5cbe8793251bbf143434c8a4c4e7195ca6c5f2ac" +version = "0.13.1" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.13.1#90d64d257fa944ab2ee1572193e501bb231627c7" dependencies = [ "bstr", "byteorder", @@ -1722,7 +1700,7 @@ dependencies = [ "grenad", "heed", "human_format", - "itertools 0.10.1", + "itertools", "levenshtein_automata", "linked-hash-map", "log", @@ -2679,12 +2657,6 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" -[[package]] -name = "snap" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" - [[package]] name = "socket2" version = "0.4.0" @@ -2710,12 +2682,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "stdweb" version = "0.4.20" @@ -3093,16 +3059,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" -[[package]] -name = "twox-hash" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f559b464de2e2bdabcac6a210d12e9b5a5973c251e102c44c585c71d51bd78e" -dependencies = [ - "cfg-if 1.0.0", - "static_assertions", -] - [[package]] name = "typenum" version = "1.13.0" @@ -3472,34 +3428,3 @@ dependencies = [ "thiserror", "time 0.1.44", ] - -[[package]] -name = "zstd" -version = "0.5.4+zstd.1.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69996ebdb1ba8b1517f61387a883857818a66c8a295f487b1ffd8fd9d2c82910" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "2.0.6+zstd.1.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98aa931fb69ecee256d44589d19754e61851ae4769bf963b385119b1cc37a49e" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "1.4.18+zstd.1.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e6e8778706838f43f771d80d37787cb2fe06dafe89dd3aebaf6721b9eaec81" -dependencies = [ - "cc", - "glob", - "itertools 0.9.0", - "libc", -] diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index aa56ad69f..50976b29a 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -50,7 +50,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.12.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.13.1" } mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.5.2" diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 085bceabf..95a503ea4 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -93,14 +93,17 @@ impl ErrorCode for MilliError<'_> { | UserError::InvalidDocumentId { .. } | UserError::InvalidStoreFile | UserError::NoSpaceLeftOnDevice + | UserError::InvalidAscDescSyntax { .. } | UserError::DocumentLimitReached => Code::Internal, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::Filter, UserError::InvalidFilterAttribute(_) => Code::Filter, + UserError::InvalidSortName { .. } => Code::Sort, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::MissingPrimaryKey => Code::MissingPrimaryKey, UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent, UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent, + UserError::SortRankingRuleMissing => Code::Sort, UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound, UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, UserError::InvalidSortableAttribute { .. } => Code::Sort, diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index 3b10a1562..7df704339 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -8,7 +8,9 @@ use heed::RoTxn; use indexmap::IndexMap; use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; use serde::{Deserialize, Serialize}; +use serde_json::Value; +use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::option::IndexerOpts; use super::error::Result; @@ -93,10 +95,22 @@ impl Index { let meta_path = src.as_ref().join(META_FILE_NAME); let mut meta_file = File::open(meta_path)?; + + // We first deserialize the dump meta into a serde_json::Value and change + // the custom ranking rules settings from the old format to the new format. + let mut meta: Value = serde_json::from_reader(&mut meta_file)?; + if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { + convert_custom_ranking_rules(ranking_rules); + } + + // Then we serialize it back into a vec to deserialize it + // into a `DumpMeta` struct with the newly patched `rankingRules` format. + let patched_meta = serde_json::to_vec(&meta)?; + let DumpMeta { settings, primary_key, - } = serde_json::from_reader(&mut meta_file)?; + } = serde_json::from_slice(&patched_meta)?; let settings = settings.check(); let index = Self::open(&dst_dir_path, size)?; let mut txn = index.write_txn()?; @@ -132,3 +146,25 @@ impl Index { Ok(()) } } + +/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. +/// +/// This is done for compatibility reasons, and to avoid a new dump version, +/// since the new syntax was introduced soon after the new dump version. +fn convert_custom_ranking_rules(ranking_rules: &mut Value) { + *ranking_rules = match ranking_rules.take() { + Value::Array(values) => values + .into_iter() + .filter_map(|value| match value { + Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s) + .map(|f| format!("{}:asc", f)) + .map(Value::String), + Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s) + .map(|f| format!("{}:desc", f)) + .map(Value::String), + otherwise => Some(otherwise), + }) + .collect(), + otherwise => otherwise, + } +} diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 4fd7f8cd1..26eb816a0 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -6,7 +6,7 @@ use either::Either; use heed::RoTxn; use indexmap::IndexMap; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; -use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords}; +use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -110,6 +110,11 @@ impl Index { if let Some(ref sort) = query.sort { let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { Ok(sorts) => sorts, + Err(UserError::InvalidAscDescSyntax { name }) => { + return Err(IndexError::Milli( + UserError::InvalidSortName { name }.into(), + )) + } Err(err) => return Err(IndexError::Milli(err.into())), }; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 6a505c077..997fd2801 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; +use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::{ index::{update_handler::UpdateHandler, Index, Unchecked}, option::IndexerOpts, @@ -164,19 +165,21 @@ impl From for index_controller::Settings { None => Setting::NotSet }, sortable_attributes: Setting::NotSet, - // we need to convert the old `Vec` into a `BTreeSet` ranking_rules: match settings.ranking_rules { - Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| { + Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| { match criterion.as_str() { - "words" | "typo" | "proximity" | "attribute" | "exactness" => true, - s if s.starts_with("asc") || s.starts_with("desc") => true, + "words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion), + s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)), + s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)), "wordsPosition" => { - warn!("The criteria `attribute` and `wordsPosition` have been merged into a single criterion `attribute` so `wordsPositon` will be ignored"); - false + warn!("The criteria `attribute` and `wordsPosition` have been merged \ + into a single criterion `attribute` so `wordsPositon` will be \ + ignored"); + None } s => { error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); - false + None } } }).collect()), diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 15d96b7ad..fc40a5090 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -31,9 +31,12 @@ pub struct IndexActor { } impl IndexActor { - pub fn new(receiver: mpsc::Receiver, store: S) -> anyhow::Result { - let options = IndexerOpts::default(); - let update_handler = UpdateHandler::new(&options)?; + pub fn new( + receiver: mpsc::Receiver, + store: S, + options: &IndexerOpts, + ) -> anyhow::Result { + let update_handler = UpdateHandler::new(options)?; let update_handler = Arc::new(update_handler); let receiver = Some(receiver); Ok(Self { diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index 231a3a44b..ceb2a8226 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -1,3 +1,4 @@ +use crate::option::IndexerOpts; use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; @@ -148,11 +149,15 @@ impl IndexActorHandle for IndexActorHandleImpl { } impl IndexActorHandleImpl { - pub fn new(path: impl AsRef, index_size: usize) -> anyhow::Result { + pub fn new( + path: impl AsRef, + index_size: usize, + options: &IndexerOpts, + ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(100); let store = MapIndexStore::new(path, index_size); - let actor = IndexActor::new(receiver, store)?; + let actor = IndexActor::new(receiver, store, options)?; tokio::task::spawn(actor.run()); Ok(Self { sender }) } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index a90498b9c..4565a1dd0 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -110,7 +110,8 @@ impl IndexController { std::fs::create_dir_all(&path)?; let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?; - let index_handle = index_actor::IndexActorHandleImpl::new(&path, index_size)?; + let index_handle = + index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?; let update_handle = update_actor::UpdateActorHandleImpl::new( index_handle.clone(), &path, @@ -439,3 +440,17 @@ pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { } } } + +/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name. +pub fn asc_ranking_rule(text: &str) -> Option<&str> { + text.split_once("asc(") + .and_then(|(_, tail)| tail.rsplit_once(")")) + .map(|(field, _)| field) +} + +/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name. +pub fn desc_ranking_rule(text: &str) -> Option<&str> { + text.split_once("desc(") + .and_then(|(_, tail)| tail.rsplit_once(")")) + .map(|(field, _)| field) +} diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index f3c077c05..39966092e 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -38,11 +38,6 @@ pub struct IndexerOpts { #[structopt(long, default_value)] pub max_memory: MaxMemory, - /// Size of the linked hash map cache when indexing. - /// The bigger it is, the faster the indexing is but the more memory it takes. - #[structopt(long, default_value = "500")] - pub linked_hash_map_size: usize, - /// The name of the compression algorithm to use when compressing intermediate /// Grenad chunks while indexing documents. /// @@ -54,18 +49,6 @@ pub struct IndexerOpts { #[structopt(long, requires = "chunk-compression-type")] pub chunk_compression_level: Option, - /// The number of bytes to remove from the begining of the chunks while reading/sorting - /// or merging them. - /// - /// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`, - /// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set. - #[structopt(long, default_value = "4 GiB")] - pub chunk_fusing_shrink_size: Byte, - - /// Enable the chunk fusing or not, this reduces the amount of disk space used. - #[structopt(long)] - pub enable_chunk_fusing: bool, - /// Number of parallel jobs for indexing, defaults to # of CPUs. #[structopt(long)] pub indexing_jobs: Option, @@ -77,11 +60,8 @@ impl Default for IndexerOpts { log_every_n: 100_000, max_nb_chunks: None, max_memory: MaxMemory::default(), - linked_hash_map_size: 500, chunk_compression_type: CompressionType::None, chunk_compression_level: None, - chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(), - enable_chunk_fusing: false, indexing_jobs: None, } } @@ -286,6 +266,12 @@ impl Deref for MaxMemory { } } +impl MaxMemory { + pub fn unlimited() -> Self { + Self(None) + } +} + /// Returns the total amount of bytes available or `None` if this system isn't supported. fn total_memory_bytes() -> Option { if System::IS_SUPPORTED { diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 6cf1acb6a..52c0e30ea 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -7,7 +7,7 @@ use tempdir::TempDir; use urlencoding::encode; use meilisearch_http::data::Data; -use meilisearch_http::option::{IndexerOpts, Opt}; +use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt}; use super::index::Index; use super::service::Service; @@ -90,7 +90,11 @@ pub fn default_settings(dir: impl AsRef) -> Opt { schedule_snapshot: false, snapshot_interval_sec: 0, import_dump: None, - indexer_options: IndexerOpts::default(), + indexer_options: IndexerOpts { + // memory has to be unlimited because several meilisearch are running in test context. + max_memory: MaxMemory::unlimited(), + ..Default::default() + }, log_level: "off".into(), } } diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index 3b3ab5735..c7bcd4376 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -16,9 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| json!([ "words", "typo", - "sort", "proximity", "attribute", + "sort", "exactness" ]), ); @@ -53,9 +53,9 @@ async fn get_settings() { json!([ "words", "typo", - "sort", "proximity", "attribute", + "sort", "exactness" ]) );