1700: `stable` into `main` after v0.22.0 r=MarinPostma a=curquiza



Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
This commit is contained in:
bors[bot] 2021-09-13 12:32:23 +00:00 committed by GitHub
commit 6e59da26d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 106 additions and 121 deletions

87
Cargo.lock generated
View File

@ -1117,17 +1117,13 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "grenad"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7824d499230110f4e4a8d4fd3fd4dc15c1347fce5082e4bba82eef17f43e1ed8"
checksum = "1a7a9cc43b28a20f791b17863f34a36654fdfa50be6d0a67bb18c1e34d145f18"
dependencies = [
"bytemuck",
"byteorder",
"flate2",
"lz4_flex",
"snap",
"tempfile",
"zstd",
]
[[package]]
@ -1352,15 +1348,6 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9"
[[package]]
name = "itertools"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.10.1"
@ -1555,15 +1542,6 @@ dependencies = [
"syn 0.15.44",
]
[[package]]
name = "lz4_flex"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827b976d911b5d2e42b2ccfc7c0d2461a1414e8280436885218762fc529b3f8"
dependencies = [
"twox-hash",
]
[[package]]
name = "main_error"
version = "0.1.1"
@ -1619,7 +1597,7 @@ dependencies = [
"hex",
"http",
"indexmap",
"itertools 0.10.1",
"itertools",
"jemallocator",
"log",
"main_error",
@ -1706,8 +1684,8 @@ dependencies = [
[[package]]
name = "milli"
version = "0.12.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.12.0#5cbe8793251bbf143434c8a4c4e7195ca6c5f2ac"
version = "0.13.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.13.1#90d64d257fa944ab2ee1572193e501bb231627c7"
dependencies = [
"bstr",
"byteorder",
@ -1722,7 +1700,7 @@ dependencies = [
"grenad",
"heed",
"human_format",
"itertools 0.10.1",
"itertools",
"levenshtein_automata",
"linked-hash-map",
"log",
@ -2679,12 +2657,6 @@ version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]]
name = "snap"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451"
[[package]]
name = "socket2"
version = "0.4.0"
@ -2710,12 +2682,6 @@ dependencies = [
"version_check",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "stdweb"
version = "0.4.20"
@ -3093,16 +3059,6 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
[[package]]
name = "twox-hash"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f559b464de2e2bdabcac6a210d12e9b5a5973c251e102c44c585c71d51bd78e"
dependencies = [
"cfg-if 1.0.0",
"static_assertions",
]
[[package]]
name = "typenum"
version = "1.13.0"
@ -3472,34 +3428,3 @@ dependencies = [
"thiserror",
"time 0.1.44",
]
[[package]]
name = "zstd"
version = "0.5.4+zstd.1.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69996ebdb1ba8b1517f61387a883857818a66c8a295f487b1ffd8fd9d2c82910"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "2.0.6+zstd.1.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98aa931fb69ecee256d44589d19754e61851ae4769bf963b385119b1cc37a49e"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "1.4.18+zstd.1.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1e6e8778706838f43f771d80d37787cb2fe06dafe89dd3aebaf6721b9eaec81"
dependencies = [
"cc",
"glob",
"itertools 0.9.0",
"libc",
]

View File

@ -50,7 +50,7 @@ main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.12.0" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.13.1" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.5.2"

View File

@ -93,14 +93,17 @@ impl ErrorCode for MilliError<'_> {
| UserError::InvalidDocumentId { .. }
| UserError::InvalidStoreFile
| UserError::NoSpaceLeftOnDevice
| UserError::InvalidAscDescSyntax { .. }
| UserError::DocumentLimitReached => Code::Internal,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::InvalidFilterAttribute(_) => Code::Filter,
UserError::InvalidSortName { .. } => Code::Sort,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,

View File

@ -8,7 +8,9 @@ use heed::RoTxn;
use indexmap::IndexMap;
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::option::IndexerOpts;
use super::error::Result;
@ -93,10 +95,22 @@ impl Index {
let meta_path = src.as_ref().join(META_FILE_NAME);
let mut meta_file = File::open(meta_path)?;
// We first deserialize the dump meta into a serde_json::Value and change
// the custom ranking rules settings from the old format to the new format.
let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
convert_custom_ranking_rules(ranking_rules);
}
// Then we serialize it back into a vec to deserialize it
// into a `DumpMeta` struct with the newly patched `rankingRules` format.
let patched_meta = serde_json::to_vec(&meta)?;
let DumpMeta {
settings,
primary_key,
} = serde_json::from_reader(&mut meta_file)?;
} = serde_json::from_slice(&patched_meta)?;
let settings = settings.check();
let index = Self::open(&dst_dir_path, size)?;
let mut txn = index.write_txn()?;
@ -132,3 +146,25 @@ impl Index {
Ok(())
}
}
/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
///
/// This is done for compatibility reasons, and to avoid a new dump version,
/// since the new syntax was introduced soon after the new dump version.
fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
*ranking_rules = match ranking_rules.take() {
Value::Array(values) => values
.into_iter()
.filter_map(|value| match value {
Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
.map(|f| format!("{}:asc", f))
.map(Value::String),
Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
.map(|f| format!("{}:desc", f))
.map(Value::String),
otherwise => Some(otherwise),
})
.collect(),
otherwise => otherwise,
}
}

View File

@ -6,7 +6,7 @@ use either::Either;
use heed::RoTxn;
use indexmap::IndexMap;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError};
use serde::{Deserialize, Serialize};
use serde_json::Value;
@ -110,6 +110,11 @@ impl Index {
if let Some(ref sort) = query.sort {
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
Ok(sorts) => sorts,
Err(UserError::InvalidAscDescSyntax { name }) => {
return Err(IndexError::Milli(
UserError::InvalidSortName { name }.into(),
))
}
Err(err) => return Err(IndexError::Milli(err.into())),
};

View File

@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize};
use uuid::Uuid;
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::{
index::{update_handler::UpdateHandler, Index, Unchecked},
option::IndexerOpts,
@ -164,19 +165,21 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
None => Setting::NotSet
},
sortable_attributes: Setting::NotSet,
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
ranking_rules: match settings.ranking_rules {
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| {
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
match criterion.as_str() {
"words" | "typo" | "proximity" | "attribute" | "exactness" => true,
s if s.starts_with("asc") || s.starts_with("desc") => true,
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
"wordsPosition" => {
warn!("The criteria `attribute` and `wordsPosition` have been merged into a single criterion `attribute` so `wordsPositon` will be ignored");
false
warn!("The criteria `attribute` and `wordsPosition` have been merged \
into a single criterion `attribute` so `wordsPositon` will be \
ignored");
None
}
s => {
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
false
None
}
}
}).collect()),

View File

@ -31,9 +31,12 @@ pub struct IndexActor<S> {
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> anyhow::Result<Self> {
let options = IndexerOpts::default();
let update_handler = UpdateHandler::new(&options)?;
pub fn new(
receiver: mpsc::Receiver<IndexMsg>,
store: S,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let update_handler = UpdateHandler::new(options)?;
let update_handler = Arc::new(update_handler);
let receiver = Some(receiver);
Ok(Self {

View File

@ -1,3 +1,4 @@
use crate::option::IndexerOpts;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
@ -148,11 +149,15 @@ impl IndexActorHandle for IndexActorHandleImpl {
}
impl IndexActorHandleImpl {
pub fn new(path: impl AsRef<Path>, index_size: usize) -> anyhow::Result<Self> {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
options: &IndexerOpts,
) -> anyhow::Result<Self> {
let (sender, receiver) = mpsc::channel(100);
let store = MapIndexStore::new(path, index_size);
let actor = IndexActor::new(receiver, store)?;
let actor = IndexActor::new(receiver, store, options)?;
tokio::task::spawn(actor.run());
Ok(Self { sender })
}

View File

@ -110,7 +110,8 @@ impl IndexController {
std::fs::create_dir_all(&path)?;
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?;
let index_handle = index_actor::IndexActorHandleImpl::new(&path, index_size)?;
let index_handle =
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?;
let update_handle = update_actor::UpdateActorHandleImpl::new(
index_handle.clone(),
&path,
@ -439,3 +440,17 @@ pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
}
}
}
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("asc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}
/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name.
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
text.split_once("desc(")
.and_then(|(_, tail)| tail.rsplit_once(")"))
.map(|(field, _)| field)
}

View File

@ -38,11 +38,6 @@ pub struct IndexerOpts {
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// Size of the linked hash map cache when indexing.
/// The bigger it is, the faster the indexing is but the more memory it takes.
#[structopt(long, default_value = "500")]
pub linked_hash_map_size: usize,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
@ -54,18 +49,6 @@ pub struct IndexerOpts {
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// The number of bytes to remove from the begining of the chunks while reading/sorting
/// or merging them.
///
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
#[structopt(long, default_value = "4 GiB")]
pub chunk_fusing_shrink_size: Byte,
/// Enable the chunk fusing or not, this reduces the amount of disk space used.
#[structopt(long)]
pub enable_chunk_fusing: bool,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
@ -77,11 +60,8 @@ impl Default for IndexerOpts {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
linked_hash_map_size: 500,
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(),
enable_chunk_fusing: false,
indexing_jobs: None,
}
}
@ -286,6 +266,12 @@ impl Deref for MaxMemory {
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {

View File

@ -7,7 +7,7 @@ use tempdir::TempDir;
use urlencoding::encode;
use meilisearch_http::data::Data;
use meilisearch_http::option::{IndexerOpts, Opt};
use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt};
use super::index::Index;
use super::service::Service;
@ -90,7 +90,11 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
schedule_snapshot: false,
snapshot_interval_sec: 0,
import_dump: None,
indexer_options: IndexerOpts::default(),
indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context.
max_memory: MaxMemory::unlimited(),
..Default::default()
},
log_level: "off".into(),
}
}

View File

@ -16,9 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
json!([
"words",
"typo",
"sort",
"proximity",
"attribute",
"sort",
"exactness"
]),
);
@ -53,9 +53,9 @@ async fn get_settings() {
json!([
"words",
"typo",
"sort",
"proximity",
"attribute",
"sort",
"exactness"
])
);