mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
Merge #1700
1700: `stable` into `main` after v0.22.0 r=MarinPostma a=curquiza Co-authored-by: many <maxime@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
This commit is contained in:
commit
6e59da26d4
87
Cargo.lock
generated
87
Cargo.lock
generated
@ -1117,17 +1117,13 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "grenad"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7824d499230110f4e4a8d4fd3fd4dc15c1347fce5082e4bba82eef17f43e1ed8"
|
||||
checksum = "1a7a9cc43b28a20f791b17863f34a36654fdfa50be6d0a67bb18c1e34d145f18"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"flate2",
|
||||
"lz4_flex",
|
||||
"snap",
|
||||
"tempfile",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1352,15 +1348,6 @@ version = "2.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.1"
|
||||
@ -1555,15 +1542,6 @@ dependencies = [
|
||||
"syn 0.15.44",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5827b976d911b5d2e42b2ccfc7c0d2461a1414e8280436885218762fc529b3f8"
|
||||
dependencies = [
|
||||
"twox-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "main_error"
|
||||
version = "0.1.1"
|
||||
@ -1619,7 +1597,7 @@ dependencies = [
|
||||
"hex",
|
||||
"http",
|
||||
"indexmap",
|
||||
"itertools 0.10.1",
|
||||
"itertools",
|
||||
"jemallocator",
|
||||
"log",
|
||||
"main_error",
|
||||
@ -1706,8 +1684,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "0.12.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.12.0#5cbe8793251bbf143434c8a4c4e7195ca6c5f2ac"
|
||||
version = "0.13.1"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.13.1#90d64d257fa944ab2ee1572193e501bb231627c7"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"byteorder",
|
||||
@ -1722,7 +1700,7 @@ dependencies = [
|
||||
"grenad",
|
||||
"heed",
|
||||
"human_format",
|
||||
"itertools 0.10.1",
|
||||
"itertools",
|
||||
"levenshtein_automata",
|
||||
"linked-hash-map",
|
||||
"log",
|
||||
@ -2679,12 +2657,6 @@ version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||
|
||||
[[package]]
|
||||
name = "snap"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.4.0"
|
||||
@ -2710,12 +2682,6 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "stdweb"
|
||||
version = "0.4.20"
|
||||
@ -3093,16 +3059,6 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f559b464de2e2bdabcac6a210d12e9b5a5973c251e102c44c585c71d51bd78e"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.13.0"
|
||||
@ -3472,34 +3428,3 @@ dependencies = [
|
||||
"thiserror",
|
||||
"time 0.1.44",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.5.4+zstd.1.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69996ebdb1ba8b1517f61387a883857818a66c8a295f487b1ffd8fd9d2c82910"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "2.0.6+zstd.1.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98aa931fb69ecee256d44589d19754e61851ae4769bf963b385119b1cc37a49e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "1.4.18+zstd.1.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1e6e8778706838f43f771d80d37787cb2fe06dafe89dd3aebaf6721b9eaec81"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"glob",
|
||||
"itertools 0.9.0",
|
||||
"libc",
|
||||
]
|
||||
|
@ -50,7 +50,7 @@ main_error = "0.1.0"
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
||||
memmap = "0.7.0"
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.12.0" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.13.1" }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.0"
|
||||
once_cell = "1.5.2"
|
||||
|
@ -93,14 +93,17 @@ impl ErrorCode for MilliError<'_> {
|
||||
| UserError::InvalidDocumentId { .. }
|
||||
| UserError::InvalidStoreFile
|
||||
| UserError::NoSpaceLeftOnDevice
|
||||
| UserError::InvalidAscDescSyntax { .. }
|
||||
| UserError::DocumentLimitReached => Code::Internal,
|
||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
||||
UserError::InvalidFilter(_) => Code::Filter,
|
||||
UserError::InvalidFilterAttribute(_) => Code::Filter,
|
||||
UserError::InvalidSortName { .. } => Code::Sort,
|
||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
||||
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
|
||||
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
|
||||
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
|
||||
UserError::SortRankingRuleMissing => Code::Sort,
|
||||
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::Sort,
|
||||
|
@ -8,7 +8,9 @@ use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
|
||||
use crate::option::IndexerOpts;
|
||||
|
||||
use super::error::Result;
|
||||
@ -93,10 +95,22 @@ impl Index {
|
||||
|
||||
let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
let mut meta_file = File::open(meta_path)?;
|
||||
|
||||
// We first deserialize the dump meta into a serde_json::Value and change
|
||||
// the custom ranking rules settings from the old format to the new format.
|
||||
let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
convert_custom_ranking_rules(ranking_rules);
|
||||
}
|
||||
|
||||
// Then we serialize it back into a vec to deserialize it
|
||||
// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||
let patched_meta = serde_json::to_vec(&meta)?;
|
||||
|
||||
let DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
} = serde_json::from_reader(&mut meta_file)?;
|
||||
} = serde_json::from_slice(&patched_meta)?;
|
||||
let settings = settings.check();
|
||||
let index = Self::open(&dst_dir_path, size)?;
|
||||
let mut txn = index.write_txn()?;
|
||||
@ -132,3 +146,25 @@ impl Index {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
///
|
||||
/// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
/// since the new syntax was introduced soon after the new dump version.
|
||||
fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
*ranking_rules = match ranking_rules.take() {
|
||||
Value::Array(values) => values
|
||||
.into_iter()
|
||||
.filter_map(|value| match value {
|
||||
Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:asc", f))
|
||||
.map(Value::String),
|
||||
Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:desc", f))
|
||||
.map(Value::String),
|
||||
otherwise => Some(otherwise),
|
||||
})
|
||||
.collect(),
|
||||
otherwise => otherwise,
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ use either::Either;
|
||||
use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
|
||||
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords};
|
||||
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, UserError};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
@ -110,6 +110,11 @@ impl Index {
|
||||
if let Some(ref sort) = query.sort {
|
||||
let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() {
|
||||
Ok(sorts) => sorts,
|
||||
Err(UserError::InvalidAscDescSyntax { name }) => {
|
||||
return Err(IndexError::Milli(
|
||||
UserError::InvalidSortName { name }.into(),
|
||||
))
|
||||
}
|
||||
Err(err) => return Err(IndexError::Milli(err.into())),
|
||||
};
|
||||
|
||||
|
@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata};
|
||||
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
|
||||
use crate::{
|
||||
index::{update_handler::UpdateHandler, Index, Unchecked},
|
||||
option::IndexerOpts,
|
||||
@ -164,19 +165,21 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
|
||||
None => Setting::NotSet
|
||||
},
|
||||
sortable_attributes: Setting::NotSet,
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
ranking_rules: match settings.ranking_rules {
|
||||
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| {
|
||||
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
|
||||
match criterion.as_str() {
|
||||
"words" | "typo" | "proximity" | "attribute" | "exactness" => true,
|
||||
s if s.starts_with("asc") || s.starts_with("desc") => true,
|
||||
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
|
||||
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
|
||||
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
|
||||
"wordsPosition" => {
|
||||
warn!("The criteria `attribute` and `wordsPosition` have been merged into a single criterion `attribute` so `wordsPositon` will be ignored");
|
||||
false
|
||||
warn!("The criteria `attribute` and `wordsPosition` have been merged \
|
||||
into a single criterion `attribute` so `wordsPositon` will be \
|
||||
ignored");
|
||||
None
|
||||
}
|
||||
s => {
|
||||
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
|
||||
false
|
||||
None
|
||||
}
|
||||
}
|
||||
}).collect()),
|
||||
|
@ -31,9 +31,12 @@ pub struct IndexActor<S> {
|
||||
}
|
||||
|
||||
impl<S: IndexStore + Sync + Send> IndexActor<S> {
|
||||
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> anyhow::Result<Self> {
|
||||
let options = IndexerOpts::default();
|
||||
let update_handler = UpdateHandler::new(&options)?;
|
||||
pub fn new(
|
||||
receiver: mpsc::Receiver<IndexMsg>,
|
||||
store: S,
|
||||
options: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let update_handler = UpdateHandler::new(options)?;
|
||||
let update_handler = Arc::new(update_handler);
|
||||
let receiver = Some(receiver);
|
||||
Ok(Self {
|
||||
|
@ -1,3 +1,4 @@
|
||||
use crate::option::IndexerOpts;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
@ -148,11 +149,15 @@ impl IndexActorHandle for IndexActorHandleImpl {
|
||||
}
|
||||
|
||||
impl IndexActorHandleImpl {
|
||||
pub fn new(path: impl AsRef<Path>, index_size: usize) -> anyhow::Result<Self> {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
options: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
|
||||
let store = MapIndexStore::new(path, index_size);
|
||||
let actor = IndexActor::new(receiver, store)?;
|
||||
let actor = IndexActor::new(receiver, store, options)?;
|
||||
tokio::task::spawn(actor.run());
|
||||
Ok(Self { sender })
|
||||
}
|
||||
|
@ -110,7 +110,8 @@ impl IndexController {
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?;
|
||||
let index_handle = index_actor::IndexActorHandleImpl::new(&path, index_size)?;
|
||||
let index_handle =
|
||||
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?;
|
||||
let update_handle = update_actor::UpdateActorHandleImpl::new(
|
||||
index_handle.clone(),
|
||||
&path,
|
||||
@ -439,3 +440,17 @@ pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("asc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Desc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("desc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
@ -38,11 +38,6 @@ pub struct IndexerOpts {
|
||||
#[structopt(long, default_value)]
|
||||
pub max_memory: MaxMemory,
|
||||
|
||||
/// Size of the linked hash map cache when indexing.
|
||||
/// The bigger it is, the faster the indexing is but the more memory it takes.
|
||||
#[structopt(long, default_value = "500")]
|
||||
pub linked_hash_map_size: usize,
|
||||
|
||||
/// The name of the compression algorithm to use when compressing intermediate
|
||||
/// Grenad chunks while indexing documents.
|
||||
///
|
||||
@ -54,18 +49,6 @@ pub struct IndexerOpts {
|
||||
#[structopt(long, requires = "chunk-compression-type")]
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
|
||||
/// The number of bytes to remove from the begining of the chunks while reading/sorting
|
||||
/// or merging them.
|
||||
///
|
||||
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
|
||||
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
|
||||
#[structopt(long, default_value = "4 GiB")]
|
||||
pub chunk_fusing_shrink_size: Byte,
|
||||
|
||||
/// Enable the chunk fusing or not, this reduces the amount of disk space used.
|
||||
#[structopt(long)]
|
||||
pub enable_chunk_fusing: bool,
|
||||
|
||||
/// Number of parallel jobs for indexing, defaults to # of CPUs.
|
||||
#[structopt(long)]
|
||||
pub indexing_jobs: Option<usize>,
|
||||
@ -77,11 +60,8 @@ impl Default for IndexerOpts {
|
||||
log_every_n: 100_000,
|
||||
max_nb_chunks: None,
|
||||
max_memory: MaxMemory::default(),
|
||||
linked_hash_map_size: 500,
|
||||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(),
|
||||
enable_chunk_fusing: false,
|
||||
indexing_jobs: None,
|
||||
}
|
||||
}
|
||||
@ -286,6 +266,12 @@ impl Deref for MaxMemory {
|
||||
}
|
||||
}
|
||||
|
||||
impl MaxMemory {
|
||||
pub fn unlimited() -> Self {
|
||||
Self(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||
fn total_memory_bytes() -> Option<u64> {
|
||||
if System::IS_SUPPORTED {
|
||||
|
@ -7,7 +7,7 @@ use tempdir::TempDir;
|
||||
use urlencoding::encode;
|
||||
|
||||
use meilisearch_http::data::Data;
|
||||
use meilisearch_http::option::{IndexerOpts, Opt};
|
||||
use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt};
|
||||
|
||||
use super::index::Index;
|
||||
use super::service::Service;
|
||||
@ -90,7 +90,11 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
schedule_snapshot: false,
|
||||
snapshot_interval_sec: 0,
|
||||
import_dump: None,
|
||||
indexer_options: IndexerOpts::default(),
|
||||
indexer_options: IndexerOpts {
|
||||
// memory has to be unlimited because several meilisearch are running in test context.
|
||||
max_memory: MaxMemory::unlimited(),
|
||||
..Default::default()
|
||||
},
|
||||
log_level: "off".into(),
|
||||
}
|
||||
}
|
||||
|
@ -16,9 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
||||
json!([
|
||||
"words",
|
||||
"typo",
|
||||
"sort",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"sort",
|
||||
"exactness"
|
||||
]),
|
||||
);
|
||||
@ -53,9 +53,9 @@ async fn get_settings() {
|
||||
json!([
|
||||
"words",
|
||||
"typo",
|
||||
"sort",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"sort",
|
||||
"exactness"
|
||||
])
|
||||
);
|
||||
|
Loading…
x
Reference in New Issue
Block a user