Merge branch 'release-v0.27.0' into stable

This commit is contained in:
Clémentine Urquizar 2022-05-09 10:43:51 +02:00
commit cd2239eb2d
No known key found for this signature in database
GPG Key ID: D8E7CC7422E77E1A
51 changed files with 3597 additions and 1231 deletions

View File

@ -1,7 +1,7 @@
contact_links:
- name: Feature request
- name: Feature request & feedback
url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal
about: The feature requests are not managed in this repository, please open a discussion in our dedicated product repository
about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository
- name: Documentation issue
url: https://github.com/meilisearch/documentation/issues/new
about: For documentation issues, open an issue or a PR in the documentation repository

View File

@ -6,7 +6,7 @@ name: Publish binaries to release
jobs:
publish:
name: Publish for ${{ matrix.os }}
name: Publish binary for ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
@ -39,7 +39,7 @@ jobs:
tag: ${{ github.ref }}
publish-aarch64:
name: Publish to GitHub
name: Publish binary for aarch64
runs-on: ${{ matrix.os }}
continue-on-error: false
strategy:

View File

@ -36,6 +36,25 @@ jobs:
command: test
args: --locked --release
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v1.3.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
command: test
args: --locked
clippy:
name: Run Clippy
runs-on: ubuntu-18.04

1444
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,9 @@
[workspace]
resolver = "2"
members = [
"meilisearch-http",
"meilisearch-error",
"meilisearch-lib",
"meilisearch-auth",
"permissive-json-pointer",
]

View File

@ -1,42 +1,22 @@
# Compile
FROM alpine:3.14 AS compiler
FROM rust:alpine3.14 AS compiler
RUN apk update --quiet \
&& apk add -q --no-cache curl build-base
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /meilisearch
COPY Cargo.lock .
COPY Cargo.toml .
COPY meilisearch-auth/Cargo.toml meilisearch-auth/
COPY meilisearch-error/Cargo.toml meilisearch-error/
COPY meilisearch-http/Cargo.toml meilisearch-http/
COPY meilisearch-lib/Cargo.toml meilisearch-lib/
ENV RUSTFLAGS="-C target-feature=-crt-static"
# Create dummy main.rs files for each workspace member to be able to compile all the dependencies
RUN find . -type d -name "meilisearch-*" | xargs -I{} sh -c 'mkdir {}/src; echo "fn main() { }" > {}/src/main.rs;'
# Use `cargo build` instead of `cargo vendor` because we need to not only download but compile dependencies too
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
$HOME/.cargo/bin/cargo build --release
# Cleanup dummy main.rs files
RUN find . -path "*/src/main.rs" -delete
ARG COMMIT_SHA
ARG COMMIT_DATE
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
$HOME/.cargo/bin/cargo build --release
cargo build --release
# Run
FROM alpine:3.14
@ -47,9 +27,20 @@ ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
COPY --from=compiler /meilisearch/target/release/meilisearch .
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch
# This directory should hold all the data related to meilisearch so we're going
# to move our PWD in there.
# We don't want to put the meilisearch binary
WORKDIR /meili_data
EXPOSE 7700/tcp
ENTRYPOINT ["tini", "--"]
CMD ./meilisearch
CMD /bin/meilisearch

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2022 Meilisearch
Copyright (c) 2019-2022 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -2,8 +2,9 @@ status = [
'Tests on ubuntu-18.04',
'Tests on macos-latest',
'Tests on windows-latest',
'Run Clippy',
'Run Rustfmt'
# 'Run Clippy',
'Run Rustfmt',
'Run tests in debug',
]
pr_status = ['Milestone Check']
# 3 hours timeout

View File

@ -1,15 +1,15 @@
[package]
name = "meilisearch-auth"
version = "0.26.1"
version = "0.27.0"
edition = "2021"
[dependencies]
enum-iterator = "0.7.0"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
sha2 = "0.9.6"
meilisearch-error = { path = "../meilisearch-error" }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" }
rand = "0.8.4"
serde = { version = "1.0.130", features = ["derive"] }
thiserror = "1.0.28"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
sha2 = "0.10.2"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }

View File

@ -25,7 +25,7 @@ pub enum AuthControllerError {
}
internal_error!(
AuthControllerError: heed::Error,
AuthControllerError: milli::heed::Error,
std::io::Error,
serde_json::Error,
std::str::Utf8Error

View File

@ -8,8 +8,8 @@ use std::path::Path;
use std::str;
use std::sync::Arc;
use heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use heed::{Database, Env, EnvOpenOptions, RwTxn};
use milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use time::OffsetDateTime;
use super::error::Result;
@ -39,7 +39,7 @@ impl Drop for HeedAuthStore {
}
}
pub fn open_auth_store_env(path: &Path) -> heed::Result<heed::Env> {
pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env> {
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2);
@ -203,7 +203,7 @@ impl HeedAuthStore {
/// optionnally on a spcific index, for a given key.
pub struct KeyIdActionCodec;
impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
@ -218,7 +218,7 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
}
}
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {

View File

@ -1,15 +1,15 @@
[package]
name = "meilisearch-error"
version = "0.26.1"
version = "0.27.0"
authors = ["marin <postma.marin@protonmail.com>"]
edition = "2021"
[dependencies]
actix-web = { version = "4", default-features = false }
actix-web = { version = "4.0.1", default-features = false }
proptest = { version = "1.0.0", optional = true }
proptest-derive = { version = "0.3.0", optional = true }
serde = { version = "1.0.130", features = ["derive"] }
serde_json = "1.0.69"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.79"
[features]
test-traits = ["proptest", "proptest-derive"]

View File

@ -120,6 +120,7 @@ pub enum Code {
IndexAlreadyExists,
IndexNotFound,
InvalidIndexUid,
InvalidMinWordLengthForTypo,
// invalid state error
InvalidState,
@ -271,6 +272,9 @@ impl Code {
InvalidApiKeyDescription => {
ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST)
}
InvalidMinWordLengthForTypo => {
ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST)
}
}
}

View File

@ -4,90 +4,89 @@ description = "Meilisearch HTTP server"
edition = "2021"
license = "MIT"
name = "meilisearch-http"
version = "0.26.1"
version = "0.27.0"
[[bin]]
name = "meilisearch"
path = "src/main.rs"
[build-dependencies]
static-files = { version = "0.2.1", optional = true }
anyhow = { version = "1.0.43", optional = true }
cargo_toml = { version = "0.9", optional = true }
anyhow = { version = "1.0.56", optional = true }
cargo_toml = { version = "0.11.4", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.4", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
sha-1 = { version = "0.9.8", optional = true }
tempfile = { version = "3.2.0", optional = true }
vergen = { version = "5.1.15", default-features = false, features = ["git"] }
reqwest = { version = "0.11.9", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
sha-1 = { version = "0.10.0", optional = true }
static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.3.0", optional = true }
vergen = { version = "7.0.0", default-features = false, features = ["git"] }
zip = { version = "0.5.13", optional = true }
[dependencies]
actix-cors = "0.6"
actix-web = { version = "4", features = ["rustls"] }
actix-cors = "0.6.1"
actix-web = { version = "4.0.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
anyhow = { version = "1.0.43", features = ["backtrace"] }
arc-swap = "1.3.2"
async-stream = "0.3.2"
async-trait = "0.1.51"
anyhow = { version = "1.0.56", features = ["backtrace"] }
async-stream = "0.3.3"
async-trait = "0.1.52"
bstr = "0.2.17"
byte-unit = { version = "4.0.12", default-features = false, features = ["std", "serde"] }
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
bytes = "1.1.0"
crossbeam-channel = "0.5.1"
clap = { version = "3.1.6", features = ["derive", "env"] }
crossbeam-channel = "0.5.2"
either = "1.6.1"
env_logger = "0.9.0"
flate2 = "1.0.21"
flate2 = "1.0.22"
fst = "0.4.7"
futures = "0.3.17"
futures-util = "0.3.17"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
iso8601-duration = "0.1.0"
itertools = "0.10.1"
jsonwebtoken = "7"
futures = "0.3.21"
futures-util = "0.3.21"
http = "0.2.6"
indexmap = { version = "1.8.0", features = ["serde-1"] }
itertools = "0.10.3"
jsonwebtoken = "8.0.1"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-lib = { path = "../meilisearch-lib" }
mime = "0.3.16"
num_cpus = "1.13.0"
num_cpus = "1.13.1"
obkv = "0.2.0"
once_cell = "1.8.0"
parking_lot = "0.11.2"
once_cell = "1.10.0"
parking_lot = "0.12.0"
pin-project-lite = "0.2.8"
platform-dirs = "0.3.0"
rand = "0.8.4"
rand = "0.8.5"
rayon = "1.5.1"
regex = "1.5.4"
rustls = "0.20.2"
rustls-pemfile = "0.2"
regex = "1.5.5"
rustls = "0.20.4"
rustls-pemfile = "0.3.0"
segment = { version = "0.2.0", optional = true }
serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
sha2 = "0.9.6"
siphasher = "0.3.7"
slice-group-by = "0.2.6"
static-files = { version = "0.2.1", optional = true }
clap = { version = "3.0", features = ["derive", "env"] }
sysinfo = "0.20.2"
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
sha2 = "0.10.2"
siphasher = "0.3.10"
slice-group-by = "0.3.0"
static-files = { version = "0.2.3", optional = true }
sysinfo = "0.23.5"
tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.11.0", features = ["full"] }
tokio-stream = "0.1.7"
tokio = { version = "1.17.0", features = ["full"] }
tokio-stream = "0.1.8"
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
pin-project-lite = "0.2.8"
[dev-dependencies]
actix-rt = "2.2.0"
actix-rt = "2.7.0"
assert-json-diff = "2.0.1"
maplit = "1.0.2"
paste = "1.0.5"
paste = "1.0.6"
serde_url_params = "0.2.1"
urlencoding = "2.1.0"
[features]
default = ["analytics", "mini-dashboard"]
analytics = ["segment"]
mini-dashboard = [
"actix-web-static-files",
"static-files",
@ -99,12 +98,10 @@ mini-dashboard = [
"tempfile",
"zip",
]
analytics = ["segment"]
default = ["analytics", "mini-dashboard"]
[target.'cfg(target_os = "linux")'.dependencies]
tikv-jemallocator = "0.4.1"
tikv-jemallocator = "0.4.3"
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.9/build.zip"
sha1 = "b1833c3e5dc6b5d9d519ae4834935ae6c8a47024"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.10/build.zip"
sha1 = "1adf96592c267425c110bfefc36b7fc6bfb0f93d"

View File

@ -8,7 +8,10 @@ use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest;
use http::header::CONTENT_TYPE;
use meilisearch_auth::SearchRules;
use meilisearch_lib::index::{SearchQuery, SearchResult};
use meilisearch_lib::index::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
};
use meilisearch_lib::index_controller::Stats;
use meilisearch_lib::MeiliSearch;
use once_cell::sync::Lazy;
@ -355,6 +358,13 @@ pub struct SearchAggregator {
// pagination
max_limit: usize,
max_offset: usize,
// formatting
highlight_pre_tag: bool,
highlight_post_tag: bool,
crop_marker: bool,
matches: bool,
crop_length: bool,
}
impl SearchAggregator {
@ -405,6 +415,12 @@ impl SearchAggregator {
ret.max_limit = query.limit;
ret.max_offset = query.offset.unwrap_or_default();
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG;
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG;
ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER;
ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH;
ret.matches = query.matches;
ret
}
@ -452,6 +468,12 @@ impl SearchAggregator {
// pagination
self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset);
self.highlight_pre_tag |= other.highlight_pre_tag;
self.highlight_post_tag |= other.highlight_post_tag;
self.crop_marker |= other.crop_marker;
self.matches |= other.matches;
self.crop_length |= other.crop_length;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -489,6 +511,13 @@ impl SearchAggregator {
"max_limit": self.max_limit,
"max_offset": self.max_offset,
},
"formatting": {
"highlight_pre_tag": self.highlight_pre_tag,
"highlight_post_tag": self.highlight_post_tag,
"crop_marker": self.crop_marker,
"matches": self.matches,
"crop_length": self.crop_length,
},
});
Some(Track {
@ -535,7 +564,7 @@ impl DocumentsAggregator {
.headers()
.get(CONTENT_TYPE)
.map(|s| s.to_str().unwrap_or("unkown"))
.unwrap()
.unwrap_or("unkown")
.to_string();
ret.content_types.insert(content_type);
ret.index_creation = index_creation;

View File

@ -70,11 +70,9 @@ impl<P, D> GuardedData<P, D> {
where
P: Policy + 'static,
{
Ok(tokio::task::spawn_blocking(move || {
P::authenticate(auth, token.as_ref(), index.as_deref())
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))?)
tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref()))
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))
}
}
@ -131,8 +129,7 @@ pub trait Policy {
}
pub mod policies {
use jsonwebtoken::{dangerous_insecure_decode, decode, Algorithm, DecodingKey, Validation};
use once_cell::sync::Lazy;
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
@ -141,11 +138,25 @@ pub mod policies {
// reexport actions in policies in order to be used in routes configuration.
pub use meilisearch_auth::actions;
pub static TENANT_TOKEN_VALIDATION: Lazy<Validation> = Lazy::new(|| Validation {
validate_exp: false,
algorithms: vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512],
..Default::default()
});
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
validation.required_spec_claims.remove("exp");
validation.algorithms = vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512];
validation
}
/// Extracts the key prefix used to sign the payload from the payload, without performing any validation.
fn extract_key_prefix(token: &str) -> Option<String> {
let mut validation = tenant_token_validation();
validation.insecure_disable_signature_validation();
let dummy_key = DecodingKey::from_secret(b"secret");
let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
// get token fields without validating it.
let Claims { api_key_prefix, .. } = token_data.claims;
Some(api_key_prefix)
}
pub struct MasterPolicy;
@ -204,27 +215,7 @@ pub mod policies {
return None;
}
// get token fields without validating it.
let Claims {
search_rules,
exp,
api_key_prefix,
} = dangerous_insecure_decode::<Claims>(token).ok()?.claims;
// Check index access if an index restriction is provided.
if let Some(index) = index {
if !search_rules.is_index_authorized(index) {
return None;
}
}
// Check if token is expired.
if let Some(exp) = exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return None;
}
}
let api_key_prefix = extract_key_prefix(token)?;
// check if parent key is authorized to do the action.
if auth
.is_key_authorized(api_key_prefix.as_bytes(), Action::Search, index)
@ -232,15 +223,29 @@ pub mod policies {
{
// Check if tenant token is valid.
let key = auth.generate_key(&api_key_prefix)?;
decode::<Claims>(
let data = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&TENANT_TOKEN_VALIDATION,
&tenant_token_validation(),
)
.ok()?;
// Check index access if an index restriction is provided.
if let Some(index) = index {
if !data.claims.search_rules.is_index_authorized(index) {
return None;
}
}
// Check if token is expired.
if let Some(exp) = data.claims.exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return None;
}
}
return auth
.get_key_filters(api_key_prefix, Some(search_rules))
.get_key_filters(api_key_prefix, Some(data.claims.search_rules))
.ok();
}

View File

@ -1,10 +1,11 @@
use meilisearch_lib::heed::Env;
use walkdir::WalkDir;
pub trait EnvSizer {
fn size(&self) -> u64;
}
impl EnvSizer for heed::Env {
impl EnvSizer for Env {
fn size(&self) -> u64 {
WalkDir::new(self.path())
.into_iter()

View File

@ -19,6 +19,7 @@ use serde::Serialize;
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, Parser, Serialize)]
#[clap(version)]
pub struct Opt {
/// The destination where the database must be created.
#[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
@ -145,8 +146,8 @@ pub struct Opt {
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
pub log_level: String,
#[serde(skip)]
#[clap(skip)]
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
#[serde(flatten)]
@ -258,3 +259,13 @@ fn load_ocsp(filename: &Option<PathBuf>) -> anyhow::Result<Vec<u8>> {
Ok(ret)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_valid_opt() {
assert!(Opt::try_parse_from(Some("")).is_ok());
}
}

View File

@ -2,7 +2,10 @@ use actix_web::{web, HttpRequest, HttpResponse};
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_error::ResponseError;
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use meilisearch_lib::index::{
default_crop_length, default_crop_marker, default_highlight_post_tag,
default_highlight_pre_tag, SearchQuery, DEFAULT_SEARCH_LIMIT,
};
use meilisearch_lib::MeiliSearch;
use serde::Deserialize;
use serde_json::Value;
@ -35,6 +38,12 @@ pub struct SearchQueryGet {
#[serde(default = "Default::default")]
matches: bool,
facets_distribution: Option<String>,
#[serde(default = "default_highlight_pre_tag")]
highlight_pre_tag: String,
#[serde(default = "default_highlight_post_tag")]
highlight_post_tag: String,
#[serde(default = "default_crop_marker")]
crop_marker: String,
}
impl From<SearchQueryGet> for SearchQuery {
@ -77,6 +86,9 @@ impl From<SearchQueryGet> for SearchQuery {
sort,
matches: other.matches,
facets_distribution,
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker,
}
}
}

View File

@ -21,11 +21,11 @@ macro_rules! make_setting_route {
use meilisearch_lib::milli::update::Setting;
use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch};
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::task::SummarizedTaskView;
use meilisearch_error::ResponseError;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::{policies::*, GuardedData};
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::task::SummarizedTaskView;
pub async fn delete(
meilisearch: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, MeiliSearch>,
@ -145,8 +145,8 @@ make_setting_route!(
"SortableAttributes Updated".to_string(),
json!({
"sortable_attributes": {
"total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0),
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false),
"total": setting.as_ref().map(|sort| sort.len()),
"has_geo": setting.as_ref().map(|sort| sort.contains("_geo")),
},
}),
Some(req),
@ -161,6 +161,47 @@ make_setting_route!(
"displayedAttributes"
);
make_setting_route!(
"/typo-tolerance",
meilisearch_lib::index::updates::TypoSettings,
typo_tolerance,
"typoTolerance",
analytics,
|setting: &Option<meilisearch_lib::index::updates::TypoSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"TypoTolerance Updated".to_string(),
json!({
"typo_tolerance": {
"enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))),
"disable_on_attributes": setting
.as_ref()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": setting
.as_ref()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": setting
.as_ref()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/searchable-attributes",
Vec<String>,
@ -174,7 +215,7 @@ make_setting_route!(
"SearchableAttributes Updated".to_string(),
json!({
"searchable_attributes": {
"total": setting.as_ref().map(|searchable| searchable.len()).unwrap_or(0),
"total": setting.as_ref().map(|searchable| searchable.len()),
},
}),
Some(req),
@ -246,7 +287,8 @@ generate_configure!(
distinct_attribute,
stop_words,
synonyms,
ranking_rules
ranking_rules,
typo_tolerance
);
pub async fn update_all(
@ -265,15 +307,46 @@ pub async fn update_all(
"sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")),
},
"searchable_attributes": {
"total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()).unwrap_or(0),
"total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()),
},
"sortable_attributes": {
"total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0),
"has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false),
"total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()),
"has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")),
},
"filterable_attributes": {
"total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0),
"has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false),
"total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()),
"has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")),
},
"typo_tolerance": {
"enabled": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.enabled.as_ref().set())
.copied(),
"disable_on_attributes": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())),
"disable_on_words": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())),
"min_word_size_for_one_typo": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.one_typo.set()))
.flatten(),
"min_word_size_for_two_typos": settings.typo_tolerance
.as_ref()
.set()
.and_then(|s| s.min_word_size_for_typos
.as_ref()
.set()
.map(|s| s.two_typos.set()))
.flatten(),
},
}),
Some(&req),

View File

@ -1,4 +1,6 @@
#![allow(dead_code)]
use clap::Parser;
use std::path::Path;
use actix_web::http::StatusCode;
@ -126,36 +128,18 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
Opt {
db_path: dir.as_ref().join("db"),
dumps_dir: dir.as_ref().join("dump"),
http_addr: "127.0.0.1:7700".to_owned(),
master_key: None,
env: "development".to_owned(),
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: true,
max_index_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
max_task_db_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
ssl_cert_path: None,
ssl_key_path: None,
ssl_auth_path: None,
ssl_ocsp_path: None,
ssl_require_auth: false,
ssl_resumption: false,
ssl_tickets: false,
import_snapshot: None,
ignore_missing_snapshot: false,
ignore_snapshot_if_db_exists: false,
snapshot_dir: ".".into(),
schedule_snapshot: false,
snapshot_interval_sec: 0,
import_dump: None,
ignore_missing_dump: false,
ignore_dump_if_db_exists: false,
indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context.
max_memory: MaxMemory::unlimited(),
..Default::default()
max_indexing_memory: MaxMemory::unlimited(),
..Parser::parse_from(None as Option<&str>)
},
log_level: "off".into(),
scheduler_options: meilisearch_lib::options::SchedulerConfig::default(),
..Parser::parse_from(None as Option<&str>)
}
}

View File

@ -212,7 +212,7 @@ async fn error_add_malformed_csv_documents() {
assert_eq!(
response["message"],
json!(
r#"The `csv` payload provided is malformed. `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."#
r#"The `csv` payload provided is malformed: `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."#
)
);
assert_eq!(response["code"], json!("malformed_payload"));
@ -236,7 +236,7 @@ async fn error_add_malformed_csv_documents() {
assert_eq!(
response["message"],
json!(
r#"The `csv` payload provided is malformed. `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."#
r#"The `csv` payload provided is malformed: `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."#
)
);
assert_eq!(response["code"], json!("malformed_payload"));
@ -307,6 +307,58 @@ async fn error_add_malformed_json_documents() {
response["link"],
json!("https://docs.meilisearch.com/errors#malformed_payload")
);
// truncate
// length = 100
let long = "0123456789".repeat(10);
let document = format!("\"{}\"", long);
let req = test::TestRequest::put()
.uri("/indexes/dog/documents")
.set_payload(document)
.insert_header(("content-type", "application/json"))
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
assert_eq!(status_code, 400);
assert_eq!(
response["message"],
json!(
r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789", expected a documents, or a sequence of documents. at line 1 column 102`."#
)
);
assert_eq!(response["code"], json!("malformed_payload"));
assert_eq!(response["type"], json!("invalid_request"));
assert_eq!(
response["link"],
json!("https://docs.meilisearch.com/errors#malformed_payload")
);
// add one more char to the long string to test if the truncating works.
let document = format!("\"{}m\"", long);
let req = test::TestRequest::put()
.uri("/indexes/dog/documents")
.set_payload(document)
.insert_header(("content-type", "application/json"))
.to_request();
let res = test::call_service(&app, req).await;
let body = test::read_body(res).await;
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
assert_eq!(status_code, 400);
assert_eq!(
response["message"],
json!(
r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...90123456789m", expected a documents, or a sequence of documents. at line 1 column 103`."#
)
);
assert_eq!(response["code"], json!("malformed_payload"));
assert_eq!(response["type"], json!("invalid_request"));
assert_eq!(
response["link"],
json!("https://docs.meilisearch.com/errors#malformed_payload")
);
}
#[actix_rt::test]
@ -961,7 +1013,7 @@ async fn error_add_documents_invalid_geo_field() {
assert_eq!(response["status"], "failed");
let expected_error = json!({
"message": r#"The document with the id: `11` contains an invalid _geo field: `foobar`."#,
"message": r#"The document with the id: `11` contains an invalid `_geo` field."#,
"code": "invalid_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_geo_field"

View File

@ -155,7 +155,7 @@ async fn test_get_all_documents_offset() {
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(response.as_array().unwrap()[0]["id"], 13);
assert_eq!(response.as_array().unwrap()[0]["id"], 5);
}
#[actix_rt::test]

View File

@ -36,6 +36,38 @@ async fn search_unexisting_parameter() {
.await;
}
#[actix_rt::test]
async fn search_invalid_highlight_and_crop_tags() {
let server = Server::new().await;
let index = server.index("test");
let fields = &["cropMarker", "highlightPreTag", "highlightPostTag"];
for field in fields {
// object
index
.search(
json!({field.to_string(): {"marker": "<crop>"}}),
|response, code| {
assert_eq!(code, 400, "field {} passing object: {}", &field, response);
assert_eq!(response["code"], "bad_request");
},
)
.await;
// array
index
.search(
json!({field.to_string(): ["marker", "<crop>"]}),
|response, code| {
assert_eq!(code, 400, "field {} passing array: {}", &field, response);
assert_eq!(response["code"], "bad_request");
},
)
.await;
}
}
#[actix_rt::test]
async fn filter_invalid_syntax_object() {
let server = Server::new().await;

View File

@ -0,0 +1,376 @@
use super::*;
use crate::common::Server;
use serde_json::json;
#[actix_rt::test]
async fn formatted_contain_wildcard() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({ "displayedAttributes": ["id", "cattos"] }))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
"cattos": "<em>pesti</em>",
}
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["*"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
})
);
let (response, code) = index
.search_post(
json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"] }),
)
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
"_formatted": {
"id": "852",
"cattos": "pesti",
}
})
);
let (response, code) = index
.search_post(
json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
)
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
"_formatted": {
"id": "852",
"cattos": "pesti",
}
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToCrop": ["*"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pesti",
"_formatted": {
"id": "852",
"cattos": "pesti",
}
})
);
}
#[actix_rt::test]
async fn format_nested() {
let server = Server::new().await;
let index = server.index("test");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
let (response, code) = index
.search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
}
#[actix_rt::test]
async fn displayedattr_2_smol() {
let server = Server::new().await;
let index = server.index("test");
// not enough displayed for the other settings
index
.update_settings(json!({ "displayedAttributes": ["id"] }))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToHighlight": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToCrop": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToHighlight": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToCrop": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["cattos"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"][0], json!({}));
let (response, code) = index
.search_post(
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
)
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"][0], json!({}));
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
let (response, code) = index
.search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
}

View File

@ -2,38 +2,96 @@
// should be tested in its own module to isolate tests and keep the tests readable.
mod errors;
mod formatted;
use crate::common::Server;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"id": "287947"
"id": "287947",
},
{
"title": "Captain Marvel",
"id": "299537"
"id": "299537",
},
{
"title": "Escape Room",
"id": "522681"
"id": "522681",
},
{ "title": "How to Train Your Dragon: The Hidden World", "id": "166428"
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428",
},
{
"title": "Glass",
"id": "450465"
"id": "450465",
}
])
});
pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
"cattos": "pesti",
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8,
},
],
"cattos": ["simba", "pestiféré"],
},
{
"id": 750,
"father": "romain",
"mother": "michelle",
"cattos": ["enigma"],
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5,
},
{
"name": "fast",
"age": 6,
},
],
"cattos": ["moumoute", "gomez"],
},
])
});
#[actix_rt::test]
async fn simple_placeholder_search() {
let server = Server::new().await;
let index = server.index("test");
let index = server.index("basic");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
@ -45,6 +103,18 @@ async fn simple_placeholder_search() {
assert_eq!(response["hits"].as_array().unwrap().len(), 5);
})
.await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
})
.await;
}
#[actix_rt::test]
@ -62,6 +132,18 @@ async fn simple_search() {
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
})
.await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({"q": "pesti"}), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
})
.await;
}
#[actix_rt::test]
@ -88,6 +170,27 @@ async fn search_multiple_params() {
},
)
.await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(
json!({
"q": "pesti",
"attributesToCrop": ["catto:2"],
"attributesToHighlight": ["catto"],
"limit": 2,
"offset": 0,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
},
)
.await;
}
#[actix_rt::test]
@ -114,6 +217,43 @@ async fn search_with_filter_string_notation() {
},
)
.await;
let index = server.index("nested");
index
.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
index
.search(
json!({
"filter": "cattos = pesti"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
assert_eq!(response["hits"][0]["id"], json!(852));
},
)
.await;
index
.search(
json!({
"filter": "doggos.age > 5"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
assert_eq!(response["hits"][0]["id"], json!(654));
assert_eq!(response["hits"][1]["id"], json!(951));
},
)
.await;
}
#[actix_rt::test]
@ -170,6 +310,28 @@ async fn search_with_sort_on_numbers() {
},
)
.await;
let index = server.index("nested");
index
.update_settings(json!({"sortableAttributes": ["doggos.age"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
index
.search(
json!({
"sort": ["doggos.age:asc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
},
)
.await;
}
#[actix_rt::test]
@ -196,6 +358,28 @@ async fn search_with_sort_on_strings() {
},
)
.await;
let index = server.index("nested");
index
.update_settings(json!({"sortableAttributes": ["doggos.name"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
index
.search(
json!({
"sort": ["doggos.name:asc"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 4);
},
)
.await;
}
#[actix_rt::test]
@ -246,6 +430,85 @@ async fn search_facet_distribution() {
},
)
.await;
let index = server.index("nested");
index
.update_settings(json!({"filterableAttributes": ["father", "doggos.name"]}))
.await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(3).await;
// TODO: TAMO: fix the test
index
.search(
json!({
// "facetsDistribution": ["father", "doggos.name"]
"facetsDistribution": ["father"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(
dist["father"],
json!({ "jean": 1, "pierre": 1, "romain": 1, "jean-baptiste": 1})
);
/*
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
*/
},
)
.await;
index
.update_settings(json!({"filterableAttributes": ["doggos"]}))
.await;
index.wait_task(4).await;
index
.search(
json!({
"facetsDistribution": ["doggos.name"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
},
)
.await;
index
.search(
json!({
"facetsDistribution": ["doggos"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetsDistribution"].as_object().unwrap();
dbg!(&dist);
assert_eq!(dist.len(), 3);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
);
assert_eq!(
dist["doggos.age"],
json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1})
);
},
)
.await;
}
#[actix_rt::test]
@ -265,5 +528,81 @@ async fn displayed_attributes() {
.search_post(json!({ "attributesToRetrieve": ["title", "id"] }))
.await;
assert_eq!(code, 200, "{}", response);
assert!(response["hits"].get("title").is_none());
assert!(response["hits"][0].get("title").is_some());
}
#[actix_rt::test]
async fn placeholder_search_is_hard_limited() {
let server = Server::new().await;
let index = server.index("test");
let documents: Vec<_> = (0..1200)
.map(|i| json!({ "id": i, "text": "I am unique!" }))
.collect();
index.add_documents(documents.into(), None).await;
index.wait_task(0).await;
index
.search(
json!({
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1000);
},
)
.await;
index
.search(
json!({
"offset": 800,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}
#[actix_rt::test]
async fn search_is_hard_limited() {
let server = Server::new().await;
let index = server.index("test");
let documents: Vec<_> = (0..1200)
.map(|i| json!({ "id": i, "text": "I am unique!" }))
.collect();
index.add_documents(documents.into(), None).await;
index.wait_task(0).await;
index
.search(
json!({
"q": "unique",
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1000);
},
)
.await;
index
.search(
json!({
"q": "unique",
"offset": 800,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}

View File

@ -43,7 +43,7 @@ async fn get_settings() {
let (response, code) = index.settings().await;
assert_eq!(code, 200);
let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 8);
assert_eq!(settings.keys().len(), 9);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([]));

View File

@ -1,67 +1,66 @@
[package]
name = "meilisearch-lib"
version = "0.26.1"
version = "0.27.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-web = { version = "4", default-features = false }
anyhow = { version = "1.0.43", features = ["backtrace"] }
async-stream = "0.3.2"
async-trait = "0.1.51"
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
actix-web = { version = "4.0.1", default-features = false }
anyhow = { version = "1.0.56", features = ["backtrace"] }
async-stream = "0.3.3"
async-trait = "0.1.52"
atomic_refcell = "0.1.8"
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
bytes = "1.1.0"
clap = { version = "3.1.6", features = ["derive", "env"] }
crossbeam-channel = "0.5.2"
csv = "1.1.6"
crossbeam-channel = "0.5.1"
derivative = "2.2.0"
either = "1.6.1"
flate2 = "1.0.21"
flate2 = "1.0.22"
fs_extra = "1.2.0"
fst = "0.4.7"
futures = "0.3.17"
futures-util = "0.3.17"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1"
futures = "0.3.21"
futures-util = "0.3.21"
http = "0.2.6"
indexmap = { version = "1.8.0", features = ["serde-1"] }
itertools = "0.10.3"
lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-auth = { path = "../meilisearch-auth" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.22.2" }
meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.8.0"
parking_lot = "0.11.2"
rand = "0.8.4"
num_cpus = "1.13.1"
obkv = "0.2.0"
once_cell = "1.10.0"
parking_lot = "0.12.0"
permissive-json-pointer = { path = "../permissive-json-pointer" }
rand = "0.8.5"
rayon = "1.5.1"
regex = "1.5.4"
rustls = "0.19.1"
serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
siphasher = "0.3.7"
slice-group-by = "0.2.6"
clap = { version = "3.0", features = ["derive", "env"] }
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
regex = "1.5.5"
reqwest = { version = "0.11.9", features = ["json", "rustls-tls"], default-features = false, optional = true }
rustls = "0.20.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
siphasher = "0.3.10"
slice-group-by = "0.3.0"
sysinfo = "0.23.5"
tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.11.0", features = ["full"] }
tokio = { version = "1.17.0", features = ["full"] }
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
obkv = "0.2.0"
pin-project = "1.0.8"
whoami = { version = "1.1.3", optional = true }
reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true }
sysinfo = "0.20.2"
derivative = "2.2.0"
fs_extra = "1.2.0"
atomic_refcell = "0.1.8"
whoami = { version = "1.2.1", optional = true }
[dev-dependencies]
actix-rt = "2.2.0"
mockall = "0.10.2"
paste = "1.0.5"
nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
actix-rt = "2.7.0"
meilisearch-error = { path = "../meilisearch-error", features = ["test-traits"] }
mockall = "0.11.0"
nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
paste = "1.0.6"
proptest = "1.0.0"
proptest-derive = "0.3.0"

View File

@ -1,4 +1,5 @@
use std::fmt;
use std::borrow::Borrow;
use std::fmt::{self, Debug, Display};
use std::io::{self, BufRead, BufReader, BufWriter, Cursor, Read, Seek, Write};
use meilisearch_error::{internal_error, Code, ErrorCode};
@ -23,17 +24,40 @@ impl fmt::Display for PayloadType {
}
}
#[derive(thiserror::Error, Debug)]
#[derive(Debug)]
pub enum DocumentFormatError {
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("The `{1}` payload provided is malformed. `{0}`.")]
MalformedPayload(
Box<dyn std::error::Error + Send + Sync + 'static>,
PayloadType,
),
MalformedPayload(Box<milli::documents::Error>, PayloadType),
}
impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
Self::MalformedPayload(me, b) => match me.borrow() {
milli::documents::Error::JsonError(se) => {
// https://github.com/meilisearch/meilisearch/issues/2107
// The user input maybe insanely long. We need to truncate it.
let mut serde_msg = se.to_string();
let ellipsis = "...";
if serde_msg.len() > 100 + ellipsis.len() {
serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis);
}
write!(
f,
"The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.",
b, serde_msg
)
}
_ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me),
},
}
}
}
impl std::error::Error for DocumentFormatError {}
impl From<(PayloadType, milli::documents::Error)> for DocumentFormatError {
fn from((ty, error): (PayloadType, milli::documents::Error)) -> Self {
match error {

View File

@ -41,6 +41,9 @@ impl ErrorCode for MilliError<'_> {
UserError::CriterionError(_) => Code::InvalidRankingRule,
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
UserError::SortError(_) => Code::Sort,
UserError::InvalidMinTypoWordLenSetting(_, _) => {
Code::InvalidMinWordLengthForTypo
}
}
}
}

View File

@ -3,9 +3,9 @@ use std::io::{BufReader, Seek, SeekFrom, Write};
use std::path::Path;
use anyhow::Context;
use heed::{EnvOpenOptions, RoTxn};
use indexmap::IndexMap;
use milli::documents::DocumentBatchReader;
use milli::heed::{EnvOpenOptions, RoTxn};
use milli::update::{IndexDocumentsConfig, IndexerConfig};
use serde::{Deserialize, Serialize};
@ -146,7 +146,7 @@ impl Index {
indexer_config,
config,
|_| (),
);
)?;
builder.add_documents(documents_reader)?;
builder.execute()?;
}

View File

@ -21,7 +21,7 @@ pub enum IndexError {
internal_error!(
IndexError: std::io::Error,
heed::Error,
milli::heed::Error,
fst::Error,
serde_json::Error,
update_file_store::UpdateFileStoreError,

View File

@ -5,7 +5,8 @@ use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use heed::{EnvOpenOptions, RoTxn};
use fst::IntoStreamer;
use milli::heed::{EnvOpenOptions, RoTxn};
use milli::update::{IndexerConfig, Setting};
use milli::{obkv_to_json, FieldDistribution, FieldId};
use serde::{Deserialize, Serialize};
@ -17,6 +18,7 @@ use crate::EnvSizer;
use super::error::IndexError;
use super::error::Result;
use super::updates::{MinWordSizeTyposSetting, TypoSettings};
use super::{Checked, Settings};
pub type Document = Map<String, Value>;
@ -37,7 +39,7 @@ impl IndexMeta {
Self::new_txn(index, &txn)
}
pub fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
pub fn new_txn(index: &Index, txn: &milli::heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
@ -168,6 +170,31 @@ impl Index {
})
.collect();
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(self.min_word_len_one_typo(txn)?),
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
};
let disabled_words = self
.exact_words(txn)?
.into_stream()
.into_strs()?
.into_iter()
.collect();
let disabled_attributes = self
.exact_attributes(txn)?
.into_iter()
.map(String::from)
.collect();
let typo_tolerance = TypoSettings {
enabled: Setting::Set(self.authorize_typos(txn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
@ -186,6 +213,7 @@ impl Index {
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
_kind: PhantomData,
})
}
@ -250,7 +278,7 @@ impl Index {
fn fields_to_display<S: AsRef<str>>(
&self,
txn: &heed::RoTxn,
txn: &milli::heed::RoTxn,
attributes_to_retrieve: &Option<Vec<S>>,
fields_ids_map: &milli::FieldsIdsMap,
) -> Result<Vec<FieldId>> {
@ -278,7 +306,7 @@ impl Index {
let _txn = self.write_txn()?;
self.inner
.env
.copy_to_path(dst, heed::CompactionOption::Enabled)?;
.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
Ok(())
}
}

View File

@ -1,4 +1,8 @@
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use search::{
default_crop_length, default_crop_marker, default_highlight_post_tag,
default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
mod dump;

File diff suppressed because it is too large Load Diff

View File

@ -37,6 +37,37 @@ pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
pub struct Unchecked;
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct MinWordSizeTyposSetting {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub one_typo: Setting<u8>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub two_typos: Setting<u8>,
}
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct TypoSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub enabled: Setting<bool>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_words: Setting<BTreeSet<String>>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
@ -80,6 +111,9 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(skip)]
pub _kind: PhantomData<T>,
@ -96,6 +130,7 @@ impl Settings<Checked> {
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset,
_kind: PhantomData,
}
}
@ -110,6 +145,7 @@ impl Settings<Checked> {
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
..
} = self;
@ -122,6 +158,7 @@ impl Settings<Checked> {
stop_words,
synonyms,
distinct_attribute,
typo_tolerance,
_kind: PhantomData,
}
}
@ -160,6 +197,7 @@ impl Settings<Unchecked> {
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance,
_kind: PhantomData,
}
}
@ -176,7 +214,7 @@ pub struct Facets {
impl Index {
fn update_primary_key_txn<'a, 'b>(
&'a self,
txn: &mut heed::RwTxn<'a, 'b>,
txn: &mut milli::heed::RwTxn<'a, 'b>,
primary_key: String,
) -> Result<IndexMeta> {
let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref());
@ -248,7 +286,7 @@ impl Index {
self.indexer_config.as_ref(),
config,
indexing_callback,
);
)?;
for content_uuid in contents.into_iter() {
let content_file = file_store.get_update(content_uuid)?;
@ -334,6 +372,61 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
}
match settings.typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val),
Setting::Reset => builder.reset_authorize_typos(),
Setting::NotSet => (),
}
match value.min_word_size_for_typos {
Setting::Set(ref setting) => {
match setting.one_typo {
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
Setting::Reset => builder.reset_min_word_len_one_typo(),
Setting::NotSet => (),
}
match setting.two_typos {
Setting::Set(val) => builder.set_min_word_len_two_typos(val),
Setting::Reset => builder.reset_min_word_len_two_typos(),
Setting::NotSet => (),
}
}
Setting::Reset => {
builder.reset_min_word_len_one_typo();
builder.reset_min_word_len_two_typos();
}
Setting::NotSet => (),
}
match value.disable_on_words {
Setting::Set(ref words) => {
builder.set_exact_words(words.clone());
}
Setting::Reset => builder.reset_exact_words(),
Setting::NotSet => (),
}
match value.disable_on_attributes {
Setting::Set(ref words) => {
builder.set_exact_attributes(words.iter().cloned().collect())
}
Setting::Reset => builder.reset_exact_attributes(),
Setting::NotSet => (),
}
}
Setting::Reset => {
// all typo settings need to be reset here.
builder.reset_authorize_typos();
builder.reset_min_word_len_one_typo();
builder.reset_min_word_len_two_typos();
builder.reset_exact_words();
builder.reset_exact_attributes();
}
Setting::NotSet => (),
}
}
#[cfg(test)]
@ -362,6 +455,7 @@ pub(crate) mod test {
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -383,6 +477,7 @@ pub(crate) mod test {
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};

View File

@ -18,7 +18,7 @@ pub enum DumpActorError {
}
internal_error!(
DumpActorError: heed::Error,
DumpActorError: milli::heed::Error,
std::io::Error,
tokio::task::JoinError,
tokio::sync::oneshot::error::RecvError,

View File

@ -1,9 +1,9 @@
use std::path::Path;
use std::sync::Arc;
use heed::EnvOpenOptions;
use log::info;
use meilisearch_auth::AuthController;
use milli::heed::EnvOpenOptions;
use crate::analytics;
use crate::index_controller::dump_actor::Metadata;

View File

@ -48,8 +48,8 @@ pub type Payload = Box<
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
>;
pub fn open_meta_env(path: &Path, size: usize) -> heed::Result<heed::Env> {
let mut options = heed::EnvOpenOptions::new();
pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result<milli::heed::Env> {
let mut options = milli::heed::EnvOpenOptions::new();
options.map_size(size);
options.max_dbs(20);
options.open(path)
@ -178,15 +178,6 @@ impl IndexControllerBuilder {
.max_task_store_size
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
let db_exists = db_path.as_ref().exists();
if db_exists {
// Directory could be pre-created without any database in.
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
if !db_is_empty {
versioning::check_version_file(db_path.as_ref())?;
}
}
if let Some(ref path) = self.import_snapshot {
log::info!("Loading from snapshot {:?}", path);
load_snapshot(
@ -207,6 +198,15 @@ impl IndexControllerBuilder {
)?;
}
let db_exists = db_path.as_ref().exists();
if db_exists {
// Directory could be pre-created without any database in.
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
if !db_is_empty {
versioning::check_version_file(db_path.as_ref())?;
}
}
std::fs::create_dir_all(db_path.as_ref())?;
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
@ -651,6 +651,9 @@ mod test {
use crate::index::error::Result as IndexResult;
use crate::index::Index;
use crate::index::{
default_crop_marker, default_highlight_post_tag, default_highlight_pre_tag,
};
use crate::index_resolver::index_store::MockIndexStore;
use crate::index_resolver::meta_store::MockIndexMetaStore;
use crate::index_resolver::IndexResolver;
@ -691,6 +694,9 @@ mod test {
filter: None,
sort: None,
facets_distribution: None,
highlight_pre_tag: default_highlight_pre_tag(),
highlight_post_tag: default_highlight_post_tag(),
crop_marker: default_crop_marker(),
};
let result = SearchResult {

View File

@ -45,7 +45,7 @@ impl From<OneshotRecvError> for IndexResolverError {
}
internal_error!(
IndexResolverError: heed::Error,
IndexResolverError: milli::heed::Error,
uuid::Error,
std::io::Error,
tokio::task::JoinError,

View File

@ -4,8 +4,8 @@ use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use heed::types::{SerdeBincode, Str};
use heed::{CompactionOption, Database, Env};
use milli::heed::types::{SerdeBincode, Str};
use milli::heed::{CompactionOption, Database, Env};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
@ -56,7 +56,7 @@ impl Drop for HeedMetaStore {
}
impl HeedMetaStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
let db = env.create_database(Some("uuids"))?;
Ok(Self { env, db })
}
@ -153,7 +153,7 @@ impl HeedMetaStore {
Ok(())
}
pub fn load_dump(src: impl AsRef<Path>, env: Arc<heed::Env>) -> Result<()> {
pub fn load_dump(src: impl AsRef<Path>, env: Arc<milli::heed::Env>) -> Result<()> {
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
let indexes = File::open(&src_indexes)?;
let mut indexes = BufReader::new(indexes);

View File

@ -7,10 +7,10 @@ use std::path::Path;
use std::sync::Arc;
use error::{IndexResolverError, Result};
use heed::Env;
use index_store::{IndexStore, MapIndexStore};
use meilisearch_error::ResponseError;
use meta_store::{HeedMetaStore, IndexMetaStore};
use milli::heed::Env;
use milli::update::{DocumentDeletionResult, IndexerConfig};
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
@ -39,7 +39,7 @@ pub fn create_index_resolver(
path: impl AsRef<Path>,
index_size: usize,
indexer_opts: &IndexerOpts,
meta_env: Arc<heed::Env>,
meta_env: Arc<milli::heed::Env>,
file_store: UpdateFileStore,
) -> anyhow::Result<HardStateIndexResolver> {
let uuid_store = HeedMetaStore::new(meta_env)?;

View File

@ -13,8 +13,8 @@ mod update_file_store;
use std::path::Path;
pub use index_controller::MeiliSearch;
pub use milli;
pub use milli::heed;
mod compression;
pub mod document_formats;
@ -25,7 +25,7 @@ pub trait EnvSizer {
fn size(&self) -> u64;
}
impl EnvSizer for heed::Env {
impl EnvSizer for milli::heed::Env {
fn size(&self) -> u64 {
WalkDir::new(self.path())
.into_iter()

View File

@ -1,21 +1,23 @@
use core::fmt;
use std::{convert::TryFrom, ops::Deref, str::FromStr};
use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use milli::{update::IndexerConfig, CompressionType};
use milli::update::IndexerConfig;
use serde::Serialize;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, Parser)]
#[derive(Debug, Clone, Parser, Serialize)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[clap(long, default_value = "100000")] // 100k
#[serde(skip)]
#[clap(long, default_value = "100000", hide = true)] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[clap(long)]
#[serde(skip)]
#[clap(long, hide = true)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
@ -25,23 +27,16 @@ pub struct IndexerOpts {
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[clap(long, default_value_t)]
pub max_memory: MaxMemory,
#[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)]
pub max_indexing_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
/// The maximum number of threads the indexer will use.
/// If the number set is higher than the real number of cores available in the machine,
/// it will use the maximum number of available cores.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[clap(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[clap(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[clap(long)]
pub indexing_jobs: Option<usize>,
/// It defaults to half of the available threads.
#[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)]
pub max_indexing_threads: MaxThreads,
}
#[derive(Debug, Clone, Parser, Default, Serialize)]
@ -74,15 +69,13 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(other.indexing_jobs.unwrap_or(num_cpus::get() / 2))
.num_threads(*other.max_indexing_threads)
.build()?;
Ok(Self {
log_every_n: Some(other.log_every_n),
max_nb_chunks: other.max_nb_chunks,
max_memory: (*other.max_memory).map(|b| b.get_bytes() as usize),
chunk_compression_type: other.chunk_compression_type,
chunk_compression_level: other.chunk_compression_level,
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
..Default::default()
@ -95,16 +88,14 @@ impl Default for IndexerOpts {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
max_indexing_memory: MaxMemory::default(),
max_indexing_threads: MaxThreads::default(),
}
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Serialize)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
@ -159,3 +150,34 @@ fn total_memory_bytes() -> Option<u64> {
None
}
}
#[derive(Debug, Clone, Copy, Serialize)]
pub struct MaxThreads(usize);
impl FromStr for MaxThreads {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
usize::from_str(s).map(Self)
}
}
impl Default for MaxThreads {
fn default() -> Self {
MaxThreads(num_cpus::get() / 2)
}
}
impl fmt::Display for MaxThreads {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Deref for MaxThreads {
type Target = usize;
fn deref(&self) -> &Self::Target {
&self.0
}
}

View File

@ -149,7 +149,7 @@ impl SnapshotJob {
let env = open_meta_env(&self.src_path, self.meta_env_size)?;
let dst = path.join("data.mdb");
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
Ok(())
}
@ -180,12 +180,12 @@ impl SnapshotJob {
let dst = dst.join("data.mdb");
let mut options = heed::EnvOpenOptions::new();
let mut options = milli::heed::EnvOpenOptions::new();
options.map_size(self.index_size);
let index = milli::Index::new(options, entry.path())?;
index
.env
.copy_to_path(dst, heed::CompactionOption::Enabled)?;
.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
}
Ok(())
@ -198,7 +198,7 @@ impl SnapshotJob {
let dst = dst.join("data.mdb");
let env = open_auth_store_env(&auth_path)?;
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?;
Ok(())
}

View File

@ -16,7 +16,7 @@ pub enum TaskError {
}
internal_error!(
TaskError: heed::Error,
TaskError: milli::heed::Error,
JoinError,
std::io::Error,
serde_json::Error,

View File

@ -5,8 +5,8 @@ use std::io::{BufWriter, Write};
use std::path::Path;
use std::sync::Arc;
use heed::{Env, RwTxn};
use log::debug;
use milli::heed::{Env, RwTxn};
use time::OffsetDateTime;
use super::error::TaskError;
@ -61,7 +61,7 @@ impl Clone for TaskStore {
}
impl TaskStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
let store = Arc::new(Store::new(env)?);
Ok(Self { store })
}
@ -248,7 +248,7 @@ pub mod test {
}
impl MockTaskStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
Ok(Self::Real(TaskStore::new(env)?))
}

View File

@ -1,5 +1,5 @@
#[allow(clippy::upper_case_acronyms)]
type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
type BEU64 = milli::heed::zerocopy::U64<milli::heed::byteorder::BE>;
const UID_TASK_IDS: &str = "uid_task_id";
const TASKS: &str = "tasks";
@ -12,8 +12,8 @@ use std::ops::Range;
use std::result::Result as StdResult;
use std::sync::Arc;
use heed::types::{ByteSlice, OwnedType, SerdeJson, Unit};
use heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn};
use milli::heed::types::{ByteSlice, OwnedType, SerdeJson, Unit};
use milli::heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn};
use crate::tasks::task::{Task, TaskId};
@ -73,7 +73,7 @@ impl Store {
/// be in an invalid state, with dangling processing tasks.
/// You want to patch all un-finished tasks and put them in your pending
/// queue with the `reset_and_return_unfinished_update` method.
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
let uids_task_ids = env.create_database(Some(UID_TASK_IDS))?;
let tasks = env.create_database(Some(TASKS))?;
@ -130,7 +130,7 @@ impl Store {
let range = from..limit
.map(|limit| (limit as u64).saturating_add(from))
.unwrap_or(u64::MAX);
let iter: Box<dyn Iterator<Item = StdResult<_, heed::Error>>> = match filter {
let iter: Box<dyn Iterator<Item = StdResult<_, milli::heed::Error>>> = match filter {
Some(
ref filter @ TaskFilter {
indexes: Some(_), ..
@ -150,7 +150,7 @@ impl Store {
),
};
let apply_fitler = |task: &StdResult<_, heed::Error>| match task {
let apply_fitler = |task: &StdResult<_, milli::heed::Error>| match task {
Ok(ref t) => filter
.as_ref()
.and_then(|filter| filter.filter_fn.as_ref())
@ -162,7 +162,7 @@ impl Store {
let tasks = iter
.filter(apply_fitler)
.take(limit.unwrap_or(usize::MAX))
.try_fold::<_, _, StdResult<_, heed::Error>>(Vec::new(), |mut v, task| {
.try_fold::<_, _, StdResult<_, milli::heed::Error>>(Vec::new(), |mut v, task| {
v.push(task?);
Ok(v)
})?;
@ -172,7 +172,7 @@ impl Store {
fn compute_candidates(
&self,
txn: &heed::RoTxn,
txn: &milli::heed::RoTxn,
filter: &TaskFilter,
range: Range<TaskId>,
) -> Result<BinaryHeap<TaskId>> {
@ -188,10 +188,10 @@ impl Store {
self.uids_task_ids
.remap_key_type::<ByteSlice>()
.rev_prefix_iter(txn, &index_uid)?
.map(|entry| -> StdResult<_, heed::Error> {
.map(|entry| -> StdResult<_, milli::heed::Error> {
let (key, _) = entry?;
let (_, id) =
IndexUidTaskIdCodec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
let (_, id) = IndexUidTaskIdCodec::bytes_decode(key)
.ok_or(milli::heed::Error::Decoding)?;
Ok(id)
})
.skip_while(|entry| {
@ -212,7 +212,7 @@ impl Store {
// if we encounter an error we returns true to collect it later
.unwrap_or(true)
})
.try_for_each::<_, StdResult<(), heed::Error>>(|id| {
.try_for_each::<_, StdResult<(), milli::heed::Error>>(|id| {
candidates.push(id?);
Ok(())
})?;
@ -225,8 +225,8 @@ impl Store {
#[cfg(test)]
pub mod test {
use heed::EnvOpenOptions;
use itertools::Itertools;
use milli::heed::EnvOpenOptions;
use nelson::Mocker;
use proptest::collection::vec;
use proptest::prelude::*;
@ -244,10 +244,10 @@ pub mod test {
Fake(Mocker),
}
pub struct TmpEnv(TempDir, Arc<heed::Env>);
pub struct TmpEnv(TempDir, Arc<milli::heed::Env>);
impl TmpEnv {
pub fn env(&self) -> Arc<heed::Env> {
pub fn env(&self) -> Arc<milli::heed::Env> {
self.1.clone()
}
}
@ -264,7 +264,7 @@ pub mod test {
}
impl MockStore {
pub fn new(env: Arc<heed::Env>) -> Result<Self> {
pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> {
Ok(Self::Real(Store::new(env)?))
}

View File

@ -0,0 +1,12 @@
[package]
name = "permissive-json-pointer"
version = "0.2.0"
edition = "2021"
description = "A permissive json pointer"
readme = "README.md"
[dependencies]
serde_json = "1.0"
[dev-dependencies]
big_s = "1.0"

View File

@ -0,0 +1,134 @@
# Permissive json pointer
This crate provide an interface a little bit similar to what you know as “json pointer”.
But its actually doing something quite different.
## The API
The crate provide only one function called [`select_values`].
It takes one object in parameter and a list of selectors.
It then returns a new object containing only the fields you selected.
## The selectors
The syntax for the selector is easier than with other API.
There is only ONE special symbol, its the `.`.
If you write `dog` and provide the following object;
```json
{
"dog": "bob",
"cat": "michel"
}
```
Youll get back;
```json
{
"dog": "bob",
}
```
Easy right?
Now the dot can either be used as a field name, or as a nested object.
For example, if you have the following json;
```json
{
"dog.name": "jean",
"dog": {
"name": "bob",
"age": 6
}
}
```
What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/)
somehow base their entire workflow on this kind of json.
Here with the `dog.name` selector both fields will be
selected and the following json will be returned;
```json
{
"dog.name": "jean",
"dog": {
"name": "bob",
}
}
```
And as you can guess, this crate is as permissive as possible.
Itll match everything it can!
Consider this even more crappy json;
```json
{
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
}
```
If you write `pet.dog.name` everything will be selected.
## Matching arrays
With this kind of selectors you cant match a specific element in an array.
Your selector will be applied to all the element _in_ the array.
Consider the following json;
```json
{
"pets": [
{
"animal": "dog",
"race": "bernese mountain",
},
{
"animal": "dog",
"race": "golden retriever",
},
{
"animal": "cat",
"age": 8,
}
]
}
```
With the filter `pets.animal` youll get;
```json
{
"pets": [
{
"animal": "dog",
},
{
"animal": "dog",
},
{
"animal": "cat",
}
]
}
```
The empty element in an array gets removed. So if you were to look
for `pets.age` you would only get;
```json
{
"pets": [
{
"age": 8,
}
]
}
```
And I think thats all you need to know 🎉

View File

@ -0,0 +1,786 @@
#![doc = include_str!("../README.md")]
use std::collections::HashSet;
use serde_json::*;
type Document = Map<String, Value>;
const SPLIT_SYMBOL: char = '.';
/// Returns `true` if the `selector` match the `key`.
///
/// ```text
/// Example:
/// `animaux` match `animaux`
/// `animaux.chien` match `animaux`
/// `animaux.chien` match `animaux`
/// `animaux.chien.nom` match `animaux`
/// `animaux.chien.nom` match `animaux.chien`
/// -----------------------------------------
/// `animaux` doesn't match `animaux.chien`
/// `animaux.` doesn't match `animaux`
/// `animaux.ch` doesn't match `animaux.chien`
/// `animau` doesn't match `animaux`
/// ```
fn contained_in(selector: &str, key: &str) -> bool {
selector.starts_with(key)
&& selector[key.len()..]
.chars()
.next()
.map(|c| c == SPLIT_SYMBOL)
.unwrap_or(true)
}
/// Map the selected leaf values of a json allowing you to update only the fields that were selected.
/// ```
/// use serde_json::{Value, json};
/// use permissive_json_pointer::map_leaf_values;
///
/// let mut value: Value = json!({
/// "jean": {
/// "age": 8,
/// "race": {
/// "name": "bernese mountain",
/// "size": "80cm",
/// }
/// }
/// });
/// map_leaf_values(
/// value.as_object_mut().unwrap(),
/// ["jean.race.name"],
/// |key, value| match (value, dbg!(key)) {
/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(),
/// _ => unreachable!(),
/// },
/// );
/// assert_eq!(
/// value,
/// json!({
/// "jean": {
/// "age": 8,
/// "race": {
/// "name": "patou",
/// "size": "80cm",
/// }
/// }
/// })
/// );
/// ```
pub fn map_leaf_values<'a>(
value: &mut Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
mut mapper: impl FnMut(&str, &mut Value),
) {
let selectors: Vec<_> = selectors.into_iter().collect();
map_leaf_values_in_object(value, &selectors, "", &mut mapper);
}
pub fn map_leaf_values_in_object<'a>(
value: &mut Map<String, Value>,
selectors: &[&'a str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
) {
for (key, value) in value.iter_mut() {
let base_key = if base_key.is_empty() {
key.to_string()
} else {
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
};
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
// so we check the contained_in on both side
let should_continue = selectors
.iter()
.any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector));
if should_continue {
match value {
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, &base_key, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, &base_key, mapper)
}
value => mapper(&base_key, value),
}
}
}
}
pub fn map_leaf_values_in_array(
values: &mut [Value],
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
) {
for value in values.iter_mut() {
match value {
Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper),
Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper),
value => mapper(base_key, value),
}
}
}
/// Permissively selects values in a json with a list of selectors.
/// Returns a new json containing all the selected fields.
/// ```
/// use serde_json::*;
/// use permissive_json_pointer::select_values;
///
/// let value: Value = json!({
/// "name": "peanut",
/// "age": 8,
/// "race": {
/// "name": "bernese mountain",
/// "avg_age": 12,
/// "size": "80cm",
/// },
/// });
/// let value: &Map<String, Value> = value.as_object().unwrap();
///
/// let res: Value = select_values(value, vec!["name", "race.name"]).into();
/// assert_eq!(
/// res,
/// json!({
/// "name": "peanut",
/// "race": {
/// "name": "bernese mountain",
/// },
/// })
/// );
/// ```
pub fn select_values<'a>(
value: &Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
) -> Map<String, Value> {
let selectors = selectors.into_iter().collect();
create_value(value, selectors)
}
fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
let mut new_value: Document = Map::new();
for (key, value) in value.iter() {
// first we insert all the key at the root level
if selectors.contains(key as &str) {
new_value.insert(key.to_string(), value.clone());
// if the key was simple we can delete it and move to
// the next key
if is_simple(key) {
selectors.remove(key as &str);
continue;
}
}
// we extract all the sub selectors matching the current field
// if there was [person.name, person.age] and if we are on the field
// `person`. Then we generate the following sub selectors: [name, age].
let sub_selectors: HashSet<&str> = selectors
.iter()
.filter(|s| contained_in(s, key))
.filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..))
.collect();
if !sub_selectors.is_empty() {
match value {
Value::Array(array) => {
let array = create_array(array, &sub_selectors);
if !array.is_empty() {
new_value.insert(key.to_string(), array.into());
}
}
Value::Object(object) => {
let object = create_value(object, sub_selectors);
if !object.is_empty() {
new_value.insert(key.to_string(), object.into());
}
}
_ => (),
}
}
}
new_value
}
fn create_array(array: &Vec<Value>, selectors: &HashSet<&str>) -> Vec<Value> {
let mut res = Vec::new();
for value in array {
match value {
Value::Array(array) => {
let array = create_array(array, selectors);
if !array.is_empty() {
res.push(array.into());
}
}
Value::Object(object) => {
let object = create_value(object, selectors.clone());
if !object.is_empty() {
res.push(object.into());
}
}
_ => (),
}
}
res
}
fn is_simple(key: impl AsRef<str>) -> bool {
!key.as_ref().contains(SPLIT_SYMBOL)
}
#[cfg(test)]
mod tests {
use big_s::S;
use super::*;
#[test]
fn test_contained_in() {
assert!(contained_in("animaux", "animaux"));
assert!(contained_in("animaux.chien", "animaux"));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois.fourrure"
));
assert!(contained_in(
"animaux.chien.race.bouvier bernois.fourrure.couleur",
"animaux.chien.race.bouvier bernois.fourrure.couleur"
));
// -- the wrongs
assert!(!contained_in("chien", "chat"));
assert!(!contained_in("animaux", "animaux.chien"));
assert!(!contained_in("animaux.chien", "animaux.chat"));
// -- the strange edge cases
assert!(!contained_in("animaux.chien", "anima"));
assert!(!contained_in("animaux.chien", "animau"));
assert!(!contained_in("animaux.chien", "animaux."));
assert!(!contained_in("animaux.chien", "animaux.c"));
assert!(!contained_in("animaux.chien", "animaux.ch"));
assert!(!contained_in("animaux.chien", "animaux.chi"));
assert!(!contained_in("animaux.chien", "animaux.chie"));
}
#[test]
fn simple_key() {
let value: Value = json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["name"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
})
);
let res: Value = select_values(value, vec!["age"]).into();
assert_eq!(
res,
json!({
"age": 8,
})
);
let res: Value = select_values(value, vec!["name", "age"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
"age": 8,
})
);
let res: Value = select_values(value, vec!["race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["name", "age", "race"]).into();
assert_eq!(
res,
json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
}
#[test]
fn complex_key() {
let value: Value = json!({
"name": "peanut",
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
println!("RIGHTBEFORE");
let res: Value = select_values(value, vec!["race.name"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
}
})
);
let res: Value = select_values(value, vec!["race.name", "race.size"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"size": "80cm",
}
})
);
let res: Value = select_values(
value,
vec!["race.name", "race.size", "race.avg_age", "race.size", "age"],
)
.into();
assert_eq!(
res,
json!({
"age": 8,
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["race.name", "race"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
let res: Value = select_values(value, vec!["race", "race.name"]).into();
assert_eq!(
res,
json!({
"race": {
"name": "bernese mountain",
"avg_age": 12,
"size": "80cm",
}
})
);
}
#[test]
fn multi_level_nested() {
let value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["jean"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
})
);
let res: Value = select_values(value, vec!["jean.age"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
}
})
);
let res: Value = select_values(value, vec!["jean.race.size"]).into();
assert_eq!(
res,
json!({
"jean": {
"race": {
"size": "80cm",
}
}
})
);
let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into();
assert_eq!(
res,
json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
}
}
})
);
let res: Value = select_values(value, vec!["jean.race"]).into();
assert_eq!(
res,
json!({
"jean": {
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
})
);
}
#[test]
fn array_and_deep_nested() {
let value: Value = json!({
"doggos": [
{
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
},
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
},
]
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["doggos.jean"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
}
]
})
);
let res: Value = select_values(value, vec!["doggos.marc"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
}
]
})
);
let res: Value = select_values(value, vec!["doggos.marc.race"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"race": {
"name": "golden retriever",
"size": "60cm",
}
}
}
]
})
);
let res: Value =
select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into();
assert_eq!(
res,
json!({
"doggos": [
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
}
}
}
]
})
);
let res: Value = select_values(
value,
vec![
"doggos.marc.race.name",
"doggos.marc.age",
"doggos.jean.race.name",
"other.field",
],
)
.into();
assert_eq!(
res,
json!({
"doggos": [
{
"jean": {
"race": {
"name": "bernese mountain",
}
}
},
{
"marc": {
"age": 4,
"race": {
"name": "golden retriever",
}
}
}
]
})
);
}
#[test]
fn all_conflict_variation() {
let value: Value = json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["pet.dog.name"]).into();
assert_eq!(
res,
json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob"
},
"pet": {
"dog.name": "michel"
},
"pet": {
"dog": {
"name": "milan"
}
}
})
);
let value: Value = json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob",
},
"pet": {
"dog.name": "michel",
"dog": {
"name": "milan",
}
}
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into();
assert_eq!(
res,
json!({
"pet.dog.name": "jean",
"pet.dog": {
"name": "bob",
},
"pet": {
"dog.name": "michel",
"dog": {
"name": "milan",
}
}
})
);
}
#[test]
fn map_object() {
let mut value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
}
});
map_leaf_values(
value.as_object_mut().unwrap(),
["jean.race.name"],
|key, value| match (value, dbg!(key)) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => unreachable!(),
},
);
assert_eq!(
value,
json!({
"jean": {
"age": 8,
"race": {
"name": "patou",
"size": "80cm",
}
}
})
);
let mut value: Value = json!({
"jean": {
"age": 8,
"race": {
"name": "bernese mountain",
"size": "80cm",
}
},
"bob": "lolpied",
});
let mut calls = 0;
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| {
calls += 1;
match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => println!("Called with {key}"),
}
});
assert_eq!(calls, 3);
assert_eq!(
value,
json!({
"jean": {
"age": 8,
"race": {
"name": "patou",
"size": "80cm",
}
},
"bob": "lolpied",
})
);
}
}