Merge branch 'main' into stable

This commit is contained in:
Clémentine Urquizar - curqui 2022-10-04 14:20:10 +02:00 committed by GitHub
commit a7d2c9572e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 824 additions and 139 deletions

View File

@ -1,13 +1,13 @@
contact_links:
- name: Support questions & other
url: https://github.com/meilisearch/meilisearch/discussions/new
about: For any other question, open a discussion in this repository
- name: Language support request & feedback
url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal?discussions_q=label%3Aproduct%3Acore%3Atokenizer+category%3A%22Feedback+%26+Feature+Proposal%22
about: The requests and feedback regarding Language support are not managed in this repository. Please upvote the related discussion in our dedicated product repository or open a new one if it doesn't exist.
- name: Feature request & feedback
- name: Any other feature request & feedback
url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal
about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository
- name: Documentation issue
url: https://github.com/meilisearch/documentation/issues/new
about: For documentation issues, open an issue or a PR in the documentation repository
- name: Support questions & other
url: https://github.com/meilisearch/meilisearch/discussions/new
about: For any other question, open a discussion in this repository

View File

@ -85,7 +85,7 @@ get_latest() {
latest=""
current_tag=""
for release_info in $releases; do
if [ $i -eq 0 ]; then # Cheking tag_name
if [ $i -eq 0 ]; then # Checking tag_name
if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release
current_tag=$release_info
else

View File

@ -3,7 +3,7 @@ on:
schedule:
- cron: '0 0 1 */3 *'
workflow_dispatch:
jobs:
create-issue:
runs-on: ubuntu-latest
@ -12,12 +12,12 @@ jobs:
- name: Create an issue
uses: actions-ecosystem/action-create-issue@v1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
title: Upgrade dependencies
body: |
We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the core-team repositories that Meilisearch depends on (milli, charabia, heed...).
⚠️ This issue should only be done at the beginning of the sprint!
⚠️ This issue should only be done at the beginning of the sprint!
labels: |
dependencies
maintenance

156
.github/workflows/milestone-workflow.yml vendored Normal file
View File

@ -0,0 +1,156 @@
name: Milestone's workflow
# /!\ No git flow are handled here
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
# - the roadmap issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/roadmap-issue.md
# - the changelog issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/changelog-issue.md
# For each Milestone closed
# - the `release_version` label is created
# - this label is applied to all issues/PRs in the Milestone
on:
milestone:
types: [created, closed]
env:
MILESTONE_VERSION: ${{ github.event.milestone.title }}
MILESTONE_URL: ${{ github.event.milestone.html_url }}
MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }}
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
# -----------------
# MILESTONE CREATED
# -----------------
get-release-version:
if: github.event.action == 'created'
runs-on: ubuntu-latest
outputs:
is-patch: ${{ steps.check-patch.outputs.is-patch }}
env:
MILESTONE_VERSION: ${{ github.event.milestone.title }}
steps:
- uses: actions/checkout@v3
- name: Check if this release is a patch release only
id: check-patch
run: |
echo version: $MILESTONE_VERSION
if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then
echo 'This is NOT a patch release'
echo ::set-output name=is-patch::false
elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo 'This is a patch release'
echo ::set-output name=is-patch::true
else
echo "Not a valid format of release, check the Milestone's title."
echo 'Should be vX.Y.Z'
exit 1
fi
create-roadmap-issue:
needs: get-release-version
# Create the roadmap issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
# Replace all <<milestone_id>> occurrences
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
# Replace release date if exists
if [[ ! -z $MILESTONE_DUE_ON ]]; then
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE
fi
- name: Create the issue
run: |
gh issue create \
--title "$MILESTONE_VERSION ROADMAP" \
--label 'epic,impacts docs,impacts integrations,impacts cloud' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION
create-changelog-issue:
needs: get-release-version
# Create the changelog issue if the release is not only a patch release
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
runs-on: ubuntu-latest
env:
ISSUE_TEMPLATE: issue-template.md
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
# Replace all <<milestone_id>> occurrences
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
- name: Create the issue
run: |
gh issue create \
--title "Create release changelogs for $MILESTONE_VERSION" \
--label 'impacts docs,documentation' \
--body-file $ISSUE_TEMPLATE \
--milestone $MILESTONE_VERSION \
--assignee curquiza
# ----------------
# MILESTONE CLOSED
# ----------------
create-release-label:
if: github.event.action == 'closed'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Create the ${{ env.MILESTONE_VERSION }} label
run: |
label_description="PRs/issues solved in $MILESTONE_VERSION"
if [[ ! -z $MILESTONE_DUE_ON ]]; then
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
label_description="$label_description released on $date"
fi
gh api repos/curquiza/meilisearch/labels \
--method POST \
-H "Accept: application/vnd.github+json" \
-f name="$MILESTONE_VERSION" \
-f description="$label_description" \
-f color='ff5ba3'
labelize-all-milestone-content:
if: github.event.action == 'closed'
needs: create-release-label
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone
run: |
prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
for pr in $prs; do
gh pr $pr edit --add-label $MILESTONE_VERSION
done
- name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone
run: |
issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
for issue in $issues; do
gh issue edit $issue --add-label $MILESTONE_VERSION
done

View File

@ -1,4 +1,6 @@
on:
schedule:
- cron: '0 2 * * *' # Every day at 2:00am
release:
types: [published]
@ -8,8 +10,9 @@ jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
# No need to check the version for dry run (cron)
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
# Check if the tag has the v<nmumber>.<number>.<number> format.
# If yes, it means we are publishing an official release.
# If no, we are releasing a RC, so no need to check the version.
@ -25,7 +28,7 @@ jobs:
echo ::set-output name=stable::false
fi
- name: Check release validity
if: steps.check-tag-format.outputs.stable == 'true'
if: github.event_name != 'schedule' && steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh
publish:
@ -54,10 +57,12 @@ jobs:
- uses: actions/checkout@v3
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name != 'schedule'
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
@ -123,9 +128,11 @@ jobs:
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name != 'schedule'
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.PUBLISH_TOKEN }}
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}

View File

@ -9,7 +9,7 @@ jobs:
name: Check the version validity
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Check release validity
run: bash .github/scripts/check-release.sh
@ -29,7 +29,7 @@ jobs:
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@v1-release
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb
asset_name: meilisearch.deb
tag: ${{ github.ref }}

View File

@ -12,7 +12,7 @@ jobs:
docker:
runs-on: docker
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
# Check if the tag has the v<nmumber>.<number>.<number> format. If yes, it means we are publishing an official release.
# In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag):
@ -53,7 +53,7 @@ jobs:
uses: docker/metadata-action@v4
with:
images: getmeili/meilisearch
# The lastest and `vX.Y` tags are only pushed for the official Meilisearch releases
# The latest and `vX.Y` tags are only pushed for the official Meilisearch releases
# See https://github.com/docker/metadata-action#latest-tag
flavor: latest=false
tags: |
@ -62,10 +62,19 @@ jobs:
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
- name: Build and push
id: docker_build
uses: docker/build-push-action@v3
with:
# We do not push tags for the cron jobs, this is only for test purposes
push: ${{ github.event_name != 'schedule' }}
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
if: github.event_name != 'schedule'
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}
repository: meilisearch/meilisearch-cloud
event-type: cloud-docker-build
client-payload: '{ "meilisearch_version": "${{ steps.meta.outputs.tags }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'

View File

@ -0,0 +1,47 @@
name: Update Meilisearch version in all Cargo.toml files
on:
workflow_dispatch:
inputs:
new_version:
description: 'The new version (vX.Y.Z)'
required: true
env:
NEW_VERSION: ${{ github.event.inputs.new_version }}
NEW_BRANCH: update-version-${{ github.event.inputs.new_version }}
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
update-version-cargo-toml:
name: Update version in Cargo.toml files
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Install sd
run: cargo install sd
- name: Update Cargo.toml files
run: |
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
new_string="version = \"$raw_new_version\""
sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml
- name: Build Meilisearch to update Cargo.lock
run: cargo build
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
uses: EndBug/add-and-commit@v9
with:
message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files"
new_branch: ${{ env.NEW_BRANCH }}
- name: Create the PR pointing to ${{ github.ref_name }}
run: |
gh pr create \
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \
--body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \
--label 'skip changelog' \
--milestone $NEW_VERSION

View File

@ -10,12 +10,24 @@ If Meilisearch does not offer optimized support for your language, please consid
## Table of Contents
- [Hacktoberfest 2022](#hacktoberfest-2022)
- [Assumptions](#assumptions)
- [How to Contribute](#how-to-contribute)
- [Development Workflow](#development-workflow)
- [Git Guidelines](#git-guidelines)
- [Release Process (for internal team only)](#release-process-for-internal-team-only)
## Hacktoberfest 2022
It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳
Thanks so much for participating with Meilisearch this year!
1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.com/participation/#spam)). Our reviewers will not consider any PR that doesnt match that standard.
2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, theres no need to panic; we will get around to your contribution.
3. There will be no issue assignment as we dont want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best!
You can check out the longer, more complete guideline documentation [here](https://github.com/meilisearch/.github/blob/main/Hacktoberfest_2022_contributors_guidelines.md).
## Assumptions
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
@ -102,7 +114,7 @@ The full Meilisearch release process is described in [this guide](https://github
### Release assets
For each release, the following assets are created:
- Binaries for differents platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release
- Binaries for different platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release
- Binaries are pushed to HomeBrew and APT (not published for RC)
- Docker tags are created/updated:
- `vX.Y.Z`

1
Cargo.lock generated
View File

@ -2091,6 +2091,7 @@ dependencies = [
"time 0.3.14",
"tokio",
"tokio-stream",
"toml",
"urlencoding",
"uuid",
"vergen",

View File

@ -1,5 +1,5 @@
# Compile
FROM rust:alpine3.14 AS compiler
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
@ -19,7 +19,7 @@ RUN set -eux; \
cargo build --release
# Run
FROM alpine:3.14
FROM alpine:3.16
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker

View File

@ -34,6 +34,14 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
## 🎃 Hacktoberfest
Its Hacktoberfest 2022 @Meilisearch
[Hacktoberfest](https://hacktoberfest.com/) is a celebration of the open-source community. This year, and for the third time in a row, Meilisearch is participating in this fantastic event.
Youd like to contribute? Dont hesitate to check out our [contributing guidelines](./CONTRIBUTING.md).
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds

129
config.toml Normal file
View File

@ -0,0 +1,129 @@
# This file shows the default configuration of Meilisearch.
# All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
db_path = "./data.ms"
# The destination where the database must be created.
env = "development" # Possible values: [development, production]
# This environment variable must be set to `production` if you are running in production.
# More logs wiil be displayed if the server is running in development mode. Setting the master
# key is optional; hence no security on the updates routes. This
# is useful to debug when integrating the engine with another service.
http_addr = "127.0.0.1:7700"
# The address on which the HTTP server will listen.
# master_key = "MASTER_KEY"
# Sets the instance's master key, automatically protecting all routes except GET /health.
# no_analytics = false
# Do not send analytics to Meilisearch.
disable_auto_batching = false
# The engine will disable task auto-batching, and will sequencialy compute each task one by one.
### DUMP
dumps_dir = "dumps/"
# Folder where dumps are created when the dump route is called.
# import_dump = "./path/to/my/file.dump"
# Import a dump from the specified path, must be a `.dump` file.
ignore_missing_dump = false
# If the dump doesn't exist, load or create the database specified by `db_path` instead.
ignore_dump_if_db_exists = false
# Ignore the dump if a database already exists, and load that database instead.
###
log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE]
# Set the log level.
### INDEX
max_index_size = "100 GiB"
# The maximum size, in bytes, of the main LMDB database directory.
# max_indexing_memory = "2 GiB"
# The maximum amount of memory the indexer will use.
#
# In case the engine is unable to retrieve the available memory the engine will try to use
# the memory it needs but without real limit, this can lead to Out-Of-Memory issues and it
# is recommended to specify the amount of memory to use.
#
# /!\ The default value is system dependant /!\
# max_indexing_threads = 4
# The maximum number of threads the indexer will use. If the number set is higher than the
# real number of cores available in the machine, it will use the maximum number of
# available cores.
#
# It defaults to half of the available threads.
###
max_task_db_size = "100 GiB"
# The maximum size, in bytes, of the update LMDB database directory.
http_payload_size_limit = "100 MB"
# The maximum size, in bytes, of accepted JSON payloads.
### SNAPSHOT
schedule_snapshot = false
# Activate snapshot scheduling.
snapshot_dir = "snapshots/"
# Defines the directory path where Meilisearch will create a snapshot each snapshot_interval_sec.
snapshot_interval_sec = 86400
# Defines time interval, in seconds, between each snapshot creation.
# import_snapshot = "./path/to/my/snapshot"
# Defines the path of the snapshot file to import. This option will, by default, stop the
# process if a database already exists, or if no snapshot exists at the given path. If this
# option is not specified, no snapshot is imported.
ignore_missing_snapshot = false
# The engine will ignore a missing snapshot and not return an error in such a case.
ignore_snapshot_if_db_exists = false
# The engine will skip snapshot importation and not return an error in such a case.
###
### SSL
# ssl_auth_path = "./path/to/root"
# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE.
# ssl_cert_path = "./path/to/CERTFILE"
# Read server certificates from CERTFILE. This should contain PEM-format certificates in
# the right order (the first certificate should certify KEYFILE, the last should be a root
# CA).
# ssl_key_path = "./path/to/private-key"
# Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded
# private key, in PEM format.
# ssl_ocsp_path = "./path/to/OCSPFILE"
# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional.
ssl_require_auth = false
# Send a fatal alert if the client does not complete client authentication.
ssl_resumption = false
# SSL support session resumption.
ssl_tickets = false
# SSL support tickets.
###

View File

@ -7,7 +7,7 @@ edition = "2021"
enum-iterator = "0.7.0"
hmac = "0.12.1"
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }

View File

@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
meilisearch-lib = { path = "../meilisearch-lib" }
meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
mimalloc = { version = "0.1.29", default-features = false }
mime = "0.3.16"
num_cpus = "1.13.1"
@ -76,6 +76,7 @@ thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = { version = "1.17.0", features = ["full"] }
tokio-stream = "0.1.8"
toml = "0.5.9"
uuid = { version = "1.1.2", features = ["serde", "v4"] }
walkdir = "2.3.2"
prometheus = { version = "0.13.0", features = ["process"], optional = true }
@ -90,7 +91,7 @@ urlencoding = "2.1.0"
yaup = "0.2.0"
[features]
default = ["analytics", "mini-dashboard"]
default = ["analytics", "meilisearch-lib/default", "mini-dashboard"]
metrics = ["prometheus"]
analytics = ["segment"]
mini-dashboard = [
@ -104,6 +105,10 @@ mini-dashboard = [
"tempfile",
"zip",
]
chinese = ["meilisearch-lib/chinese"]
hebrew = ["meilisearch-lib/hebrew"]
japanese = ["meilisearch-lib/japanese"]
thai = ["meilisearch-lib/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.2/build.zip"

View File

@ -349,16 +349,16 @@ pub struct SearchAggregator {
// sort
sort_with_geo_point: bool,
// everytime a request has a filter, this field must be incremented by the number of terms it contains
// every time a request has a filter, this field must be incremented by the number of terms it contains
sort_sum_of_criteria_terms: usize,
// everytime a request has a filter, this field must be incremented by one
// every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize,
// filter
filter_with_geo_radius: bool,
// everytime a request has a filter, this field must be incremented by the number of terms it contains
// every time a request has a filter, this field must be incremented by the number of terms it contains
filter_sum_of_criteria_terms: usize,
// everytime a request has a filter, this field must be incremented by one
// every time a request has a filter, this field must be incremented by one
filter_total_number_of_criteria: usize,
used_syntax: HashMap<String, usize>,
@ -366,7 +366,7 @@ pub struct SearchAggregator {
// The maximum number of terms in a q request
max_terms_number: usize,
// everytime a search is done, we increment the counter linked to the used settings
// every time a search is done, we increment the counter linked to the used settings
matching_strategy: HashMap<String, usize>,
// pagination

View File

@ -1,9 +1,9 @@
use std::env;
use std::path::PathBuf;
use std::sync::Arc;
use actix_web::http::KeepAlive;
use actix_web::HttpServer;
use clap::Parser;
use meilisearch_auth::AuthController;
use meilisearch_http::analytics;
use meilisearch_http::analytics::Analytics;
@ -29,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::parse();
let (opt, config_read_from) = Opt::try_build()?;
setup(&opt)?;
@ -58,7 +58,7 @@ async fn main() -> anyhow::Result<()> {
#[cfg(any(debug_assertions, not(feature = "analytics")))]
let (analytics, user) = analytics::MockAnalytics::new(&opt);
print_launch_resume(&opt, &user);
print_launch_resume(&opt, &user, config_read_from);
run_http(meilisearch, auth_controller, opt, analytics).await?;
@ -97,7 +97,7 @@ async fn run_http(
Ok(())
}
pub fn print_launch_resume(opt: &Opt, user: &str) {
pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
@ -118,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) {
eprintln!("{}", ascii_name);
eprintln!(
"Config file path:\t{:?}",
config_read_from
.map(|config_file_path| config_file_path.display().to_string())
.unwrap_or_else(|| "none".to_string())
);
eprintln!("Database path:\t\t{:?}", opt.db_path);
eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
eprintln!("Environment:\t\t{:?}", opt.env);

View File

@ -5,7 +5,10 @@ use std::sync::Arc;
use byte_unit::Byte;
use clap::Parser;
use meilisearch_lib::options::{IndexerOpts, SchedulerConfig};
use meilisearch_lib::{
export_to_env_if_not_present,
options::{IndexerOpts, SchedulerConfig},
};
use rustls::{
server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,
@ -14,141 +17,208 @@ use rustls::{
RootCertStore,
};
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use serde::Serialize;
use serde::{Deserialize, Serialize};
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, Parser, Serialize)]
const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
#[cfg(all(not(debug_assertions), feature = "analytics"))]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE";
const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH";
const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH";
const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH";
const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH";
const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH";
const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION";
const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS";
const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT";
const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT";
const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS";
const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR";
const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT";
const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC";
const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP";
const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
#[cfg(feature = "metrics")]
const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "127.0.0.1:7700";
const DEFAULT_ENV: &str = "development";
const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB";
const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB";
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
const DEFAULT_DUMPS_DIR: &str = "dumps/";
const DEFAULT_LOG_LEVEL: &str = "INFO";
#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
#[clap(version)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct Opt {
/// The destination where the database must be created.
#[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
#[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())]
#[serde(default = "default_db_path")]
pub db_path: PathBuf,
/// The address on which the http server will listen.
#[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
#[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())]
#[serde(default = "default_http_addr")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[serde(skip)]
#[clap(long, env = "MEILI_MASTER_KEY")]
/// Sets the instance's master key, automatically protecting all routes except GET /health
#[serde(skip_serializing)]
#[clap(long, env = MEILI_MASTER_KEY)]
pub master_key: Option<String>,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
#[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
/// More logs wiil be displayed if the server is running in development mode. Setting the master
/// key is optional; hence no security on the updates routes. This
/// is useful to debug when integrating the engine with another service
#[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)]
#[serde(default = "default_env")]
pub env: String,
/// Do not send analytics to Meili.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
#[serde(skip)] // we can't send true
#[clap(long, env = "MEILI_NO_ANALYTICS")]
#[serde(skip_serializing, default)] // we can't send true
#[clap(long, env = MEILI_NO_ANALYTICS)]
pub no_analytics: bool,
/// The maximum size, in bytes, of the main lmdb database directory
#[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")]
/// The maximum size, in bytes, of the main LMDB database directory
#[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())]
#[serde(default = "default_max_index_size")]
pub max_index_size: Byte,
/// The maximum size, in bytes, of the update lmdb database directory
#[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")]
/// The maximum size, in bytes, of the update LMDB database directory
#[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())]
#[serde(default = "default_max_task_db_size")]
pub max_task_db_size: Byte,
/// The maximum size, in bytes, of accepted JSON payloads
#[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
#[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())]
#[serde(default = "default_http_payload_size_limit")]
pub http_payload_size_limit: Byte,
/// Read server certificates from CERTFILE.
/// This should contain PEM-format certificates
/// in the right order (the first certificate should
/// certify KEYFILE, the last should be a root CA).
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// Read the private key from KEYFILE. This should be an RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))]
pub ssl_key_path: Option<PathBuf>,
/// Enable client authentication, and accept certificates
/// signed by those roots provided in CERTFILE.
#[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
#[serde(skip)]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))]
pub ssl_auth_path: Option<PathBuf>,
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
/// Optional
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
#[serde(skip_serializing)]
#[clap(long, env = MEILI_SSL_OCSP_PATH, parse(from_os_str))]
pub ssl_ocsp_path: Option<PathBuf>,
/// Send a fatal alert if the client does not complete client authentication.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")]
#[serde(skip_serializing, default)]
#[clap(long, env = MEILI_SSL_REQUIRE_AUTH)]
pub ssl_require_auth: bool,
/// SSL support session resumption
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_RESUMPTION")]
#[serde(skip_serializing, default)]
#[clap(long, env = MEILI_SSL_RESUMPTION)]
pub ssl_resumption: bool,
/// SSL support tickets.
#[serde(skip)]
#[clap(long, env = "MEILI_SSL_TICKETS")]
#[serde(skip_serializing, default)]
#[clap(long, env = MEILI_SSL_TICKETS)]
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
/// the given path. If this option is not specified no snapshot is imported.
#[clap(long)]
/// This option will, by default, stop the process if a database already exists, or if no snapshot exists at
/// the given path. If this option is not specified, no snapshot is imported.
#[clap(long, env = MEILI_IMPORT_SNAPSHOT)]
pub import_snapshot: Option<PathBuf>,
/// The engine will ignore a missing snapshot and not return an error in such case.
#[clap(long, requires = "import-snapshot")]
/// The engine will ignore a missing snapshot and not return an error in such a case.
#[clap(
long,
env = MEILI_IGNORE_MISSING_SNAPSHOT,
requires = "import-snapshot"
)]
#[serde(default)]
pub ignore_missing_snapshot: bool,
/// The engine will skip snapshot importation and not return an error in such case.
#[clap(long, requires = "import-snapshot")]
#[clap(
long,
env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS,
requires = "import-snapshot"
)]
#[serde(default)]
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
#[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
/// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec.
#[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())]
#[serde(default = "default_snapshot_dir")]
pub snapshot_dir: PathBuf,
/// Activate snapshot scheduling.
#[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
#[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)]
#[serde(default)]
pub schedule_snapshot: bool,
/// Defines time interval, in seconds, between each snapshot creation.
#[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h
#[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())]
#[serde(default = "default_snapshot_interval_sec")]
// 24h
pub snapshot_interval_sec: u64,
/// Import a dump from the specified path, must be a `.dump` file.
#[clap(long, conflicts_with = "import-snapshot")]
#[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// If the dump doesn't exists, load or create the database specified by `db-path` instead.
#[clap(long, requires = "import-dump")]
/// If the dump doesn't exist, load or create the database specified by `db-path` instead.
#[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")]
#[serde(default)]
pub ignore_missing_dump: bool,
/// Ignore the dump if a database already exists, and load that database instead.
#[clap(long, requires = "import-dump")]
#[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import-dump")]
#[serde(default)]
pub ignore_dump_if_db_exists: bool,
/// Folder where dumps are created when the dump route is called.
#[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
#[clap(long, env = MEILI_DUMPS_DIR, default_value_os_t = default_dumps_dir())]
#[serde(default = "default_dumps_dir")]
pub dumps_dir: PathBuf,
/// Set the log level
#[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")]
/// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE]
#[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())]
#[serde(default = "default_log_level")]
pub log_level: String,
/// Enables Prometheus metrics and /metrics route.
#[cfg(feature = "metrics")]
#[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")]
#[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)]
#[serde(default)]
pub enable_metrics_route: bool,
#[serde(flatten)]
@ -158,15 +228,139 @@ pub struct Opt {
#[serde(flatten)]
#[clap(flatten)]
pub scheduler_options: SchedulerConfig,
/// The path to a configuration file that should be used to setup the engine.
/// Format must be TOML.
#[serde(skip_serializing)]
#[clap(long)]
pub config_file_path: Option<PathBuf>,
}
impl Opt {
/// Wether analytics should be enabled or not.
/// Whether analytics should be enabled or not.
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub fn analytics(&self) -> bool {
!self.no_analytics
}
/// Build a new Opt from config file, env vars and cli args.
pub fn try_build() -> anyhow::Result<(Self, Option<PathBuf>)> {
// Parse the args to get the config_file_path.
let mut opts = Opt::parse();
let mut config_read_from = None;
if let Some(config_file_path) = opts
.config_file_path
.clone()
.or_else(|| Some(PathBuf::from("./config.toml")))
{
match std::fs::read(&config_file_path) {
Ok(config) => {
// If the file is successfully read, we deserialize it with `toml`.
let opt_from_config = toml::from_slice::<Opt>(&config)?;
// We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args.
opt_from_config.export_to_env();
// Once injected we parse the cli args once again to take the new env vars into scope.
opts = Opt::parse();
config_read_from = Some(config_file_path);
}
// If we have an error while reading the file defined by the user.
Err(_) if opts.config_file_path.is_some() => anyhow::bail!(
"unable to open or read the {:?} configuration file.",
opts.config_file_path.unwrap().display().to_string()
),
_ => (),
}
}
Ok((opts, config_read_from))
}
/// Exports the opts values to their corresponding env vars if they are not set.
fn export_to_env(self) {
let Opt {
db_path,
http_addr,
master_key,
env,
max_index_size,
max_task_db_size,
http_payload_size_limit,
ssl_cert_path,
ssl_key_path,
ssl_auth_path,
ssl_ocsp_path,
ssl_require_auth,
ssl_resumption,
ssl_tickets,
snapshot_dir,
schedule_snapshot,
snapshot_interval_sec,
dumps_dir,
log_level,
indexer_options,
scheduler_options,
import_snapshot: _,
ignore_missing_snapshot: _,
ignore_snapshot_if_db_exists: _,
import_dump: _,
ignore_missing_dump: _,
ignore_dump_if_db_exists: _,
config_file_path: _,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
#[cfg(feature = "metrics")]
enable_metrics_route,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
if let Some(master_key) = master_key {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, env);
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
}
export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string());
export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string());
export_to_env_if_not_present(
MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
http_payload_size_limit.to_string(),
);
if let Some(ssl_cert_path) = ssl_cert_path {
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
}
if let Some(ssl_key_path) = ssl_key_path {
export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path);
}
if let Some(ssl_auth_path) = ssl_auth_path {
export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path);
}
if let Some(ssl_ocsp_path) = ssl_ocsp_path {
export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path);
}
export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string());
export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string());
export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string());
export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir);
export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string());
export_to_env_if_not_present(
MEILI_SNAPSHOT_INTERVAL_SEC,
snapshot_interval_sec.to_string(),
);
export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level);
#[cfg(feature = "metrics")]
{
export_to_env_if_not_present(
MEILI_ENABLE_METRICS_ROUTE,
enable_metrics_route.to_string(),
);
}
indexer_options.export_to_env();
scheduler_options.export_to_env();
}
pub fn get_ssl_config(&self) -> anyhow::Result<Option<rustls::ServerConfig>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let config = rustls::ServerConfig::builder().with_safe_defaults();
@ -265,6 +459,48 @@ fn load_ocsp(filename: &Option<PathBuf>) -> anyhow::Result<Vec<u8>> {
Ok(ret)
}
/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute.
fn default_db_path() -> PathBuf {
PathBuf::from(DEFAULT_DB_PATH)
}
fn default_http_addr() -> String {
DEFAULT_HTTP_ADDR.to_string()
}
fn default_env() -> String {
DEFAULT_ENV.to_string()
}
fn default_max_index_size() -> Byte {
Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap()
}
fn default_max_task_db_size() -> Byte {
Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap()
}
fn default_http_payload_size_limit() -> Byte {
Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
}
fn default_snapshot_dir() -> PathBuf {
PathBuf::from(DEFAULT_SNAPSHOT_DIR)
}
fn default_snapshot_interval_sec() -> u64 {
DEFAULT_SNAPSHOT_INTERVAL_SEC
}
fn default_dumps_dir() -> PathBuf {
PathBuf::from(DEFAULT_DUMPS_DIR)
}
fn default_log_level() -> String {
DEFAULT_LOG_LEVEL.to_string()
}
#[cfg(test)]
mod test {
use super::*;

View File

@ -147,7 +147,7 @@ enum TaskDetails {
IndexInfo { primary_key: Option<String> },
#[serde(rename_all = "camelCase")]
DocumentDeletion {
received_document_ids: usize,
matched_documents: usize,
deleted_documents: Option<u64>,
},
#[serde(rename_all = "camelCase")]
@ -255,7 +255,7 @@ impl From<Task> for TaskView {
} => (
TaskType::DocumentDeletion,
Some(TaskDetails::DocumentDeletion {
received_document_ids: ids.len(),
matched_documents: ids.len(),
deleted_documents: None,
}),
),

View File

@ -1,5 +1,6 @@
use crate::common::Server;
#[cfg(feature = "mini-dashboard")]
#[actix_rt::test]
async fn dashboard_assets_load() {
let server = Server::new().await;

View File

@ -372,7 +372,7 @@ async fn error_add_malformed_json_documents() {
assert_eq!(
response["message"],
json!(
r#"The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`."#
r#"The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`."#
)
);
assert_eq!(response["code"], json!("malformed_payload"));
@ -395,7 +395,7 @@ async fn error_add_malformed_json_documents() {
assert_eq!(status_code, 400);
assert_eq!(
response["message"],
json!("The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`.")
json!("The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`.")
);
assert_eq!(response["code"], json!("malformed_payload"));
assert_eq!(response["type"], json!("invalid_request"));

View File

@ -28,7 +28,7 @@ lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"
@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554
paste = "1.0.6"
proptest = "1.0.0"
proptest-derive = "0.3.0"
[features]
# all specialized tokenizations
default = ["milli/default"]
# chinese specialized tokenization
chinese = ["milli/chinese"]
# hebrew specialized tokenization
hebrew = ["milli/hebrew"]
# japanese specialized tokenization
japanese = ["milli/japanese"]
# thai specialized tokenization
thai = ["milli/thai"]

View File

@ -8,6 +8,7 @@ use meilisearch_types::internal_error;
use milli::documents::{DocumentsBatchBuilder, Error};
use milli::Object;
use serde::Deserialize;
use serde_json::error::Category;
type Result<T> = std::result::Result<T, DocumentFormatError>;
@ -40,18 +41,32 @@ impl Display for DocumentFormatError {
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
Self::MalformedPayload(me, b) => match me.borrow() {
Error::Json(se) => {
let mut message = match se.classify() {
Category::Data => {
"data are neither an object nor a list of objects".to_string()
}
_ => se.to_string(),
};
// https://github.com/meilisearch/meilisearch/issues/2107
// The user input maybe insanely long. We need to truncate it.
let mut serde_msg = se.to_string();
let ellipsis = "...";
if serde_msg.len() > 100 + ellipsis.len() {
serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis);
let trim_input_prefix_len = 50;
let trim_input_suffix_len = 85;
if message.len()
> trim_input_prefix_len + trim_input_suffix_len + ellipsis.len()
{
message.replace_range(
trim_input_prefix_len..message.len() - trim_input_suffix_len,
ellipsis,
);
}
write!(
f,
"The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.",
b, serde_msg
b, message
)
}
_ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me),

View File

@ -145,7 +145,7 @@ pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
"unsupported_media_type" => Code::UnsupportedMediaType,
"dump_already_in_progress" => Code::DumpAlreadyInProgress,
"dump_process_failed" => Code::DumpProcessFailed,
_ => bail!("unknow error code."),
_ => bail!("unknown error code."),
};
Ok(code)

View File

@ -1,24 +0,0 @@
use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::index_controller::IndexMetadata;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct MetadataV1 {
pub db_version: String,
indexes: Vec<IndexMetadata>,
}
impl MetadataV1 {
#[allow(dead_code, unreachable_code, unused_variables)]
pub fn load_dump(
self,
src: impl AsRef<Path>,
dst: impl AsRef<Path>,
size: usize,
indexer_options: &IndexerOpts,
) -> anyhow::Result<()> {
anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")
}

View File

@ -57,10 +57,10 @@ fn patch_updates(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result
let updates_path = src.as_ref().join("updates/data.jsonl");
let output_updates_path = dst.as_ref().join("updates/data.jsonl");
create_dir_all(output_updates_path.parent().unwrap())?;
let udpates_file = File::open(updates_path)?;
let updates_file = File::open(updates_path)?;
let mut output_update_file = File::create(output_updates_path)?;
serde_json::Deserializer::from_reader(udpates_file)
serde_json::Deserializer::from_reader(updates_file)
.into_iter::<compat::v4::Task>()
.try_for_each(|task| -> anyhow::Result<()> {
let task: Task = task?.into();

View File

@ -105,6 +105,7 @@ impl Index {
let mut options = EnvOpenOptions::new();
options.map_size(size);
options.max_readers(1024);
let index = milli::Index::new(options, &dst_dir_path)?;
let mut txn = index.write_txn()?;

View File

@ -94,6 +94,7 @@ impl Index {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
options.max_readers(1024);
let inner = Arc::new(milli::Index::new(options, &path)?);
Ok(Index {
inner,

View File

@ -27,7 +27,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
/// The maximimum number of results that the engine
/// The maximum number of results that the engine
/// will be able to return in one search call.
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;

View File

@ -51,7 +51,7 @@ impl MapIndexStore {
#[async_trait::async_trait]
impl IndexStore for MapIndexStore {
async fn create(&self, uuid: Uuid) -> Result<Index> {
// We need to keep the lock until we are sure the db file has been opened correclty, to
// We need to keep the lock until we are sure the db file has been opened correctly, to
// ensure that another db is not created at the same time.
let mut lock = self.index_store.write().await;

View File

@ -11,6 +11,8 @@ mod snapshot;
pub mod tasks;
mod update_file_store;
use std::env::VarError;
use std::ffi::OsStr;
use std::path::Path;
pub use index_controller::MeiliSearch;
@ -35,3 +37,14 @@ pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
true
}
}
/// Checks if the key is defined in the environment variables.
/// If not, inserts it with the given value.
pub fn export_to_env_if_not_present<T>(key: &str, value: T)
where
T: AsRef<OsStr>,
{
if let Err(VarError::NotPresent) = std::env::var(key) {
std::env::set_var(key, value);
}
}

View File

@ -1,33 +1,40 @@
use crate::export_to_env_if_not_present;
use core::fmt;
use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use milli::update::IndexerConfig;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, Parser, Serialize)]
const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING";
const DEFAULT_LOG_EVERY_N: usize = 100000;
#[derive(Debug, Clone, Parser, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[serde(skip)]
#[clap(long, default_value = "100000", hide = true)] // 100k
#[serde(skip_serializing, default = "default_log_every_n")]
#[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[serde(skip)]
#[serde(skip_serializing)]
#[clap(long, hide = true)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
/// The maximum amount of memory the indexer will use.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)]
#[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
#[serde(default)]
pub max_indexing_memory: MaxMemory,
/// The maximum number of threads the indexer will use.
@ -35,18 +42,43 @@ pub struct IndexerOpts {
/// it will use the maximum number of available cores.
///
/// It defaults to half of the available threads.
#[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)]
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
#[serde(default)]
pub max_indexing_threads: MaxThreads,
}
#[derive(Debug, Clone, Parser, Default, Serialize)]
#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", deny_unknown_fields)]
pub struct SchedulerConfig {
/// The engine will disable task auto-batching,
/// and will sequencialy compute each task one by one.
#[clap(long, env = "DISABLE_AUTO_BATCHING")]
#[clap(long, env = DISABLE_AUTO_BATCHING)]
#[serde(default)]
pub disable_auto_batching: bool,
}
impl IndexerOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
let IndexerOpts {
max_indexing_memory,
max_indexing_threads,
log_every_n: _,
max_nb_chunks: _,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
max_indexing_memory.to_string(),
);
}
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
max_indexing_threads.0.to_string(),
);
}
}
impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
@ -77,8 +109,17 @@ impl Default for IndexerOpts {
}
}
impl SchedulerConfig {
pub fn export_to_env(self) {
let SchedulerConfig {
disable_auto_batching,
} = self;
export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string());
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy, Serialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
@ -134,7 +175,7 @@ fn total_memory_bytes() -> Option<u64> {
}
}
#[derive(Debug, Clone, Copy, Serialize)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct MaxThreads(usize);
impl FromStr for MaxThreads {
@ -164,3 +205,7 @@ impl Deref for MaxThreads {
&self.0
}
}
fn default_log_every_n() -> usize {
DEFAULT_LOG_EVERY_N
}

View File

@ -181,6 +181,7 @@ impl SnapshotJob {
let mut options = milli::heed::EnvOpenOptions::new();
options.map_size(self.index_size);
options.max_readers(1024);
let index = milli::Index::new(options, entry.path())?;
index.copy_to_path(dst, CompactionOption::Enabled)?;
}

View File

@ -117,7 +117,7 @@ impl TaskStore {
match filter {
Some(filter) => filter
.pass(&task)
.then(|| task)
.then_some(task)
.ok_or(TaskError::UnexistingTask(id)),
None => Ok(task),
}

View File

@ -63,7 +63,7 @@ impl Store {
/// Returns the id for the next task.
///
/// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit
/// the task to the store in the same transaction, no one else will hav this task id.
/// the task to the store in the same transaction, no one else will have this task id.
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
let id = self
.tasks