diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1006a064d..3f6cb9462 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,13 +1,13 @@ contact_links: + - name: Support questions & other + url: https://github.com/meilisearch/meilisearch/discussions/new + about: For any other question, open a discussion in this repository - name: Language support request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal?discussions_q=label%3Aproduct%3Acore%3Atokenizer+category%3A%22Feedback+%26+Feature+Proposal%22 about: The requests and feedback regarding Language support are not managed in this repository. Please upvote the related discussion in our dedicated product repository or open a new one if it doesn't exist. - - name: Feature request & feedback + - name: Any other feature request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository - name: Documentation issue url: https://github.com/meilisearch/documentation/issues/new about: For documentation issues, open an issue or a PR in the documentation repository - - name: Support questions & other - url: https://github.com/meilisearch/meilisearch/discussions/new - about: For any other question, open a discussion in this repository diff --git a/.github/scripts/is-latest-release.sh b/.github/scripts/is-latest-release.sh index 81534a2f7..54f0a9d3a 100644 --- a/.github/scripts/is-latest-release.sh +++ b/.github/scripts/is-latest-release.sh @@ -85,7 +85,7 @@ get_latest() { latest="" current_tag="" for release_info in $releases; do - if [ $i -eq 0 ]; then # Cheking tag_name + if [ $i -eq 0 ]; then # Checking tag_name if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release current_tag=$release_info else diff --git a/.github/workflows/create-issue-dependencies.yml b/.github/workflows/create-issue-dependencies.yml index 638088c2e..e3deebe2a 100644 --- a/.github/workflows/create-issue-dependencies.yml +++ b/.github/workflows/create-issue-dependencies.yml @@ -3,7 +3,7 @@ on: schedule: - cron: '0 0 1 */3 *' workflow_dispatch: - + jobs: create-issue: runs-on: ubuntu-latest @@ -12,12 +12,12 @@ jobs: - name: Create an issue uses: actions-ecosystem/action-create-issue@v1 with: - github_token: ${{ secrets.GITHUB_TOKEN }} + github_token: ${{ secrets.MEILI_BOT_GH_PAT }} title: Upgrade dependencies body: | We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the core-team repositories that Meilisearch depends on (milli, charabia, heed...). - ⚠️ This issue should only be done at the beginning of the sprint! + ⚠️ This issue should only be done at the beginning of the sprint! labels: | dependencies maintenance diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml new file mode 100644 index 000000000..4cb87684d --- /dev/null +++ b/.github/workflows/milestone-workflow.yml @@ -0,0 +1,156 @@ +name: Milestone's workflow + +# /!\ No git flow are handled here + +# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed) +# - the roadmap issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/roadmap-issue.md +# - the changelog issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/changelog-issue.md + +# For each Milestone closed +# - the `release_version` label is created +# - this label is applied to all issues/PRs in the Milestone + +on: + milestone: + types: [created, closed] + +env: + MILESTONE_VERSION: ${{ github.event.milestone.title }} + MILESTONE_URL: ${{ github.event.milestone.html_url }} + MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + +jobs: + +# ----------------- +# MILESTONE CREATED +# ----------------- + + get-release-version: + if: github.event.action == 'created' + runs-on: ubuntu-latest + outputs: + is-patch: ${{ steps.check-patch.outputs.is-patch }} + env: + MILESTONE_VERSION: ${{ github.event.milestone.title }} + steps: + - uses: actions/checkout@v3 + - name: Check if this release is a patch release only + id: check-patch + run: | + echo version: $MILESTONE_VERSION + if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then + echo 'This is NOT a patch release' + echo ::set-output name=is-patch::false + elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo 'This is a patch release' + echo ::set-output name=is-patch::true + else + echo "Not a valid format of release, check the Milestone's title." + echo 'Should be vX.Y.Z' + exit 1 + fi + + create-roadmap-issue: + needs: get-release-version + # Create the roadmap issue if the release is not only a patch release + if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE + - name: Replace all empty occurrences in the templates + run: | + # Replace all <> occurrences + sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE + + # Replace all <> occurrences + milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) + sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE + + # Replace release date if exists + if [[ ! -z $MILESTONE_DUE_ON ]]; then + date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1) + sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE + fi + - name: Create the issue + run: | + gh issue create \ + --title "$MILESTONE_VERSION ROADMAP" \ + --label 'epic,impacts docs,impacts integrations,impacts cloud' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION + + create-changelog-issue: + needs: get-release-version + # Create the changelog issue if the release is not only a patch release + if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE + - name: Replace all empty occurrences in the templates + run: | + # Replace all <> occurrences + sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE + + # Replace all <> occurrences + milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) + sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE + - name: Create the issue + run: | + gh issue create \ + --title "Create release changelogs for $MILESTONE_VERSION" \ + --label 'impacts docs,documentation' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION \ + --assignee curquiza + +# ---------------- +# MILESTONE CLOSED +# ---------------- + + create-release-label: + if: github.event.action == 'closed' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Create the ${{ env.MILESTONE_VERSION }} label + run: | + label_description="PRs/issues solved in $MILESTONE_VERSION" + if [[ ! -z $MILESTONE_DUE_ON ]]; then + date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1) + label_description="$label_description released on $date" + fi + + gh api repos/curquiza/meilisearch/labels \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -f name="$MILESTONE_VERSION" \ + -f description="$label_description" \ + -f color='ff5ba3' + + labelize-all-milestone-content: + if: github.event.action == 'closed' + needs: create-release-label + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone + run: | + prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') + for pr in $prs; do + gh pr $pr edit --add-label $MILESTONE_VERSION + done + - name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone + run: | + issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') + for issue in $issues; do + gh issue edit $issue --add-label $MILESTONE_VERSION + done diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index a2f43d867..95088b1ef 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -1,4 +1,6 @@ on: + schedule: + - cron: '0 2 * * *' # Every day at 2:00am release: types: [published] @@ -8,8 +10,9 @@ jobs: check-version: name: Check the version validity runs-on: ubuntu-latest + # No need to check the version for dry run (cron) steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Check if the tag has the v.. format. # If yes, it means we are publishing an official release. # If no, we are releasing a RC, so no need to check the version. @@ -25,7 +28,7 @@ jobs: echo ::set-output name=stable::false fi - name: Check release validity - if: steps.check-tag-format.outputs.stable == 'true' + if: github.event_name != 'schedule' && steps.check-tag-format.outputs.stable == 'true' run: bash .github/scripts/check-release.sh publish: @@ -54,10 +57,12 @@ jobs: - uses: actions/checkout@v3 - name: Build run: cargo build --release --locked + # No need to upload binaries for dry run (cron) - name: Upload binaries to release + if: github.event_name != 'schedule' uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.PUBLISH_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} asset_name: ${{ matrix.asset_name }} tag: ${{ github.ref }} @@ -123,9 +128,11 @@ jobs: run: ls -lR ./target - name: Upload the binary to release + # No need to upload binaries for dry run (cron) + if: github.event_name != 'schedule' uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.PUBLISH_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch asset_name: ${{ matrix.asset_name }} tag: ${{ github.ref }} diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index b5fc330bf..a135ddafb 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -9,7 +9,7 @@ jobs: name: Check the version validity runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check release validity run: bash .github/scripts/check-release.sh @@ -29,7 +29,7 @@ jobs: - name: Upload debian pkg to release uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.GITHUB_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb asset_name: meilisearch.deb tag: ${{ github.ref }} diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 72234fc01..f2e119a6d 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -12,7 +12,7 @@ jobs: docker: runs-on: docker steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Check if the tag has the v.. format. If yes, it means we are publishing an official release. # In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag): @@ -53,7 +53,7 @@ jobs: uses: docker/metadata-action@v4 with: images: getmeili/meilisearch - # The lastest and `vX.Y` tags are only pushed for the official Meilisearch releases + # The latest and `vX.Y` tags are only pushed for the official Meilisearch releases # See https://github.com/docker/metadata-action#latest-tag flavor: latest=false tags: | @@ -62,10 +62,19 @@ jobs: type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' }} - name: Build and push - id: docker_build uses: docker/build-push-action@v3 with: # We do not push tags for the cron jobs, this is only for test purposes push: ${{ github.event_name != 'schedule' }} platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} + + # /!\ Don't touch this without checking with Cloud team + - name: Send CI information to Cloud team + if: github.event_name != 'schedule' + uses: peter-evans/repository-dispatch@v2 + with: + token: ${{ secrets.MEILI_BOT_GH_PAT }} + repository: meilisearch/meilisearch-cloud + event-type: cloud-docker-build + client-payload: '{ "meilisearch_version": "${{ steps.meta.outputs.tags }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }' diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml new file mode 100644 index 000000000..968b5f050 --- /dev/null +++ b/.github/workflows/update-cargo-toml-version.yml @@ -0,0 +1,47 @@ +name: Update Meilisearch version in all Cargo.toml files + +on: + workflow_dispatch: + inputs: + new_version: + description: 'The new version (vX.Y.Z)' + required: true + +env: + NEW_VERSION: ${{ github.event.inputs.new_version }} + NEW_BRANCH: update-version-${{ github.event.inputs.new_version }} + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + +jobs: + + update-version-cargo-toml: + name: Update version in Cargo.toml files + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Install sd + run: cargo install sd + - name: Update Cargo.toml files + run: | + raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2) + new_string="version = \"$raw_new_version\"" + sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml + - name: Build Meilisearch to update Cargo.lock + run: cargo build + - name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch + uses: EndBug/add-and-commit@v9 + with: + message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files" + new_branch: ${{ env.NEW_BRANCH }} + - name: Create the PR pointing to ${{ github.ref_name }} + run: | + gh pr create \ + --title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \ + --body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \ + --label 'skip changelog' \ + --milestone $NEW_VERSION diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c40c7dac..a335460ab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,12 +10,24 @@ If Meilisearch does not offer optimized support for your language, please consid ## Table of Contents +- [Hacktoberfest 2022](#hacktoberfest-2022) - [Assumptions](#assumptions) - [How to Contribute](#how-to-contribute) - [Development Workflow](#development-workflow) - [Git Guidelines](#git-guidelines) - [Release Process (for internal team only)](#release-process-for-internal-team-only) +## Hacktoberfest 2022 + +It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳 + +Thanks so much for participating with Meilisearch this year! +1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.com/participation/#spam)). Our reviewers will not consider any PR that doesn’t match that standard. +2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, there’s no need to panic; we will get around to your contribution. +3. There will be no issue assignment as we don’t want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best! + +You can check out the longer, more complete guideline documentation [here](https://github.com/meilisearch/.github/blob/main/Hacktoberfest_2022_contributors_guidelines.md). + ## Assumptions 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.** @@ -102,7 +114,7 @@ The full Meilisearch release process is described in [this guide](https://github ### Release assets For each release, the following assets are created: -- Binaries for differents platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release +- Binaries for different platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release - Binaries are pushed to HomeBrew and APT (not published for RC) - Docker tags are created/updated: - `vX.Y.Z` diff --git a/Cargo.lock b/Cargo.lock index e9ee22378..c3b835d27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2091,6 +2091,7 @@ dependencies = [ "time 0.3.14", "tokio", "tokio-stream", + "toml", "urlencoding", "uuid", "vergen", diff --git a/Dockerfile b/Dockerfile index ad21329fc..0e54fcdae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Compile -FROM rust:alpine3.14 AS compiler +FROM rust:alpine3.16 AS compiler RUN apk add -q --update-cache --no-cache build-base openssl-dev @@ -19,7 +19,7 @@ RUN set -eux; \ cargo build --release # Run -FROM alpine:3.14 +FROM alpine:3.16 ENV MEILI_HTTP_ADDR 0.0.0.0:7700 ENV MEILI_SERVER_PROVIDER docker diff --git a/README.md b/README.md index f728d8a6b..2bbc3dfe1 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,14 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f 🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥 +## 🎃 Hacktoberfest + +It’s Hacktoberfest 2022 @Meilisearch + +[Hacktoberfest](https://hacktoberfest.com/) is a celebration of the open-source community. This year, and for the third time in a row, Meilisearch is participating in this fantastic event. + +You’d like to contribute? Don’t hesitate to check out our [contributing guidelines](./CONTRIBUTING.md). + ## ✨ Features - **Search-as-you-type:** find search results in less than 50 milliseconds diff --git a/config.toml b/config.toml new file mode 100644 index 000000000..8da71c70a --- /dev/null +++ b/config.toml @@ -0,0 +1,129 @@ +# This file shows the default configuration of Meilisearch. +# All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables + +db_path = "./data.ms" +# The destination where the database must be created. + +env = "development" # Possible values: [development, production] +# This environment variable must be set to `production` if you are running in production. +# More logs wiil be displayed if the server is running in development mode. Setting the master +# key is optional; hence no security on the updates routes. This +# is useful to debug when integrating the engine with another service. + +http_addr = "127.0.0.1:7700" +# The address on which the HTTP server will listen. + +# master_key = "MASTER_KEY" +# Sets the instance's master key, automatically protecting all routes except GET /health. + +# no_analytics = false +# Do not send analytics to Meilisearch. + +disable_auto_batching = false +# The engine will disable task auto-batching, and will sequencialy compute each task one by one. + + +### DUMP + +dumps_dir = "dumps/" +# Folder where dumps are created when the dump route is called. + +# import_dump = "./path/to/my/file.dump" +# Import a dump from the specified path, must be a `.dump` file. + +ignore_missing_dump = false +# If the dump doesn't exist, load or create the database specified by `db_path` instead. + +ignore_dump_if_db_exists = false +# Ignore the dump if a database already exists, and load that database instead. + +### + + +log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] +# Set the log level. + + +### INDEX + +max_index_size = "100 GiB" +# The maximum size, in bytes, of the main LMDB database directory. + +# max_indexing_memory = "2 GiB" +# The maximum amount of memory the indexer will use. +# +# In case the engine is unable to retrieve the available memory the engine will try to use +# the memory it needs but without real limit, this can lead to Out-Of-Memory issues and it +# is recommended to specify the amount of memory to use. +# +# /!\ The default value is system dependant /!\ + +# max_indexing_threads = 4 +# The maximum number of threads the indexer will use. If the number set is higher than the +# real number of cores available in the machine, it will use the maximum number of +# available cores. +# +# It defaults to half of the available threads. + +### + + +max_task_db_size = "100 GiB" +# The maximum size, in bytes, of the update LMDB database directory. + +http_payload_size_limit = "100 MB" +# The maximum size, in bytes, of accepted JSON payloads. + + +### SNAPSHOT + +schedule_snapshot = false +# Activate snapshot scheduling. + +snapshot_dir = "snapshots/" +# Defines the directory path where Meilisearch will create a snapshot each snapshot_interval_sec. + +snapshot_interval_sec = 86400 +# Defines time interval, in seconds, between each snapshot creation. + +# import_snapshot = "./path/to/my/snapshot" +# Defines the path of the snapshot file to import. This option will, by default, stop the +# process if a database already exists, or if no snapshot exists at the given path. If this +# option is not specified, no snapshot is imported. + +ignore_missing_snapshot = false +# The engine will ignore a missing snapshot and not return an error in such a case. + +ignore_snapshot_if_db_exists = false +# The engine will skip snapshot importation and not return an error in such a case. + +### + + +### SSL + +# ssl_auth_path = "./path/to/root" +# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE. + +# ssl_cert_path = "./path/to/CERTFILE" +# Read server certificates from CERTFILE. This should contain PEM-format certificates in +# the right order (the first certificate should certify KEYFILE, the last should be a root +# CA). + +# ssl_key_path = "./path/to/private-key" +# Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded +# private key, in PEM format. + +# ssl_ocsp_path = "./path/to/OCSPFILE" +# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional. + +ssl_require_auth = false +# Send a fatal alert if the client does not complete client authentication. + +ssl_resumption = false +# SSL support session resumption. + +ssl_tickets = false +# SSL support tickets. + +### diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 470d5b8d1..3bbc09c4a 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" enum-iterator = "0.7.0" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index d69e7c21d..b26934f2b 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -meilisearch-lib = { path = "../meilisearch-lib" } +meilisearch-lib = { path = "../meilisearch-lib", default-features = false } mimalloc = { version = "0.1.29", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" @@ -76,6 +76,7 @@ thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } tokio = { version = "1.17.0", features = ["full"] } tokio-stream = "0.1.8" +toml = "0.5.9" uuid = { version = "1.1.2", features = ["serde", "v4"] } walkdir = "2.3.2" prometheus = { version = "0.13.0", features = ["process"], optional = true } @@ -90,7 +91,7 @@ urlencoding = "2.1.0" yaup = "0.2.0" [features] -default = ["analytics", "mini-dashboard"] +default = ["analytics", "meilisearch-lib/default", "mini-dashboard"] metrics = ["prometheus"] analytics = ["segment"] mini-dashboard = [ @@ -104,6 +105,10 @@ mini-dashboard = [ "tempfile", "zip", ] +chinese = ["meilisearch-lib/chinese"] +hebrew = ["meilisearch-lib/hebrew"] +japanese = ["meilisearch-lib/japanese"] +thai = ["meilisearch-lib/thai"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.2/build.zip" diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index f0dfd0fab..7b76cdd80 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -349,16 +349,16 @@ pub struct SearchAggregator { // sort sort_with_geo_point: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains sort_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, // filter filter_with_geo_radius: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains filter_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one filter_total_number_of_criteria: usize, used_syntax: HashMap, @@ -366,7 +366,7 @@ pub struct SearchAggregator { // The maximum number of terms in a q request max_terms_number: usize, - // everytime a search is done, we increment the counter linked to the used settings + // every time a search is done, we increment the counter linked to the used settings matching_strategy: HashMap, // pagination diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 9627aeef8..b6f92ae28 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,9 +1,9 @@ use std::env; +use std::path::PathBuf; use std::sync::Arc; use actix_web::http::KeepAlive; use actix_web::HttpServer; -use clap::Parser; use meilisearch_auth::AuthController; use meilisearch_http::analytics; use meilisearch_http::analytics::Analytics; @@ -29,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[actix_web::main] async fn main() -> anyhow::Result<()> { - let opt = Opt::parse(); + let (opt, config_read_from) = Opt::try_build()?; setup(&opt)?; @@ -58,7 +58,7 @@ async fn main() -> anyhow::Result<()> { #[cfg(any(debug_assertions, not(feature = "analytics")))] let (analytics, user) = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user); + print_launch_resume(&opt, &user, config_read_from); run_http(meilisearch, auth_controller, opt, analytics).await?; @@ -97,7 +97,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str) { +pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { @@ -118,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) { eprintln!("{}", ascii_name); + eprintln!( + "Config file path:\t{:?}", + config_read_from + .map(|config_file_path| config_file_path.display().to_string()) + .unwrap_or_else(|| "none".to_string()) + ); eprintln!("Database path:\t\t{:?}", opt.db_path); eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr); eprintln!("Environment:\t\t{:?}", opt.env); diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 6848e693d..ae12f0cc6 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -5,7 +5,10 @@ use std::sync::Arc; use byte_unit::Byte; use clap::Parser; -use meilisearch_lib::options::{IndexerOpts, SchedulerConfig}; +use meilisearch_lib::{ + export_to_env_if_not_present, + options::{IndexerOpts, SchedulerConfig}, +}; use rustls::{ server::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, @@ -14,141 +17,208 @@ use rustls::{ RootCertStore, }; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; -#[derive(Debug, Clone, Parser, Serialize)] +const MEILI_DB_PATH: &str = "MEILI_DB_PATH"; +const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; +const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; +const MEILI_ENV: &str = "MEILI_ENV"; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; +const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE"; +const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE"; +const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; +const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; +const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH"; +const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH"; +const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH"; +const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH"; +const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION"; +const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS"; +const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT"; +const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT"; +const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS"; +const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR"; +const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT"; +const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC"; +const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP"; +const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; +const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; +const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR"; +const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +#[cfg(feature = "metrics")] +const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE"; + +const DEFAULT_DB_PATH: &str = "./data.ms"; +const DEFAULT_HTTP_ADDR: &str = "127.0.0.1:7700"; +const DEFAULT_ENV: &str = "development"; +const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB"; +const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB"; +const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB"; +const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/"; +const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400; +const DEFAULT_DUMPS_DIR: &str = "dumps/"; +const DEFAULT_LOG_LEVEL: &str = "INFO"; + +#[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { /// The destination where the database must be created. - #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] + #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] + #[serde(default = "default_db_path")] pub db_path: PathBuf, /// The address on which the http server will listen. - #[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")] + #[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())] + #[serde(default = "default_http_addr")] pub http_addr: String, - /// The master key allowing you to do everything on the server. - #[serde(skip)] - #[clap(long, env = "MEILI_MASTER_KEY")] + /// Sets the instance's master key, automatically protecting all routes except GET /health + #[serde(skip_serializing)] + #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, /// This environment variable must be set to `production` if you are running in production. - /// If the server is running in development mode more logs will be displayed, - /// and the master key can be avoided which implies that there is no security on the updates routes. - /// This is useful to debug when integrating the engine with another service. - #[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)] + /// More logs wiil be displayed if the server is running in development mode. Setting the master + /// key is optional; hence no security on the updates routes. This + /// is useful to debug when integrating the engine with another service + #[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)] + #[serde(default = "default_env")] pub env: String, /// Do not send analytics to Meili. #[cfg(all(not(debug_assertions), feature = "analytics"))] - #[serde(skip)] // we can't send true - #[clap(long, env = "MEILI_NO_ANALYTICS")] + #[serde(skip_serializing, default)] // we can't send true + #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, - /// The maximum size, in bytes, of the main lmdb database directory - #[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")] + /// The maximum size, in bytes, of the main LMDB database directory + #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())] + #[serde(default = "default_max_index_size")] pub max_index_size: Byte, - /// The maximum size, in bytes, of the update lmdb database directory - #[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")] + /// The maximum size, in bytes, of the update LMDB database directory + #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())] + #[serde(default = "default_max_task_db_size")] pub max_task_db_size: Byte, /// The maximum size, in bytes, of accepted JSON payloads - #[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")] + #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())] + #[serde(default = "default_http_payload_size_limit")] pub http_payload_size_limit: Byte, /// Read server certificates from CERTFILE. /// This should contain PEM-format certificates /// in the right order (the first certificate should /// certify KEYFILE, the last should be a root CA). - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))] pub ssl_cert_path: Option, - /// Read private key from KEYFILE. This should be a RSA + /// Read the private key from KEYFILE. This should be an RSA /// private key or PKCS8-encoded private key, in PEM format. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))] pub ssl_key_path: Option, /// Enable client authentication, and accept certificates /// signed by those roots provided in CERTFILE. - #[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))] - #[serde(skip)] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))] pub ssl_auth_path: Option, /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate. /// Optional - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_OCSP_PATH, parse(from_os_str))] pub ssl_ocsp_path: Option, /// Send a fatal alert if the client does not complete client authentication. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")] + #[serde(skip_serializing, default)] + #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, /// SSL support session resumption - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_RESUMPTION")] + #[serde(skip_serializing, default)] + #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, /// SSL support tickets. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_TICKETS")] + #[serde(skip_serializing, default)] + #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, /// Defines the path of the snapshot file to import. - /// This option will, by default, stop the process if a database already exist or if no snapshot exists at - /// the given path. If this option is not specified no snapshot is imported. - #[clap(long)] + /// This option will, by default, stop the process if a database already exists, or if no snapshot exists at + /// the given path. If this option is not specified, no snapshot is imported. + #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, - /// The engine will ignore a missing snapshot and not return an error in such case. - #[clap(long, requires = "import-snapshot")] + /// The engine will ignore a missing snapshot and not return an error in such a case. + #[clap( + long, + env = MEILI_IGNORE_MISSING_SNAPSHOT, + requires = "import-snapshot" + )] + #[serde(default)] pub ignore_missing_snapshot: bool, /// The engine will skip snapshot importation and not return an error in such case. - #[clap(long, requires = "import-snapshot")] + #[clap( + long, + env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS, + requires = "import-snapshot" + )] + #[serde(default)] pub ignore_snapshot_if_db_exists: bool, - /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. - #[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")] + /// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. + #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] + #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, /// Activate snapshot scheduling. - #[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")] + #[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)] + #[serde(default)] pub schedule_snapshot: bool, /// Defines time interval, in seconds, between each snapshot creation. - #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h + #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())] + #[serde(default = "default_snapshot_interval_sec")] + // 24h pub snapshot_interval_sec: u64, /// Import a dump from the specified path, must be a `.dump` file. - #[clap(long, conflicts_with = "import-snapshot")] + #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")] pub import_dump: Option, - /// If the dump doesn't exists, load or create the database specified by `db-path` instead. - #[clap(long, requires = "import-dump")] + /// If the dump doesn't exist, load or create the database specified by `db-path` instead. + #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")] + #[serde(default)] pub ignore_missing_dump: bool, /// Ignore the dump if a database already exists, and load that database instead. - #[clap(long, requires = "import-dump")] + #[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import-dump")] + #[serde(default)] pub ignore_dump_if_db_exists: bool, /// Folder where dumps are created when the dump route is called. - #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] + #[clap(long, env = MEILI_DUMPS_DIR, default_value_os_t = default_dumps_dir())] + #[serde(default = "default_dumps_dir")] pub dumps_dir: PathBuf, - /// Set the log level - #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] + /// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] + #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())] + #[serde(default = "default_log_level")] pub log_level: String, /// Enables Prometheus metrics and /metrics route. #[cfg(feature = "metrics")] - #[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")] + #[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)] + #[serde(default)] pub enable_metrics_route: bool, #[serde(flatten)] @@ -158,15 +228,139 @@ pub struct Opt { #[serde(flatten)] #[clap(flatten)] pub scheduler_options: SchedulerConfig, + + /// The path to a configuration file that should be used to setup the engine. + /// Format must be TOML. + #[serde(skip_serializing)] + #[clap(long)] + pub config_file_path: Option, } impl Opt { - /// Wether analytics should be enabled or not. + /// Whether analytics should be enabled or not. #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics } + /// Build a new Opt from config file, env vars and cli args. + pub fn try_build() -> anyhow::Result<(Self, Option)> { + // Parse the args to get the config_file_path. + let mut opts = Opt::parse(); + let mut config_read_from = None; + if let Some(config_file_path) = opts + .config_file_path + .clone() + .or_else(|| Some(PathBuf::from("./config.toml"))) + { + match std::fs::read(&config_file_path) { + Ok(config) => { + // If the file is successfully read, we deserialize it with `toml`. + let opt_from_config = toml::from_slice::(&config)?; + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. + opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. + opts = Opt::parse(); + config_read_from = Some(config_file_path); + } + // If we have an error while reading the file defined by the user. + Err(_) if opts.config_file_path.is_some() => anyhow::bail!( + "unable to open or read the {:?} configuration file.", + opts.config_file_path.unwrap().display().to_string() + ), + _ => (), + } + } + + Ok((opts, config_read_from)) + } + + /// Exports the opts values to their corresponding env vars if they are not set. + fn export_to_env(self) { + let Opt { + db_path, + http_addr, + master_key, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + import_snapshot: _, + ignore_missing_snapshot: _, + ignore_snapshot_if_db_exists: _, + import_dump: _, + ignore_missing_dump: _, + ignore_dump_if_db_exists: _, + config_file_path: _, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics, + #[cfg(feature = "metrics")] + enable_metrics_route, + } = self; + export_to_env_if_not_present(MEILI_DB_PATH, db_path); + export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); + if let Some(master_key) = master_key { + export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); + } + export_to_env_if_not_present(MEILI_ENV, env); + #[cfg(all(not(debug_assertions), feature = "analytics"))] + { + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); + } + export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string()); + export_to_env_if_not_present( + MEILI_HTTP_PAYLOAD_SIZE_LIMIT, + http_payload_size_limit.to_string(), + ); + if let Some(ssl_cert_path) = ssl_cert_path { + export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); + } + if let Some(ssl_key_path) = ssl_key_path { + export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); + } + if let Some(ssl_auth_path) = ssl_auth_path { + export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); + } + if let Some(ssl_ocsp_path) = ssl_ocsp_path { + export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); + } + export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string()); + export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string()); + export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string()); + export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir); + export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string()); + export_to_env_if_not_present( + MEILI_SNAPSHOT_INTERVAL_SEC, + snapshot_interval_sec.to_string(), + ); + export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir); + export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level); + #[cfg(feature = "metrics")] + { + export_to_env_if_not_present( + MEILI_ENABLE_METRICS_ROUTE, + enable_metrics_route.to_string(), + ); + } + indexer_options.export_to_env(); + scheduler_options.export_to_env(); + } + pub fn get_ssl_config(&self) -> anyhow::Result> { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { let config = rustls::ServerConfig::builder().with_safe_defaults(); @@ -265,6 +459,48 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } +/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute. + +fn default_db_path() -> PathBuf { + PathBuf::from(DEFAULT_DB_PATH) +} + +fn default_http_addr() -> String { + DEFAULT_HTTP_ADDR.to_string() +} + +fn default_env() -> String { + DEFAULT_ENV.to_string() +} + +fn default_max_index_size() -> Byte { + Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap() +} + +fn default_max_task_db_size() -> Byte { + Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap() +} + +fn default_http_payload_size_limit() -> Byte { + Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap() +} + +fn default_snapshot_dir() -> PathBuf { + PathBuf::from(DEFAULT_SNAPSHOT_DIR) +} + +fn default_snapshot_interval_sec() -> u64 { + DEFAULT_SNAPSHOT_INTERVAL_SEC +} + +fn default_dumps_dir() -> PathBuf { + PathBuf::from(DEFAULT_DUMPS_DIR) +} + +fn default_log_level() -> String { + DEFAULT_LOG_LEVEL.to_string() +} + #[cfg(test)] mod test { use super::*; diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index fe23720aa..786d318f8 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -147,7 +147,7 @@ enum TaskDetails { IndexInfo { primary_key: Option }, #[serde(rename_all = "camelCase")] DocumentDeletion { - received_document_ids: usize, + matched_documents: usize, deleted_documents: Option, }, #[serde(rename_all = "camelCase")] @@ -255,7 +255,7 @@ impl From for TaskView { } => ( TaskType::DocumentDeletion, Some(TaskDetails::DocumentDeletion { - received_document_ids: ids.len(), + matched_documents: ids.len(), deleted_documents: None, }), ), diff --git a/meilisearch-http/tests/dashboard/mod.rs b/meilisearch-http/tests/dashboard/mod.rs index d097cfd4b..2699cd16f 100644 --- a/meilisearch-http/tests/dashboard/mod.rs +++ b/meilisearch-http/tests/dashboard/mod.rs @@ -1,5 +1,6 @@ use crate::common::Server; +#[cfg(feature = "mini-dashboard")] #[actix_rt::test] async fn dashboard_assets_load() { let server = Server::new().await; diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 685428784..8e6ba44a9 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -372,7 +372,7 @@ async fn error_add_malformed_json_documents() { assert_eq!( response["message"], json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`."# + r#"The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -395,7 +395,7 @@ async fn error_add_malformed_json_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!("The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`.") + json!("The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index de967286c..bda3ecbc7 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -28,7 +28,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" @@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554 paste = "1.0.6" proptest = "1.0.0" proptest-derive = "0.3.0" + +[features] +# all specialized tokenizations +default = ["milli/default"] + +# chinese specialized tokenization +chinese = ["milli/chinese"] + +# hebrew specialized tokenization +hebrew = ["milli/hebrew"] + +# japanese specialized tokenization +japanese = ["milli/japanese"] + +# thai specialized tokenization +thai = ["milli/thai"] diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index ebc98f3fb..cfc200019 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -8,6 +8,7 @@ use meilisearch_types::internal_error; use milli::documents::{DocumentsBatchBuilder, Error}; use milli::Object; use serde::Deserialize; +use serde_json::error::Category; type Result = std::result::Result; @@ -40,18 +41,32 @@ impl Display for DocumentFormatError { Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), Self::MalformedPayload(me, b) => match me.borrow() { Error::Json(se) => { + let mut message = match se.classify() { + Category::Data => { + "data are neither an object nor a list of objects".to_string() + } + _ => se.to_string(), + }; + // https://github.com/meilisearch/meilisearch/issues/2107 // The user input maybe insanely long. We need to truncate it. - let mut serde_msg = se.to_string(); let ellipsis = "..."; - if serde_msg.len() > 100 + ellipsis.len() { - serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis); + let trim_input_prefix_len = 50; + let trim_input_suffix_len = 85; + + if message.len() + > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() + { + message.replace_range( + trim_input_prefix_len..message.len() - trim_input_suffix_len, + ellipsis, + ); } write!( f, "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", - b, serde_msg + b, message ) } _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), diff --git a/meilisearch-lib/src/dump/compat/v2.rs b/meilisearch-lib/src/dump/compat/v2.rs index 364d894c4..ba3b8e3a6 100644 --- a/meilisearch-lib/src/dump/compat/v2.rs +++ b/meilisearch-lib/src/dump/compat/v2.rs @@ -145,7 +145,7 @@ pub fn error_code_from_str(s: &str) -> anyhow::Result { "unsupported_media_type" => Code::UnsupportedMediaType, "dump_already_in_progress" => Code::DumpAlreadyInProgress, "dump_process_failed" => Code::DumpProcessFailed, - _ => bail!("unknow error code."), + _ => bail!("unknown error code."), }; Ok(code) diff --git a/meilisearch-lib/src/dump/loaders/v1.rs b/meilisearch-lib/src/dump/loaders/v1.rs deleted file mode 100644 index a07475b56..000000000 --- a/meilisearch-lib/src/dump/loaders/v1.rs +++ /dev/null @@ -1,24 +0,0 @@ -use std::path::Path; - -use serde::{Deserialize, Serialize}; - -use crate::index_controller::IndexMetadata; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct MetadataV1 { - pub db_version: String, - indexes: Vec, -} - -impl MetadataV1 { - #[allow(dead_code, unreachable_code, unused_variables)] - pub fn load_dump( - self, - src: impl AsRef, - dst: impl AsRef, - size: usize, - indexer_options: &IndexerOpts, - ) -> anyhow::Result<()> { - anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") -} diff --git a/meilisearch-lib/src/dump/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs index 0744df7ea..44ec23517 100644 --- a/meilisearch-lib/src/dump/loaders/v4.rs +++ b/meilisearch-lib/src/dump/loaders/v4.rs @@ -57,10 +57,10 @@ fn patch_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result let updates_path = src.as_ref().join("updates/data.jsonl"); let output_updates_path = dst.as_ref().join("updates/data.jsonl"); create_dir_all(output_updates_path.parent().unwrap())?; - let udpates_file = File::open(updates_path)?; + let updates_file = File::open(updates_path)?; let mut output_update_file = File::create(output_updates_path)?; - serde_json::Deserializer::from_reader(udpates_file) + serde_json::Deserializer::from_reader(updates_file) .into_iter::() .try_for_each(|task| -> anyhow::Result<()> { let task: Task = task?.into(); diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 6a41fa7a0..9cc3c033f 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -105,6 +105,7 @@ impl Index { let mut options = EnvOpenOptions::new(); options.map_size(size); + options.max_readers(1024); let index = milli::Index::new(options, &dst_dir_path)?; let mut txn = index.write_txn()?; diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 02425d0bf..3d6c47949 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -94,6 +94,7 @@ impl Index { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); + options.max_readers(1024); let inner = Arc::new(milli::Index::new(options, &path)?); Ok(Index { inner, diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index 57171d529..1a9aa1d0d 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -27,7 +27,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); -/// The maximimum number of results that the engine +/// The maximum number of results that the engine /// will be able to return in one search call. pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; diff --git a/meilisearch-lib/src/index_resolver/index_store.rs b/meilisearch-lib/src/index_resolver/index_store.rs index e4f58f130..ea3c7125a 100644 --- a/meilisearch-lib/src/index_resolver/index_store.rs +++ b/meilisearch-lib/src/index_resolver/index_store.rs @@ -51,7 +51,7 @@ impl MapIndexStore { #[async_trait::async_trait] impl IndexStore for MapIndexStore { async fn create(&self, uuid: Uuid) -> Result { - // We need to keep the lock until we are sure the db file has been opened correclty, to + // We need to keep the lock until we are sure the db file has been opened correctly, to // ensure that another db is not created at the same time. let mut lock = self.index_store.write().await; diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 70fd2ba51..7fe0984dc 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -11,6 +11,8 @@ mod snapshot; pub mod tasks; mod update_file_store; +use std::env::VarError; +use std::ffi::OsStr; use std::path::Path; pub use index_controller::MeiliSearch; @@ -35,3 +37,14 @@ pub fn is_empty_db(db_path: impl AsRef) -> bool { true } } + +/// Checks if the key is defined in the environment variables. +/// If not, inserts it with the given value. +pub fn export_to_env_if_not_present(key: &str, value: T) +where + T: AsRef, +{ + if let Err(VarError::NotPresent) = std::env::var(key) { + std::env::set_var(key, value); + } +} diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index ea810b9b7..bd406fbdd 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -1,33 +1,40 @@ +use crate::export_to_env_if_not_present; + use core::fmt; use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; use byte_unit::{Byte, ByteError}; use clap::Parser; use milli::update::IndexerConfig; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use sysinfo::{RefreshKind, System, SystemExt}; -#[derive(Debug, Clone, Parser, Serialize)] +const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; +const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; +const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; +const DEFAULT_LOG_EVERY_N: usize = 100000; + +#[derive(Debug, Clone, Parser, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. - #[serde(skip)] - #[clap(long, default_value = "100000", hide = true)] // 100k + #[serde(skip_serializing, default = "default_log_every_n")] + #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. - #[serde(skip)] + #[serde(skip_serializing)] #[clap(long, hide = true)] pub max_nb_chunks: Option, - /// The maximum amount of memory the indexer will use. It defaults to 2/3 - /// of the available memory. It is recommended to use something like 80%-90% - /// of the available memory, no more. + /// The maximum amount of memory the indexer will use. /// /// In case the engine is unable to retrieve the available memory the engine will /// try to use the memory it needs but without real limit, this can lead to /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)] + #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] + #[serde(default)] pub max_indexing_memory: MaxMemory, /// The maximum number of threads the indexer will use. @@ -35,18 +42,43 @@ pub struct IndexerOpts { /// it will use the maximum number of available cores. /// /// It defaults to half of the available threads. - #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] + #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] + #[serde(default)] pub max_indexing_threads: MaxThreads, } -#[derive(Debug, Clone, Parser, Default, Serialize)] +#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct SchedulerConfig { /// The engine will disable task auto-batching, /// and will sequencialy compute each task one by one. - #[clap(long, env = "DISABLE_AUTO_BATCHING")] + #[clap(long, env = DISABLE_AUTO_BATCHING)] + #[serde(default)] pub disable_auto_batching: bool, } +impl IndexerOpts { + /// Exports the values to their corresponding env vars if they are not set. + pub fn export_to_env(self) { + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + log_every_n: _, + max_nb_chunks: _, + } = self; + if let Some(max_indexing_memory) = max_indexing_memory.0 { + export_to_env_if_not_present( + MEILI_MAX_INDEXING_MEMORY, + max_indexing_memory.to_string(), + ); + } + export_to_env_if_not_present( + MEILI_MAX_INDEXING_THREADS, + max_indexing_threads.0.to_string(), + ); + } +} + impl TryFrom<&IndexerOpts> for IndexerConfig { type Error = anyhow::Error; @@ -77,8 +109,17 @@ impl Default for IndexerOpts { } } +impl SchedulerConfig { + pub fn export_to_env(self) { + let SchedulerConfig { + disable_auto_batching, + } = self; + export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); + } +} + /// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy, Serialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct MaxMemory(Option); impl FromStr for MaxMemory { @@ -134,7 +175,7 @@ fn total_memory_bytes() -> Option { } } -#[derive(Debug, Clone, Copy, Serialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct MaxThreads(usize); impl FromStr for MaxThreads { @@ -164,3 +205,7 @@ impl Deref for MaxThreads { &self.0 } } + +fn default_log_every_n() -> usize { + DEFAULT_LOG_EVERY_N +} diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index da4907939..4566a627e 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -181,6 +181,7 @@ impl SnapshotJob { let mut options = milli::heed::EnvOpenOptions::new(); options.map_size(self.index_size); + options.max_readers(1024); let index = milli::Index::new(options, entry.path())?; index.copy_to_path(dst, CompactionOption::Enabled)?; } diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index 621d66dd3..55dfe17d3 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -117,7 +117,7 @@ impl TaskStore { match filter { Some(filter) => filter .pass(&task) - .then(|| task) + .then_some(task) .ok_or(TaskError::UnexistingTask(id)), None => Ok(task), } diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 24d0d3a65..32b20aeb8 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -63,7 +63,7 @@ impl Store { /// Returns the id for the next task. /// /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit - /// the task to the store in the same transaction, no one else will hav this task id. + /// the task to the store in the same transaction, no one else will have this task id. pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { let id = self .tasks