mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-26 22:50:07 +01:00
Merge #3346
3346: Import milli 🎉 r=Kerollmops a=Kerollmops
Fixes https://github.com/meilisearch/meilisearch/issues/2901
Main work
- integrate the milli repository as an internal crate into this repo
- Update the Cargo.toml accordingly
- Ensure meilisearch-type now uses the internal milli crate and not the remote repository
- Update the milli's version to follow the meilisearch one
Also
- Removed the beta tests in test CI (will be re-integrated later if needed)
- Move and modify milli's README into the `milli` folder
- remove the script folder from `milli`
- Removed useless CI (release-drafter and enforce-label)
⚠️ Also, import all the `release-v1.0.0` until [a5c4fb](a5c4fbbcea
) included (merged of the PR https://github.com/meilisearch/meilisearch/pull/3334)
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
Co-authored-by: unvalley <kirohi.code@gmail.com>
Co-authored-by: Samyak Sarnayak <samyak201@gmail.com>
This commit is contained in:
commit
6b3da8a6de
1
.github/dependabot.yml
vendored
1
.github/dependabot.yml
vendored
@ -2,7 +2,6 @@
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
|
3
.github/workflows/latest-git-tag.yml
vendored
3
.github/workflows/latest-git-tag.yml
vendored
@ -3,7 +3,7 @@ name: Update latest git tag
|
||||
on:
|
||||
workflow_dispatch:
|
||||
release:
|
||||
types: [published]
|
||||
types: [released]
|
||||
|
||||
jobs:
|
||||
check-version:
|
||||
@ -17,6 +17,7 @@ jobs:
|
||||
|
||||
update-latest-tag:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: rickstaa/action-create-tag@v1
|
||||
|
77
.github/workflows/manual_benchmarks.yml
vendored
Normal file
77
.github/workflows/manual_benchmarks.yml
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
name: Benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dataset_name:
|
||||
description: 'The name of the dataset used to benchmark (search_songs, search_wiki, search_geo or indexing)'
|
||||
required: false
|
||||
default: 'search_songs'
|
||||
|
||||
env:
|
||||
BENCH_NAME: ${{ github.event.inputs.dataset_name }}
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
2
.github/workflows/publish-deb-brew-pkg.yml
vendored
2
.github/workflows/publish-deb-brew-pkg.yml
vendored
@ -2,7 +2,7 @@ name: Publish to APT repository & Homebrew
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
types: [released]
|
||||
|
||||
jobs:
|
||||
check-version:
|
||||
|
79
.github/workflows/push_benchmarks_indexing.yml
vendored
Normal file
79
.github/workflows/push_benchmarks_indexing.yml
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
name: Benchmarks indexing (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
|
||||
BENCH_NAME: "indexing"
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Upload benchmarks to influxdb
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
78
.github/workflows/push_benchmarks_search_geo.yml
vendored
Normal file
78
.github/workflows/push_benchmarks_search_geo.yml
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
name: Benchmarks search geo (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
BENCH_NAME: "search_geo"
|
||||
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Upload benchmarks to influxdb
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
78
.github/workflows/push_benchmarks_search_songs.yml
vendored
Normal file
78
.github/workflows/push_benchmarks_search_songs.yml
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
name: Benchmarks search songs (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
BENCH_NAME: "search_songs"
|
||||
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Upload benchmarks to influxdb
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
78
.github/workflows/push_benchmarks_search_wiki.yml
vendored
Normal file
78
.github/workflows/push_benchmarks_search_wiki.yml
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
name: Benchmarks search wikipedia articles (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
BENCH_NAME: "search_wiki"
|
||||
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
uses: taiki-e/install-action@v2
|
||||
with:
|
||||
tool: critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Upload benchmarks to influxdb
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
|
||||
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
|
7
.github/workflows/rust.yml
vendored
7
.github/workflows/rust.yml
vendored
@ -124,4 +124,9 @@ jobs:
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo fmt
|
||||
run: cargo fmt --all -- --check
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
# we are going to create an empty file where rustfmt expects it.
|
||||
run: |
|
||||
echo -ne "\n" > benchmarks/benches/datasets_paths.rs
|
||||
cargo fmt --all -- --check
|
||||
|
@ -13,7 +13,6 @@ env:
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
|
||||
update-version-cargo-toml:
|
||||
name: Update version in Cargo.toml files
|
||||
runs-on: ubuntu-latest
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -8,9 +8,11 @@
|
||||
/snapshots
|
||||
/dumps
|
||||
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
*.full.snap
|
||||
## ... unreviewed
|
||||
*.snap.new
|
||||
|
||||
# Fuzzcheck data for the facet indexing fuzz test
|
||||
milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||
|
370
Cargo.lock
generated
370
Cargo.lock
generated
@ -46,7 +46,7 @@ dependencies = [
|
||||
"actix-tls",
|
||||
"actix-utils",
|
||||
"ahash",
|
||||
"base64",
|
||||
"base64 0.13.1",
|
||||
"bitflags",
|
||||
"brotli",
|
||||
"bytes",
|
||||
@ -289,6 +289,12 @@ dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anes"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.68"
|
||||
@ -331,9 +337,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.60"
|
||||
version = "0.1.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d1d8ab452a3936018a687b20e6f7cf5363d713b732b8884001317b0e48aa3"
|
||||
checksum = "705339e0e4a9690e2908d2b3d049d85682cf19fbd5782494498fbf7003a6a282"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.49",
|
||||
"quote 1.0.23",
|
||||
@ -377,7 +383,7 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"object 0.30.2",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
@ -387,12 +393,37 @@ version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
|
||||
|
||||
[[package]]
|
||||
name = "base64ct"
|
||||
version = "1.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"convert_case 0.6.0",
|
||||
"criterion",
|
||||
"csv",
|
||||
"flate2",
|
||||
"milli",
|
||||
"mimalloc",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"reqwest",
|
||||
"roaring",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "big_s"
|
||||
version = "1.0.2"
|
||||
@ -592,6 +623,12 @@ dependencies = [
|
||||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.78"
|
||||
@ -644,9 +681,37 @@ dependencies = [
|
||||
"slice-group-by",
|
||||
"unicode-normalization",
|
||||
"unicode-segmentation",
|
||||
"wana_kana",
|
||||
"whatlang",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"ciborium-ll",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-io"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-ll"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.3.0"
|
||||
@ -832,6 +897,42 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
|
||||
dependencies = [
|
||||
"anes",
|
||||
"atty",
|
||||
"cast",
|
||||
"ciborium",
|
||||
"clap 3.2.23",
|
||||
"criterion-plot",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
"oorandom",
|
||||
"plotters",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"tinytemplate",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "critical-section"
|
||||
version = "1.1.1"
|
||||
@ -1018,9 +1119,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deserr"
|
||||
version = "0.1.2"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb73133def0ebeb6f2e911a8ea3495cde53b00a5337dddc49bdb4b6c450ac8c7"
|
||||
checksum = "86290491a2b5c21a1a5083da8dae831006761258fabd5617309c3eebc5f89468"
|
||||
dependencies = [
|
||||
"deserr-internal",
|
||||
"serde-cs",
|
||||
@ -1029,9 +1130,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deserr-internal"
|
||||
version = "0.1.2"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f290f1f52fbf7d0afe91e6f71f3c831ae5b223a55cc396e819c5748ba73a7bfa"
|
||||
checksum = "7131de1c27581bc376a22166c9f570be91b76cb096be2f6aecf224c27bf7c49a"
|
||||
dependencies = [
|
||||
"convert_case 0.5.0",
|
||||
"proc-macro2 1.0.49",
|
||||
@ -1309,9 +1410,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "0.38.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.38.0#c3f4835e8e102586bd6d5eb1e55c4bba5e92f994"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
"nom_locate",
|
||||
]
|
||||
@ -1323,14 +1424,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"libz-sys",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "0.38.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.38.0#c3f4835e8e102586bd6d5eb1e55c4bba5e92f994"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
@ -1444,6 +1546,50 @@ dependencies = [
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzzcheck"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee76e8096c3fcd82ab23177edddcc9b81b72c123caab54bb1e2dc19fd09d2dec"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bit-vec",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"flate2",
|
||||
"fuzzcheck_common",
|
||||
"fuzzcheck_mutators_derive",
|
||||
"getopts",
|
||||
"libc",
|
||||
"md5",
|
||||
"nu-ansi-term",
|
||||
"object 0.27.1",
|
||||
"regex-syntax",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzzcheck_common"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dde06f8d25b14a35d43eb2d3dbace3b9193424460b10ad4ccf1b3d542d48f06f"
|
||||
dependencies = [
|
||||
"getopts",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzzcheck_mutators_derive"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30ce01e8bbb3e7e0758dcf907fe799f5998a54368963f766ae94b84624ba60c8"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.49",
|
||||
"quote 1.0.23",
|
||||
"syn 1.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
@ -1469,6 +1615,15 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad"
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.8"
|
||||
@ -1513,9 +1668,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "grenad"
|
||||
@ -1547,6 +1702,12 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "1.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
||||
|
||||
[[package]]
|
||||
name = "hash32"
|
||||
version = "0.2.1"
|
||||
@ -1786,9 +1947,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "insta"
|
||||
version = "1.24.1"
|
||||
version = "1.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb5686bd8e9239eabe90bb30a0c341bffd6fdc177fb556708f2cb792bf00352d"
|
||||
checksum = "f6f0f08b46e4379744de2ab67aa8f7de3ffd1da3e275adc41fcc82053ede46ff"
|
||||
dependencies = [
|
||||
"console",
|
||||
"lazy_static",
|
||||
@ -1821,9 +1982,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.7.0"
|
||||
version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11b0d96e660696543b251e58030cf9787df56da39dab19ad60eae7353040917e"
|
||||
checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
@ -1893,9 +2054,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "0.38.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.38.0#c3f4835e8e102586bd6d5eb1e55c4bba5e92f994"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
@ -1905,7 +2066,7 @@ version = "8.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09f4f04699947111ec1733e71778d763555737579e44b85844cae8e1940a1828"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"base64 0.13.1",
|
||||
"pem",
|
||||
"ring",
|
||||
"serde",
|
||||
@ -1942,9 +2103,9 @@ checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
version = "0.14.0+1.5.0"
|
||||
version = "0.14.1+1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47a00859c70c8a4f7218e6d1cc32875c4b55f6799445b842b0d8ed5e4c3d959b"
|
||||
checksum = "4a07fb2692bc3593bda59de45a502bb3071659f2c515e28c71e728306b038e17"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@ -1960,9 +2121,9 @@ checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
|
||||
|
||||
[[package]]
|
||||
name = "libmimalloc-sys"
|
||||
version = "0.1.28"
|
||||
version = "0.1.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
|
||||
checksum = "dd8c7cbf8b89019683667e347572e6d55a7df7ea36b0c4ce69961b0cde67b174"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@ -2371,7 +2532,7 @@ dependencies = [
|
||||
name = "meilisearch-auth"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"base64 0.13.1",
|
||||
"enum-iterator",
|
||||
"hmac",
|
||||
"meilisearch-types",
|
||||
@ -2442,9 +2603,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "0.38.0"
|
||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.38.0#c3f4835e8e102586bd6d5eb1e55c4bba5e92f994"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
"bincode",
|
||||
"bstr 1.1.0",
|
||||
@ -2453,23 +2614,29 @@ dependencies = [
|
||||
"concat-arrays",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"deserr",
|
||||
"either",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
"fst",
|
||||
"fuzzcheck",
|
||||
"fxhash",
|
||||
"geoutils",
|
||||
"grenad",
|
||||
"heed",
|
||||
"insta",
|
||||
"itertools",
|
||||
"json-depth-checker",
|
||||
"levenshtein_automata",
|
||||
"log",
|
||||
"logging_timer",
|
||||
"maplit",
|
||||
"md5",
|
||||
"memmap2",
|
||||
"obkv",
|
||||
"once_cell",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
"rayon",
|
||||
"roaring",
|
||||
"rstar",
|
||||
@ -2487,9 +2654,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mimalloc"
|
||||
version = "0.1.32"
|
||||
version = "0.1.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
|
||||
checksum = "9dcb174b18635f7561a0c6c9fc2ce57218ac7523cf72c50af80e2d79ab8f3ba1"
|
||||
dependencies = [
|
||||
"libmimalloc-sys",
|
||||
]
|
||||
@ -2572,6 +2739,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.39.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e62e2187cbceeafee9fb7b5e5e182623e0628ebf430a479df4487beb8f92fd7a"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
@ -2615,9 +2792,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.30.1"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d864c91689fdc196779b98dba0aceac6118594c2df6ee5d943eb6a8df4d107a"
|
||||
checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.30.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b8c786513eb403643f2a88c244c2aaa270ef2153f55094587d0c48a3cf22a83"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@ -2634,6 +2820,12 @@ version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
version = "11.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.3.0"
|
||||
@ -2655,6 +2847,12 @@ version = "6.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "page_size"
|
||||
version = "0.4.2"
|
||||
@ -2748,7 +2946,7 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03c64931a1a212348ec4f3b4362585eca7159d0d09cbdf4a7f74f02173596fd4"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"base64 0.13.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2767,9 +2965,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pest"
|
||||
version = "2.5.2"
|
||||
version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f6e86fb9e7026527a0d46bc308b841d73170ef8f443e1807f6ef88526a816d4"
|
||||
checksum = "4257b4a04d91f7e9e6290be5d3da4804dd5784fafde3a497d73eb2b4a158c30a"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
"ucd-trie",
|
||||
@ -2777,9 +2975,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pest_derive"
|
||||
version = "2.5.2"
|
||||
version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96504449aa860c8dcde14f9fba5c58dc6658688ca1fe363589d6327b8662c603"
|
||||
checksum = "241cda393b0cdd65e62e07e12454f1f25d57017dcc514b1514cd3c4645e3a0a6"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_generator",
|
||||
@ -2787,9 +2985,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pest_generator"
|
||||
version = "2.5.2"
|
||||
version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "798e0220d1111ae63d66cb66a5dcb3fc2d986d520b98e49e1852bfdb11d7c5e7"
|
||||
checksum = "46b53634d8c8196302953c74d5352f33d0c512a9499bd2ce468fc9f4128fa27c"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_meta",
|
||||
@ -2800,13 +2998,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pest_meta"
|
||||
version = "2.5.2"
|
||||
version = "2.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "984298b75898e30a843e278a9f2452c31e349a073a0ce6fd950a12a74464e065"
|
||||
checksum = "0ef4f1332a8d4678b41966bb4cc1d0676880e84183a1ecc3f4b69f03e99c7a51"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"pest",
|
||||
"sha1",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2880,6 +3078,34 @@ dependencies = [
|
||||
"dirs-next",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"plotters-backend",
|
||||
"plotters-svg",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotters-backend"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142"
|
||||
|
||||
[[package]]
|
||||
name = "plotters-svg"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f"
|
||||
dependencies = [
|
||||
"plotters-backend",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
@ -3108,9 +3334,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.7.0"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
|
||||
checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@ -3144,7 +3370,7 @@ version = "0.11.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
"futures-core",
|
||||
@ -3265,11 +3491,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pemfile"
|
||||
version = "1.0.1"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55"
|
||||
checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"base64 0.21.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3323,9 +3549,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "segment"
|
||||
version = "0.2.1"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24fc91c898e0487ff3e471d0849bbaf7d38a00ff5e3531009d386b0bab9b6b12"
|
||||
checksum = "2bb93f3f738322ce8f33c4e80c251fb1560ca81f3a241355271fcb912eeb48e3"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"reqwest",
|
||||
@ -3607,9 +3833,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sysinfo"
|
||||
version = "0.26.8"
|
||||
version = "0.26.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29ddf41e393a9133c81d5f0974195366bd57082deac6e0eb02ed39b8341c2bb6"
|
||||
checksum = "5c18a6156d1f27a9592ee18c1a846ca8dd5c258b7179fc193ae87c74ebb666f5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"core-foundation-sys",
|
||||
@ -3716,6 +3942,16 @@ dependencies = [
|
||||
"time-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinytemplate"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.6.0"
|
||||
@ -3733,9 +3969,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.24.0"
|
||||
version = "1.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7125661431c26622a80ca5051a2f936c9a678318e0351007b0cc313143024e5c"
|
||||
checksum = "1d9f76183f91ecfb55e1d7d5602bd1d979e38a3a522fe900241cf195624d67ae"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes",
|
||||
@ -3836,9 +4072,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "try-lock"
|
||||
version = "0.2.3"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
|
||||
checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
@ -3888,6 +4124,12 @@ version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
@ -3996,6 +4238,17 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wana_kana"
|
||||
version = "2.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5770f395a1c9d6d64bee602f0a36763d7861ef5715f9d4f707cb0086f82dba54"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "want"
|
||||
version = "0.3.0"
|
||||
@ -4310,10 +4563,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.4+zstd.1.5.2"
|
||||
version = "2.0.5+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fa202f2ef00074143e219d15b62ffc317d17cc33909feac471c044087cad7b0"
|
||||
checksum = "edc50ffce891ad571e9f9afe5039c4837bede781ac4bb13052ed7ae695518596"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
30
Cargo.toml
30
Cargo.toml
@ -9,6 +9,11 @@ members = [
|
||||
"dump",
|
||||
"file-store",
|
||||
"permissive-json-pointer",
|
||||
"milli",
|
||||
"filter-parser",
|
||||
"flatten-serde-json",
|
||||
"json-depth-checker",
|
||||
"benchmarks"
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
@ -17,5 +22,28 @@ codegen-units = 1
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
||||
[profile.dev.package.milli]
|
||||
[profile.dev.package.grenad]
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[profile.dev.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.dev.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.dev.package.yada]
|
||||
opt-level = 3
|
||||
|
||||
[profile.release.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.release.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.release.package.yada]
|
||||
opt-level = 3
|
||||
|
||||
[profile.bench.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.bench.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.bench.package.yada]
|
||||
opt-level = 3
|
||||
|
@ -101,3 +101,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
|
||||
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
|
||||
|
||||
Thank you for your support!
|
||||
|
||||
## 📦 Internal crates and their versioning
|
||||
|
||||
The crates in this repository are not currently available on crates.io and do not follow [semver conventions](https://semver.org). However, the Meilisearch search engine is well versioned, and releases follow the semver conventions.
|
||||
|
6
assets/milli-logo.svg
Normal file
6
assets/milli-logo.svg
Normal file
@ -0,0 +1,6 @@
|
||||
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
|
||||
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
|
||||
</svg>
|
After Width: | Height: | Size: 585 B |
1
benchmarks/.gitignore
vendored
Normal file
1
benchmarks/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
benches/datasets_paths.rs
|
48
benchmarks/Cargo.toml
Normal file
48
benchmarks/Cargo.toml
Normal file
@ -0,0 +1,48 @@
|
||||
[package]
|
||||
name = "benchmarks"
|
||||
version = "1.0.0"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
csv = "1.1.6"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.65"
|
||||
bytes = "1.2.1"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.24"
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/default"]
|
||||
|
||||
[[bench]]
|
||||
name = "search_songs"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "search_wiki"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "search_geo"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "indexing"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "formatting"
|
||||
harness = false
|
138
benchmarks/README.md
Normal file
138
benchmarks/README.md
Normal file
@ -0,0 +1,138 @@
|
||||
Benchmarks
|
||||
==========
|
||||
|
||||
## TOC
|
||||
|
||||
- [Run the benchmarks](#run-the-benchmarks)
|
||||
- [Comparison between benchmarks](#comparison-between-benchmarks)
|
||||
- [Datasets](#datasets)
|
||||
|
||||
## Run the benchmarks
|
||||
|
||||
### On our private server
|
||||
|
||||
The Meili team has self-hosted his own GitHub runner to run benchmarks on our dedicated bare metal server.
|
||||
|
||||
To trigger the benchmark workflow:
|
||||
- Go to the `Actions` tab of this repository.
|
||||
- Select the `Benchmarks` workflow on the left.
|
||||
- Click on `Run workflow` in the blue banner.
|
||||
- Select the branch on which you want to run the benchmarks and select the dataset you want (default: `songs`).
|
||||
- Finally, click on `Run workflow`.
|
||||
|
||||
This GitHub workflow will run the benchmarks and push the `critcmp` report to a DigitalOcean Space (= S3).
|
||||
|
||||
The name of the uploaded file is displayed in the workflow.
|
||||
|
||||
_[More about critcmp](https://github.com/BurntSushi/critcmp)._
|
||||
|
||||
💡 To compare the just-uploaded benchmark with another one, check out the [next section](#comparison-between-benchmarks).
|
||||
|
||||
### On your machine
|
||||
|
||||
To run all the benchmarks (~5h):
|
||||
|
||||
```bash
|
||||
cargo bench
|
||||
```
|
||||
|
||||
To run only the `search_songs` (~1h), `search_wiki` (~3h), `search_geo` (~20m) or `indexing` (~2h) benchmark:
|
||||
|
||||
```bash
|
||||
cargo bench --bench <dataset name>
|
||||
```
|
||||
|
||||
By default, the benchmarks will be downloaded and uncompressed automatically in the target directory.<br>
|
||||
If you don't want to download the datasets every time you update something on the code, you can specify a custom directory with the environment variable `MILLI_BENCH_DATASETS_PATH`:
|
||||
|
||||
```bash
|
||||
mkdir ~/datasets
|
||||
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench search_songs # the four datasets are downloaded
|
||||
touch build.rs
|
||||
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the code is compiled again but the datasets are not downloaded
|
||||
```
|
||||
|
||||
## Comparison between benchmarks
|
||||
|
||||
The benchmark reports we push are generated with `critcmp`. Thus, we use `critcmp` to show the result of a benchmark, or compare results between multiple benchmarks.
|
||||
|
||||
We provide a script to download and display the comparison report.
|
||||
|
||||
Requirements:
|
||||
- `grep`
|
||||
- `curl`
|
||||
- [`critcmp`](https://github.com/BurntSushi/critcmp)
|
||||
|
||||
List the available file in the DO Space:
|
||||
|
||||
```bash
|
||||
./benchmarks/script/list.sh
|
||||
```
|
||||
```bash
|
||||
songs_main_09a4321.json
|
||||
songs_geosearch_24ec456.json
|
||||
search_songs_main_cb45a10b.json
|
||||
```
|
||||
|
||||
Run the comparison script:
|
||||
|
||||
```bash
|
||||
# we get the result of ONE benchmark, this give you an idea of how much time an operation took
|
||||
./benchmarks/scripts/compare.sh son songs_geosearch_24ec456.json
|
||||
# we compare two benchmarks
|
||||
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
|
||||
# we compare three benchmarks
|
||||
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json search_songs_main_cb45a10b.json
|
||||
```
|
||||
|
||||
## Datasets
|
||||
|
||||
The benchmarks uses the following datasets:
|
||||
- `smol-songs`
|
||||
- `smol-wiki`
|
||||
- `movies`
|
||||
- `smol-all-countries`
|
||||
|
||||
### Songs
|
||||
|
||||
`smol-songs` is a subset of the [`songs.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/songs.csv.gz).
|
||||
|
||||
It was generated with this command:
|
||||
|
||||
```bash
|
||||
xsv sample --seed 42 1000000 songs.csv -o smol-songs.csv
|
||||
```
|
||||
|
||||
_[Download the generated `smol-songs` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-songs.csv.gz)._
|
||||
|
||||
### Wiki
|
||||
|
||||
`smol-wiki` is a subset of the [`wikipedia-articles.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/wiki-articles.csv.gz).
|
||||
|
||||
It was generated with the following command:
|
||||
|
||||
```bash
|
||||
xsv sample --seed 42 500000 wiki-articles.csv -o smol-wiki-articles.csv
|
||||
```
|
||||
|
||||
_[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-wiki-articles.csv.gz)._
|
||||
|
||||
### Movies
|
||||
|
||||
`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
|
||||
|
||||
_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
|
||||
|
||||
|
||||
### All Countries
|
||||
|
||||
`smol-all-countries` is a subset of the [`all-countries.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/all-countries.csv.gz)
|
||||
It has been converted to jsonlines and then edited so it matches our format for the `_geo` field.
|
||||
|
||||
It was generated with the following command:
|
||||
```bash
|
||||
bat all-countries.csv.gz | gunzip | xsv sample --seed 42 1000000 | csv2json-lite | sd '"latitude":"(.*?)","longitude":"(.*?)"' '"_geo": { "lat": $1, "lng": $2 }' | sd '\[|\]|,$' '' | gzip > smol-all-countries.jsonl.gz
|
||||
```
|
||||
|
||||
_[Download the `smol-all-countries` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-all-countries.jsonl.gz)._
|
||||
|
67
benchmarks/benches/formatting.rs
Normal file
67
benchmarks/benches/formatting.rs
Normal file
@ -0,0 +1,67 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
struct Conf<'a> {
|
||||
name: &'a str,
|
||||
text: &'a str,
|
||||
matching_words: MatcherBuilder<'a, Vec<u8>>,
|
||||
}
|
||||
|
||||
fn bench_formatting(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
Conf {
|
||||
name: "'the door d'",
|
||||
text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#,
|
||||
matching_words: MatcherBuilder::new(MatchingWords::new(vec![
|
||||
(vec![Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("door".to_string(), 1, false).unwrap())], vec![1]),
|
||||
(vec![Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoor".to_string(), 1, false).unwrap())], vec![0, 1]),
|
||||
(vec![Rc::new(MatchingWord::new("d".to_string(), 0, true).unwrap())], vec![2]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
|
||||
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
|
||||
]
|
||||
), TokenizerBuilder::default().build()),
|
||||
},
|
||||
];
|
||||
|
||||
let format_options = &[
|
||||
FormatOptions { highlight: false, crop: None },
|
||||
FormatOptions { highlight: true, crop: None },
|
||||
FormatOptions { highlight: false, crop: Some(10) },
|
||||
FormatOptions { highlight: true, crop: Some(10) },
|
||||
FormatOptions { highlight: false, crop: Some(20) },
|
||||
FormatOptions { highlight: true, crop: Some(20) },
|
||||
];
|
||||
|
||||
for option in format_options {
|
||||
let highlight = if option.highlight { "highlight" } else { "no-highlight" };
|
||||
|
||||
let name = match option.crop {
|
||||
Some(size) => format!("{}-crop({})", highlight, size),
|
||||
None => format!("{}-no-crop", highlight),
|
||||
};
|
||||
|
||||
let mut group = c.benchmark_group(&name);
|
||||
for conf in confs {
|
||||
group.bench_function(conf.name, |b| {
|
||||
b.iter(|| {
|
||||
let mut matcher = conf.matching_words.build(conf.text);
|
||||
matcher.format(*option);
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_formatting);
|
||||
criterion_main!(benches);
|
1380
benchmarks/benches/indexing.rs
Normal file
1380
benchmarks/benches/indexing.rs
Normal file
File diff suppressed because it is too large
Load Diff
122
benchmarks/benches/search_geo.rs
Normal file
122
benchmarks/benches/search_geo.rs
Normal file
@ -0,0 +1,122 @@
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields =
|
||||
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields =
|
||||
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||
dataset_format: "jsonl",
|
||||
queries: &[
|
||||
"",
|
||||
],
|
||||
configure: base_conf,
|
||||
primary_key: Some("geonameid"),
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_geo(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
// A basic placeholder with no geo
|
||||
utils::Conf {
|
||||
group_name: "placeholder with no geo",
|
||||
..BASE_CONF
|
||||
},
|
||||
// Medium aglomeration: probably the most common usecase
|
||||
utils::Conf {
|
||||
group_name: "asc sort from Lille",
|
||||
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc sort from Lille",
|
||||
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
// Big agglomeration: a lot of documents close to our point
|
||||
utils::Conf {
|
||||
group_name: "asc sort from Tokyo",
|
||||
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc sort from Tokyo",
|
||||
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
// The furthest point from any civilization
|
||||
utils::Conf {
|
||||
group_name: "asc sort from Point Nemo",
|
||||
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc sort from Point Nemo",
|
||||
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
// Filters
|
||||
utils::Conf {
|
||||
group_name: "filter of 100km from Lille",
|
||||
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 100000)"),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "filter of 1km from Lille",
|
||||
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 1000)"),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "filter of 100km from Tokyo",
|
||||
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 100000)"),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "filter of 1km from Tokyo",
|
||||
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 1000)"),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "filter of 100km from Point Nemo",
|
||||
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "filter of 1km from Point Nemo",
|
||||
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"),
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_geo);
|
||||
criterion_main!(benches);
|
196
benchmarks/benches/search_songs.rs
Normal file
196
benchmarks/benches/search_songs.rs
Normal file
@ -0,0 +1,196 @@
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields =
|
||||
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_SONGS,
|
||||
queries: &[
|
||||
"john ", // 9097
|
||||
"david ", // 4794
|
||||
"charles ", // 1957
|
||||
"david bowie ", // 1200
|
||||
"michael jackson ", // 600
|
||||
"thelonious monk ", // 303
|
||||
"charles mingus ", // 142
|
||||
"marcus miller ", // 60
|
||||
"tamo ", // 13
|
||||
"Notstandskomitee ", // 4
|
||||
],
|
||||
configure: base_conf,
|
||||
primary_key: Some("id"),
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_songs(c: &mut criterion::Criterion) {
|
||||
let default_criterion: Vec<String> =
|
||||
milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
|
||||
let default_criterion = default_criterion.iter().map(|s| s.as_str());
|
||||
let asc_default: Vec<&str> =
|
||||
std::iter::once("released-timestamp:asc").chain(default_criterion.clone()).collect();
|
||||
let desc_default: Vec<&str> =
|
||||
std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect();
|
||||
|
||||
let basic_with_quote: Vec<String> = BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| {
|
||||
s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::<Vec<String>>().join(" ")
|
||||
})
|
||||
.collect();
|
||||
let basic_with_quote: &[&str] =
|
||||
&basic_with_quote.iter().map(|s| s.as_str()).collect::<Vec<&str>>();
|
||||
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
/* first we bench each criterion alone */
|
||||
utils::Conf {
|
||||
group_name: "proximity",
|
||||
queries: &[
|
||||
"black saint sinner lady ",
|
||||
"les dangeureuses 1960 ",
|
||||
"The Disneyland Sing-Along Chorus ",
|
||||
"Under Great Northern Lights ",
|
||||
"7000 Danses Un Jour Dans Notre Vie ",
|
||||
],
|
||||
criterion: Some(&["proximity"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "typo",
|
||||
queries: &[
|
||||
"mongus ",
|
||||
"thelonius monk ",
|
||||
"Disnaylande ",
|
||||
"the white striper ",
|
||||
"indochie ",
|
||||
"indochien ",
|
||||
"klub des loopers ",
|
||||
"fear of the duck ",
|
||||
"michel depech ",
|
||||
"stromal ",
|
||||
"dire straights ",
|
||||
"Arethla Franklin ",
|
||||
],
|
||||
criterion: Some(&["typo"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "words",
|
||||
queries: &[
|
||||
"the black saint and the sinner lady and the good doggo ", // four words to pop
|
||||
"les liaisons dangeureuses 1793 ", // one word to pop
|
||||
"The Disneyland Children's Sing-Alone song ", // two words to pop
|
||||
"seven nation mummy ", // one word to pop
|
||||
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
|
||||
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
|
||||
"whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13
|
||||
],
|
||||
criterion: Some(&["words"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "asc",
|
||||
criterion: Some(&["released-timestamp:desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc",
|
||||
criterion: Some(&["released-timestamp:desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* then we bench the asc and desc criterion on top of the default criterion */
|
||||
utils::Conf {
|
||||
group_name: "asc + default",
|
||||
criterion: Some(&asc_default[..]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc + default",
|
||||
criterion: Some(&desc_default[..]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* we bench the filters with the default request */
|
||||
utils::Conf {
|
||||
group_name: "basic filter: <=",
|
||||
filter: Some("released-timestamp <= 946728000"), // year 2000
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic filter: TO",
|
||||
filter: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "big filter",
|
||||
filter: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* the we bench some global / normal search with all the default criterion in the default
|
||||
* order */
|
||||
utils::Conf {
|
||||
group_name: "basic placeholder",
|
||||
queries: &[""],
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic without quote",
|
||||
queries: &BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| s.trim()) // we remove the space at the end of each request
|
||||
.collect::<Vec<&str>>(),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic with quote",
|
||||
queries: basic_with_quote,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "prefix search",
|
||||
queries: &[
|
||||
"s", // 500k+ results
|
||||
"a", //
|
||||
"b", //
|
||||
"i", //
|
||||
"x", // only 7k results
|
||||
],
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_songs);
|
||||
criterion_main!(benches);
|
129
benchmarks/benches/search_wiki.rs
Normal file
129
benchmarks/benches/search_wiki.rs
Normal file
@ -0,0 +1,129 @@
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields = ["title", "body", "url"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_WIKI_ARTICLES,
|
||||
queries: &[
|
||||
"mingus ", // 46 candidates
|
||||
"miles davis ", // 159
|
||||
"rock and roll ", // 1007
|
||||
"machine ", // 3448
|
||||
"spain ", // 7002
|
||||
"japan ", // 10.593
|
||||
"france ", // 17.616
|
||||
"film ", // 24.959
|
||||
],
|
||||
configure: base_conf,
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_songs(c: &mut criterion::Criterion) {
|
||||
let basic_with_quote: Vec<String> = BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| {
|
||||
s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::<Vec<String>>().join(" ")
|
||||
})
|
||||
.collect();
|
||||
let basic_with_quote: &[&str] =
|
||||
&basic_with_quote.iter().map(|s| s.as_str()).collect::<Vec<&str>>();
|
||||
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
/* first we bench each criterion alone */
|
||||
utils::Conf {
|
||||
group_name: "proximity",
|
||||
queries: &[
|
||||
"herald sings ",
|
||||
"april paris ",
|
||||
"tea two ",
|
||||
"diesel engine ",
|
||||
],
|
||||
criterion: Some(&["proximity"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "typo",
|
||||
queries: &[
|
||||
"migrosoft ",
|
||||
"linax ",
|
||||
"Disnaylande ",
|
||||
"phytogropher ",
|
||||
"nympalidea ",
|
||||
"aritmetric ",
|
||||
"the fronce ",
|
||||
"sisan ",
|
||||
],
|
||||
criterion: Some(&["typo"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "words",
|
||||
queries: &[
|
||||
"the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results
|
||||
"Kameya Tokujirō mingus monk ", // two words to pop, 55
|
||||
"Ulrich Hensel meilisearch milli ", // two words to pop, 306
|
||||
"Idaho Bellevue pizza ", // one word to pop, 800
|
||||
"Abraham machin ", // one word to pop, 1141
|
||||
],
|
||||
criterion: Some(&["words"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
/* the we bench some global / normal search with all the default criterion in the default
|
||||
* order */
|
||||
utils::Conf {
|
||||
group_name: "basic placeholder",
|
||||
queries: &[""],
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic without quote",
|
||||
queries: &BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| s.trim()) // we remove the space at the end of each request
|
||||
.collect::<Vec<&str>>(),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic with quote",
|
||||
queries: basic_with_quote,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "prefix search",
|
||||
queries: &[
|
||||
"t", // 453k results
|
||||
"c", // 405k
|
||||
"g", // 318k
|
||||
"j", // 227k
|
||||
"q", // 71k
|
||||
"x", // 17k
|
||||
],
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_songs);
|
||||
criterion_main!(benches);
|
256
benchmarks/benches/utils.rs
Normal file
256
benchmarks/benches/utils.rs
Normal file
@ -0,0 +1,256 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::fs::{create_dir_all, remove_dir_all, File};
|
||||
use std::io::{self, BufRead, BufReader, Cursor, Read, Seek};
|
||||
use std::num::ParseFloatError;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use criterion::BenchmarkId;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::update::{
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
};
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
|
||||
pub struct Conf<'a> {
|
||||
/// where we are going to create our database.mmdb directory
|
||||
/// each benchmark will first try to delete it and then recreate it
|
||||
pub database_name: &'a str,
|
||||
/// the dataset to be used, it must be an uncompressed csv
|
||||
pub dataset: &'a str,
|
||||
/// The format of the dataset
|
||||
pub dataset_format: &'a str,
|
||||
pub group_name: &'a str,
|
||||
pub queries: &'a [&'a str],
|
||||
/// here you can change which criterion are used and in which order.
|
||||
/// - if you specify something all the base configuration will be thrown out
|
||||
/// - if you don't specify anything (None) the default configuration will be kept
|
||||
pub criterion: Option<&'a [&'a str]>,
|
||||
/// the last chance to configure your database as you want
|
||||
pub configure: fn(&mut Settings),
|
||||
pub filter: Option<&'a str>,
|
||||
pub sort: Option<Vec<&'a str>>,
|
||||
/// enable or disable the optional words on the query
|
||||
pub optional_words: bool,
|
||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||
pub primary_key: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl Conf<'_> {
|
||||
pub const BASE: Self = Conf {
|
||||
database_name: "benches.mmdb",
|
||||
dataset_format: "csv",
|
||||
dataset: "",
|
||||
group_name: "",
|
||||
queries: &[],
|
||||
criterion: None,
|
||||
configure: |_| (),
|
||||
filter: None,
|
||||
sort: None,
|
||||
optional_words: true,
|
||||
primary_key: None,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn base_setup(conf: &Conf) -> Index {
|
||||
match remove_dir_all(conf.database_name) {
|
||||
Ok(_) => (),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(e) => panic!("{}", e),
|
||||
}
|
||||
create_dir_all(conf.database_name).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
if let Some(primary_key) = conf.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
if let Some(criterion) = conf.criterion {
|
||||
builder.reset_filterable_fields();
|
||||
builder.reset_criteria();
|
||||
builder.reset_stop_words();
|
||||
|
||||
let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
|
||||
builder.set_criteria(criterion);
|
||||
}
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig {
|
||||
autogenerate_docids: conf.primary_key.is_none(),
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
};
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
for conf in confs {
|
||||
let index = base_setup(conf);
|
||||
|
||||
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
||||
let name = format!("{}: {}", file_name, conf.group_name);
|
||||
let mut group = c.benchmark_group(&name);
|
||||
|
||||
for &query in conf.queries {
|
||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
if let Some(filter) = conf.filter {
|
||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||
search.filter(filter);
|
||||
}
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
|
||||
let reader = File::open(filename)
|
||||
.unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
|
||||
let reader = BufReader::new(reader);
|
||||
let documents = match filetype {
|
||||
"csv" => documents_from_csv(reader).unwrap(),
|
||||
"json" => documents_from_json(reader).unwrap(),
|
||||
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||
};
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
}
|
||||
|
||||
fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result?;
|
||||
documents.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
documents.append_json_array(reader)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let csv = csv::Reader::from_reader(reader);
|
||||
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
documents.append_csv(csv)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
enum AllowedType {
|
||||
String,
|
||||
Number,
|
||||
}
|
||||
|
||||
fn parse_csv_header(header: &str) -> (String, AllowedType) {
|
||||
// if there are several separators we only split on the last one.
|
||||
match header.rsplit_once(':') {
|
||||
Some((field_name, field_type)) => match field_type {
|
||||
"string" => (field_name.to_string(), AllowedType::String),
|
||||
"number" => (field_name.to_string(), AllowedType::Number),
|
||||
// we may return an error in this case.
|
||||
_otherwise => (header.to_string(), AllowedType::String),
|
||||
},
|
||||
None => (header.to_string(), AllowedType::String),
|
||||
}
|
||||
}
|
||||
|
||||
struct CSVDocumentDeserializer<R>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
documents: csv::StringRecordsIntoIter<R>,
|
||||
headers: Vec<(String, AllowedType)>,
|
||||
}
|
||||
|
||||
impl<R: Read> CSVDocumentDeserializer<R> {
|
||||
fn from_reader(reader: R) -> io::Result<Self> {
|
||||
let mut records = csv::Reader::from_reader(reader);
|
||||
|
||||
let headers = records.headers()?.into_iter().map(parse_csv_header).collect();
|
||||
|
||||
Ok(Self { documents: records.into_records(), headers })
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Iterator for CSVDocumentDeserializer<R> {
|
||||
type Item = anyhow::Result<Object>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let csv_document = self.documents.next()?;
|
||||
|
||||
match csv_document {
|
||||
Ok(csv_document) => {
|
||||
let mut document = Object::new();
|
||||
|
||||
for ((field_name, field_type), value) in
|
||||
self.headers.iter().zip(csv_document.into_iter())
|
||||
{
|
||||
let parsed_value: Result<Value, ParseFloatError> = match field_type {
|
||||
AllowedType::Number => {
|
||||
value.parse::<f64>().map(Value::from).map_err(Into::into)
|
||||
}
|
||||
AllowedType::String => Ok(Value::String(value.to_string())),
|
||||
};
|
||||
|
||||
match parsed_value {
|
||||
Ok(value) => drop(document.insert(field_name.to_string(), value)),
|
||||
Err(_e) => {
|
||||
return Some(Err(anyhow::anyhow!(
|
||||
"Value '{}' is not a valid number",
|
||||
value
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ok(document))
|
||||
}
|
||||
Err(e) => Some(Err(anyhow::anyhow!("Error parsing csv document: {}", e))),
|
||||
}
|
||||
}
|
||||
}
|
115
benchmarks/build.rs
Normal file
115
benchmarks/build.rs
Normal file
@ -0,0 +1,115 @@
|
||||
use std::fs::File;
|
||||
use std::io::{Cursor, Read, Seek, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{env, fs};
|
||||
|
||||
use bytes::Bytes;
|
||||
use convert_case::{Case, Casing};
|
||||
use flate2::read::GzDecoder;
|
||||
use reqwest::IntoUrl;
|
||||
|
||||
const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets";
|
||||
|
||||
const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv");
|
||||
const DATASET_SONGS_1_2: (&str, &str) = ("smol-songs-1_2", "csv");
|
||||
const DATASET_SONGS_3_4: (&str, &str) = ("smol-songs-3_4", "csv");
|
||||
const DATASET_SONGS_4_4: (&str, &str) = ("smol-songs-4_4", "csv");
|
||||
const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv");
|
||||
const DATASET_WIKI_1_2: (&str, &str) = ("smol-wiki-articles-1_2", "csv");
|
||||
const DATASET_WIKI_3_4: (&str, &str) = ("smol-wiki-articles-3_4", "csv");
|
||||
const DATASET_WIKI_4_4: (&str, &str) = ("smol-wiki-articles-4_4", "csv");
|
||||
const DATASET_MOVIES: (&str, &str) = ("movies", "json");
|
||||
const DATASET_MOVIES_1_2: (&str, &str) = ("movies-1_2", "json");
|
||||
const DATASET_MOVIES_3_4: (&str, &str) = ("movies-3_4", "json");
|
||||
const DATASET_MOVIES_4_4: (&str, &str) = ("movies-4_4", "json");
|
||||
const DATASET_NESTED_MOVIES: (&str, &str) = ("nested_movies", "json");
|
||||
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
|
||||
|
||||
const ALL_DATASETS: &[(&str, &str)] = &[
|
||||
DATASET_SONGS,
|
||||
DATASET_SONGS_1_2,
|
||||
DATASET_SONGS_3_4,
|
||||
DATASET_SONGS_4_4,
|
||||
DATASET_WIKI,
|
||||
DATASET_WIKI_1_2,
|
||||
DATASET_WIKI_3_4,
|
||||
DATASET_WIKI_4_4,
|
||||
DATASET_MOVIES,
|
||||
DATASET_MOVIES_1_2,
|
||||
DATASET_MOVIES_3_4,
|
||||
DATASET_MOVIES_4_4,
|
||||
DATASET_NESTED_MOVIES,
|
||||
DATASET_GEO,
|
||||
];
|
||||
|
||||
/// The name of the environment variable used to select the path
|
||||
/// of the directory containing the datasets
|
||||
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let out_dir = PathBuf::from(env::var(BASE_DATASETS_PATH_KEY).unwrap_or(env::var("OUT_DIR")?));
|
||||
|
||||
let benches_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?).join("benches");
|
||||
let mut manifest_paths_file = File::create(benches_dir.join("datasets_paths.rs"))?;
|
||||
write!(
|
||||
manifest_paths_file,
|
||||
r#"//! This file is generated by the build script.
|
||||
//! Do not modify by hand, use the build.rs file.
|
||||
#![allow(dead_code)]
|
||||
"#
|
||||
)?;
|
||||
writeln!(manifest_paths_file)?;
|
||||
|
||||
for (dataset, extension) in ALL_DATASETS {
|
||||
let out_path = out_dir.join(dataset);
|
||||
let out_file = out_path.with_extension(extension);
|
||||
|
||||
writeln!(
|
||||
&mut manifest_paths_file,
|
||||
r#"pub const {}: &str = {:?};"#,
|
||||
dataset.to_case(Case::ScreamingSnake),
|
||||
out_file.display(),
|
||||
)?;
|
||||
|
||||
if out_file.exists() {
|
||||
eprintln!(
|
||||
"The dataset {} already exists on the file system and will not be downloaded again",
|
||||
out_path.display(),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
let url = format!("{}/{}.{}.gz", BASE_URL, dataset, extension);
|
||||
eprintln!("downloading: {}", url);
|
||||
let bytes = retry(|| download_dataset(url.clone()), 10)?;
|
||||
eprintln!("{} downloaded successfully", url);
|
||||
eprintln!("uncompressing in {}", out_file.display());
|
||||
uncompress_in_file(bytes, &out_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn retry<Ok, Err>(fun: impl Fn() -> Result<Ok, Err>, times: usize) -> Result<Ok, Err> {
|
||||
for _ in 0..times {
|
||||
if let ok @ Ok(_) = fun() {
|
||||
return ok;
|
||||
}
|
||||
}
|
||||
fun()
|
||||
}
|
||||
|
||||
fn download_dataset<U: IntoUrl>(url: U) -> anyhow::Result<Cursor<Bytes>> {
|
||||
let bytes =
|
||||
reqwest::blocking::Client::builder().timeout(None).build()?.get(url).send()?.bytes()?;
|
||||
Ok(Cursor::new(bytes))
|
||||
}
|
||||
|
||||
fn uncompress_in_file<R: Read + Seek, P: AsRef<Path>>(bytes: R, path: P) -> anyhow::Result<()> {
|
||||
let path = path.as_ref();
|
||||
let mut gz = GzDecoder::new(bytes);
|
||||
let mut dataset = Vec::new();
|
||||
gz.read_to_end(&mut dataset)?;
|
||||
|
||||
fs::write(path, dataset)?;
|
||||
Ok(())
|
||||
}
|
38
benchmarks/scripts/compare.sh
Executable file
38
benchmarks/scripts/compare.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Requirements:
|
||||
# - critcmp. See: https://github.com/BurntSushi/critcmp
|
||||
# - curl
|
||||
|
||||
# Usage
|
||||
# $ bash compare.sh json_file1 json_file1
|
||||
# ex: bash compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
|
||||
|
||||
# Checking that critcmp is installed
|
||||
command -v critcmp > /dev/null 2>&1
|
||||
if [[ "$?" -ne 0 ]]; then
|
||||
echo 'You must install critcmp to make this script work.'
|
||||
echo 'See: https://github.com/BurntSushi/critcmp'
|
||||
echo ' $ cargo install critcmp'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
s3_url='https://milli-benchmarks.fra1.digitaloceanspaces.com/critcmp_results'
|
||||
|
||||
for file in $@
|
||||
do
|
||||
file_s3_url="$s3_url/$file"
|
||||
file_local_path="/tmp/$file"
|
||||
|
||||
if [[ ! -f $file_local_path ]]; then
|
||||
curl $file_s3_url --output $file_local_path --silent
|
||||
if [[ "$?" -ne 0 ]]; then
|
||||
echo 'curl command failed.'
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
path_list=$(echo " $@" | sed 's/ / \/tmp\//g')
|
||||
|
||||
critcmp $path_list
|
14
benchmarks/scripts/list.sh
Executable file
14
benchmarks/scripts/list.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Requirements:
|
||||
# - curl
|
||||
# - grep
|
||||
|
||||
res=$(curl -s https://milli-benchmarks.fra1.digitaloceanspaces.com | grep -o '<Key>[^<]\+' | cut -c 5- | grep critcmp_results/ | cut -c 18-)
|
||||
|
||||
for pattern in "$@"
|
||||
do
|
||||
res=$(echo "$res" | grep $pattern)
|
||||
done
|
||||
|
||||
echo "$res"
|
5
benchmarks/src/lib.rs
Normal file
5
benchmarks/src/lib.rs
Normal file
@ -0,0 +1,5 @@
|
||||
//! This library is only used to isolate the benchmarks
|
||||
//! from the original milli library.
|
||||
//!
|
||||
//! It does not include interesting functions for milli library
|
||||
//! users only for milli contributors.
|
@ -249,17 +249,17 @@ pub(crate) mod test {
|
||||
|
||||
pub fn create_test_settings() -> Settings<Checked> {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }).into(),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }).into(),
|
||||
ranking_rules: Setting::NotSet.into(),
|
||||
stop_words: Setting::NotSet.into(),
|
||||
synonyms: Setting::NotSet.into(),
|
||||
distinct_attribute: Setting::NotSet.into(),
|
||||
typo_tolerance: Setting::NotSet.into(),
|
||||
faceting: Setting::NotSet.into(),
|
||||
pagination: Setting::NotSet.into(),
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
faceting: Setting::NotSet,
|
||||
pagination: Setting::NotSet,
|
||||
_kind: std::marker::PhantomData,
|
||||
};
|
||||
settings.check()
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5};
|
||||
use crate::reader::{v5, v6, Document, UpdateFile};
|
||||
use crate::Result;
|
||||
@ -254,51 +256,50 @@ impl<T> From<v5::Setting<T>> for v6::Setting<T> {
|
||||
impl From<v5::ResponseError> for v6::ResponseError {
|
||||
fn from(error: v5::ResponseError) -> Self {
|
||||
let code = match error.error_code.as_ref() {
|
||||
"index_creation_failed" => v6::Code::CreateIndex,
|
||||
"index_creation_failed" => v6::Code::IndexCreationFailed,
|
||||
"index_already_exists" => v6::Code::IndexAlreadyExists,
|
||||
"index_not_found" => v6::Code::IndexNotFound,
|
||||
"invalid_index_uid" => v6::Code::InvalidIndexUid,
|
||||
"invalid_min_word_length_for_typo" => v6::Code::InvalidMinWordLengthForTypo,
|
||||
"invalid_state" => v6::Code::InvalidState,
|
||||
"primary_key_inference_failed" => v6::Code::NoPrimaryKeyCandidateFound,
|
||||
"index_primary_key_already_exists" => v6::Code::PrimaryKeyAlreadyPresent,
|
||||
"primary_key_inference_failed" => v6::Code::IndexPrimaryKeyNoCandidateFound,
|
||||
"index_primary_key_already_exists" => v6::Code::IndexPrimaryKeyAlreadyExists,
|
||||
"max_fields_limit_exceeded" => v6::Code::MaxFieldsLimitExceeded,
|
||||
"missing_document_id" => v6::Code::MissingDocumentId,
|
||||
"invalid_document_id" => v6::Code::InvalidDocumentId,
|
||||
"invalid_filter" => v6::Code::Filter,
|
||||
"invalid_sort" => v6::Code::Sort,
|
||||
"invalid_filter" => v6::Code::InvalidSettingsFilterableAttributes,
|
||||
"invalid_sort" => v6::Code::InvalidSettingsSortableAttributes,
|
||||
"bad_parameter" => v6::Code::BadParameter,
|
||||
"bad_request" => v6::Code::BadRequest,
|
||||
"database_size_limit_reached" => v6::Code::DatabaseSizeLimitReached,
|
||||
"document_not_found" => v6::Code::DocumentNotFound,
|
||||
"internal" => v6::Code::Internal,
|
||||
"invalid_geo_field" => v6::Code::InvalidDocumentGeoField,
|
||||
"invalid_ranking_rule" => v6::Code::InvalidRankingRule,
|
||||
"invalid_store_file" => v6::Code::InvalidStore,
|
||||
"invalid_api_key" => v6::Code::InvalidToken,
|
||||
"invalid_ranking_rule" => v6::Code::InvalidSettingsRankingRules,
|
||||
"invalid_store_file" => v6::Code::InvalidStoreFile,
|
||||
"invalid_api_key" => v6::Code::InvalidApiKey,
|
||||
"missing_authorization_header" => v6::Code::MissingAuthorizationHeader,
|
||||
"no_space_left_on_device" => v6::Code::NoSpaceLeftOnDevice,
|
||||
"dump_not_found" => v6::Code::DumpNotFound,
|
||||
"task_not_found" => v6::Code::TaskNotFound,
|
||||
"payload_too_large" => v6::Code::PayloadTooLarge,
|
||||
"unretrievable_document" => v6::Code::RetrieveDocument,
|
||||
"search_error" => v6::Code::SearchDocuments,
|
||||
"unretrievable_document" => v6::Code::UnretrievableDocument,
|
||||
"unsupported_media_type" => v6::Code::UnsupportedMediaType,
|
||||
"dump_already_processing" => v6::Code::DumpAlreadyInProgress,
|
||||
"dump_already_processing" => v6::Code::DumpAlreadyProcessing,
|
||||
"dump_process_failed" => v6::Code::DumpProcessFailed,
|
||||
"invalid_content_type" => v6::Code::InvalidContentType,
|
||||
"missing_content_type" => v6::Code::MissingContentType,
|
||||
"malformed_payload" => v6::Code::MalformedPayload,
|
||||
"missing_payload" => v6::Code::MissingPayload,
|
||||
"api_key_not_found" => v6::Code::ApiKeyNotFound,
|
||||
"missing_parameter" => v6::Code::UnretrievableErrorCode,
|
||||
"missing_parameter" => v6::Code::BadRequest,
|
||||
"invalid_api_key_actions" => v6::Code::InvalidApiKeyActions,
|
||||
"invalid_api_key_indexes" => v6::Code::InvalidApiKeyIndexes,
|
||||
"invalid_api_key_expires_at" => v6::Code::InvalidApiKeyExpiresAt,
|
||||
"invalid_api_key_description" => v6::Code::InvalidApiKeyDescription,
|
||||
"invalid_api_key_name" => v6::Code::InvalidApiKeyName,
|
||||
"invalid_api_key_uid" => v6::Code::InvalidApiKeyUid,
|
||||
"immutable_field" => v6::Code::ImmutableField,
|
||||
"immutable_field" => v6::Code::BadRequest,
|
||||
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
|
||||
other => {
|
||||
log::warn!("Unknown error code {}", other);
|
||||
@ -316,7 +317,26 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
searchable_attributes: settings.searchable_attributes.into(),
|
||||
filterable_attributes: settings.filterable_attributes.into(),
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: settings.ranking_rules.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
v5::settings::Setting::Set(ranking_rules) => {
|
||||
let mut new_ranking_rules = vec![];
|
||||
for rule in ranking_rules {
|
||||
match v6::RankingRuleView::from_str(&rule) {
|
||||
Ok(new_rule) => {
|
||||
new_ranking_rules.push(new_rule);
|
||||
}
|
||||
Err(_) => {
|
||||
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
|
||||
}
|
||||
}
|
||||
}
|
||||
v6::Setting::Set(new_ranking_rules)
|
||||
}
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
}
|
||||
},
|
||||
stop_words: settings.stop_words.into(),
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
|
@ -26,7 +26,7 @@ pub type Kind = crate::KindDump;
|
||||
pub type Details = meilisearch_types::tasks::Details;
|
||||
|
||||
// everything related to the settings
|
||||
pub type Setting<T> = meilisearch_types::settings::Setting<T>;
|
||||
pub type Setting<T> = meilisearch_types::milli::update::Setting<T>;
|
||||
pub type TypoTolerance = meilisearch_types::settings::TypoSettings;
|
||||
pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting;
|
||||
pub type FacetingSettings = meilisearch_types::settings::FacetingSettings;
|
||||
@ -40,6 +40,7 @@ pub type IndexUid = meilisearch_types::index_uid::IndexUid;
|
||||
// everything related to the errors
|
||||
pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
|
13
filter-parser/Cargo.toml
Normal file
13
filter-parser/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "filter-parser"
|
||||
version = "1.0.0"
|
||||
edition = "2021"
|
||||
description = "The parser for the Meilisearch filter syntax"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.1"
|
||||
nom_locate = "4.0.0"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.21.0"
|
36
filter-parser/README.md
Normal file
36
filter-parser/README.md
Normal file
@ -0,0 +1,36 @@
|
||||
# Filter parser
|
||||
|
||||
This workspace is dedicated to the parsing of the Meilisearch filters.
|
||||
|
||||
Most of the code and explanation are in the [`lib.rs`](./src/lib.rs). Especially, the BNF of the filters at the top of this file.
|
||||
|
||||
The parser use [nom](https://docs.rs/nom/) to do most of its work and [nom-locate](https://docs.rs/nom_locate/) to keep track of what we were doing when we encountered an error.
|
||||
|
||||
## Cli
|
||||
A simple main is provided to quick-test if a filter can be parsed or not without bringing milli.
|
||||
It takes one argument and try to parse it.
|
||||
```
|
||||
cargo run -- 'field = value' # success
|
||||
cargo run -- 'field = "doggo' # error => missing closing delimiter "
|
||||
```
|
||||
|
||||
## Fuzz
|
||||
The workspace have been fuzzed with [cargo-fuzz](https://rust-fuzz.github.io/book/cargo-fuzz.html).
|
||||
|
||||
### Setup
|
||||
You'll need rust-nightly to execute the fuzzer.
|
||||
|
||||
```
|
||||
cargo install cargo-fuzz
|
||||
```
|
||||
|
||||
### Run
|
||||
When the filter parser is executed by the fuzzer it's triggering a stackoverflow really fast. We can avoid this problem by limiting the `max_len` of [libfuzzer](https://llvm.org/docs/LibFuzzer.html) at 500 characters.
|
||||
```
|
||||
cargo fuzz run parse -- -max_len=500
|
||||
```
|
||||
|
||||
## What to do if you find a bug in the parser
|
||||
|
||||
- Write a test at the end of the [`lib.rs`](./src/lib.rs) to ensure it never happens again.
|
||||
- Add a file in [the corpus directory](./fuzz/corpus/parse/) with your filter to help the fuzzer find new bugs. Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force push your new test.
|
3
filter-parser/fuzz/.gitignore
vendored
Normal file
3
filter-parser/fuzz/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/corpus/
|
||||
/artifacts/
|
||||
/target/
|
25
filter-parser/fuzz/Cargo.toml
Normal file
25
filter-parser/fuzz/Cargo.toml
Normal file
@ -0,0 +1,25 @@
|
||||
[package]
|
||||
name = "filter-parser-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
[dependencies.filter-parser]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "parse"
|
||||
path = "fuzz_targets/parse.rs"
|
||||
test = false
|
||||
doc = false
|
1
filter-parser/fuzz/corpus/parse/test_1
Normal file
1
filter-parser/fuzz/corpus/parse/test_1
Normal file
@ -0,0 +1 @@
|
||||
channel = Ponce
|
1
filter-parser/fuzz/corpus/parse/test_10
Normal file
1
filter-parser/fuzz/corpus/parse/test_10
Normal file
@ -0,0 +1 @@
|
||||
channel != ponce
|
1
filter-parser/fuzz/corpus/parse/test_11
Normal file
1
filter-parser/fuzz/corpus/parse/test_11
Normal file
@ -0,0 +1 @@
|
||||
NOT channel = ponce
|
1
filter-parser/fuzz/corpus/parse/test_12
Normal file
1
filter-parser/fuzz/corpus/parse/test_12
Normal file
@ -0,0 +1 @@
|
||||
subscribers < 1000
|
1
filter-parser/fuzz/corpus/parse/test_13
Normal file
1
filter-parser/fuzz/corpus/parse/test_13
Normal file
@ -0,0 +1 @@
|
||||
subscribers > 1000
|
1
filter-parser/fuzz/corpus/parse/test_14
Normal file
1
filter-parser/fuzz/corpus/parse/test_14
Normal file
@ -0,0 +1 @@
|
||||
subscribers <= 1000
|
1
filter-parser/fuzz/corpus/parse/test_15
Normal file
1
filter-parser/fuzz/corpus/parse/test_15
Normal file
@ -0,0 +1 @@
|
||||
subscribers >= 1000
|
1
filter-parser/fuzz/corpus/parse/test_16
Normal file
1
filter-parser/fuzz/corpus/parse/test_16
Normal file
@ -0,0 +1 @@
|
||||
NOT subscribers < 1000
|
1
filter-parser/fuzz/corpus/parse/test_17
Normal file
1
filter-parser/fuzz/corpus/parse/test_17
Normal file
@ -0,0 +1 @@
|
||||
NOT subscribers > 1000
|
1
filter-parser/fuzz/corpus/parse/test_18
Normal file
1
filter-parser/fuzz/corpus/parse/test_18
Normal file
@ -0,0 +1 @@
|
||||
NOT subscribers <= 1000
|
1
filter-parser/fuzz/corpus/parse/test_19
Normal file
1
filter-parser/fuzz/corpus/parse/test_19
Normal file
@ -0,0 +1 @@
|
||||
NOT subscribers >= 1000
|
1
filter-parser/fuzz/corpus/parse/test_2
Normal file
1
filter-parser/fuzz/corpus/parse/test_2
Normal file
@ -0,0 +1 @@
|
||||
subscribers = 12
|
1
filter-parser/fuzz/corpus/parse/test_20
Normal file
1
filter-parser/fuzz/corpus/parse/test_20
Normal file
@ -0,0 +1 @@
|
||||
subscribers 100 TO 1000
|
1
filter-parser/fuzz/corpus/parse/test_21
Normal file
1
filter-parser/fuzz/corpus/parse/test_21
Normal file
@ -0,0 +1 @@
|
||||
NOT subscribers 100 TO 1000
|
1
filter-parser/fuzz/corpus/parse/test_22
Normal file
1
filter-parser/fuzz/corpus/parse/test_22
Normal file
@ -0,0 +1 @@
|
||||
_geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_23
Normal file
1
filter-parser/fuzz/corpus/parse/test_23
Normal file
@ -0,0 +1 @@
|
||||
NOT _geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_24
Normal file
1
filter-parser/fuzz/corpus/parse/test_24
Normal file
@ -0,0 +1 @@
|
||||
channel = ponce AND 'dog race' != 'bernese mountain'
|
1
filter-parser/fuzz/corpus/parse/test_25
Normal file
1
filter-parser/fuzz/corpus/parse/test_25
Normal file
@ -0,0 +1 @@
|
||||
channel = ponce OR 'dog race' != 'bernese mountain'
|
1
filter-parser/fuzz/corpus/parse/test_26
Normal file
1
filter-parser/fuzz/corpus/parse/test_26
Normal file
@ -0,0 +1 @@
|
||||
channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000
|
1
filter-parser/fuzz/corpus/parse/test_27
Normal file
1
filter-parser/fuzz/corpus/parse/test_27
Normal file
@ -0,0 +1 @@
|
||||
channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )
|
1
filter-parser/fuzz/corpus/parse/test_28
Normal file
1
filter-parser/fuzz/corpus/parse/test_28
Normal file
@ -0,0 +1 @@
|
||||
(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_29
Normal file
1
filter-parser/fuzz/corpus/parse/test_29
Normal file
@ -0,0 +1 @@
|
||||
channel = Ponce = 12
|
1
filter-parser/fuzz/corpus/parse/test_3
Normal file
1
filter-parser/fuzz/corpus/parse/test_3
Normal file
@ -0,0 +1 @@
|
||||
channel = 'Mister Mv'
|
1
filter-parser/fuzz/corpus/parse/test_30
Normal file
1
filter-parser/fuzz/corpus/parse/test_30
Normal file
@ -0,0 +1 @@
|
||||
channel =
|
1
filter-parser/fuzz/corpus/parse/test_31
Normal file
1
filter-parser/fuzz/corpus/parse/test_31
Normal file
@ -0,0 +1 @@
|
||||
channel = 🐻
|
1
filter-parser/fuzz/corpus/parse/test_32
Normal file
1
filter-parser/fuzz/corpus/parse/test_32
Normal file
@ -0,0 +1 @@
|
||||
OR
|
1
filter-parser/fuzz/corpus/parse/test_33
Normal file
1
filter-parser/fuzz/corpus/parse/test_33
Normal file
@ -0,0 +1 @@
|
||||
AND
|
1
filter-parser/fuzz/corpus/parse/test_34
Normal file
1
filter-parser/fuzz/corpus/parse/test_34
Normal file
@ -0,0 +1 @@
|
||||
channel Ponce
|
1
filter-parser/fuzz/corpus/parse/test_35
Normal file
1
filter-parser/fuzz/corpus/parse/test_35
Normal file
@ -0,0 +1 @@
|
||||
channel = Ponce OR
|
1
filter-parser/fuzz/corpus/parse/test_36
Normal file
1
filter-parser/fuzz/corpus/parse/test_36
Normal file
@ -0,0 +1 @@
|
||||
_geoRadius
|
1
filter-parser/fuzz/corpus/parse/test_37
Normal file
1
filter-parser/fuzz/corpus/parse/test_37
Normal file
@ -0,0 +1 @@
|
||||
_geoRadius = 12
|
1
filter-parser/fuzz/corpus/parse/test_38
Normal file
1
filter-parser/fuzz/corpus/parse/test_38
Normal file
@ -0,0 +1 @@
|
||||
_geoPoint(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_39
Normal file
1
filter-parser/fuzz/corpus/parse/test_39
Normal file
@ -0,0 +1 @@
|
||||
position <= _geoPoint(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_4
Normal file
1
filter-parser/fuzz/corpus/parse/test_4
Normal file
@ -0,0 +1 @@
|
||||
channel = "Mister Mv"
|
1
filter-parser/fuzz/corpus/parse/test_40
Normal file
1
filter-parser/fuzz/corpus/parse/test_40
Normal file
@ -0,0 +1 @@
|
||||
position <= _geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_41
Normal file
1
filter-parser/fuzz/corpus/parse/test_41
Normal file
@ -0,0 +1 @@
|
||||
channel = 'ponce
|
1
filter-parser/fuzz/corpus/parse/test_42
Normal file
1
filter-parser/fuzz/corpus/parse/test_42
Normal file
@ -0,0 +1 @@
|
||||
channel = "ponce
|
1
filter-parser/fuzz/corpus/parse/test_43
Normal file
1
filter-parser/fuzz/corpus/parse/test_43
Normal file
@ -0,0 +1 @@
|
||||
channel = mv OR (followers >= 1000
|
1
filter-parser/fuzz/corpus/parse/test_5
Normal file
1
filter-parser/fuzz/corpus/parse/test_5
Normal file
@ -0,0 +1 @@
|
||||
'dog race' = Borzoi
|
1
filter-parser/fuzz/corpus/parse/test_6
Normal file
1
filter-parser/fuzz/corpus/parse/test_6
Normal file
@ -0,0 +1 @@
|
||||
"dog race" = Chusky
|
1
filter-parser/fuzz/corpus/parse/test_7
Normal file
1
filter-parser/fuzz/corpus/parse/test_7
Normal file
@ -0,0 +1 @@
|
||||
"dog race" = "Bernese Mountain"
|
1
filter-parser/fuzz/corpus/parse/test_8
Normal file
1
filter-parser/fuzz/corpus/parse/test_8
Normal file
@ -0,0 +1 @@
|
||||
'dog race' = 'Bernese Mountain'
|
1
filter-parser/fuzz/corpus/parse/test_9
Normal file
1
filter-parser/fuzz/corpus/parse/test_9
Normal file
@ -0,0 +1 @@
|
||||
"dog race" = 'Bernese Mountain'
|
18
filter-parser/fuzz/fuzz_targets/parse.rs
Normal file
18
filter-parser/fuzz/fuzz_targets/parse.rs
Normal file
@ -0,0 +1,18 @@
|
||||
#![no_main]
|
||||
use filter_parser::{ErrorKind, FilterCondition};
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
// When we are fuzzing the parser we can get a stack overflow very easily.
|
||||
// But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters.
|
||||
if s.len() < 500 {
|
||||
match FilterCondition::parse(s) {
|
||||
Err(e) if matches!(e.kind(), ErrorKind::InternalError(_)) => {
|
||||
panic!("Found an internal error: `{:?}`", e)
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
67
filter-parser/src/condition.rs
Normal file
67
filter-parser/src/condition.rs
Normal file
@ -0,0 +1,67 @@
|
||||
//! BNF grammar:
|
||||
//!
|
||||
//! ```text
|
||||
//! condition = value ("==" | ">" ...) value
|
||||
//! to = value value TO value
|
||||
//! ```
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::combinator::cut;
|
||||
use nom::sequence::{terminated, tuple};
|
||||
use Condition::*;
|
||||
|
||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Condition<'a> {
|
||||
GreaterThan(Token<'a>),
|
||||
GreaterThanOrEqual(Token<'a>),
|
||||
Equal(Token<'a>),
|
||||
NotEqual(Token<'a>),
|
||||
Exists,
|
||||
LowerThan(Token<'a>),
|
||||
LowerThanOrEqual(Token<'a>),
|
||||
Between { from: Token<'a>, to: Token<'a> },
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?;
|
||||
|
||||
let condition = match *op.fragment() {
|
||||
"<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) },
|
||||
">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) },
|
||||
"!=" => FilterCondition::Condition { fid, op: NotEqual(value) },
|
||||
"<" => FilterCondition::Condition { fid, op: LowerThan(value) },
|
||||
">" => FilterCondition::Condition { fid, op: GreaterThan(value) },
|
||||
"=" => FilterCondition::Condition { fid, op: Equal(value) },
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok((input, condition))
|
||||
}
|
||||
|
||||
/// exist = value "EXISTS"
|
||||
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
|
||||
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Exists }))
|
||||
}
|
||||
/// exist = value "NOT" WS+ "EXISTS"
|
||||
pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
||||
}
|
||||
|
||||
/// to = value value "TO" WS+ value
|
||||
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, (key, from, _, _, to)) =
|
||||
tuple((parse_value, parse_value, tag("TO"), multispace1, cut(parse_value)))(input)?;
|
||||
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Between { from, to } }))
|
||||
}
|
198
filter-parser/src/error.rs
Normal file
198
filter-parser/src/error.rs
Normal file
@ -0,0 +1,198 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use nom::error::{self, ParseError};
|
||||
use nom::Parser;
|
||||
|
||||
use crate::{IResult, Span};
|
||||
|
||||
pub trait NomErrorExt<E> {
|
||||
fn is_failure(&self) -> bool;
|
||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||
}
|
||||
|
||||
impl<E> NomErrorExt<E> for nom::Err<E> {
|
||||
fn is_failure(&self) -> bool {
|
||||
matches!(self, Self::Failure(_))
|
||||
}
|
||||
|
||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||
match self {
|
||||
e @ Self::Failure(_) => e,
|
||||
e => e.map(op),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||
match self {
|
||||
e @ Self::Error(_) => e,
|
||||
e => e.map(op),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// cut a parser and map the error
|
||||
pub fn cut_with_err<'a, O>(
|
||||
mut parser: impl FnMut(Span<'a>) -> IResult<'a, O>,
|
||||
mut with: impl FnMut(Error<'a>) -> Error<'a>,
|
||||
) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||
move |input| match parser.parse(input) {
|
||||
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
|
||||
rest => rest,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error<'a> {
|
||||
context: Span<'a>,
|
||||
kind: ErrorKind<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ExpectedValueKind {
|
||||
ReservedKeyword,
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ErrorKind<'a> {
|
||||
ReservedGeo(&'a str),
|
||||
Geo,
|
||||
MisusedGeo,
|
||||
InvalidPrimary,
|
||||
ExpectedEof,
|
||||
ExpectedValue(ExpectedValueKind),
|
||||
MalformedValue,
|
||||
InOpeningBracket,
|
||||
InClosingBracket,
|
||||
NonFiniteFloat,
|
||||
InExpectedValue(ExpectedValueKind),
|
||||
ReservedKeyword(String),
|
||||
MissingClosingDelimiter(char),
|
||||
Char(char),
|
||||
InternalError(error::ErrorKind),
|
||||
DepthLimitReached,
|
||||
External(String),
|
||||
}
|
||||
|
||||
impl<'a> Error<'a> {
|
||||
pub fn kind(&self) -> &ErrorKind<'a> {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn context(&self) -> &Span<'a> {
|
||||
&self.context
|
||||
}
|
||||
|
||||
pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
|
||||
Self { context, kind }
|
||||
}
|
||||
|
||||
pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
|
||||
Self::new_from_kind(context, ErrorKind::External(error.to_string()))
|
||||
}
|
||||
|
||||
pub fn char(self) -> char {
|
||||
match self.kind {
|
||||
ErrorKind::Char(c) => c,
|
||||
error => panic!("Internal filter parser error: {:?}", error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseError<Span<'a>> for Error<'a> {
|
||||
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
|
||||
let kind = match kind {
|
||||
error::ErrorKind::Eof => ErrorKind::ExpectedEof,
|
||||
kind => ErrorKind::InternalError(kind),
|
||||
};
|
||||
Self { context: input, kind }
|
||||
}
|
||||
|
||||
fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self {
|
||||
other
|
||||
}
|
||||
|
||||
fn from_char(input: Span<'a>, c: char) -> Self {
|
||||
Self { context: input, kind: ErrorKind::Char(c) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for Error<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let input = self.context.fragment();
|
||||
// When printing our error message we want to escape all `\n` to be sure we keep our format with the
|
||||
// first line being the diagnostic and the second line being the incriminated filter.
|
||||
let escaped_input = input.escape_debug();
|
||||
|
||||
match &self.kind {
|
||||
ErrorKind::ExpectedValue(_) if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting a value but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::ExpectedValue(ExpectedValueKind::ReservedKeyword) => {
|
||||
writeln!(f, "Was expecting a value but instead got `{escaped_input}`, which is a reserved keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")?
|
||||
}
|
||||
ErrorKind::ExpectedValue(ExpectedValueKind::Other) => {
|
||||
writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::MalformedValue => {
|
||||
writeln!(f, "Malformed value: `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::MissingClosingDelimiter(c) => {
|
||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||
}
|
||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
ErrorKind::Geo => {
|
||||
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
ErrorKind::MisusedGeo => {
|
||||
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||
}
|
||||
ErrorKind::ReservedKeyword(word) => {
|
||||
writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
|
||||
}
|
||||
ErrorKind::InOpeningBracket => {
|
||||
writeln!(f, "Expected `[` after `IN` keyword.")?
|
||||
}
|
||||
ErrorKind::InClosingBracket => {
|
||||
writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")?
|
||||
}
|
||||
ErrorKind::NonFiniteFloat => {
|
||||
writeln!(f, "Non finite floats are not supported")?
|
||||
}
|
||||
ErrorKind::InExpectedValue(ExpectedValueKind::ReservedKeyword) => {
|
||||
writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`, which is a keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")?
|
||||
}
|
||||
ErrorKind::InExpectedValue(ExpectedValueKind::Other) => {
|
||||
writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`.")?
|
||||
}
|
||||
ErrorKind::Char(c) => {
|
||||
panic!("Tried to display a char error with `{}`", c)
|
||||
}
|
||||
ErrorKind::DepthLimitReached => writeln!(
|
||||
f,
|
||||
"The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions."
|
||||
)?,
|
||||
ErrorKind::InternalError(kind) => writeln!(
|
||||
f,
|
||||
"Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind
|
||||
)?,
|
||||
ErrorKind::External(ref error) => writeln!(f, "{}", error)?,
|
||||
}
|
||||
let base_column = self.context.get_utf8_column();
|
||||
let size = self.context.fragment().chars().count();
|
||||
|
||||
write!(f, "{}:{} {}", base_column, base_column + size, self.context.extra)
|
||||
}
|
||||
}
|
739
filter-parser/src/lib.rs
Normal file
739
filter-parser/src/lib.rs
Normal file
@ -0,0 +1,739 @@
|
||||
//! BNF grammar:
|
||||
//!
|
||||
//! ```text
|
||||
//! filter = expression EOF
|
||||
//! expression = or
|
||||
//! or = and ("OR" WS+ and)*
|
||||
//! and = not ("AND" WS+ not)*
|
||||
//! not = ("NOT" WS+ not) | primary
|
||||
//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | in | condition | exists | not_exists | to
|
||||
//! in = value "IN" WS* "[" value_list "]"
|
||||
//! condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value
|
||||
//! exists = value "EXISTS"
|
||||
//! not_exists = value "NOT" WS+ "EXISTS"
|
||||
//! to = value value "TO" WS+ value
|
||||
//! value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
||||
//! value_list = (value ("," value)* ","?)?
|
||||
//! singleQuoted = "'" .* all but quotes "'"
|
||||
//! doubleQuoted = "\"" .* all but double quotes "\""
|
||||
//! word = (alphanumeric | _ | - | .)+
|
||||
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
|
||||
//! ```
|
||||
//!
|
||||
//! Other BNF grammar used to handle some specific errors:
|
||||
//! ```text
|
||||
//! geoPoint = WS* "_geoPoint(" (float ",")* ")"
|
||||
//! ```
|
||||
//!
|
||||
//! Specific errors:
|
||||
//! ================
|
||||
//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error.
|
||||
//! ```text
|
||||
//! field = _geoPoint(12, 13, 14)
|
||||
//! field < 12 AND _geoPoint(1, 2)
|
||||
//! ```
|
||||
//!
|
||||
//! - If a user try to use a geoRadius as a value we must throw an error.
|
||||
//! ```text
|
||||
//! field = _geoRadius(12, 13, 14)
|
||||
//! ```
|
||||
//!
|
||||
|
||||
mod condition;
|
||||
mod error;
|
||||
mod value;
|
||||
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub use condition::{parse_condition, parse_to, Condition};
|
||||
use condition::{parse_exists, parse_not_exists};
|
||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||
pub use error::{Error, ErrorKind};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::{cut, eof, map, opt};
|
||||
use nom::multi::{many0, separated_list1};
|
||||
use nom::number::complete::recognize_float;
|
||||
use nom::sequence::{delimited, preceded, terminated, tuple};
|
||||
use nom::Finish;
|
||||
use nom_locate::LocatedSpan;
|
||||
pub(crate) use value::parse_value;
|
||||
use value::word_exact;
|
||||
|
||||
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
||||
|
||||
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
||||
|
||||
const MAX_FILTER_DEPTH: usize = 200;
|
||||
|
||||
#[derive(Debug, Clone, Eq)]
|
||||
pub struct Token<'a> {
|
||||
/// The token in the original input, it should be used when possible.
|
||||
span: Span<'a>,
|
||||
/// If you need to modify the original input you can use the `value` field
|
||||
/// to store your modified input.
|
||||
value: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for Token<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.span.fragment() == other.span.fragment()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
pub fn new(span: Span<'a>, value: Option<String>) -> Self {
|
||||
Self { span, value }
|
||||
}
|
||||
|
||||
pub fn lexeme(&self) -> &str {
|
||||
&self.span
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &str {
|
||||
self.value.as_ref().map_or(&self.span, |value| value)
|
||||
}
|
||||
|
||||
pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> {
|
||||
Error::new_from_external(self.span, error)
|
||||
}
|
||||
|
||||
pub fn parse_finite_float(&self) -> Result<f64, Error> {
|
||||
let value: f64 = self.span.parse().map_err(|e| self.as_external_error(e))?;
|
||||
if value.is_finite() {
|
||||
Ok(value)
|
||||
} else {
|
||||
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Span<'a>> for Token<'a> {
|
||||
fn from(span: Span<'a>) -> Self {
|
||||
Self { span, value: None }
|
||||
}
|
||||
}
|
||||
|
||||
/// Allow [Token] to be constructed from &[str]
|
||||
impl<'a> From<&'a str> for Token<'a> {
|
||||
fn from(s: &'a str) -> Self {
|
||||
Token::from(Span::new_extra(s, s))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FilterCondition<'a> {
|
||||
Not(Box<Self>),
|
||||
Condition { fid: Token<'a>, op: Condition<'a> },
|
||||
In { fid: Token<'a>, els: Vec<Token<'a>> },
|
||||
Or(Vec<Self>),
|
||||
And(Vec<Self>),
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
}
|
||||
|
||||
impl<'a> FilterCondition<'a> {
|
||||
/// Returns the first token found at the specified depth, `None` if no token at this depth.
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
|
||||
FilterCondition::Or(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
for f in subfilters.iter() {
|
||||
if let Some(t) = f.token_at_depth(depth) {
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
FilterCondition::And(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
for f in subfilters.iter() {
|
||||
if let Some(t) = f.token_at_depth(depth) {
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(input: &'a str) -> Result<Option<Self>, Error> {
|
||||
if input.trim().is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let span = Span::new_extra(input, input);
|
||||
parse_filter(span).finish().map(|(_rem, output)| Some(output))
|
||||
}
|
||||
}
|
||||
|
||||
/// remove OPTIONAL whitespaces before AND after the provided parser.
|
||||
fn ws<'a, O>(
|
||||
inner: impl FnMut(Span<'a>) -> IResult<'a, O>,
|
||||
) -> impl FnMut(Span<'a>) -> IResult<'a, O> {
|
||||
delimited(multispace0, inner, multispace0)
|
||||
}
|
||||
|
||||
/// value_list = (value ("," value)* ","?)?
|
||||
fn parse_value_list(input: Span) -> IResult<Vec<Token>> {
|
||||
let (input, first_value) = opt(parse_value)(input)?;
|
||||
if let Some(first_value) = first_value {
|
||||
let value_list_el_parser = preceded(ws(tag(",")), parse_value);
|
||||
|
||||
let (input, mut values) = many0(value_list_el_parser)(input)?;
|
||||
let (input, _) = opt(ws(tag(",")))(input)?;
|
||||
values.insert(0, first_value);
|
||||
|
||||
Ok((input, values))
|
||||
} else {
|
||||
Ok((input, vec![]))
|
||||
}
|
||||
}
|
||||
|
||||
/// "IN" WS* "[" value_list "]"
|
||||
fn parse_in_body(input: Span) -> IResult<Vec<Token>> {
|
||||
let (input, _) = ws(word_exact("IN"))(input)?;
|
||||
|
||||
// everything after `IN` can be a failure
|
||||
let (input, _) =
|
||||
cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))(
|
||||
input,
|
||||
)?;
|
||||
|
||||
let (input, content) = cut(parse_value_list)(input)?;
|
||||
|
||||
// everything after `IN` can be a failure
|
||||
let (input, _) = cut_with_err(ws(tag("]")), |_| {
|
||||
if eof::<_, ()>(input).is_ok() {
|
||||
Error::new_from_kind(input, ErrorKind::InClosingBracket)
|
||||
} else {
|
||||
let expected_value_kind = match parse_value(input) {
|
||||
Err(nom::Err::Error(e)) => match e.kind() {
|
||||
ErrorKind::ReservedKeyword(_) => ExpectedValueKind::ReservedKeyword,
|
||||
_ => ExpectedValueKind::Other,
|
||||
},
|
||||
_ => ExpectedValueKind::Other,
|
||||
};
|
||||
Error::new_from_kind(input, ErrorKind::InExpectedValue(expected_value_kind))
|
||||
}
|
||||
})(input)?;
|
||||
|
||||
Ok((input, content))
|
||||
}
|
||||
|
||||
/// in = value "IN" "[" value_list "]"
|
||||
fn parse_in(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, value) = parse_value(input)?;
|
||||
let (input, content) = parse_in_body(input)?;
|
||||
|
||||
let filter = FilterCondition::In { fid: value, els: content };
|
||||
Ok((input, filter))
|
||||
}
|
||||
|
||||
/// in = value "NOT" WS* "IN" "[" value_list "]"
|
||||
fn parse_not_in(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, value) = parse_value(input)?;
|
||||
let (input, _) = word_exact("NOT")(input)?;
|
||||
let (input, content) = parse_in_body(input)?;
|
||||
|
||||
let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content }));
|
||||
Ok((input, filter))
|
||||
}
|
||||
|
||||
/// or = and ("OR" and)
|
||||
fn parse_or(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
if depth > MAX_FILTER_DEPTH {
|
||||
return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached)));
|
||||
}
|
||||
let (input, first_filter) = parse_and(input, depth + 1)?;
|
||||
// if we found a `OR` then we MUST find something next
|
||||
let (input, mut ors) =
|
||||
many0(preceded(ws(word_exact("OR")), cut(|input| parse_and(input, depth + 1))))(input)?;
|
||||
|
||||
let filter = if ors.is_empty() {
|
||||
first_filter
|
||||
} else {
|
||||
ors.insert(0, first_filter);
|
||||
FilterCondition::Or(ors)
|
||||
};
|
||||
|
||||
Ok((input, filter))
|
||||
}
|
||||
|
||||
/// and = not ("AND" not)*
|
||||
fn parse_and(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
if depth > MAX_FILTER_DEPTH {
|
||||
return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached)));
|
||||
}
|
||||
let (input, first_filter) = parse_not(input, depth + 1)?;
|
||||
// if we found a `AND` then we MUST find something next
|
||||
let (input, mut ands) =
|
||||
many0(preceded(ws(word_exact("AND")), cut(|input| parse_not(input, depth + 1))))(input)?;
|
||||
|
||||
let filter = if ands.is_empty() {
|
||||
first_filter
|
||||
} else {
|
||||
ands.insert(0, first_filter);
|
||||
FilterCondition::And(ands)
|
||||
};
|
||||
|
||||
Ok((input, filter))
|
||||
}
|
||||
|
||||
/// not = ("NOT" WS+ not) | primary
|
||||
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
|
||||
/// If we parse a `NOT` we MUST parse something behind.
|
||||
fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
if depth > MAX_FILTER_DEPTH {
|
||||
return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached)));
|
||||
}
|
||||
alt((
|
||||
map(
|
||||
preceded(ws(word_exact("NOT")), cut(|input| parse_not(input, depth + 1))),
|
||||
|e| match e {
|
||||
FilterCondition::Not(e) => *e,
|
||||
_ => FilterCondition::Not(Box::new(e)),
|
||||
},
|
||||
),
|
||||
|input| parse_primary(input, depth + 1),
|
||||
))(input)
|
||||
}
|
||||
|
||||
/// geoRadius = WS* "_geoRadius(float WS* "," WS* float WS* "," WS* float)
|
||||
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to allow space BEFORE the _geoRadius but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, word_exact("_geoRadius"))),
|
||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo)));
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 3 {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo)));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoLowerThan {
|
||||
point: [args[0].into(), args[1].into()],
|
||||
radius: args[2].into(),
|
||||
};
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoPoint but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
tag("_geoPoint"),
|
||||
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
||||
// if we succeeded we still return a `Failure` because geoPoints are not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||
}
|
||||
|
||||
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
|
||||
match parse_condition(input) {
|
||||
Ok(result) => Ok(result),
|
||||
Err(nom::Err::Error(inner) | nom::Err::Failure(inner)) => match inner.kind() {
|
||||
ErrorKind::ExpectedValue(ExpectedValueKind::ReservedKeyword) => {
|
||||
Err(nom::Err::Failure(inner))
|
||||
}
|
||||
_ => Err(nom::Err::Error(inner)),
|
||||
},
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
/// primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to
|
||||
fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
if depth > MAX_FILTER_DEPTH {
|
||||
return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached)));
|
||||
}
|
||||
alt((
|
||||
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
|
||||
delimited(
|
||||
ws(char('(')),
|
||||
cut(|input| parse_expression(input, depth + 1)),
|
||||
cut_with_err(ws(char(')')), |c| {
|
||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||
}),
|
||||
),
|
||||
parse_geo_radius,
|
||||
parse_in,
|
||||
parse_not_in,
|
||||
parse_condition,
|
||||
parse_exists,
|
||||
parse_not_exists,
|
||||
parse_to,
|
||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||
parse_geo_point,
|
||||
parse_error_reserved_keyword,
|
||||
))(input)
|
||||
// if the inner parsers did not match enough information to return an accurate error
|
||||
.map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary)))
|
||||
}
|
||||
|
||||
/// expression = or
|
||||
pub fn parse_expression(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
parse_or(input, depth)
|
||||
}
|
||||
|
||||
/// filter = expression EOF
|
||||
pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
|
||||
terminated(|input| parse_expression(input, 0), eof)(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
|
||||
// if the string is empty we still need to return 1 for the line number
|
||||
let lines = before.is_empty().then_some(1).unwrap_or_else(|| before.lines().count());
|
||||
let offset = before.chars().count();
|
||||
// the extra field is not checked in the tests so we can set it to nothing
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||
Fc::parse(s).unwrap().unwrap()
|
||||
}
|
||||
|
||||
// Test equal
|
||||
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
||||
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
||||
insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}");
|
||||
insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}");
|
||||
insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}");
|
||||
insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}");
|
||||
insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||
insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}");
|
||||
|
||||
// Test IN
|
||||
insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]");
|
||||
insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]");
|
||||
insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]");
|
||||
insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||
insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]");
|
||||
|
||||
// Test IN + OR/AND/()
|
||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||
insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]");
|
||||
insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]");
|
||||
|
||||
// Test whitespace start/end
|
||||
insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}");
|
||||
insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]");
|
||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]");
|
||||
insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])");
|
||||
insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]");
|
||||
|
||||
// Test conditions
|
||||
insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}");
|
||||
insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})");
|
||||
insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||
|
||||
// Test NOT + EXISTS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test nested NOT
|
||||
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||
insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
|
||||
|
||||
// Test geo radius
|
||||
insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
|
||||
insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
|
||||
|
||||
// Test OR + AND
|
||||
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
|
||||
insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]");
|
||||
insta::assert_display_snapshot!(
|
||||
p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"),
|
||||
@"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]"
|
||||
);
|
||||
|
||||
// Test parentheses
|
||||
insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]");
|
||||
insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]");
|
||||
|
||||
// Test recursion
|
||||
// This is the most that is allowed
|
||||
insta::assert_display_snapshot!(
|
||||
p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"),
|
||||
@"{x} = {1}"
|
||||
);
|
||||
insta::assert_display_snapshot!(
|
||||
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
||||
@"NOT ({x} = {1})"
|
||||
);
|
||||
|
||||
// Confusing keywords
|
||||
insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||
Fc::parse(s).unwrap_err().to_string()
|
||||
}
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###"
|
||||
Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule.
|
||||
17:21 channel = Ponce = 12
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = "), @r###"
|
||||
Was expecting a value but instead got nothing.
|
||||
14:14 channel =
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = 🐻"), @r###"
|
||||
Was expecting a value but instead got `🐻`.
|
||||
11:12 channel = 🐻
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###"
|
||||
Was expecting a value but instead got `🐻`.
|
||||
11:12 channel = 🐻 AND followers < 100
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("OR"), @r###"
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
1:3 OR
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoRadius"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:11 _geoRadius
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###"
|
||||
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
|
||||
1:16 _geoRadius = 12
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.
|
||||
1:22 _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.
|
||||
13:34 position <= _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||
The `_geoRadius` filter is an operation and can't be used as a value.
|
||||
13:35 position <= _geoRadius(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = 'ponce"), @r###"
|
||||
Expression `\'ponce` is missing the following closing delimiter: `'`.
|
||||
11:17 channel = 'ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = \"ponce"), @r###"
|
||||
Expression `\"ponce` is missing the following closing delimiter: `"`.
|
||||
11:17 channel = "ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###"
|
||||
Expression `(followers >= 1000` is missing the following closing delimiter: `)`.
|
||||
17:35 channel = mv OR (followers >= 1000
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###"
|
||||
Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule.
|
||||
34:35 channel = mv OR followers >= 1000)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
|
||||
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
|
||||
17:44 channel = ponce ORdog != 'bernese mountain'
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###"
|
||||
Expected `[` after `IN` keyword.
|
||||
11:23 colour IN blue, green]
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###"
|
||||
Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`.
|
||||
32:36 colour IN [blue, green, 'blue' > 2]
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###"
|
||||
Expected only comma-separated field names inside `IN[..]` but instead found `AND]`.
|
||||
25:29 colour IN [blue, green, AND]
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###"
|
||||
Expected matching `]` after the list of field names given to `IN[`
|
||||
23:23 colour IN [blue, green
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###"
|
||||
Expression `\'blue, green` is missing the following closing delimiter: `'`.
|
||||
12:24 colour IN ['blue, green
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("x = EXISTS"), @r###"
|
||||
Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes.
|
||||
5:11 x = EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("AND = 8"), @r###"
|
||||
Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes.
|
||||
1:4 AND = 8
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###"
|
||||
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
||||
51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(
|
||||
p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"),
|
||||
@r###"
|
||||
The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions.
|
||||
797:802 NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1
|
||||
"###
|
||||
);
|
||||
|
||||
insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn depth() {
|
||||
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap();
|
||||
assert!(filter.token_at_depth(1).is_some());
|
||||
assert!(filter.token_at_depth(2).is_none());
|
||||
|
||||
let filter = FilterCondition::parse("(account_ids=1 OR (account_ids=2 AND account_ids=3) OR (account_ids=4 AND account_ids=5) OR account_ids=6)").unwrap().unwrap();
|
||||
assert!(filter.token_at_depth(2).is_some());
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
|
||||
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 AND account_ids=3 OR account_ids=4 AND account_ids=5 OR account_ids=6").unwrap().unwrap();
|
||||
assert!(filter.token_at_depth(2).is_some());
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_from_str() {
|
||||
let s = "test string that should not be parsed";
|
||||
let token: Token = s.into();
|
||||
assert_eq!(token.value(), s);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Display for FilterCondition<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FilterCondition::Not(filter) => {
|
||||
write!(f, "NOT ({filter})")
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
write!(f, "{fid} {op}")
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
write!(f, "{fid} IN[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::Or(els) => {
|
||||
write!(f, "OR[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::And(els) => {
|
||||
write!(f, "AND[")?;
|
||||
for el in els {
|
||||
write!(f, "{el}, ")?;
|
||||
}
|
||||
write!(f, "]")
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Condition<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Condition::GreaterThan(token) => write!(f, "> {token}"),
|
||||
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
|
||||
Condition::Equal(token) => write!(f, "= {token}"),
|
||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||
Condition::Exists => write!(f, "EXISTS"),
|
||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> std::fmt::Display for Token<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{{{}}}", self.value())
|
||||
}
|
||||
}
|
16
filter-parser/src/main.rs
Normal file
16
filter-parser/src/main.rs
Normal file
@ -0,0 +1,16 @@
|
||||
fn main() {
|
||||
let input = std::env::args().nth(1).expect("You must provide a filter to test");
|
||||
|
||||
println!("Trying to execute the following filter:\n{}\n", input);
|
||||
|
||||
match filter_parser::FilterCondition::parse(&input) {
|
||||
Ok(filter) => {
|
||||
println!("✅ Valid filter");
|
||||
println!("{:#?}", filter);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❎ Invalid filter");
|
||||
println!("{}", e);
|
||||
}
|
||||
}
|
||||
}
|
341
filter-parser/src/value.rs
Normal file
341
filter-parser/src/value.rs
Normal file
@ -0,0 +1,341 @@
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{take_till, take_while, take_while1};
|
||||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::cut;
|
||||
use nom::sequence::{delimited, terminated};
|
||||
use nom::{InputIter, InputLength, InputTake, Slice};
|
||||
|
||||
use crate::error::{ExpectedValueKind, NomErrorExt};
|
||||
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
||||
|
||||
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
|
||||
/// It generates a new string with all `\` removed from the [Span].
|
||||
fn unescape(buf: Span, char_to_escape: char) -> String {
|
||||
let to_escape = format!("\\{}", char_to_escape);
|
||||
buf.replace(&to_escape, &char_to_escape.to_string())
|
||||
}
|
||||
|
||||
/// Parse a value in quote. If it encounter an escaped quote it'll unescape it.
|
||||
fn quoted_by(quote: char, input: Span) -> IResult<Token> {
|
||||
// empty fields / values are valid in json
|
||||
if input.is_empty() {
|
||||
return Ok((input.slice(input.input_len()..), input.into()));
|
||||
}
|
||||
|
||||
let mut escaped = false;
|
||||
let mut i = input.iter_indices();
|
||||
|
||||
while let Some((idx, c)) = i.next() {
|
||||
if c == quote {
|
||||
let (rem, output) = input.take_split(idx);
|
||||
return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote)))));
|
||||
} else if c == '\\' {
|
||||
if let Some((_, c)) = i.next() {
|
||||
escaped |= c == quote;
|
||||
} else {
|
||||
return Err(nom::Err::Error(Error::new_from_kind(
|
||||
input,
|
||||
ErrorKind::MalformedValue,
|
||||
)));
|
||||
}
|
||||
}
|
||||
// if it was preceeded by a `\` or if it was anything else we can continue to advance
|
||||
}
|
||||
|
||||
Ok((
|
||||
input.slice(input.input_len()..),
|
||||
Token::new(input, escaped.then(|| unescape(input, quote))),
|
||||
))
|
||||
}
|
||||
|
||||
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
|
||||
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
||||
let (input, word): (_, Token<'a>) =
|
||||
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
||||
if is_keyword(word.value()) {
|
||||
return Err(nom::Err::Error(Error::new_from_kind(
|
||||
input,
|
||||
ErrorKind::ReservedKeyword(word.value().to_owned()),
|
||||
)));
|
||||
}
|
||||
Ok((input, word))
|
||||
}
|
||||
|
||||
// word = {tag}
|
||||
pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> {
|
||||
move |input| {
|
||||
let (input, word): (_, Token<'a>) =
|
||||
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
||||
if word.value() == tag {
|
||||
Ok((input, word))
|
||||
} else {
|
||||
Err(nom::Err::Error(Error::new_from_kind(
|
||||
input,
|
||||
ErrorKind::InternalError(nom::error::ErrorKind::Tag),
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||
let (input, _) = take_while(char::is_whitespace)(input)?;
|
||||
|
||||
// then, we want to check if the user is misusing a geo expression
|
||||
// This expression can’t finish without error.
|
||||
// We want to return an error in case of failure.
|
||||
if let Err(err) = parse_geo_point(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
match parse_geo_radius(input) {
|
||||
Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
|
||||
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
||||
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
|
||||
Err(e) if e.is_failure() => {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
// this parser is only used when an error is encountered and it parse the
|
||||
// largest string possible that do not contain any “language” syntax.
|
||||
// If we try to parse `name = 🦀 AND language = rust` we want to return an
|
||||
// error saying we could not parse `🦀`. Not that no value were found or that
|
||||
// we could note parse `🦀 AND language = rust`.
|
||||
// we want to remove the space before entering the alt because if we don't,
|
||||
// when we create the errors from the output of the alt we have spaces everywhere
|
||||
let error_word = take_till::<_, _, Error>(is_syntax_component);
|
||||
|
||||
let (input, value) = terminated(
|
||||
alt((
|
||||
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
||||
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
||||
word_not_keyword,
|
||||
)),
|
||||
multispace0,
|
||||
)(input)
|
||||
// if we found nothing in the alt it means the user specified something that was not recognized as a value
|
||||
.map_err(|e: nom::Err<Error>| {
|
||||
e.map_err(|error| {
|
||||
let expected_value_kind = if matches!(error.kind(), ErrorKind::ReservedKeyword(_)) {
|
||||
ExpectedValueKind::ReservedKeyword
|
||||
} else {
|
||||
ExpectedValueKind::Other
|
||||
};
|
||||
Error::new_from_kind(
|
||||
error_word(input).unwrap().1,
|
||||
ErrorKind::ExpectedValue(expected_value_kind),
|
||||
)
|
||||
})
|
||||
})
|
||||
.map_err(|e| {
|
||||
e.map_fail(|failure| {
|
||||
// if we found encountered a char failure it means the user had an unmatched quote
|
||||
if matches!(failure.kind(), ErrorKind::Char(_)) {
|
||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char()))
|
||||
} else {
|
||||
// else we let the failure untouched
|
||||
failure
|
||||
}
|
||||
})
|
||||
})?;
|
||||
|
||||
Ok((input, value))
|
||||
}
|
||||
|
||||
fn is_value_component(c: char) -> bool {
|
||||
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
|
||||
}
|
||||
|
||||
fn is_syntax_component(c: char) -> bool {
|
||||
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
|
||||
}
|
||||
|
||||
fn is_keyword(s: &str) -> bool {
|
||||
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use nom::Finish;
|
||||
|
||||
use super::*;
|
||||
use crate::tests::rtok;
|
||||
|
||||
#[test]
|
||||
fn test_span() {
|
||||
let test_case = [
|
||||
("channel", rtok("", "channel")),
|
||||
(".private", rtok("", ".private")),
|
||||
("I-love-kebab", rtok("", "I-love-kebab")),
|
||||
("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
|
||||
("parens(", rtok("", "parens")),
|
||||
("parens)", rtok("", "parens")),
|
||||
("not!", rtok("", "not")),
|
||||
(" channel", rtok(" ", "channel")),
|
||||
("channel ", rtok("", "channel")),
|
||||
(" channel ", rtok(" ", "channel")),
|
||||
("'channel'", rtok("'", "channel")),
|
||||
("\"channel\"", rtok("\"", "channel")),
|
||||
("'cha)nnel'", rtok("'", "cha)nnel")),
|
||||
("'cha\"nnel'", rtok("'", "cha\"nnel")),
|
||||
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
|
||||
("\" some spaces \"", rtok("\"", " some spaces ")),
|
||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||
("I'm tamo", rtok("'m tamo", "I")),
|
||||
("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||
expected,
|
||||
result.unwrap_err()
|
||||
);
|
||||
let token = result.unwrap().1;
|
||||
assert_eq!(token, expected, "Filter `{}` failed.", input);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_inside_double_quote() {
|
||||
// (input, remaining, expected output token, output value)
|
||||
let test_case = [
|
||||
("aaaa", "", rtok("", "aaaa"), "aaaa"),
|
||||
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
|
||||
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
|
||||
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
|
||||
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
|
||||
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
|
||||
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
|
||||
];
|
||||
|
||||
for (input, remaining, expected_tok, expected_val) in test_case {
|
||||
let span = Span::new_extra(input, "");
|
||||
let result = quoted_by('"', span);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let (rem, output) = result.unwrap();
|
||||
assert_eq!(rem.to_string(), remaining);
|
||||
assert_eq!(output, expected_tok);
|
||||
assert_eq!(output.value(), expected_val.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape() {
|
||||
// double quote
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'),
|
||||
r#"Hello "World""#.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'),
|
||||
r#"Hello \\"World\\""#.to_string()
|
||||
);
|
||||
// simple quote
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
|
||||
r#"Hello 'World'"#.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
|
||||
r#"Hello \\'World\\'"#.to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_value() {
|
||||
let test_case = [
|
||||
// (input, expected value, if a string was generated to hold the new value)
|
||||
("channel", "channel", false),
|
||||
// All the base test, no escaped string should be generated
|
||||
(".private", ".private", false),
|
||||
("I-love-kebab", "I-love-kebab", false),
|
||||
("but_snakes_is_also_good", "but_snakes_is_also_good", false),
|
||||
("parens(", "parens", false),
|
||||
("parens)", "parens", false),
|
||||
("not!", "not", false),
|
||||
(" channel", "channel", false),
|
||||
("channel ", "channel", false),
|
||||
(" channel ", "channel", false),
|
||||
("'channel'", "channel", false),
|
||||
("\"channel\"", "channel", false),
|
||||
("'cha)nnel'", "cha)nnel", false),
|
||||
("'cha\"nnel'", "cha\"nnel", false),
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("\" some spaces \"", " some spaces ", false),
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("I'm tamo", "I", false),
|
||||
// escaped thing but not quote
|
||||
(r#""\\""#, r#"\\"#, false),
|
||||
(r#""\\\\\\""#, r#"\\\\\\"#, false),
|
||||
(r#""aa\\aa""#, r#"aa\\aa"#, false),
|
||||
// with double quote
|
||||
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
|
||||
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
||||
(r#""\"\"""#, r#""""#, true),
|
||||
// with simple quote
|
||||
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
|
||||
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
||||
(r#"'\'\''"#, r#"''"#, true),
|
||||
];
|
||||
|
||||
for (input, expected, escaped) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||
expected,
|
||||
result.unwrap_err()
|
||||
);
|
||||
let token = result.unwrap().1;
|
||||
assert_eq!(
|
||||
token.value.is_some(),
|
||||
escaped,
|
||||
"Filter `{}` was not supposed to be escaped",
|
||||
input
|
||||
);
|
||||
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn diagnostic() {
|
||||
let test_case = [
|
||||
("🦀", "🦀"),
|
||||
(" 🦀", "🦀"),
|
||||
("🦀 AND crab = truc", "🦀"),
|
||||
("🦀_in_name", "🦀_in_name"),
|
||||
(" (name = ...", ""),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
|
||||
expected,
|
||||
result.unwrap()
|
||||
);
|
||||
// get the inner string referenced in the error
|
||||
let value = *result.finish().unwrap_err().context().fragment();
|
||||
assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
|
||||
}
|
||||
}
|
||||
}
|
17
flatten-serde-json/Cargo.toml
Normal file
17
flatten-serde-json/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.0.0"
|
||||
edition = "2021"
|
||||
description = "Flatten serde-json objects like elastic search"
|
||||
readme = "README.md"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
harness = false
|
153
flatten-serde-json/README.md
Normal file
153
flatten-serde-json/README.md
Normal file
@ -0,0 +1,153 @@
|
||||
# Flatten serde Json
|
||||
|
||||
This crate flatten [`serde_json`](https://docs.rs/serde_json/latest/serde_json/) `Object` in a format
|
||||
similar to [elastic search](https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html).
|
||||
|
||||
## Examples
|
||||
|
||||
### There is nothing to do
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "287947",
|
||||
"title": "Shazam!",
|
||||
"release_date": 1553299200,
|
||||
"genres": [
|
||||
"Action",
|
||||
"Comedy",
|
||||
"Fantasy"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"id": "287947",
|
||||
"title": "Shazam!",
|
||||
"release_date": 1553299200,
|
||||
"genres": [
|
||||
"Action",
|
||||
"Comedy",
|
||||
"Fantasy"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Objects
|
||||
|
||||
```json
|
||||
{
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Array of objects
|
||||
|
||||
```json
|
||||
{
|
||||
"a": [
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a.b": ["c", "d", "e"],
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Array of objects with normal value in the array
|
||||
|
||||
```json
|
||||
{
|
||||
"a": [
|
||||
42,
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a": 42,
|
||||
"a.b": ["c", "d", "e"],
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Array of objects of array of objects of ...
|
||||
|
||||
```json
|
||||
{
|
||||
"a": [
|
||||
"b",
|
||||
["c", "d"],
|
||||
{ "e": ["f", "g"] },
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "e": ["j", { "z": "y" }] },
|
||||
],
|
||||
["l"],
|
||||
"m",
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a": ["b", "c", "d", "l", "m"],
|
||||
"a.e": ["f", "g", "j"],
|
||||
"a.h": "i",
|
||||
"a.e.z": "y",
|
||||
}
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
### Collision between a generated field name and an already existing field
|
||||
|
||||
```json
|
||||
{
|
||||
"a": {
|
||||
"b": "c",
|
||||
},
|
||||
"a.b": "d",
|
||||
}
|
||||
```
|
||||
|
||||
Flattens to:
|
||||
```json
|
||||
{
|
||||
"a.b": ["c", "d"],
|
||||
}
|
||||
```
|
||||
|
42
flatten-serde-json/benches/benchmarks.rs
Normal file
42
flatten-serde-json/benches/benchmarks.rs
Normal file
@ -0,0 +1,42 @@
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
|
||||
use flatten_serde_json::flatten;
|
||||
use serde_json::json;
|
||||
|
||||
pub fn flatten_simple(c: &mut Criterion) {
|
||||
let mut input = json!({
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
}
|
||||
});
|
||||
let object = input.as_object_mut().unwrap();
|
||||
|
||||
c.bench_with_input(BenchmarkId::new("flatten", "simple"), &object, |b, input| {
|
||||
b.iter(|| flatten(input))
|
||||
});
|
||||
}
|
||||
|
||||
pub fn flatten_complex(c: &mut Criterion) {
|
||||
let mut input = json!({
|
||||
"a": [
|
||||
"b",
|
||||
["c", "d"],
|
||||
{ "e": ["f", "g"] },
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "e": ["j", { "z": "y" }] },
|
||||
],
|
||||
["l"],
|
||||
"m",
|
||||
]
|
||||
});
|
||||
let object = input.as_object_mut().unwrap();
|
||||
|
||||
c.bench_with_input(BenchmarkId::new("flatten", "complex"), &object, |b, input| {
|
||||
b.iter(|| flatten(input))
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, flatten_simple, flatten_complex);
|
||||
criterion_main!(benches);
|
27
flatten-serde-json/fuzz/Cargo.toml
Normal file
27
flatten-serde-json/fuzz/Cargo.toml
Normal file
@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "flatten-serde-json-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
arbitrary-json = "0.1.1"
|
||||
json-depth-checker = { path = "../../json-depth-checker" }
|
||||
|
||||
[dependencies.flatten-serde-json]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "flatten"
|
||||
path = "fuzz_targets/flatten.rs"
|
||||
test = false
|
||||
doc = false
|
12
flatten-serde-json/fuzz/fuzz_targets/flatten.rs
Normal file
12
flatten-serde-json/fuzz/fuzz_targets/flatten.rs
Normal file
@ -0,0 +1,12 @@
|
||||
#![no_main]
|
||||
use arbitrary_json::ArbitraryObject;
|
||||
use flatten_serde_json::flatten;
|
||||
use json_depth_checker::should_flatten_from_value;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|object: ArbitraryObject| {
|
||||
let object = flatten(&object);
|
||||
if !object.is_empty() {
|
||||
assert!(object.values().any(|value| !should_flatten_from_value(value)));
|
||||
}
|
||||
});
|
305
flatten-serde-json/src/lib.rs
Normal file
305
flatten-serde-json/src/lib.rs
Normal file
@ -0,0 +1,305 @@
|
||||
#![doc = include_str!("../README.md")]
|
||||
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
|
||||
let mut obj = Map::new();
|
||||
let mut all_keys = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_keys);
|
||||
for key in all_keys {
|
||||
obj.entry(key).or_insert(Value::Array(vec![]));
|
||||
}
|
||||
obj
|
||||
}
|
||||
|
||||
fn insert_object(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: Option<&str>,
|
||||
object: &Map<String, Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
all_keys.push(new_key.clone());
|
||||
if let Some(array) = value.as_array() {
|
||||
insert_array(base_json, &new_key, array, all_keys);
|
||||
} else if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(&new_key), object, all_keys);
|
||||
} else {
|
||||
insert_value(base_json, &new_key, value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_array(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: &str,
|
||||
array: &Vec<Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
) {
|
||||
for value in array {
|
||||
if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(base_key), object, all_keys);
|
||||
} else if let Some(sub_array) = value.as_array() {
|
||||
insert_array(base_json, base_key, sub_array, all_keys);
|
||||
} else {
|
||||
insert_value(base_json, base_key, value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
|
||||
debug_assert!(!to_insert.is_object());
|
||||
debug_assert!(!to_insert.is_array());
|
||||
|
||||
// does the field already exists?
|
||||
if let Some(value) = base_json.get_mut(key) {
|
||||
// is it already an array
|
||||
if let Some(array) = value.as_array_mut() {
|
||||
array.push(to_insert);
|
||||
// or is there a collision
|
||||
} else {
|
||||
let value = std::mem::take(value);
|
||||
base_json[key] = Value::Array(vec![value, to_insert]);
|
||||
}
|
||||
// if it does not exist we can push the value untouched
|
||||
} else {
|
||||
base_json.insert(key.to_string(), to_insert);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn no_flattening() {
|
||||
let mut base: Value = json!({
|
||||
"id": "287947",
|
||||
"title": "Shazam!",
|
||||
"release_date": 1553299200,
|
||||
"genres": [
|
||||
"Action",
|
||||
"Comedy",
|
||||
"Fantasy"
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!(
|
||||
"got:\n{}\nexpected:\n{}\n",
|
||||
serde_json::to_string_pretty(&flat).unwrap(),
|
||||
serde_json::to_string_pretty(&json).unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(flat, json);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_object() {
|
||||
let mut base: Value = json!({
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
}
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_array() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
1,
|
||||
"b",
|
||||
[],
|
||||
[{}],
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [1, "b"],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
// here we must keep 42 in "a"
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
42,
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": 42,
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
// here we must keep 42 in "a"
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
{ "b": "c" },
|
||||
{ "b": "d" },
|
||||
{ "b": "e" },
|
||||
null,
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": null,
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collision_with_object() {
|
||||
let mut base: Value = json!({
|
||||
"a": {
|
||||
"b": "c",
|
||||
},
|
||||
"a.b": "d",
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a.b": ["c", "d"],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collision_with_array() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
{ "b": "c" },
|
||||
{ "b": "d", "c": "e" },
|
||||
[35],
|
||||
],
|
||||
"a.b": "f",
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a.b": ["c", "d", "f"],
|
||||
"a.c": "e",
|
||||
"a": 35,
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_arrays() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
["b", "c"],
|
||||
{ "d": "e" },
|
||||
["f", "g"],
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "d": "j" },
|
||||
],
|
||||
["k", "l"],
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": ["b", "c", "f", "g", "k", "l"],
|
||||
"a.d": ["e", "j"],
|
||||
"a.h": "i",
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_arrays_and_objects() {
|
||||
let mut base: Value = json!({
|
||||
"a": [
|
||||
"b",
|
||||
["c", "d"],
|
||||
{ "e": ["f", "g"] },
|
||||
[
|
||||
{ "h": "i" },
|
||||
{ "e": ["j", { "z": "y" }] },
|
||||
],
|
||||
["l"],
|
||||
"m",
|
||||
]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": ["b", "c", "d", "l", "m"],
|
||||
"a.e": ["f", "g", "j"],
|
||||
"a.h": "i",
|
||||
"a.e.z": "y",
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
11
flatten-serde-json/src/main.rs
Normal file
11
flatten-serde-json/src/main.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use std::io::stdin;
|
||||
|
||||
use flatten_serde_json::flatten;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
fn main() {
|
||||
let json: Map<String, Value> = serde_json::from_reader(stdin()).unwrap();
|
||||
|
||||
let result = flatten(&json);
|
||||
println!("{}", serde_json::to_string_pretty(&result).unwrap());
|
||||
}
|
@ -139,8 +139,8 @@ impl ErrorCode for Error {
|
||||
match self {
|
||||
Error::IndexNotFound(_) => Code::IndexNotFound,
|
||||
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
|
||||
Error::SwapDuplicateIndexesFound(_) => Code::InvalidDuplicateIndexesFound,
|
||||
Error::SwapDuplicateIndexFound(_) => Code::InvalidDuplicateIndexesFound,
|
||||
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
|
||||
Error::SwapDuplicateIndexFound(_) => Code::InvalidSwapDuplicateIndexFound,
|
||||
Error::SwapIndexNotFound(_) => Code::InvalidSwapIndexes,
|
||||
Error::SwapIndexesNotFound(_) => Code::InvalidSwapIndexes,
|
||||
Error::InvalidTaskDate { field, .. } => (*field).into(),
|
||||
@ -150,8 +150,8 @@ impl ErrorCode for Error {
|
||||
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
|
||||
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
|
||||
Error::TaskNotFound(_) => Code::TaskNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::TaskDeletionWithEmptyQuery,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::TaskCancelationWithEmptyQuery,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
|
16
json-depth-checker/Cargo.toml
Normal file
16
json-depth-checker/Cargo.toml
Normal file
@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "json-depth-checker"
|
||||
version = "1.0.0"
|
||||
edition = "2021"
|
||||
description = "A library that indicates if a JSON must be flattened"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4.0"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
harness = false
|
59
json-depth-checker/benches/depth.rs
Normal file
59
json-depth-checker/benches/depth.rs
Normal file
@ -0,0 +1,59 @@
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use json_depth_checker::should_flatten_from_unchecked_slice;
|
||||
use serde_json::json;
|
||||
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
let null = serde_json::to_vec(&json!(null)).unwrap();
|
||||
let bool_true = serde_json::to_vec(&json!(true)).unwrap();
|
||||
let bool_false = serde_json::to_vec(&json!(false)).unwrap();
|
||||
let integer = serde_json::to_vec(&json!(42)).unwrap();
|
||||
let float = serde_json::to_vec(&json!(1456.258)).unwrap();
|
||||
let string = serde_json::to_vec(&json!("hello world")).unwrap();
|
||||
let object = serde_json::to_vec(&json!({ "hello": "world",})).unwrap();
|
||||
let complex_object = serde_json::to_vec(&json!({
|
||||
"doggos": [
|
||||
{ "bernard": true },
|
||||
{ "michel": 42 },
|
||||
false,
|
||||
],
|
||||
"bouvier": true,
|
||||
"caniche": null,
|
||||
}))
|
||||
.unwrap();
|
||||
let simple_array = serde_json::to_vec(&json!([
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
"viva",
|
||||
"l\"algeria",
|
||||
true,
|
||||
"[array]",
|
||||
"escaped string \""
|
||||
]))
|
||||
.unwrap();
|
||||
let array_of_array = serde_json::to_vec(&json!([1, [2, [3]]])).unwrap();
|
||||
let array_of_object = serde_json::to_vec(&json!([1, [2, [3]], {}])).unwrap();
|
||||
|
||||
c.bench_function("null", |b| b.iter(|| should_flatten_from_unchecked_slice(&null)));
|
||||
c.bench_function("true", |b| b.iter(|| should_flatten_from_unchecked_slice(&bool_true)));
|
||||
c.bench_function("false", |b| b.iter(|| should_flatten_from_unchecked_slice(&bool_false)));
|
||||
c.bench_function("integer", |b| b.iter(|| should_flatten_from_unchecked_slice(&integer)));
|
||||
c.bench_function("float", |b| b.iter(|| should_flatten_from_unchecked_slice(&float)));
|
||||
c.bench_function("string", |b| b.iter(|| should_flatten_from_unchecked_slice(&string)));
|
||||
c.bench_function("object", |b| b.iter(|| should_flatten_from_unchecked_slice(&object)));
|
||||
c.bench_function("complex object", |b| {
|
||||
b.iter(|| should_flatten_from_unchecked_slice(&complex_object))
|
||||
});
|
||||
c.bench_function("simple array", |b| {
|
||||
b.iter(|| should_flatten_from_unchecked_slice(&simple_array))
|
||||
});
|
||||
c.bench_function("array of array", |b| {
|
||||
b.iter(|| should_flatten_from_unchecked_slice(&array_of_array))
|
||||
});
|
||||
c.bench_function("array of object", |b| {
|
||||
b.iter(|| should_flatten_from_unchecked_slice(&array_of_object))
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
criterion_main!(benches);
|
27
json-depth-checker/fuzz/Cargo.toml
Normal file
27
json-depth-checker/fuzz/Cargo.toml
Normal file
@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "json-depth-checker"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
arbitrary-json = "0.1.1"
|
||||
serde_json = "1.0.79"
|
||||
|
||||
[dependencies.json-depth-checker]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "depth"
|
||||
path = "fuzz_targets/depth.rs"
|
||||
test = false
|
||||
doc = false
|
13
json-depth-checker/fuzz/fuzz_targets/depth.rs
Normal file
13
json-depth-checker/fuzz/fuzz_targets/depth.rs
Normal file
@ -0,0 +1,13 @@
|
||||
#![no_main]
|
||||
use arbitrary_json::ArbitraryValue;
|
||||
use json_depth_checker::*;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|value: ArbitraryValue| {
|
||||
let value = serde_json::Value::from(value);
|
||||
let left = should_flatten_from_value(&value);
|
||||
let value = serde_json::to_vec(&value).unwrap();
|
||||
let right = should_flatten_from_unchecked_slice(&value);
|
||||
|
||||
assert_eq!(left, right);
|
||||
});
|
114
json-depth-checker/src/lib.rs
Normal file
114
json-depth-checker/src/lib.rs
Normal file
@ -0,0 +1,114 @@
|
||||
use serde_json::Value;
|
||||
|
||||
/// Your json MUST BE valid and generated by `serde_json::to_vec` before being
|
||||
/// sent in this function. This function is DUMB and FAST but makes a lot of
|
||||
/// asumption about the way `serde_json` will generate its input.
|
||||
///
|
||||
/// Will return `true` if the JSON contains an object, an array of array
|
||||
/// or an array containing an object. Returns `false` for everything else.
|
||||
pub fn should_flatten_from_unchecked_slice(json: &[u8]) -> bool {
|
||||
if json.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// since the json we receive has been generated by serde_json we know
|
||||
// it doesn't contains any whitespace at the beginning thus we can check
|
||||
// directly if we're looking at an object.
|
||||
if json[0] == b'{' {
|
||||
return true;
|
||||
} else if json[0] != b'[' {
|
||||
// if the json isn't an object or an array it means it's a simple value.
|
||||
return false;
|
||||
}
|
||||
|
||||
// The array case is a little bit more complex. We are looking for a second
|
||||
// `[` but we need to ensure that it doesn't appear inside of a string. Thus
|
||||
// we need to keep track of if we're in a string or not.
|
||||
|
||||
// will be used when we met a `\` to skip the next character.
|
||||
let mut skip_next = false;
|
||||
let mut in_string = false;
|
||||
|
||||
for byte in json.iter().skip(1) {
|
||||
match byte {
|
||||
// handle the backlash.
|
||||
_ if skip_next => skip_next = false,
|
||||
b'\\' => skip_next = true,
|
||||
|
||||
// handle the strings.
|
||||
byte if in_string => {
|
||||
if *byte == b'"' {
|
||||
in_string = false;
|
||||
}
|
||||
}
|
||||
b'"' => in_string = true,
|
||||
|
||||
// handle the arrays.
|
||||
b'[' => return true,
|
||||
// since we know the json is valid we don't need to ensure the
|
||||
// array is correctly closed
|
||||
|
||||
// handle the objects.
|
||||
b'{' => return true,
|
||||
|
||||
// ignore everything else
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Consider using [`should_flatten_from_unchecked_slice`] when you can.
|
||||
/// Will returns `true` if the json contains an object, an array of array
|
||||
/// or an array containing an object.
|
||||
/// Returns `false` for everything else.
|
||||
/// This function has been written to test the [`should_flatten_from_unchecked_slice`].
|
||||
pub fn should_flatten_from_value(json: &Value) -> bool {
|
||||
match json {
|
||||
Value::Object(..) => true,
|
||||
Value::Array(array) => array.iter().any(|value| value.is_array() || value.is_object()),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_shouldnt_flatten() {
|
||||
let shouldnt_flatten = vec![
|
||||
json!(null),
|
||||
json!(true),
|
||||
json!(false),
|
||||
json!("a superb string"),
|
||||
json!("a string escaping other \"string\""),
|
||||
json!([null, true, false]),
|
||||
json!(["hello", "world", "!"]),
|
||||
json!(["a \"string\" escaping 'an other'", "\"[\"", "\"{\""]),
|
||||
];
|
||||
for value in shouldnt_flatten {
|
||||
assert!(!should_flatten_from_value(&value));
|
||||
let value = serde_json::to_vec(&value).unwrap();
|
||||
assert!(!should_flatten_from_unchecked_slice(&value));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_flatten() {
|
||||
let should_flatten = vec![
|
||||
json!({}),
|
||||
json!({ "hello": "world" }),
|
||||
json!(["hello", ["world"]]),
|
||||
json!([true, true, true, true, true, true, true, true, true, {}]),
|
||||
];
|
||||
for value in should_flatten {
|
||||
assert!(should_flatten_from_value(&value));
|
||||
let value = serde_json::to_vec(&value).unwrap();
|
||||
assert!(should_flatten_from_unchecked_slice(&value));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
use std::error::Error;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode};
|
||||
use meilisearch_types::{internal_error, keys};
|
||||
use meilisearch_types::internal_error;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, AuthControllerError>;
|
||||
|
||||
@ -11,8 +11,6 @@ pub enum AuthControllerError {
|
||||
ApiKeyNotFound(String),
|
||||
#[error("`uid` field value `{0}` is already an existing API key.")]
|
||||
ApiKeyAlreadyExists(String),
|
||||
#[error(transparent)]
|
||||
ApiKey(#[from] keys::Error),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
}
|
||||
@ -27,7 +25,6 @@ internal_error!(
|
||||
impl ErrorCode for AuthControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
Self::ApiKey(e) => e.error_code(),
|
||||
Self::ApiKeyNotFound(_) => Code::ApiKeyNotFound,
|
||||
Self::ApiKeyAlreadyExists(_) => Code::ApiKeyAlreadyExists,
|
||||
Self::Internal(_) => Code::Internal,
|
||||
|
@ -8,10 +8,9 @@ use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use error::{AuthControllerError, Result};
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
|
||||
use meilisearch_types::star_or::StarOr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
pub use store::open_auth_store_env;
|
||||
use store::{generate_key_as_hexa, HeedAuthStore};
|
||||
use time::OffsetDateTime;
|
||||
@ -34,17 +33,18 @@ impl AuthController {
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
pub fn create_key(&self, value: Value) -> Result<Key> {
|
||||
let key = Key::create_from_value(value)?;
|
||||
match self.store.get_api_key(key.uid)? {
|
||||
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(key.uid.to_string())),
|
||||
None => self.store.put_api_key(key),
|
||||
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
|
||||
match self.store.get_api_key(create_key.uid)? {
|
||||
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
|
||||
None => self.store.put_api_key(create_key.to_key()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_key(&self, uid: Uuid, value: Value) -> Result<Key> {
|
||||
pub fn update_key(&self, uid: Uuid, patch: PatchApiKey) -> Result<Key> {
|
||||
let mut key = self.get_key(uid)?;
|
||||
key.update_from_value(value)?;
|
||||
key.description = patch.description;
|
||||
key.name = patch.name;
|
||||
key.updated_at = OffsetDateTime::now_utc();
|
||||
self.store.put_api_key(key)
|
||||
}
|
||||
|
||||
|
@ -9,14 +9,14 @@ actix-web = { version = "4.2.1", default-features = false }
|
||||
anyhow = "1.0.65"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.1.6"
|
||||
deserr = { version = "0.1.2", features = ["serde-json"] }
|
||||
deserr = "0.1.4"
|
||||
either = { version = "1.6.1", features = ["serde"] }
|
||||
enum-iterator = "1.1.3"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.5.7"
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.38.0", default-features = false }
|
||||
milli = { path = "../milli", default-features = false }
|
||||
proptest = { version = "1.0.0", optional = true }
|
||||
proptest-derive = { version = "0.3.0", optional = true }
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user