mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Merge #207
207: Benchmarks r=Kerollmops a=irevoire Co-authored-by: tamo <tamo@meilisearch.com> Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com> Co-authored-by: Tamo <irevoire@hotmail.fr> Co-authored-by: Irevoire <tamo@meilisearch.com>
This commit is contained in:
commit
28962bce99
71
.github/workflows/benchmarks.yml
vendored
Normal file
71
.github/workflows/benchmarks.yml
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
name: Benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dataset_name:
|
||||
description: 'The name of the dataset used to benchmark (songs or wiki)'
|
||||
required: false
|
||||
default: 'songs'
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
name: Run and upload benchmarks
|
||||
runs-on: self-hosted
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
# Set variables
|
||||
- name: Set current branch name
|
||||
shell: bash
|
||||
run: echo "##[set-output name=name;]$(echo ${GITHUB_REF#refs/heads/})"
|
||||
id: current_branch
|
||||
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
|
||||
shell: bash
|
||||
run: echo "##[set-output name=name;]$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')"
|
||||
id: normalized_current_branch
|
||||
- name: Set shorter commit SHA
|
||||
shell: bash
|
||||
run: echo "##[set-output name=short;]$(echo $GITHUB_SHA | cut -c1-8)"
|
||||
id: commit_sha
|
||||
- name: Set file basename with format "dataset_branch_commitSHA"
|
||||
shell: bash
|
||||
run: echo "##[set-output name=basename;]$(echo ${{ github.event.inputs.dataset_name }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})"
|
||||
id: file
|
||||
|
||||
# Run benchmarks
|
||||
- name: Run benchmarks - Dataset ${{ github.event.inputs.dataset_name }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
|
||||
run: |
|
||||
cd benchmarks
|
||||
cargo bench --bench ${{ github.event.inputs.dataset_name }} -- --save-baseline ${{ steps.file.outputs.basename }}
|
||||
|
||||
# Generate critcmp files
|
||||
- name: Install critcmp
|
||||
run: cargo install critcmp
|
||||
- name: Export cripcmp file
|
||||
run: |
|
||||
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
|
||||
|
||||
# Upload benchmarks
|
||||
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
|
||||
uses: BetaHuhn/do-spaces-action@v2
|
||||
with:
|
||||
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
|
||||
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
|
||||
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
|
||||
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
|
||||
source: ${{ steps.file.outputs.basename }}.json
|
||||
out_dir: critcmp_results
|
||||
|
||||
# Helper
|
||||
- name: 'README: compare with another benchmark'
|
||||
run: |
|
||||
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
|
||||
echo 'How to compare this benchmark with another one?'
|
||||
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
|
||||
echo " - Run the following command: ./benchmaks/scipts/compare.sh ${{ steps.file.outputs.basename }}.json <file-to-compare-with>"
|
346
Cargo.lock
generated
346
Cargo.lock
generated
@ -122,6 +122,20 @@ version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes 1.0.1",
|
||||
"convert_case",
|
||||
"criterion",
|
||||
"flate2",
|
||||
"heed",
|
||||
"milli",
|
||||
"reqwest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "big_s"
|
||||
version = "1.0.2"
|
||||
@ -327,6 +341,12 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "convert_case"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
|
||||
|
||||
[[package]]
|
||||
name = "cow-utils"
|
||||
version = "0.1.2"
|
||||
@ -506,6 +526,15 @@ version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fake-simd"
|
||||
version = "0.1.2"
|
||||
@ -750,12 +779,31 @@ dependencies = [
|
||||
"http",
|
||||
"indexmap",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tokio 0.2.25",
|
||||
"tokio-util 0.3.1",
|
||||
"tracing",
|
||||
"tracing-futures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "825343c4eef0b63f541f8903f395dc5beb362a979b5799a84062527ef1e37726"
|
||||
dependencies = [
|
||||
"bytes 1.0.1",
|
||||
"fnv",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"http",
|
||||
"indexmap",
|
||||
"slab",
|
||||
"tokio 1.6.0",
|
||||
"tokio-util 0.6.7",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "1.7.1"
|
||||
@ -893,6 +941,17 @@ dependencies = [
|
||||
"http",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60daa14be0e0786db0f03a9e57cb404c9d756eed2b6c62b9ea98ec5743ec75a9"
|
||||
dependencies = [
|
||||
"bytes 1.0.1",
|
||||
"http",
|
||||
"pin-project-lite 0.2.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-ui"
|
||||
version = "0.2.1"
|
||||
@ -922,7 +981,7 @@ dependencies = [
|
||||
"stderrlog",
|
||||
"structopt",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio 0.2.25",
|
||||
"warp",
|
||||
]
|
||||
|
||||
@ -960,20 +1019,59 @@ dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"h2 0.2.7",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body 0.3.1",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project 1.0.5",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"socket2 0.3.19",
|
||||
"tokio 0.2.25",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bf09f61b52cfcf4c00de50df88ae423d6c02354e385a86341133b5338630ad1"
|
||||
dependencies = [
|
||||
"bytes 1.0.1",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2 0.3.3",
|
||||
"http",
|
||||
"http-body 0.4.2",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project 1.0.5",
|
||||
"socket2 0.4.0",
|
||||
"tokio 1.6.0",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-rustls"
|
||||
version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f9f7a97316d44c0af9b0301e65010573a853a9fc97046d7331d7f6bc0fd5a64"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"hyper 0.14.5",
|
||||
"log",
|
||||
"rustls",
|
||||
"tokio 1.6.0",
|
||||
"tokio-rustls",
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.2.2"
|
||||
@ -1029,6 +1127,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.9.0"
|
||||
@ -1261,7 +1365,6 @@ dependencies = [
|
||||
"bstr",
|
||||
"byteorder",
|
||||
"chrono",
|
||||
"criterion",
|
||||
"crossbeam-channel",
|
||||
"csv",
|
||||
"either",
|
||||
@ -1343,6 +1446,19 @@ dependencies = [
|
||||
"winapi 0.2.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.7.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"miow 0.3.7",
|
||||
"ntapi",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio-named-pipes"
|
||||
version = "0.1.7"
|
||||
@ -1350,7 +1466,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mio",
|
||||
"mio 0.6.23",
|
||||
"miow 0.3.7",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
@ -1363,7 +1479,7 @@ checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0"
|
||||
dependencies = [
|
||||
"iovec",
|
||||
"libc",
|
||||
"mio",
|
||||
"mio 0.6.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1441,6 +1557,15 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ntapi"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
|
||||
dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.44"
|
||||
@ -1956,12 +2081,62 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"bytes 1.0.1",
|
||||
"encoding_rs",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body 0.4.2",
|
||||
"hyper 0.14.5",
|
||||
"hyper-rustls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite 0.2.6",
|
||||
"rustls",
|
||||
"serde",
|
||||
"serde_urlencoded 0.7.0",
|
||||
"tokio 1.6.0",
|
||||
"tokio-rustls",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
"webpki-roots",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "retain_mut"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53552c6c49e1e13f1a203ef0080ab3bbef0beb570a528993e83df057a9d9bba1"
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.16.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"spin",
|
||||
"untrusted",
|
||||
"web-sys",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.6.6"
|
||||
@ -1982,6 +2157,19 @@ dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"log",
|
||||
"ring",
|
||||
"sct",
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.5"
|
||||
@ -2015,6 +2203,16 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "search"
|
||||
version = "0.2.1"
|
||||
@ -2108,6 +2306,18 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_urlencoded"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha-1"
|
||||
version = "0.8.2"
|
||||
@ -2193,6 +2403,22 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e3dfc207c526015c632472a77be09cf1b6e46866581aecae5cc38fb4235dea2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
@ -2386,7 +2612,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"memchr",
|
||||
"mio",
|
||||
"mio 0.6.23",
|
||||
"mio-named-pipes",
|
||||
"mio-uds",
|
||||
"num_cpus",
|
||||
@ -2397,6 +2623,21 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd3076b5c8cc18138b8f8814895c11eb4de37114a5d127bafdc5e55798ceef37"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes 1.0.1",
|
||||
"libc",
|
||||
"memchr",
|
||||
"mio 0.7.11",
|
||||
"num_cpus",
|
||||
"pin-project-lite 0.2.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-macros"
|
||||
version = "0.2.6"
|
||||
@ -2408,6 +2649,17 @@ dependencies = [
|
||||
"syn 1.0.64",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6"
|
||||
dependencies = [
|
||||
"rustls",
|
||||
"tokio 1.6.0",
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tungstenite"
|
||||
version = "0.11.0"
|
||||
@ -2417,7 +2669,7 @@ dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"pin-project 0.4.27",
|
||||
"tokio",
|
||||
"tokio 0.2.25",
|
||||
"tungstenite",
|
||||
]
|
||||
|
||||
@ -2432,7 +2684,21 @@ dependencies = [
|
||||
"futures-sink",
|
||||
"log",
|
||||
"pin-project-lite 0.1.12",
|
||||
"tokio",
|
||||
"tokio 0.2.25",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.6.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1caa0b0c8d94a049db56b5acf8cba99dc0623aab1b26d5b5f5e2d945846b3592"
|
||||
dependencies = [
|
||||
"bytes 1.0.1",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"log",
|
||||
"pin-project-lite 0.2.6",
|
||||
"tokio 1.6.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2578,6 +2844,12 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.2.1"
|
||||
@ -2654,7 +2926,7 @@ dependencies = [
|
||||
"futures",
|
||||
"headers",
|
||||
"http",
|
||||
"hyper",
|
||||
"hyper 0.13.10",
|
||||
"log",
|
||||
"mime",
|
||||
"mime_guess",
|
||||
@ -2663,8 +2935,8 @@ dependencies = [
|
||||
"scoped-tls",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"tokio",
|
||||
"serde_urlencoded 0.6.1",
|
||||
"tokio 0.2.25",
|
||||
"tokio-tungstenite",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@ -2691,6 +2963,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fe8f61dba8e5d645a4d8132dc7a0a66861ed5e1045d2c0ed940fab33bac0fbe"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
@ -2709,6 +2983,18 @@ dependencies = [
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-futures"
|
||||
version = "0.4.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73157efb9af26fb564bb59a009afd1c7c334a44db171d280690d0c3faaec3468"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.72"
|
||||
@ -2748,6 +3034,25 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki"
|
||||
version = "0.21.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aabe153544e473b775453675851ecc86863d2a81d786d741f6b76778f2a48940"
|
||||
dependencies = [
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whatlang"
|
||||
version = "0.9.0"
|
||||
@ -2800,6 +3105,15 @@ version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69"
|
||||
dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ws2_32-sys"
|
||||
version = "0.2.1"
|
||||
|
@ -1,5 +1,5 @@
|
||||
[workspace]
|
||||
members = ["milli", "http-ui", "infos", "helpers", "search"]
|
||||
members = ["milli", "http-ui", "benchmarks", "infos", "helpers", "search"]
|
||||
default-members = ["milli"]
|
||||
|
||||
[profile.release]
|
||||
|
1
benchmarks/.gitignore
vendored
Normal file
1
benchmarks/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
benches/datasets_paths.rs
|
28
benchmarks/Cargo.toml
Normal file
28
benchmarks/Cargo.toml
Normal file
@ -0,0 +1,28 @@
|
||||
[package]
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
publish = false
|
||||
|
||||
|
||||
[dependencies]
|
||||
milli = { path = "../milli" }
|
||||
|
||||
[dev-dependencies]
|
||||
heed = "*" # we want to use the version milli uses
|
||||
criterion = "0.3.4"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0"
|
||||
bytes = "1.0"
|
||||
flate2 = "1.0.20"
|
||||
convert_case = "0.4"
|
||||
reqwest = { version = "0.11.3", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[[bench]]
|
||||
name = "songs"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "wiki"
|
||||
harness = false
|
110
benchmarks/README.md
Normal file
110
benchmarks/README.md
Normal file
@ -0,0 +1,110 @@
|
||||
Benchmarks
|
||||
==========
|
||||
|
||||
## TOC
|
||||
|
||||
- [Datasets](#datasets)
|
||||
- [Run the benchmarks](#run-the-benchmarks)
|
||||
- [Comparison between benchmarks](#comparison-between-benchmarks)
|
||||
|
||||
## Datasets
|
||||
|
||||
The benchmarks are available for the following datasets:
|
||||
- `songs`
|
||||
- `wiki`
|
||||
|
||||
### Songs
|
||||
|
||||
`songs` is a subset of the [`songs.csv` dataset](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz).
|
||||
|
||||
It was generated with this command:
|
||||
|
||||
```bash
|
||||
xsv sample --seed 42 1000000 songs.csv -o smol-songs.csv
|
||||
```
|
||||
|
||||
_[Download the generated `songs` dataset](https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-songs.csv.gz)._
|
||||
|
||||
### Wiki
|
||||
|
||||
`wiki` is a subset of the [`wikipedia-articles.csv` dataset](https://meili-datasets.s3.fr-par.scw.cloud/wikipedia-articles.csv.gz).
|
||||
|
||||
It was generated with the following command:
|
||||
|
||||
```bash
|
||||
xsv sample --seed 42 500000 wikipedia-articles.csv -o smol-wikipedia-articles.csv
|
||||
```
|
||||
|
||||
_[Download the generated `wiki` dataset](https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-wikipedia-articles.csv.gz)._
|
||||
|
||||
## Run the benchmarks
|
||||
|
||||
### On our private server
|
||||
|
||||
The Meili team has self-hosted his own GitHub runner to run benchmarks on our dedicated bare metal server.
|
||||
|
||||
To trigger the benchmark workflow:
|
||||
- Go to the `Actions` tab of this repository.
|
||||
- Select the `Benchmarks` workflow on the left.
|
||||
- Click on `Run workflow` in the blue banner.
|
||||
- Select the branch on which you want to run the benchmarks and select the dataset you want (default: `songs`).
|
||||
- Finally, click on `Run workflow`.
|
||||
|
||||
This GitHub workflow will run the benchmarks and push the `critcmp` report to a DigitalOcean Space (= S3).
|
||||
|
||||
The name of the uploaded file is displayed in the workflow.
|
||||
|
||||
_[More about critcmp](https://github.com/BurntSushi/critcmp)._
|
||||
|
||||
💡 To compare the just-uploaded benchmark with another one, check out the [next section](#comparison-between-benchmarks).
|
||||
|
||||
### On your machine
|
||||
|
||||
To run all the benchmarks (~4h):
|
||||
|
||||
```bash
|
||||
cargo bench
|
||||
```
|
||||
|
||||
To run only the `songs` (~1h) or `wiki` (~3h) benchmark:
|
||||
|
||||
```bash
|
||||
cargo bench --bench <dataset name>
|
||||
```
|
||||
|
||||
By default, the benchmarks will be downloaded and uncompressed automatically in the target directory.<br>
|
||||
If you don't want to download the datasets every time you update something on the code, you can specify a custom directory with the environment variable `MILLI_BENCH_DATASETS_PATH`:
|
||||
|
||||
```bash
|
||||
mkdir ~/datasets
|
||||
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the two datasets are downloaded
|
||||
touch build.rs
|
||||
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the code is compiled again but the datasets are not downloaded
|
||||
```
|
||||
|
||||
## Comparison between benchmarks
|
||||
|
||||
The benchmark reports we push are generated with `critcmp`. Thus, we use `critcmp` to generate comparison results between 2 benchmarks.
|
||||
|
||||
We provide a script to download and display the comparison report.
|
||||
|
||||
Requirements:
|
||||
- `grep`
|
||||
- `curl`
|
||||
- [`critcmp`](https://github.com/BurntSushi/critcmp)
|
||||
|
||||
List the available file in the DO Space:
|
||||
|
||||
```bash
|
||||
./benchmarks/script/list.sh
|
||||
```
|
||||
```bash
|
||||
songs_main_09a4321.json
|
||||
songs_geosearch_24ec456.json
|
||||
```
|
||||
|
||||
Run the comparison script:
|
||||
|
||||
```bash
|
||||
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
|
||||
```
|
211
benchmarks/benches/songs.rs
Normal file
211
benchmarks/benches/songs.rs
Normal file
@ -0,0 +1,211 @@
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields = [
|
||||
"id", "title", "album", "artist", "genre", "country", "released", "duration",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "album", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields = [
|
||||
"released-timestamp",
|
||||
"duration-float",
|
||||
"genre",
|
||||
"country",
|
||||
"artist",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_faceted_fields(faceted_fields);
|
||||
}
|
||||
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_SONGS,
|
||||
queries: &[
|
||||
"john ", // 9097
|
||||
"david ", // 4794
|
||||
"charles ", // 1957
|
||||
"david bowie ", // 1200
|
||||
"michael jackson ", // 600
|
||||
"thelonious monk ", // 303
|
||||
"charles mingus ", // 142
|
||||
"marcus miller ", // 60
|
||||
"tamo ", // 13
|
||||
"Notstandskomitee ", // 4
|
||||
],
|
||||
configure: base_conf,
|
||||
primary_key: Some("id"),
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_songs(c: &mut criterion::Criterion) {
|
||||
let default_criterion: Vec<String> = milli::default_criteria()
|
||||
.iter()
|
||||
.map(|criteria| criteria.to_string())
|
||||
.collect();
|
||||
let default_criterion = default_criterion.iter().map(|s| s.as_str());
|
||||
let asc_default: Vec<&str> = std::iter::once("asc(released-timestamp)")
|
||||
.chain(default_criterion.clone())
|
||||
.collect();
|
||||
let desc_default: Vec<&str> = std::iter::once("desc(released-timestamp)")
|
||||
.chain(default_criterion.clone())
|
||||
.collect();
|
||||
|
||||
let basic_with_quote: Vec<String> = BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| {
|
||||
s.trim()
|
||||
.split(' ')
|
||||
.map(|s| format!(r#""{}""#, s))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
})
|
||||
.collect();
|
||||
let basic_with_quote: &[&str] = &basic_with_quote
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<&str>>();
|
||||
|
||||
let confs = &[
|
||||
/* first we bench each criterion alone */
|
||||
utils::Conf {
|
||||
group_name: "proximity",
|
||||
queries: &[
|
||||
"black saint sinner lady ",
|
||||
"les dangeureuses 1960 ",
|
||||
"The Disneyland Sing-Along Chorus ",
|
||||
"Under Great Northern Lights ",
|
||||
"7000 Danses Un Jour Dans Notre Vie ",
|
||||
],
|
||||
criterion: Some(&["proximity"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "typo",
|
||||
queries: &[
|
||||
"mongus ",
|
||||
"thelonius monk ",
|
||||
"Disnaylande ",
|
||||
"the white striper ",
|
||||
"indochie ",
|
||||
"indochien ",
|
||||
"klub des loopers ",
|
||||
"fear of the duck ",
|
||||
"michel depech ",
|
||||
"stromal ",
|
||||
"dire straights ",
|
||||
"Arethla Franklin ",
|
||||
],
|
||||
criterion: Some(&["typo"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "words",
|
||||
queries: &[
|
||||
"the black saint and the sinner lady and the good doggo ", // four words to pop
|
||||
"les liaisons dangeureuses 1793 ", // one word to pop
|
||||
"The Disneyland Children's Sing-Alone song ", // two words to pop
|
||||
"seven nation mummy ", // one word to pop
|
||||
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
|
||||
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
|
||||
"whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13
|
||||
],
|
||||
criterion: Some(&["words"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "asc",
|
||||
criterion: Some(&["asc(released-timestamp)"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc",
|
||||
criterion: Some(&["desc(released-timestamp)"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* then we bench the asc and desc criterion on top of the default criterion */
|
||||
utils::Conf {
|
||||
group_name: "asc + default",
|
||||
criterion: Some(&asc_default[..]),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "desc + default",
|
||||
criterion: Some(&desc_default[..]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* we bench the filters with the default request */
|
||||
utils::Conf {
|
||||
group_name: "basic filter: <=",
|
||||
facet_condition: Some("released-timestamp <= 946728000"), // year 2000
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic filter: TO",
|
||||
facet_condition: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "big filter",
|
||||
facet_condition: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
/* the we bench some global / normal search with all the default criterion in the default
|
||||
* order */
|
||||
utils::Conf {
|
||||
group_name: "basic placeholder",
|
||||
queries: &[""],
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic without quote",
|
||||
queries: &BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| s.trim()) // we remove the space at the end of each request
|
||||
.collect::<Vec<&str>>(),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic with quote",
|
||||
queries: basic_with_quote,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "prefix search",
|
||||
queries: &[
|
||||
"s", // 500k+ results
|
||||
"a", //
|
||||
"b", //
|
||||
"i", //
|
||||
"x", // only 7k results
|
||||
],
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_songs);
|
||||
criterion_main!(benches);
|
119
benchmarks/benches/utils.rs
Normal file
119
benchmarks/benches/utils.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use std::fs::{create_dir_all, remove_dir_all, File};
|
||||
|
||||
use criterion::BenchmarkId;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat},
|
||||
FacetCondition, Index,
|
||||
};
|
||||
|
||||
pub struct Conf<'a> {
|
||||
/// where we are going to create our database.mmdb directory
|
||||
/// each benchmark will first try to delete it and then recreate it
|
||||
pub database_name: &'a str,
|
||||
/// the dataset to be used, it must be an uncompressed csv
|
||||
pub dataset: &'a str,
|
||||
pub group_name: &'a str,
|
||||
pub queries: &'a [&'a str],
|
||||
/// here you can change which criterion are used and in which order.
|
||||
/// - if you specify something all the base configuration will be thrown out
|
||||
/// - if you don't specify anything (None) the default configuration will be kept
|
||||
pub criterion: Option<&'a [&'a str]>,
|
||||
/// the last chance to configure your database as you want
|
||||
pub configure: fn(&mut Settings),
|
||||
pub facet_condition: Option<&'a str>,
|
||||
/// enable or disable the optional words on the query
|
||||
pub optional_words: bool,
|
||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||
pub primary_key: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl Conf<'_> {
|
||||
pub const BASE: Self = Conf {
|
||||
database_name: "benches.mmdb",
|
||||
dataset: "",
|
||||
group_name: "",
|
||||
queries: &[],
|
||||
criterion: None,
|
||||
configure: |_| (),
|
||||
facet_condition: None,
|
||||
optional_words: true,
|
||||
primary_key: None,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn base_setup(conf: &Conf) -> Index {
|
||||
match remove_dir_all(&conf.database_name) {
|
||||
Ok(_) => (),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
|
||||
Err(e) => panic!("{}", e),
|
||||
}
|
||||
create_dir_all(&conf.database_name).unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
if let Some(primary_key) = conf.primary_key {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
index.put_primary_key(&mut wtxn, primary_key).unwrap();
|
||||
}
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
||||
|
||||
if let Some(criterion) = conf.criterion {
|
||||
builder.reset_faceted_fields();
|
||||
builder.reset_criteria();
|
||||
builder.reset_stop_words();
|
||||
|
||||
let criterion = criterion.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_criteria(criterion);
|
||||
}
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.index_documents(&mut wtxn, &index);
|
||||
if let None = conf.primary_key {
|
||||
builder.enable_autogenerate_docids();
|
||||
}
|
||||
builder.update_format(UpdateFormat::Csv);
|
||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
let reader = File::open(conf.dataset)
|
||||
.expect(&format!("could not find the dataset in: {}", conf.dataset));
|
||||
builder.execute(reader, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
for conf in confs {
|
||||
let index = base_setup(conf);
|
||||
|
||||
let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name));
|
||||
|
||||
for &query in conf.queries {
|
||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search.query(query).optional_words(conf.optional_words);
|
||||
if let Some(facet_condition) = conf.facet_condition {
|
||||
let facet_condition =
|
||||
FacetCondition::from_str(&rtxn, &index, facet_condition).unwrap();
|
||||
search.facet_condition(facet_condition);
|
||||
}
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
133
benchmarks/benches/wiki.rs
Normal file
133
benchmarks/benches/wiki.rs
Normal file
@ -0,0 +1,133 @@
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields = ["title", "body", "url"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
}
|
||||
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_WIKI_ARTICLES,
|
||||
queries: &[
|
||||
"mingus ", // 46 candidates
|
||||
"miles davis ", // 159
|
||||
"rock and roll ", // 1007
|
||||
"machine ", // 3448
|
||||
"spain ", // 7002
|
||||
"japan ", // 10.593
|
||||
"france ", // 17.616
|
||||
"film ", // 24.959
|
||||
],
|
||||
configure: base_conf,
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_songs(c: &mut criterion::Criterion) {
|
||||
let basic_with_quote: Vec<String> = BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| {
|
||||
s.trim()
|
||||
.split(' ')
|
||||
.map(|s| format!(r#""{}""#, s))
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
})
|
||||
.collect();
|
||||
let basic_with_quote: &[&str] = &basic_with_quote
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.collect::<Vec<&str>>();
|
||||
|
||||
let confs = &[
|
||||
/* first we bench each criterion alone */
|
||||
utils::Conf {
|
||||
group_name: "proximity",
|
||||
queries: &[
|
||||
"herald sings ",
|
||||
"april paris ",
|
||||
"tea two ",
|
||||
"diesel engine ",
|
||||
],
|
||||
criterion: Some(&["proximity"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "typo",
|
||||
queries: &[
|
||||
"migrosoft ",
|
||||
"linax ",
|
||||
"Disnaylande ",
|
||||
"phytogropher ",
|
||||
"nympalidea ",
|
||||
"aritmetric ",
|
||||
"the fronce ",
|
||||
"sisan ",
|
||||
],
|
||||
criterion: Some(&["typo"]),
|
||||
optional_words: false,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "words",
|
||||
queries: &[
|
||||
"the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results
|
||||
"Kameya Tokujirō mingus monk ", // two words to pop, 55
|
||||
"Ulrich Hensel meilisearch milli ", // two words to pop, 306
|
||||
"Idaho Bellevue pizza ", // one word to pop, 800
|
||||
"Abraham machin ", // one word to pop, 1141
|
||||
],
|
||||
criterion: Some(&["words"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
/* the we bench some global / normal search with all the default criterion in the default
|
||||
* order */
|
||||
utils::Conf {
|
||||
group_name: "basic placeholder",
|
||||
queries: &[""],
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic without quote",
|
||||
queries: &BASE_CONF
|
||||
.queries
|
||||
.iter()
|
||||
.map(|s| s.trim()) // we remove the space at the end of each request
|
||||
.collect::<Vec<&str>>(),
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "basic with quote",
|
||||
queries: basic_with_quote,
|
||||
..BASE_CONF
|
||||
},
|
||||
utils::Conf {
|
||||
group_name: "prefix search",
|
||||
queries: &[
|
||||
"t", // 453k results
|
||||
"c", // 405k
|
||||
"g", // 318k
|
||||
"j", // 227k
|
||||
"q", // 71k
|
||||
"x", // 17k
|
||||
],
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_songs);
|
||||
criterion_main!(benches);
|
80
benchmarks/build.rs
Normal file
80
benchmarks/build.rs
Normal file
@ -0,0 +1,80 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{env, fs};
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{Cursor, Read, Seek, Write},
|
||||
};
|
||||
|
||||
use bytes::Bytes;
|
||||
use convert_case::{Case, Casing};
|
||||
use flate2::read::GzDecoder;
|
||||
use reqwest::IntoUrl;
|
||||
|
||||
const BASE_URL: &str = "https://meili-datasets.s3.fr-par.scw.cloud/benchmarks";
|
||||
|
||||
const DATASET_SONGS: &str = "smol-songs";
|
||||
const DATASET_WIKI: &str = "smol-wiki-articles";
|
||||
|
||||
/// The name of the environment variable used to select the path
|
||||
/// of the directory containing the datasets
|
||||
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let out_dir = PathBuf::from(env::var(BASE_DATASETS_PATH_KEY).unwrap_or(env::var("OUT_DIR")?));
|
||||
|
||||
let benches_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?).join("benches");
|
||||
let mut manifest_paths_file = File::create(benches_dir.join("datasets_paths.rs"))?;
|
||||
writeln!(
|
||||
manifest_paths_file,
|
||||
r#"//! This file is generated by the build script.
|
||||
//! Do not modify by hand, use the build.rs file.
|
||||
#![allow(dead_code)]
|
||||
"#
|
||||
)?;
|
||||
writeln!(manifest_paths_file)?;
|
||||
|
||||
for dataset in &[DATASET_SONGS, DATASET_WIKI] {
|
||||
let out_path = out_dir.join(dataset);
|
||||
let out_file = out_path.with_extension("csv");
|
||||
|
||||
writeln!(
|
||||
&mut manifest_paths_file,
|
||||
r#"pub const {}: &str = {:?};"#,
|
||||
dataset.to_case(Case::ScreamingSnake),
|
||||
out_file.display(),
|
||||
)?;
|
||||
|
||||
if out_file.exists() {
|
||||
eprintln!("The dataset {} already exists on the file system and will not be downloaded again", dataset);
|
||||
continue;
|
||||
}
|
||||
let url = format!("{}/{}.csv.gz", BASE_URL, dataset);
|
||||
eprintln!("downloading: {}", url);
|
||||
let bytes = download_dataset(url.clone())?;
|
||||
eprintln!("{} downloaded successfully", url);
|
||||
eprintln!("uncompressing in {}", out_path.display());
|
||||
uncompress_in_file(bytes, &out_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn download_dataset<U: IntoUrl>(url: U) -> anyhow::Result<Cursor<Bytes>> {
|
||||
let bytes = reqwest::blocking::Client::builder()
|
||||
.timeout(None)
|
||||
.build()?
|
||||
.get(url)
|
||||
.send()?
|
||||
.bytes()?;
|
||||
Ok(Cursor::new(bytes))
|
||||
}
|
||||
|
||||
fn uncompress_in_file<R: Read + Seek, P: AsRef<Path>>(bytes: R, path: P) -> anyhow::Result<()> {
|
||||
let path = path.as_ref();
|
||||
let mut gz = GzDecoder::new(bytes);
|
||||
let mut dataset = Vec::new();
|
||||
gz.read_to_end(&mut dataset)?;
|
||||
|
||||
fs::write(path, dataset)?;
|
||||
Ok(())
|
||||
}
|
58
benchmarks/scripts/compare.sh
Executable file
58
benchmarks/scripts/compare.sh
Executable file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Requirements:
|
||||
# - critcmp. See: https://github.com/BurntSushi/critcmp
|
||||
# - curl
|
||||
|
||||
# Usage
|
||||
# $ bash compare.sh json_file1 json_file1
|
||||
# ex: bash compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
|
||||
|
||||
# Checking that critcmp is installed
|
||||
command -v critcmp > /dev/null 2>&1
|
||||
if [[ "$?" -ne 0 ]]; then
|
||||
echo 'You must install critcmp to make this script working.'
|
||||
echo '$ cargo install critcmp'
|
||||
echo 'See: https://github.com/BurntSushi/critcmp'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $# -ne 2 ]]
|
||||
then
|
||||
echo 'Need 2 arguments.'
|
||||
echo 'Usage: '
|
||||
echo ' $ ./compare.sh file_to_download1 file_to_download2'
|
||||
echo 'Ex:'
|
||||
echo ' $ ./compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
file1="$1"
|
||||
file2="$2"
|
||||
s3_url='https://milli-benchmarks.fra1.digitaloceanspaces.com/critcmp_results'
|
||||
file1_s3_url="$s3_url/$file1"
|
||||
file2_s3_url="$s3_url/$file2"
|
||||
file1_local_path="/tmp/$file1"
|
||||
file2_local_path="/tmp/$file2"
|
||||
|
||||
if [[ ! -f "$file1_local_path" ]]; then
|
||||
curl "$file1_s3_url" -O "$file1_local_path"
|
||||
if [[ "$?" -ne 0 ]]; then
|
||||
echo 'curl command failed.'
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "$file1 already present in /tmp, no need to download."
|
||||
fi
|
||||
|
||||
if [[ ! -f "$file2_local_path" ]]; then
|
||||
curl "$file2_s3_url" -O "$file2_local_path"
|
||||
if [[ "$?" -ne 0 ]]; then
|
||||
echo 'curl command failed.'
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "$file2 already present in /tmp, no need to download."
|
||||
fi
|
||||
|
||||
critcmp --color always "$file1_local_path" "$file2_local_path"
|
14
benchmarks/scripts/list.sh
Executable file
14
benchmarks/scripts/list.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Requirements:
|
||||
# - curl
|
||||
# - grep
|
||||
|
||||
res=$(curl -s https://milli-benchmarks.fra1.digitaloceanspaces.com | grep -o '<Key>[^<]\+' | cut -c 5- | grep critcmp_results/ | cut -c 18-)
|
||||
|
||||
for pattern in "$@"
|
||||
do
|
||||
res=$(echo "$res" | grep $pattern)
|
||||
done
|
||||
|
||||
echo "$res"
|
5
benchmarks/src/lib.rs
Normal file
5
benchmarks/src/lib.rs
Normal file
@ -0,0 +1,5 @@
|
||||
//! This library is only used to isolate the benchmarks
|
||||
//! from the original milli library.
|
||||
//!
|
||||
//! It does not include interesting functions for milli library
|
||||
//! users only for milli contributors.
|
@ -53,13 +53,8 @@ tinytemplate = "=1.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
criterion = "0.3.4"
|
||||
maplit = "1.0.2"
|
||||
rand = "0.8.3"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
[[bench]]
|
||||
name = "search"
|
||||
harness = false
|
||||
|
@ -1,36 +0,0 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::Index;
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId};
|
||||
|
||||
fn bench_search(c: &mut criterion::Criterion) {
|
||||
let database = "books-4cpu.mmdb";
|
||||
let queries = [
|
||||
"minogue kylie",
|
||||
"minogue kylie live",
|
||||
];
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, database).unwrap();
|
||||
|
||||
let mut group = c.benchmark_group("search");
|
||||
group.sample_size(10);
|
||||
group.measurement_time(Duration::from_secs(12));
|
||||
|
||||
for query in &queries {
|
||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let _documents_ids = index.search(&rtxn).query(*query).execute().unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_search);
|
||||
criterion_main!(benches);
|
Loading…
x
Reference in New Issue
Block a user