mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 13:34:30 +01:00
Merge pull request #5218 from meilisearch/upgrade-dependencies
Upgrade dependencies
This commit is contained in:
commit
377fa09cb7
2
.github/workflows/bench-manual.yml
vendored
2
.github/workflows/bench-manual.yml
vendored
@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
2
.github/workflows/bench-pr.yml
vendored
2
.github/workflows/bench-pr.yml
vendored
@ -66,7 +66,7 @@ jobs:
|
||||
fetch-depth: 0 # fetch full history to be able to get main commit sha
|
||||
ref: ${{ steps.comment-branch.outputs.head_ref }}
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
2
.github/workflows/bench-push-indexing.yml
vendored
2
.github/workflows/bench-push-indexing.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 180 # 3h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
2
.github/workflows/benchmarks-manual.yml
vendored
2
.github/workflows/benchmarks-manual.yml
vendored
@ -18,7 +18,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
2
.github/workflows/benchmarks-pr.yml
vendored
2
.github/workflows/benchmarks-pr.yml
vendored
@ -44,7 +44,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
@ -16,7 +16,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: benchmarks
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
2
.github/workflows/flaky-tests.yml
vendored
2
.github/workflows/flaky-tests.yml
vendored
@ -17,7 +17,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-flaky
|
||||
run: cargo install cargo-flaky
|
||||
- name: Run cargo flaky in the dumps
|
||||
|
2
.github/workflows/fuzzer-indexing.yml
vendored
2
.github/workflows/fuzzer-indexing.yml
vendored
@ -12,7 +12,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72h
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
|
||||
|
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Install cargo-deb
|
||||
run: cargo install cargo-deb
|
||||
- uses: actions/checkout@v3
|
||||
|
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@ -45,7 +45,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@ -75,7 +75,7 @@ jobs:
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
@ -148,7 +148,7 @@ jobs:
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
|
14
.github/workflows/test-suite.yml
vendored
14
.github/workflows/test-suite.yml
vendored
@ -31,7 +31,7 @@ jobs:
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.79
|
||||
uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run cargo check without any default features
|
||||
@ -56,7 +56,7 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -81,7 +81,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo build with almost all features
|
||||
run: |
|
||||
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
|
||||
@ -125,7 +125,7 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.7
|
||||
- name: Run tests in debug
|
||||
@ -139,7 +139,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
@ -156,7 +156,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly-2024-07-09
|
||||
|
@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@1.79
|
||||
- uses: dtolnay/rust-toolchain@1.81
|
||||
with:
|
||||
profile: minimal
|
||||
- name: Install sd
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,6 +10,7 @@
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
|
1169
Cargo.lock
generated
1169
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
# Compile
|
||||
FROM rust:1.79.0-alpine3.20 AS compiler
|
||||
FROM rust:1.81.0-alpine3.20 AS compiler
|
||||
|
||||
RUN apk add -q --no-cache build-base openssl-dev
|
||||
|
||||
|
@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
csv = "1.3.0"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.14.0"
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.7"
|
||||
roaring = "0.10.10"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
bytes = "1.6.0"
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.30"
|
||||
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.36", features = ["parsing"] }
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.86"
|
||||
vergen-git2 = "1.0.0"
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
|
@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
flate2 = "1.0.30"
|
||||
http = "1.1.0"
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.19.0"
|
||||
regex = "1.10.5"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
once_cell = "1.20.2"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
tracing = "0.1.40"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
@ -17,4 +17,5 @@ nom_locate = "4.2.0"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
|
@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
fastrand = "2.1.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
tempfile = "3.10.1"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
@ -11,42 +11,43 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
bincode = "1.3.3"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
derive_builder = "0.20.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
flate2 = "1.0.35"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.4"
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
ureq = "2.10.0"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.5.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam-channel = "0.5.13"
|
||||
insta = { version = "1.39.0", features = ["json", "redactions"] }
|
||||
crossbeam-channel = "0.5.14"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*!
|
||||
This crate defines the index scheduler, which is responsible for:
|
||||
1. Keeping references to meilisearch's indexes and mapping them to their
|
||||
user-defined names.
|
||||
user-defined names.
|
||||
2. Scheduling tasks given by the user and executing them, in batch if possible.
|
||||
|
||||
When an `IndexScheduler` is created, a new thread containing a reference to the
|
||||
@ -513,7 +513,7 @@ impl IndexScheduler {
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub fn get_tasks_from_authorized_indexes(
|
||||
&self,
|
||||
@ -532,7 +532,7 @@ impl IndexScheduler {
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub fn get_task_ids_from_authorized_indexes(
|
||||
&self,
|
||||
@ -551,7 +551,7 @@ impl IndexScheduler {
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub fn get_batches_from_authorized_indexes(
|
||||
&self,
|
||||
@ -570,7 +570,7 @@ impl IndexScheduler {
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub fn get_batch_ids_from_authorized_indexes(
|
||||
&self,
|
||||
|
@ -444,7 +444,7 @@ impl Queue {
|
||||
/// the user.
|
||||
///
|
||||
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
|
||||
/// with many indexes internally.
|
||||
/// with many indexes internally.
|
||||
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
|
||||
pub(crate) fn get_batch_ids_from_authorized_indexes(
|
||||
&self,
|
||||
|
@ -106,7 +106,7 @@ impl IndexScheduler {
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes));
|
||||
count += 1;
|
||||
|
||||
|
@ -14,4 +14,4 @@ license.workspace = true
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.19"
|
||||
once_cell = "1.20"
|
||||
|
@ -17,10 +17,10 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
@ -11,40 +11,41 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.8.0", default-features = false }
|
||||
anyhow = "1.0.86"
|
||||
actix-web = { version = "4.9.0", default-features = false }
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.0"
|
||||
deserr = { version = "0.6.2", features = ["actix-web"] }
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
flate2 = "1.0.35"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.4"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
bumparaw-collections = "0.1.2"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
bumparaw-collections = "0.1.4"
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
rustc-hash = "2.1.0"
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.120"
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
serde_json = "1.0.135"
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.38"
|
||||
utoipa = { version = "5.2.0", features = ["macros"] }
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tokio = "1.42"
|
||||
utoipa = { version = "5.3.1", features = ["macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
|
@ -14,42 +14,42 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-http = { version = "3.8.0", default-features = false, features = [
|
||||
actix-http = { version = "3.9.0", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.8.0", default-features = false, features = [
|
||||
actix-web = { version = "4.9.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
anyhow = { version = "1.0.86", features = ["backtrace"] }
|
||||
async-trait = "0.1.81"
|
||||
bstr = "1.9.1"
|
||||
byte-unit = { version = "5.1.4", default-features = false, features = [
|
||||
anyhow = { version = "1.0.95", features = ["backtrace"] }
|
||||
async-trait = "0.1.85"
|
||||
bstr = "1.11.3"
|
||||
byte-unit = { version = "5.1.6", default-features = false, features = [
|
||||
"std",
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
bytes = "1.6.0"
|
||||
clap = { version = "4.5.9", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = { version = "0.6.2", features = ["actix-web"] }
|
||||
bytes = "1.9.0"
|
||||
clap = { version = "4.5.24", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.14"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.13.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.30"
|
||||
flate2 = "1.0.35"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.30"
|
||||
futures-util = "0.3.30"
|
||||
futures = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||
is-terminal = "0.4.12"
|
||||
itertools = "0.13.0"
|
||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
||||
is-terminal = "0.4.13"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.0"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
@ -58,80 +58,81 @@ mimalloc = { version = "0.1.43", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.16.0"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.1"
|
||||
once_cell = "1.20.2"
|
||||
ordered-float = "4.6.0"
|
||||
parking_lot = "0.12.3"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.14"
|
||||
pin-project-lite = "0.2.16"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.4", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.10.5"
|
||||
reqwest = { version = "0.12.5", features = [
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = { version = "0.23.11", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
|
||||
rustls-pemfile = "2.1.2"
|
||||
segment = { version = "0.2.4" }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
||||
rustls = { version = "0.23.20", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
|
||||
rustls-pemfile = "2.2.0"
|
||||
segment = { version = "0.2.5" }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
sysinfo = "0.30.13"
|
||||
tar = "0.4.41"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
sysinfo = "0.33.1"
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.38.0", features = ["full"] }
|
||||
toml = "0.8.14"
|
||||
uuid = { version = "1.10.0", features = ["serde", "v4"] }
|
||||
tokio = { version = "1.42.0", features = ["full"] }
|
||||
toml = "0.8.19"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.2", features = ["serde"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["json"] }
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.11"
|
||||
tracing-actix-web = "0.7.15"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.7"
|
||||
roaring = "0.10.10"
|
||||
mopa-maintained = "0.2.3"
|
||||
utoipa = { version = "5.2.0", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
|
||||
utoipa-scalar = { version = "0.2.0", optional = true, features = ["actix-web"] }
|
||||
utoipa = { version = "5.3.1", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
|
||||
utoipa-scalar = { version = "0.2.1", optional = true, features = ["actix-web"] }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "6.0.0"
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
wiremock = "0.6.0"
|
||||
wiremock = "0.6.2"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.86", optional = true }
|
||||
cargo_toml = { version = "0.20.3", optional = true }
|
||||
anyhow = { version = "1.0.95", optional = true }
|
||||
cargo_toml = { version = "0.21.0", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.12.5", features = [
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.4", optional = true }
|
||||
tempfile = { version = "3.10.1", optional = true }
|
||||
zip = { version = "2.1.3", optional = true }
|
||||
tempfile = { version = "3.15.0", optional = true }
|
||||
zip = { version = "2.2.2", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
|
@ -426,13 +426,9 @@ impl Segment {
|
||||
&AuthFilter::default(),
|
||||
) {
|
||||
// Replace the version number with the prototype name if any.
|
||||
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
|
||||
let version = build_info::DescribeResult::from_build()
|
||||
.and_then(|describe| describe.as_prototype())
|
||||
{
|
||||
prototype
|
||||
} else {
|
||||
env!("CARGO_PKG_VERSION")
|
||||
};
|
||||
.unwrap_or(env!("CARGO_PKG_VERSION"));
|
||||
|
||||
let _ = self
|
||||
.batcher
|
||||
|
@ -19,15 +19,15 @@ pub enum MeilisearchHttpError {
|
||||
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
|
||||
CsvDelimiterWithWrongContentType(String),
|
||||
#[error(
|
||||
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
|
||||
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
||||
"The Content-Type `{}` is invalid. Accepted values for the Content-Type header are: {}",
|
||||
.0, .1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
||||
)]
|
||||
InvalidContentType(String, Vec<String>),
|
||||
#[error("Document `{0}` not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("Sending an empty filter is forbidden.")]
|
||||
EmptyFilter,
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {}`.", .0.join(", "), .1)]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
|
||||
FederationOptionsInNonFederatedRequest(usize),
|
||||
|
@ -188,13 +188,13 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
|
||||
|
||||
if let Some(error) = response.response().error() {
|
||||
// use the status code already constructed for the outgoing HTTP response
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
span.record("error", tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
let code: i32 = error.error_response().status().as_u16().into();
|
||||
span.record("status_code", code);
|
||||
span.record("error", &tracing::field::display(error.as_response_error()));
|
||||
span.record("error", tracing::field::display(error.as_response_error()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -760,8 +760,8 @@ impl MaxMemory {
|
||||
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||
fn total_memory_bytes() -> Option<u64> {
|
||||
if sysinfo::IS_SUPPORTED_SYSTEM {
|
||||
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram());
|
||||
let mut system = System::new_with_specifics(memory_kind);
|
||||
let mem_kind = RefreshKind::nothing().with_memory(MemoryRefreshKind::nothing().with_ram());
|
||||
let mut system = System::new_with_specifics(mem_kind);
|
||||
system.refresh_memory();
|
||||
Some(system.total_memory())
|
||||
} else {
|
||||
|
@ -1,7 +1,3 @@
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::routes::create_all_stats;
|
||||
use crate::search_queue::SearchQueue;
|
||||
use actix_web::http::header;
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::HttpResponse;
|
||||
@ -14,6 +10,11 @@ use prometheus::{Encoder, TextEncoder};
|
||||
use time::OffsetDateTime;
|
||||
use utoipa::OpenApi;
|
||||
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::routes::create_all_stats;
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(paths(get_metrics))]
|
||||
pub struct MetricApi;
|
||||
|
@ -545,5 +545,5 @@ pub async fn get_health(
|
||||
index_scheduler.health().unwrap();
|
||||
auth_controller.health().unwrap();
|
||||
|
||||
Ok(HttpResponse::Ok().json(&HealthResponse::default()))
|
||||
Ok(HttpResponse::Ok().json(HealthResponse::default()))
|
||||
}
|
||||
|
@ -73,8 +73,8 @@ async fn get_and_paginate_indexes() {
|
||||
let server = Server::new().await;
|
||||
const NB_INDEXES: usize = 50;
|
||||
for i in 0..NB_INDEXES {
|
||||
server.index(&format!("test_{i:02}")).create(None).await;
|
||||
server.index(&format!("test_{i:02}")).wait_task(i as u64).await;
|
||||
server.index(format!("test_{i:02}")).create(None).await;
|
||||
server.index(format!("test_{i:02}")).wait_task(i as u64).await;
|
||||
}
|
||||
|
||||
// basic
|
||||
|
@ -9,16 +9,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
indexmap = {version = "2.7.0", features = ["serde"]}
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
serde = { version = "1.0.209", features = ["derive"] }
|
||||
serde_json = {version = "1.0.133", features = ["preserve_order"]}
|
||||
tempfile = "3.14.0"
|
||||
time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = {version = "1.0.135", features = ["preserve_order"]}
|
||||
tempfile = "3.15.0"
|
||||
time = { version = "0.3.37", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.11.0", features = ["v4"], default-features = false }
|
||||
|
@ -15,68 +15,68 @@ license.workspace = true
|
||||
big_s = "1.0.2"
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.9.1"
|
||||
bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
|
||||
bstr = "1.11.3"
|
||||
bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.2", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = "0.6.2"
|
||||
crossbeam-channel = "0.5.14"
|
||||
deserr = "0.6.3"
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
|
||||
heed = { version = "0.20.3", default-features = false, features = [
|
||||
heed = { version = "0.20.5", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
"read-txn-no-tls",
|
||||
] }
|
||||
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||
indexmap = { version = "2.7.0", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memchr = "2.5.0"
|
||||
memmap2 = "0.9.4"
|
||||
memchr = "2.7.4"
|
||||
memmap2 = "0.9.5"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.19.0"
|
||||
ordered-float = "4.2.1"
|
||||
once_cell = "1.20.2"
|
||||
ordered-float = "4.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.7", features = ["serde"] }
|
||||
rstar = { version = "0.12.0", features = ["serde"] }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
rstar = { version = "0.12.2", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] }
|
||||
slice-group-by = "0.3.1"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.13.2"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.10.1"
|
||||
thiserror = "1.0.61"
|
||||
time = { version = "0.3.36", features = [
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
uuid = { version = "1.10.0", features = ["v4"] }
|
||||
uuid = { version = "1.11.0", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.13.0"
|
||||
itertools = "0.14.0"
|
||||
|
||||
csv = "1.3.0"
|
||||
candle-core = { version = "0.6.0" }
|
||||
candle-transformers = { version = "0.6.0" }
|
||||
candle-nn = { version = "0.6.0" }
|
||||
csv = "1.3.1"
|
||||
candle-core = { version = "0.8.2" }
|
||||
candle-transformers = { version = "0.8.2" }
|
||||
candle-nn = { version = "0.8.2" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
||||
"onig",
|
||||
] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tiktoken-rs = "0.5.9"
|
||||
liquid = "0.26.6"
|
||||
tiktoken-rs = "0.6.0"
|
||||
liquid = "0.26.9"
|
||||
rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [
|
||||
"serde",
|
||||
"no_module",
|
||||
@ -86,25 +86,26 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
|
||||
] }
|
||||
arroy = "0.5.0"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
ureq = { version = "2.10.0", features = ["json"] }
|
||||
url = "2.5.2"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
url = "2.5.4"
|
||||
rayon-par-bridge = "0.1.0"
|
||||
hashbrown = "0.15.0"
|
||||
hashbrown = "0.15.2"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.2"
|
||||
bumparaw-collections = "0.1.4"
|
||||
thread_local = "1.1.8"
|
||||
allocator-api2 = "0.2.18"
|
||||
rustc-hash = "2.0.0"
|
||||
allocator-api2 = "0.2.21"
|
||||
rustc-hash = "2.1.0"
|
||||
uell = "0.1.0"
|
||||
enum-iterator = "2.1.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
utoipa = { version = "5.0.2", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
|
||||
utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
insta = "1.39.0"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
@ -134,7 +134,7 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
||||
InvalidVectorsEmbedderConf { document_id: String, error: String },
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
|
||||
InvalidFilterExpression(&'static [&'static str], Value),
|
||||
#[error("Attribute `{}` is not sortable. {}",
|
||||
.field,
|
||||
|
@ -1,4 +1,3 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(not(windows))]
|
||||
|
@ -132,12 +132,12 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
///
|
||||
/// 1. So long as the element's range is less than the left bound, we do nothing and keep iterating
|
||||
/// 2. If the element's range is fully contained by the bounds, then all of its docids are added to
|
||||
/// the roaring bitmap.
|
||||
/// the roaring bitmap.
|
||||
/// 3. If the element's range merely intersects the bounds, then we call the algorithm recursively
|
||||
/// on the children of the element from the level below.
|
||||
/// on the children of the element from the level below.
|
||||
/// 4. If the element's range is greater than the right bound, we do nothing and stop iterating.
|
||||
/// Note that the right bound is found through either the `left_bound` of the *next* element,
|
||||
/// or from the `rightmost_bound` argument
|
||||
/// Note that the right bound is found through either the `left_bound` of the *next* element,
|
||||
/// or from the `rightmost_bound` argument
|
||||
///
|
||||
/// ## Arguments
|
||||
/// - `level`: the level being visited
|
||||
|
@ -18,10 +18,10 @@ pub struct DistinctOutput {
|
||||
|
||||
/// Return a [`DistinctOutput`] containing:
|
||||
/// - `remaining`: a set of docids built such that exactly one element from `candidates`
|
||||
/// is kept for each distinct value inside the given field. If the field does not exist, it
|
||||
/// is considered unique.
|
||||
/// is kept for each distinct value inside the given field. If the field does not exist, it
|
||||
/// is considered unique.
|
||||
/// - `excluded`: the set of document ids that contain a value for the given field that occurs
|
||||
/// in the given candidates.
|
||||
/// in the given candidates.
|
||||
pub fn apply_distinct_rule(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
field_id: u16,
|
||||
|
@ -149,7 +149,7 @@ pub type WordId = u16;
|
||||
/// A given token can partially match a query word for several reasons:
|
||||
/// - split words
|
||||
/// - multi-word synonyms
|
||||
/// In these cases we need to match consecutively several tokens to consider that the match is full.
|
||||
/// In these cases we need to match consecutively several tokens to consider that the match is full.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum MatchType<'a> {
|
||||
Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive<WordId> },
|
||||
|
@ -21,9 +21,9 @@ use crate::Result;
|
||||
/// 1. `Start` : unique, represents the start of the query
|
||||
/// 2. `End` : unique, represents the end of a query
|
||||
/// 3. `Deleted` : represents a node that was deleted.
|
||||
/// All deleted nodes are unreachable from the start node.
|
||||
/// All deleted nodes are unreachable from the start node.
|
||||
/// 4. `Term` is a regular node representing a word or combination of words
|
||||
/// from the user query.
|
||||
/// from the user query.
|
||||
#[derive(Clone)]
|
||||
pub struct QueryNode {
|
||||
pub data: QueryNodeData,
|
||||
|
@ -8,7 +8,7 @@ with them, they are "unconditional". These kinds of edges are used to "skip" a n
|
||||
The algorithm uses a depth-first search. It benefits from two main optimisations:
|
||||
- The list of all possible costs to go from any node to the END node is precomputed
|
||||
- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges
|
||||
untraversable depending on what other edges were selected.
|
||||
untraversable depending on what other edges were selected.
|
||||
|
||||
These two optimisations are meant to avoid traversing edges that wouldn't lead
|
||||
to a valid path. In practically all cases, we avoid the exponential complexity
|
||||
@ -24,6 +24,7 @@ For example, the DeadEndsCache could say the following:
|
||||
- if we take `g`, then `[f]` is also forbidden
|
||||
- etc.
|
||||
- etc.
|
||||
|
||||
As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
|
||||
conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.
|
||||
|
||||
|
@ -58,7 +58,7 @@ pub struct ComputedCondition {
|
||||
/// 2. The cost of traversing this edge
|
||||
/// 3. The condition associated with it
|
||||
/// 4. The list of nodes that have to be skipped
|
||||
/// if this edge is traversed.
|
||||
/// if this edge is traversed.
|
||||
#[derive(Clone)]
|
||||
pub struct Edge<E> {
|
||||
pub source_node: Interned<QueryNode>,
|
||||
|
@ -14,7 +14,7 @@ This module tests the following properties about the exactness ranking rule:
|
||||
3. those that contain the most exact words from the remaining query
|
||||
|
||||
- if it is followed by other graph-based ranking rules (`typo`, `proximity`, `attribute`).
|
||||
Then these rules will only work with
|
||||
Then these rules will only work with
|
||||
1. the exact terms selected by `exactness
|
||||
2. the full query term otherwise
|
||||
*/
|
||||
|
@ -4,15 +4,14 @@ This module tests the Proximity ranking rule:
|
||||
1. A proximity of >7 always has the same cost.
|
||||
|
||||
2. Phrase terms can be in sprximity to other terms via their start and end words,
|
||||
but we need to make sure that the phrase exists in the document that meets this
|
||||
proximity condition. This is especially relevant with split words and synonyms.
|
||||
but we need to make sure that the phrase exists in the document that meets this
|
||||
proximity condition. This is especially relevant with split words and synonyms.
|
||||
|
||||
3. An ngram has the same sprximity cost as its component words being consecutive.
|
||||
e.g. `sunflower` equivalent to `sun flower`.
|
||||
e.g. `sunflower` equivalent to `sun flower`.
|
||||
|
||||
4. The prefix databases can be used to find the sprximity between two words, but
|
||||
they store fewer sprximities than the regular word sprximity DB.
|
||||
|
||||
they store fewer sprximities than the regular word sprximity DB.
|
||||
*/
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
@ -11,7 +11,7 @@ This module tests the following properties:
|
||||
8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos`
|
||||
9. 3grams are not typo tolerant (but they can be split into two words)
|
||||
10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly
|
||||
if `words` doesn't exist before it.
|
||||
if `words` doesn't exist before it.
|
||||
11. The `typo` ranking rule places documents with the same number of typos in the same bucket
|
||||
12. Prefix tolerance costs nothing according to the typo ranking rule
|
||||
13. Split words cost 1 typo according to the typo ranking rule
|
||||
|
@ -2,11 +2,11 @@
|
||||
This module tests the following properties:
|
||||
|
||||
1. The `last` term matching strategy starts removing terms from the query
|
||||
starting from the end if no more results match it.
|
||||
starting from the end if no more results match it.
|
||||
2. Phrases are never deleted by the `last` term matching strategy
|
||||
3. Duplicate words don't affect the ranking of a document according to the `words` ranking rule
|
||||
4. The proximity of the first and last word of a phrase to its adjacent terms is taken into
|
||||
account by the proximity ranking rule.
|
||||
account by the proximity ranking rule.
|
||||
5. Unclosed double quotes still make a phrase
|
||||
6. The `all` term matching strategy does not remove any term from the query
|
||||
7. The search is capable of returning no results if no documents match the query
|
||||
|
@ -21,29 +21,30 @@ use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||
/// Enum used as a return value for the facet incremental indexing.
|
||||
///
|
||||
/// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have
|
||||
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
|
||||
/// of the parent node.
|
||||
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
|
||||
/// of the parent node.
|
||||
///
|
||||
/// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted
|
||||
/// in the addition of a new key in that level, and that therefore the number of children
|
||||
/// of the parent node should be incremented.
|
||||
/// in the addition of a new key in that level, and that therefore the number of children
|
||||
/// of the parent node should be incremented.
|
||||
///
|
||||
/// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the
|
||||
/// number of keys in the level. For example, removing a document id from the facet value `3` could
|
||||
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
|
||||
/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
|
||||
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
|
||||
/// number of keys in the level. For example, removing a document id from the facet value `3` could
|
||||
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
|
||||
/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
|
||||
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
|
||||
///
|
||||
/// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the
|
||||
/// bounds of the keys of the level. For example, removing a document id from the facet value
|
||||
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
|
||||
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
|
||||
/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
|
||||
/// its left bound as well.
|
||||
/// bounds of the keys of the level. For example, removing a document id from the facet value
|
||||
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
|
||||
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
|
||||
/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
|
||||
/// its left bound as well.
|
||||
///
|
||||
/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
|
||||
/// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
|
||||
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`.
|
||||
/// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
|
||||
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0`
|
||||
/// but should remain in the group node [1..4] in `level 1`.
|
||||
enum ModificationResult {
|
||||
InPlace,
|
||||
Expand,
|
||||
@ -1059,208 +1060,3 @@ mod tests {
|
||||
milli_snap!(format!("{index}"), "after_delete");
|
||||
}
|
||||
}
|
||||
|
||||
// fuzz tests
|
||||
#[cfg(all(test, fuzzing))]
|
||||
/**
|
||||
Fuzz test for the incremental indxer.
|
||||
|
||||
The fuzz test uses fuzzcheck, a coverage-guided fuzzer.
|
||||
See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org
|
||||
for more information.
|
||||
|
||||
It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with:
|
||||
```sh
|
||||
cargo install cargo-fuzzcheck
|
||||
```
|
||||
To start the fuzz test, run (from the base folder or from milli/):
|
||||
```sh
|
||||
cargo fuzzcheck update::facet::incremental::fuzz::fuzz
|
||||
```
|
||||
and wait a couple minutes to make sure the code was thoroughly tested, then
|
||||
hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz.
|
||||
|
||||
To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file:
|
||||
```toml
|
||||
[build]
|
||||
rustflags = ["--cfg", "fuzzing"]
|
||||
```
|
||||
|
||||
The fuzz test generates sequences of additions and deletions to the facet database and
|
||||
ensures that:
|
||||
1. its structure is still internally valid
|
||||
2. its content is the same as a trivially correct implementation of the same database
|
||||
*/
|
||||
mod fuzz {
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::iter::FromIterator;
|
||||
use std::rc::Rc;
|
||||
|
||||
use fuzzcheck::mutators::integer::U8Mutator;
|
||||
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
|
||||
use fuzzcheck::mutators::vector::VecMutator;
|
||||
use fuzzcheck::DefaultMutator;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::update::facet::test_helpers::FacetIndex;
|
||||
#[derive(Default)]
|
||||
pub struct TrivialDatabase<T> {
|
||||
pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
|
||||
}
|
||||
impl<T> TrivialDatabase<T>
|
||||
where
|
||||
T: Ord + Clone + Eq + std::fmt::Debug,
|
||||
{
|
||||
#[no_coverage]
|
||||
pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) {
|
||||
if new_values.is_empty() {
|
||||
return;
|
||||
}
|
||||
let values_field_id = self.elements.entry(field_id).or_default();
|
||||
let values = values_field_id.entry(new_key.clone()).or_default();
|
||||
*values |= new_values;
|
||||
}
|
||||
#[no_coverage]
|
||||
pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) {
|
||||
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
|
||||
if let Some(values) = values_field_id.get_mut(&key) {
|
||||
*values -= values_to_remove;
|
||||
if values.is_empty() {
|
||||
values_field_id.remove(&key);
|
||||
}
|
||||
}
|
||||
if values_field_id.is_empty() {
|
||||
self.elements.remove(&field_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
||||
struct Operation {
|
||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })]
|
||||
key: Vec<u8>,
|
||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||
group_size: u8,
|
||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||
max_group_size: u8,
|
||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||
min_level_size: u8,
|
||||
#[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
|
||||
field_id: u16,
|
||||
kind: OperationKind,
|
||||
}
|
||||
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
||||
enum OperationKind {
|
||||
Insert(
|
||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
|
||||
Vec<u8>,
|
||||
),
|
||||
Delete(
|
||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
|
||||
Vec<u8>,
|
||||
),
|
||||
}
|
||||
|
||||
#[no_coverage]
|
||||
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
|
||||
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
|
||||
let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new();
|
||||
for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
|
||||
operations
|
||||
{
|
||||
index.set_group_size(*group_size);
|
||||
index.set_max_group_size(*max_group_size);
|
||||
index.set_min_level_size(*min_level_size);
|
||||
match kind {
|
||||
OperationKind::Insert(values) => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
for value in values {
|
||||
bitmap.insert(*value as u32);
|
||||
value_to_keys.entry(*value).or_default().push(key.clone());
|
||||
}
|
||||
index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap);
|
||||
trivial_db.insert(*field_id, &key, &bitmap);
|
||||
}
|
||||
OperationKind::Delete(values) => {
|
||||
let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
|
||||
let mut values_per_key = HashMap::new();
|
||||
|
||||
for value in values {
|
||||
if let Some(keys) = value_to_keys.get(&(value as u8)) {
|
||||
for key in keys {
|
||||
let values: &mut RoaringBitmap =
|
||||
values_per_key.entry(key).or_default();
|
||||
values.insert(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (key, values) in values_per_key {
|
||||
index.delete(&mut txn, *field_id, &key.as_slice(), &values);
|
||||
trivial_db.delete(*field_id, &key, &values);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
||||
let level0iter = index
|
||||
.content
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
|
||||
.unwrap();
|
||||
|
||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||
let (group_key, group_values) = group.unwrap();
|
||||
let group_key =
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
||||
assert_eq!(key, &group_key.left_bound);
|
||||
assert_eq!(values, &group_values.bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
||||
let level0iter = index
|
||||
.content
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
||||
.unwrap();
|
||||
|
||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||
let (group_key, group_values) = group.unwrap();
|
||||
let group_key =
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
||||
assert_eq!(key, &group_key.left_bound);
|
||||
assert_eq!(values, &group_values.bitmap);
|
||||
}
|
||||
index.verify_structure_validity(&txn, *field_id);
|
||||
}
|
||||
txn.abort().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[no_coverage]
|
||||
fn fuzz() {
|
||||
let tempdir = Rc::new(TempDir::new().unwrap());
|
||||
let tempdir_cloned = tempdir.clone();
|
||||
let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| {
|
||||
compare_with_trivial_database(tempdir_cloned.clone(), operations)
|
||||
})
|
||||
.default_mutator()
|
||||
.serde_serializer()
|
||||
.default_sensor_and_pool_with_custom_filter(|file, function| {
|
||||
file == std::path::Path::new("milli/src/update/facet/incremental.rs")
|
||||
&& !function.contains("serde")
|
||||
&& !function.contains("tests::")
|
||||
&& !function.contains("fuzz::")
|
||||
&& !function.contains("display_bitmap")
|
||||
})
|
||||
.arguments_from_cargo_fuzzcheck()
|
||||
.launch();
|
||||
assert!(!result.found_test_failure);
|
||||
}
|
||||
}
|
||||
|
@ -346,35 +346,6 @@ pub(crate) mod test_helpers {
|
||||
for<'a> BoundCodec:
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn open_from_tempdir(
|
||||
tempdir: Rc<tempfile::TempDir>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetIndex<BoundCodec> {
|
||||
let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
|
||||
let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
|
||||
let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
let options = options.map_size(4096 * 4 * 10 * 1000);
|
||||
unsafe {
|
||||
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
|
||||
}
|
||||
let env = options.open(tempdir.path()).unwrap();
|
||||
let content = env.open_database(None).unwrap().unwrap();
|
||||
|
||||
FacetIndex {
|
||||
content,
|
||||
group_size: Cell::new(group_size),
|
||||
max_group_size: Cell::new(max_group_size),
|
||||
min_level_size: Cell::new(min_level_size),
|
||||
_tempdir: tempdir,
|
||||
env,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn new(
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
@ -402,26 +373,6 @@ pub(crate) mod test_helpers {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn set_group_size(&self, group_size: u8) {
|
||||
// 2 <= x <= 64
|
||||
self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
|
||||
}
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn set_max_group_size(&self, max_group_size: u8) {
|
||||
// 2*group_size <= x <= 128
|
||||
let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
|
||||
self.max_group_size.set(max_group_size);
|
||||
if self.group_size.get() < max_group_size / 2 {
|
||||
self.group_size.set(max_group_size / 2);
|
||||
}
|
||||
}
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn set_min_level_size(&self, min_level_size: u8) {
|
||||
// 1 <= x <= inf
|
||||
self.min_level_size.set(std::cmp::max(1, min_level_size));
|
||||
}
|
||||
|
||||
pub fn insert<'a>(
|
||||
&self,
|
||||
wtxn: &'a mut RwTxn<'_>,
|
||||
|
@ -2093,33 +2093,6 @@ mod tests {
|
||||
index.add_documents(doc1).unwrap();
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
#[test]
|
||||
fn store_detected_script_and_language_per_document_during_indexing() {
|
||||
use charabia::{Language, Script};
|
||||
let index = TempIndex::new();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
|
||||
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
|
||||
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
|
||||
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
|
||||
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let key_jpn = (Script::Cj, Language::Jpn);
|
||||
let key_cmn = (Script::Cj, Language::Cmn);
|
||||
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
|
||||
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
|
||||
let expected_cj_jpn_docids = [3].iter().collect();
|
||||
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
|
||||
let expected_cj_cmn_docids = [1, 5].iter().collect();
|
||||
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_and_delete_documents_in_single_transform() {
|
||||
let mut index = TempIndex::new();
|
||||
|
@ -29,9 +29,9 @@ use std::cell::RefCell;
|
||||
/// - An example of a type that verifies (1) and (2) is [`std::rc::Rc`] (when `T` is `Send` and `Sync`).
|
||||
/// - An example of a type that doesn't verify (1) is thread-local data.
|
||||
/// - An example of a type that doesn't verify (2) is [`std::sync::MutexGuard`]: a lot of mutex implementations require that
|
||||
/// a lock is returned to the operating system on the same thread that initially locked the mutex, failing to uphold this
|
||||
/// invariant will cause Undefined Behavior
|
||||
/// (see last § in [the nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)).
|
||||
/// a lock is returned to the operating system on the same thread that initially locked the mutex, failing to uphold this
|
||||
/// invariant will cause Undefined Behavior
|
||||
/// (see last § in [the nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)).
|
||||
///
|
||||
/// It is **always safe** to implement this trait on a type that is `Send`, but no placeholder impl is provided due to limitations in
|
||||
/// coherency. Use the [`FullySend`] wrapper in this situation.
|
||||
@ -86,7 +86,7 @@ impl<T: MostlySend> MostlySendWrapper<T> {
|
||||
/// # Safety
|
||||
///
|
||||
/// 1. `T` is [`MostlySend`], so by its safety contract it can be accessed by any thread and all of its operations are available
|
||||
/// from any thread.
|
||||
/// from any thread.
|
||||
/// 2. (P1) of `MostlySendWrapper::new` forces the user to never access the value from multiple threads concurrently.
|
||||
unsafe impl<T: MostlySend> Send for MostlySendWrapper<T> {}
|
||||
|
||||
|
@ -86,9 +86,9 @@ pub enum EmbedErrorKind {
|
||||
},
|
||||
option_info(.0.as_deref(), "server replied with "))]
|
||||
RestBadRequest(Option<String>, ConfigurationSource),
|
||||
#[error("received internal error HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
|
||||
#[error("received internal error HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))]
|
||||
RestInternalServerError(u16, Option<String>),
|
||||
#[error("received unexpected HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
|
||||
#[error("received unexpected HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))]
|
||||
RestOtherStatusCode(u16, Option<String>),
|
||||
#[error("could not reach embedding server:\n - {0}")]
|
||||
RestNetwork(ureq::Transport),
|
||||
|
@ -163,8 +163,10 @@ impl Embedder {
|
||||
|
||||
let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?;
|
||||
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
||||
let embeddings =
|
||||
self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?;
|
||||
let embeddings = self
|
||||
.model
|
||||
.forward(&token_ids, &token_type_ids, None)
|
||||
.map_err(EmbedError::model_forward)?;
|
||||
|
||||
// Apply some avg-pooling by taking the mean embedding value for all tokens (including padding)
|
||||
let (_n_sentence, n_tokens, _hidden_size) =
|
||||
@ -185,8 +187,10 @@ impl Embedder {
|
||||
Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?;
|
||||
let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?;
|
||||
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
|
||||
let embeddings =
|
||||
self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?;
|
||||
let embeddings = self
|
||||
.model
|
||||
.forward(&token_ids, &token_type_ids, None)
|
||||
.map_err(EmbedError::model_forward)?;
|
||||
|
||||
// Apply some avg-pooling by taking the mean embedding value for all tokens (including padding)
|
||||
let (_n_sentence, n_tokens, _hidden_size) =
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::fmt;
|
||||
use std::time::Instant;
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
@ -168,7 +169,6 @@ fn infer_api_key() -> String {
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Embedder {
|
||||
tokenizer: tiktoken_rs::CoreBPE,
|
||||
rest_embedder: RestEmbedder,
|
||||
@ -302,3 +302,13 @@ impl Embedder {
|
||||
self.options.distribution()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Embedder {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("Embedder")
|
||||
.field("tokenizer", &"CoreBPE")
|
||||
.field("rest_embedder", &self.rest_embedder)
|
||||
.field("options", &self.options)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
@ -175,7 +175,7 @@ impl Embedder {
|
||||
|
||||
pub fn embed_tokens(
|
||||
&self,
|
||||
tokens: &[usize],
|
||||
tokens: &[u32],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Embedding, EmbedError> {
|
||||
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;
|
||||
|
@ -8,17 +8,17 @@ edition = "2021"
|
||||
[dependencies]
|
||||
color-spantrace = "0.2.1"
|
||||
fxprof-processed-profile = "0.7.0"
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = "1.0.120"
|
||||
tracing = "0.1.40"
|
||||
tracing-error = "0.2.0"
|
||||
tracing-subscriber = "0.3.18"
|
||||
byte-unit = { version = "5.1.4", default-features = false, features = [
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = "1.0.135"
|
||||
tracing = "0.1.41"
|
||||
tracing-error = "0.2.1"
|
||||
tracing-subscriber = "0.3.19"
|
||||
byte-unit = { version = "5.1.6", default-features = false, features = [
|
||||
"std",
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
tokio = { version = "1.38.0", features = ["sync"] }
|
||||
tokio = { version = "1.42.0", features = ["sync"] }
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
|
||||
libproc = "0.14.8"
|
||||
libproc = "0.14.10"
|
||||
|
@ -11,34 +11,34 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
anyhow = "1.0.95"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
cargo_metadata = "0.18.1"
|
||||
clap = { version = "4.5.9", features = ["derive"] }
|
||||
futures-core = "0.3.30"
|
||||
futures-util = "0.3.30"
|
||||
reqwest = { version = "0.12.5", features = [
|
||||
cargo_metadata = "0.19.1"
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
futures-core = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
reqwest = { version = "0.12.12", features = [
|
||||
"stream",
|
||||
"json",
|
||||
"rustls-tls",
|
||||
], default-features = false }
|
||||
serde = { version = "1.0.204", features = ["derive"] }
|
||||
serde_json = "1.0.120"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = "1.0.135"
|
||||
sha2 = "0.10.8"
|
||||
sysinfo = "0.30.13"
|
||||
time = { version = "0.3.36", features = [
|
||||
sysinfo = "0.33.1"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde",
|
||||
"serde-human-readable",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.38.0", features = [
|
||||
tokio = { version = "1.42.0", features = [
|
||||
"rt",
|
||||
"net",
|
||||
"time",
|
||||
"process",
|
||||
"signal",
|
||||
] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = "0.3.18"
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = "0.3.19"
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
uuid = { version = "1.10.0", features = ["v7", "serde"] }
|
||||
uuid = { version = "1.11.0", features = ["v7", "serde"] }
|
||||
|
@ -27,8 +27,7 @@ impl Environment {
|
||||
|
||||
let unknown_string = String::from("Unknown");
|
||||
let mut system = System::new();
|
||||
system.refresh_cpu();
|
||||
system.refresh_cpu_frequency();
|
||||
system.refresh_cpu_all();
|
||||
system.refresh_memory();
|
||||
|
||||
let (cpu, frequency) = match system.cpus().first() {
|
||||
@ -50,9 +49,7 @@ impl Environment {
|
||||
if let Some(os) = System::os_version() {
|
||||
software.push(VersionInfo { name: os, version: String::from("kernel-release") });
|
||||
}
|
||||
if let Some(arch) = System::cpu_arch() {
|
||||
software.push(VersionInfo { name: arch, version: String::from("arch") });
|
||||
}
|
||||
software.push(VersionInfo { name: System::cpu_arch(), version: String::from("arch") });
|
||||
|
||||
Self {
|
||||
hostname: System::host_name(),
|
||||
|
@ -1,3 +1,3 @@
|
||||
[toolchain]
|
||||
channel = "1.79.0"
|
||||
channel = "1.81.0"
|
||||
components = ["clippy"]
|
||||
|
Loading…
x
Reference in New Issue
Block a user