Merge pull request #5218 from meilisearch/upgrade-dependencies

Upgrade dependencies
This commit is contained in:
Clément Renault 2025-01-09 11:46:44 +01:00 committed by GitHub
commit 377fa09cb7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
65 changed files with 1068 additions and 994 deletions

View File

@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -66,7 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -12,7 +12,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -44,7 +44,7 @@ jobs:
exit 1
fi
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -17,7 +17,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@ -12,7 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -25,7 +25,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3

View File

@ -45,7 +45,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@ -75,7 +75,7 @@ jobs:
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@ -101,7 +101,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.79
uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
target: ${{ matrix.target }}
@ -148,7 +148,7 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.79
uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
target: ${{ matrix.target }}

View File

@ -31,7 +31,7 @@ jobs:
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.79
uses: dtolnay/rust-toolchain@1.81
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
- name: Run cargo check without any default features
@ -56,7 +56,7 @@ jobs:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -81,7 +81,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
@ -101,7 +101,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@ -125,7 +125,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
- name: Run tests in debug
@ -139,7 +139,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
components: clippy
@ -156,7 +156,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
toolchain: nightly-2024-07-09

View File

@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
- name: Install sd

1
.gitignore vendored
View File

@ -10,6 +10,7 @@
/dumps
/bench
/_xtask_benchmark.ms
/benchmarks
# Snapshots
## ... large

1169
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
# Compile
FROM rust:1.79.0-alpine3.20 AS compiler
FROM rust:1.81.0-alpine3.20 AS compiler
RUN apk add -q --no-cache build-base openssl-dev

View File

@ -11,27 +11,27 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.95"
bumpalo = "3.16.0"
csv = "1.3.0"
csv = "1.3.1"
memmap2 = "0.9.5"
milli = { path = "../milli" }
mimalloc = { version = "0.1.43", default-features = false }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tempfile = "3.14.0"
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.15.0"
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.7"
roaring = "0.10.10"
[build-dependencies]
anyhow = "1.0.86"
bytes = "1.6.0"
anyhow = "1.0.95"
bytes = "1.9.0"
convert_case = "0.6.0"
flate2 = "1.0.30"
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.35"
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.36", features = ["parsing"] }
time = { version = "0.3.37", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.86"
vergen-git2 = "1.0.0"
anyhow = "1.0.95"
vergen-git2 = "1.0.2"

View File

@ -11,21 +11,21 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
flate2 = "1.0.30"
http = "1.1.0"
anyhow = "1.0.95"
flate2 = "1.0.35"
http = "1.2.0"
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
regex = "1.10.5"
roaring = { version = "0.10.7", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
once_cell = "1.20.2"
regex = "1.11.1"
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.41"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.10.1"
thiserror = "1.0.61"
tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tempfile = "3.15.0"
thiserror = "2.0.9"
tracing = "0.1.41"
uuid = { version = "1.11.0", features = ["serde", "v4"] }

View File

@ -17,4 +17,5 @@ nom_locate = "4.2.0"
unescaper = "0.1.5"
[dev-dependencies]
insta = "1.39.0"
# fixed version due to format breakages in v1.40
insta = "=1.39.0"

View File

@ -11,12 +11,12 @@ edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
arbitrary = { version = "1.4.1", features = ["derive"] }
bumpalo = "3.16.0"
clap = { version = "4.5.9", features = ["derive"] }
clap = { version = "4.5.24", features = ["derive"] }
either = "1.13.0"
fastrand = "2.1.0"
fastrand = "2.3.0"
milli = { path = "../milli" }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tempfile = "3.10.1"
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.15.0"

View File

@ -11,42 +11,43 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.95"
bincode = "1.3.3"
bumpalo = "3.16.0"
bumparaw-collections = "0.1.2"
bumparaw-collections = "0.1.4"
convert_case = "0.6.0"
csv = "1.3.0"
derive_builder = "0.20.0"
csv = "1.3.1"
derive_builder = "0.20.2"
dump = { path = "../dump" }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.35"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.4"
memmap2 = "0.9.5"
page_size = "0.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.7", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.10.0"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tracing = "0.1.41"
ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies]
arroy = "0.5.0"
big_s = "1.0.2"
crossbeam-channel = "0.5.13"
insta = { version = "1.39.0", features = ["json", "redactions"] }
crossbeam-channel = "0.5.14"
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }

View File

@ -106,7 +106,7 @@ impl IndexScheduler {
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0;
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
progress.update_progress(VariableNameStep::new(uid.to_string(), count, nb_indexes));
count += 1;

View File

@ -14,4 +14,4 @@ license.workspace = true
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
md5 = "0.7.0"
once_cell = "1.19"
once_cell = "1.20"

View File

@ -17,10 +17,10 @@ hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.7", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
sha2 = "0.10.8"
thiserror = "1.0.61"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.10.0", features = ["serde", "v4"] }
thiserror = "2.0.9"
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.11.0", features = ["serde", "v4"] }

View File

@ -11,40 +11,41 @@ edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.8.0", default-features = false }
anyhow = "1.0.86"
actix-web = { version = "4.9.0", default-features = false }
anyhow = "1.0.95"
bumpalo = "3.16.0"
convert_case = "0.6.0"
csv = "1.3.0"
deserr = { version = "0.6.2", features = ["actix-web"] }
csv = "1.3.1"
deserr = { version = "0.6.3", features = ["actix-web"] }
either = { version = "1.13.0", features = ["serde"] }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.35"
fst = "0.4.7"
memmap2 = "0.9.4"
memmap2 = "0.9.5"
milli = { path = "../milli" }
bumparaw-collections = "0.1.2"
roaring = { version = "0.10.7", features = ["serde"] }
bumparaw-collections = "0.1.4"
roaring = { version = "0.10.10", features = ["serde"] }
rustc-hash = "2.1.0"
serde = { version = "1.0.204", features = ["derive"] }
serde = { version = "1.0.217", features = ["derive"] }
serde-cs = "0.2.4"
serde_json = "1.0.120"
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
serde_json = "1.0.135"
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = "1.38"
utoipa = { version = "5.2.0", features = ["macros"] }
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tokio = "1.42"
utoipa = { version = "5.3.1", features = ["macros"] }
uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies]
insta = "1.39.0"
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
meili-snap = { path = "../meili-snap" }
[features]

View File

@ -14,42 +14,42 @@ default-run = "meilisearch"
[dependencies]
actix-cors = "0.7.0"
actix-http = { version = "3.8.0", default-features = false, features = [
actix-http = { version = "3.9.0", default-features = false, features = [
"compress-brotli",
"compress-gzip",
"rustls-0_23",
] }
actix-utils = "3.0.1"
actix-web = { version = "4.8.0", default-features = false, features = [
actix-web = { version = "4.9.0", default-features = false, features = [
"macros",
"compress-brotli",
"compress-gzip",
"cookies",
"rustls-0_23",
] }
anyhow = { version = "1.0.86", features = ["backtrace"] }
async-trait = "0.1.81"
bstr = "1.9.1"
byte-unit = { version = "5.1.4", default-features = false, features = [
anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.85"
bstr = "1.11.3"
byte-unit = { version = "5.1.6", default-features = false, features = [
"std",
"byte",
"serde",
] }
bytes = "1.6.0"
clap = { version = "4.5.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.13"
deserr = { version = "0.6.2", features = ["actix-web"] }
bytes = "1.9.0"
clap = { version = "4.5.24", features = ["derive", "env"] }
crossbeam-channel = "0.5.14"
deserr = { version = "0.6.3", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.13.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.35"
fst = "0.4.7"
futures = "0.3.30"
futures-util = "0.3.30"
futures = "0.3.31"
futures-util = "0.3.31"
index-scheduler = { path = "../index-scheduler" }
indexmap = { version = "2.2.6", features = ["serde"] }
is-terminal = "0.4.12"
itertools = "0.13.0"
indexmap = { version = "2.7.0", features = ["serde"] }
is-terminal = "0.4.13"
itertools = "0.14.0"
jsonwebtoken = "9.3.0"
lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" }
@ -58,80 +58,81 @@ mimalloc = { version = "0.1.43", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.3.0"
once_cell = "1.19.0"
ordered-float = "4.2.1"
once_cell = "1.20.2"
ordered-float = "4.6.0"
parking_lot = "0.12.3"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.14"
pin-project-lite = "0.2.16"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.4", features = ["process"] }
rand = "0.8.5"
rayon = "1.10.0"
regex = "1.10.5"
reqwest = { version = "0.12.5", features = [
regex = "1.11.1"
reqwest = { version = "0.12.12", features = [
"rustls-tls",
"json",
], default-features = false }
rustls = { version = "0.23.11", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.7.0", features = ["alloc"] }
rustls-pemfile = "2.1.2"
segment = { version = "0.2.4" }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
rustls = { version = "0.23.20", features = ["ring"], default-features = false }
rustls-pki-types = { version = "1.10.1", features = ["alloc"] }
rustls-pemfile = "2.2.0"
segment = { version = "0.2.5" }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
sha2 = "0.10.8"
siphasher = "1.0.1"
slice-group-by = "0.3.1"
static-files = { version = "0.2.4", optional = true }
sysinfo = "0.30.13"
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
sysinfo = "0.33.1"
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tokio = { version = "1.38.0", features = ["full"] }
toml = "0.8.14"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tokio = { version = "1.42.0", features = ["full"] }
toml = "0.8.19"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.2", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["json"] }
url = { version = "2.5.4", features = ["serde"] }
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.11"
tracing-actix-web = "0.7.15"
build-info = { version = "1.7.0", path = "../build-info" }
roaring = "0.10.7"
roaring = "0.10.10"
mopa-maintained = "0.2.3"
utoipa = { version = "5.2.0", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
utoipa-scalar = { version = "0.2.0", optional = true, features = ["actix-web"] }
utoipa = { version = "5.3.1", features = ["actix_extras", "macros", "non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
utoipa-scalar = { version = "0.2.1", optional = true, features = ["actix-web"] }
[dev-dependencies]
actix-rt = "2.10.0"
brotli = "6.0.0"
insta = "1.39.0"
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
wiremock = "0.6.0"
wiremock = "0.6.2"
yaup = "0.3.1"
[build-dependencies]
anyhow = { version = "1.0.86", optional = true }
cargo_toml = { version = "0.20.3", optional = true }
anyhow = { version = "1.0.95", optional = true }
cargo_toml = { version = "0.21.0", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.12.5", features = [
reqwest = { version = "0.12.12", features = [
"blocking",
"rustls-tls",
], default-features = false, optional = true }
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.10.1", optional = true }
zip = { version = "2.1.3", optional = true }
tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.2.2", optional = true }
[features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]

View File

@ -426,13 +426,9 @@ impl Segment {
&AuthFilter::default(),
) {
// Replace the version number with the prototype name if any.
let version = if let Some(prototype) = build_info::DescribeResult::from_build()
let version = build_info::DescribeResult::from_build()
.and_then(|describe| describe.as_prototype())
{
prototype
} else {
env!("CARGO_PKG_VERSION")
};
.unwrap_or(env!("CARGO_PKG_VERSION"));
let _ = self
.batcher

View File

@ -19,15 +19,15 @@ pub enum MeilisearchHttpError {
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
"The Content-Type `{}` is invalid. Accepted values for the Content-Type header are: {}",
.0, .1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
InvalidContentType(String, Vec<String>),
#[error("Document `{0}` not found.")]
DocumentNotFound(String),
#[error("Sending an empty filter is forbidden.")]
EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
#[error("Invalid syntax for the filter parameter: `expected {}, found: {}`.", .0.join(", "), .1)]
InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
FederationOptionsInNonFederatedRequest(usize),

View File

@ -188,13 +188,13 @@ impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
span.record("error", tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
span.record("error", tracing::field::display(error.as_response_error()));
}
};
}

View File

@ -760,8 +760,8 @@ impl MaxMemory {
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if sysinfo::IS_SUPPORTED_SYSTEM {
let memory_kind = RefreshKind::new().with_memory(MemoryRefreshKind::new().with_ram());
let mut system = System::new_with_specifics(memory_kind);
let mem_kind = RefreshKind::nothing().with_memory(MemoryRefreshKind::nothing().with_ram());
let mut system = System::new_with_specifics(mem_kind);
system.refresh_memory();
Some(system.total_memory())
} else {

View File

@ -1,7 +1,3 @@
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats;
use crate::search_queue::SearchQueue;
use actix_web::http::header;
use actix_web::web::{self, Data};
use actix_web::HttpResponse;
@ -14,6 +10,11 @@ use prometheus::{Encoder, TextEncoder};
use time::OffsetDateTime;
use utoipa::OpenApi;
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::routes::create_all_stats;
use crate::search_queue::SearchQueue;
#[derive(OpenApi)]
#[openapi(paths(get_metrics))]
pub struct MetricApi;

View File

@ -545,5 +545,5 @@ pub async fn get_health(
index_scheduler.health().unwrap();
auth_controller.health().unwrap();
Ok(HttpResponse::Ok().json(&HealthResponse::default()))
Ok(HttpResponse::Ok().json(HealthResponse::default()))
}

View File

@ -73,8 +73,8 @@ async fn get_and_paginate_indexes() {
let server = Server::new().await;
const NB_INDEXES: usize = 50;
for i in 0..NB_INDEXES {
server.index(&format!("test_{i:02}")).create(None).await;
server.index(&format!("test_{i:02}")).wait_task(i as u64).await;
server.index(format!("test_{i:02}")).create(None).await;
server.index(format!("test_{i:02}")).wait_task(i as u64).await;
}
// basic

View File

@ -9,16 +9,16 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.95"
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
clap = { version = "4.5.9", features = ["derive"] }
clap = { version = "4.5.24", features = ["derive"] }
dump = { path = "../dump" }
file-store = { path = "../file-store" }
indexmap = {version = "2.7.0", features = ["serde"]}
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
serde = { version = "1.0.209", features = ["derive"] }
serde_json = {version = "1.0.133", features = ["preserve_order"]}
tempfile = "3.14.0"
time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
uuid = { version = "1.10.0", features = ["v4"], default-features = false }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = {version = "1.0.135", features = ["preserve_order"]}
tempfile = "3.15.0"
time = { version = "0.3.37", features = ["formatting", "parsing", "alloc"] }
uuid = { version = "1.11.0", features = ["v4"], default-features = false }

View File

@ -15,68 +15,68 @@ license.workspace = true
big_s = "1.0.2"
bimap = { version = "0.6.3", features = ["serde"] }
bincode = "1.3.3"
bstr = "1.9.1"
bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
bstr = "1.11.3"
bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.9.2", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13"
deserr = "0.6.2"
crossbeam-channel = "0.5.14"
deserr = "0.6.3"
either = { version = "1.13.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
heed = { version = "0.20.3", default-features = false, features = [
heed = { version = "0.20.5", default-features = false, features = [
"serde-json",
"serde-bincode",
"read-txn-no-tls",
] }
indexmap = { version = "2.2.6", features = ["serde"] }
indexmap = { version = "2.7.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.5.0"
memmap2 = "0.9.4"
memchr = "2.7.4"
memmap2 = "0.9.5"
obkv = "0.3.0"
once_cell = "1.19.0"
ordered-float = "4.2.1"
once_cell = "1.20.2"
ordered-float = "4.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.7", features = ["serde"] }
rstar = { version = "0.12.0", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
roaring = { version = "0.10.10", features = ["serde"] }
rstar = { version = "0.12.2", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order", "raw_value"] }
slice-group-by = "0.3.1"
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.13.2"
smartstring = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = [
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
uuid = { version = "1.10.0", features = ["v4"] }
uuid = { version = "1.11.0", features = ["v4"] }
filter-parser = { path = "../filter-parser" }
# documents words self-join
itertools = "0.13.0"
itertools = "0.14.0"
csv = "1.3.0"
candle-core = { version = "0.6.0" }
candle-transformers = { version = "0.6.0" }
candle-nn = { version = "0.6.0" }
csv = "1.3.1"
candle-core = { version = "0.8.2" }
candle-transformers = { version = "0.8.2" }
candle-nn = { version = "0.8.2" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
"onig",
] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
"online",
] }
tiktoken-rs = "0.5.9"
liquid = "0.26.6"
tiktoken-rs = "0.6.0"
liquid = "0.26.9"
rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = [
"serde",
"no_module",
@ -86,25 +86,26 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
] }
arroy = "0.5.0"
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.10.0", features = ["json"] }
url = "2.5.2"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }
url = "2.5.4"
rayon-par-bridge = "0.1.0"
hashbrown = "0.15.0"
hashbrown = "0.15.2"
bumpalo = "3.16.0"
bumparaw-collections = "0.1.2"
bumparaw-collections = "0.1.4"
thread_local = "1.1.8"
allocator-api2 = "0.2.18"
rustc-hash = "2.0.0"
allocator-api2 = "0.2.21"
rustc-hash = "2.1.0"
uell = "0.1.0"
enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
flume = { version = "0.11.1", default-features = false }
utoipa = { version = "5.0.2", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] }
[dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false }
insta = "1.39.0"
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
maplit = "1.0.2"
md5 = "0.7.0"
meili-snap = { path = "../meili-snap" }

View File

@ -134,7 +134,7 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidVectorsEmbedderConf { document_id: String, error: String },
#[error("{0}")]
InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
#[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
InvalidFilterExpression(&'static [&'static str], Value),
#[error("Attribute `{}` is not sortable. {}",
.field,

View File

@ -1,4 +1,3 @@
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
#![allow(clippy::type_complexity)]
#[cfg(not(windows))]

View File

@ -24,6 +24,7 @@ For example, the DeadEndsCache could say the following:
- if we take `g`, then `[f]` is also forbidden
- etc.
- etc.
As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.

View File

@ -12,7 +12,6 @@ e.g. `sunflower` equivalent to `sun flower`.
4. The prefix databases can be used to find the sprximity between two words, but
they store fewer sprximities than the regular word sprximity DB.
*/
use std::collections::BTreeMap;

View File

@ -43,7 +43,8 @@ use crate::{CboRoaringBitmapCodec, Index, Result};
///
/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
/// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`.
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0`
/// but should remain in the group node [1..4] in `level 1`.
enum ModificationResult {
InPlace,
Expand,
@ -1059,208 +1060,3 @@ mod tests {
milli_snap!(format!("{index}"), "after_delete");
}
}
// fuzz tests
#[cfg(all(test, fuzzing))]
/**
Fuzz test for the incremental indxer.
The fuzz test uses fuzzcheck, a coverage-guided fuzzer.
See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org
for more information.
It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with:
```sh
cargo install cargo-fuzzcheck
```
To start the fuzz test, run (from the base folder or from milli/):
```sh
cargo fuzzcheck update::facet::incremental::fuzz::fuzz
```
and wait a couple minutes to make sure the code was thoroughly tested, then
hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz.
To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file:
```toml
[build]
rustflags = ["--cfg", "fuzzing"]
```
The fuzz test generates sequences of additions and deletions to the facet database and
ensures that:
1. its structure is still internally valid
2. its content is the same as a trivially correct implementation of the same database
*/
mod fuzz {
use std::collections::{BTreeMap, HashMap};
use std::iter::FromIterator;
use std::rc::Rc;
use fuzzcheck::mutators::integer::U8Mutator;
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
use fuzzcheck::mutators::vector::VecMutator;
use fuzzcheck::DefaultMutator;
use roaring::RoaringBitmap;
use tempfile::TempDir;
use super::*;
use crate::update::facet::test_helpers::FacetIndex;
#[derive(Default)]
pub struct TrivialDatabase<T> {
pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
}
impl<T> TrivialDatabase<T>
where
T: Ord + Clone + Eq + std::fmt::Debug,
{
#[no_coverage]
pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) {
if new_values.is_empty() {
return;
}
let values_field_id = self.elements.entry(field_id).or_default();
let values = values_field_id.entry(new_key.clone()).or_default();
*values |= new_values;
}
#[no_coverage]
pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) {
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
if let Some(values) = values_field_id.get_mut(&key) {
*values -= values_to_remove;
if values.is_empty() {
values_field_id.remove(&key);
}
}
if values_field_id.is_empty() {
self.elements.remove(&field_id);
}
}
}
}
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
struct Operation {
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })]
key: Vec<u8>,
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
group_size: u8,
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
max_group_size: u8,
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
min_level_size: u8,
#[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
field_id: u16,
kind: OperationKind,
}
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
enum OperationKind {
Insert(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
Delete(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
}
#[no_coverage]
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
let mut txn = index.env.write_txn().unwrap();
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new();
for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
operations
{
index.set_group_size(*group_size);
index.set_max_group_size(*max_group_size);
index.set_min_level_size(*min_level_size);
match kind {
OperationKind::Insert(values) => {
let mut bitmap = RoaringBitmap::new();
for value in values {
bitmap.insert(*value as u32);
value_to_keys.entry(*value).or_default().push(key.clone());
}
index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap);
trivial_db.insert(*field_id, &key, &bitmap);
}
OperationKind::Delete(values) => {
let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
let mut values_per_key = HashMap::new();
for value in values {
if let Some(keys) = value_to_keys.get(&(value as u8)) {
for key in keys {
let values: &mut RoaringBitmap =
values_per_key.entry(key).or_default();
values.insert(value);
}
}
}
for (key, values) in values_per_key {
index.delete(&mut txn, *field_id, &key.as_slice(), &values);
trivial_db.delete(*field_id, &key, &values);
}
}
}
}
for (field_id, values_field_id) in trivial_db.elements.iter() {
let level0iter = index
.content
.as_polymorph()
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
.unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap();
let group_key =
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap);
}
}
for (field_id, values_field_id) in trivial_db.elements.iter() {
let level0iter = index
.content
.as_polymorph()
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
.unwrap();
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
let (group_key, group_values) = group.unwrap();
let group_key =
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
assert_eq!(key, &group_key.left_bound);
assert_eq!(values, &group_values.bitmap);
}
index.verify_structure_validity(&txn, *field_id);
}
txn.abort().unwrap();
}
#[test]
#[no_coverage]
fn fuzz() {
let tempdir = Rc::new(TempDir::new().unwrap());
let tempdir_cloned = tempdir.clone();
let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| {
compare_with_trivial_database(tempdir_cloned.clone(), operations)
})
.default_mutator()
.serde_serializer()
.default_sensor_and_pool_with_custom_filter(|file, function| {
file == std::path::Path::new("milli/src/update/facet/incremental.rs")
&& !function.contains("serde")
&& !function.contains("tests::")
&& !function.contains("fuzz::")
&& !function.contains("display_bitmap")
})
.arguments_from_cargo_fuzzcheck()
.launch();
assert!(!result.found_test_failure);
}
}

View File

@ -346,35 +346,6 @@ pub(crate) mod test_helpers {
for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
#[cfg(all(test, fuzzing))]
pub fn open_from_tempdir(
tempdir: Rc<tempfile::TempDir>,
group_size: u8,
max_group_size: u8,
min_level_size: u8,
) -> FacetIndex<BoundCodec> {
let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
let mut options = heed::EnvOpenOptions::new();
let options = options.map_size(4096 * 4 * 10 * 1000);
unsafe {
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
}
let env = options.open(tempdir.path()).unwrap();
let content = env.open_database(None).unwrap().unwrap();
FacetIndex {
content,
group_size: Cell::new(group_size),
max_group_size: Cell::new(max_group_size),
min_level_size: Cell::new(min_level_size),
_tempdir: tempdir,
env,
_phantom: PhantomData,
}
}
pub fn new(
group_size: u8,
max_group_size: u8,
@ -402,26 +373,6 @@ pub(crate) mod test_helpers {
}
}
#[cfg(all(test, fuzzing))]
pub fn set_group_size(&self, group_size: u8) {
// 2 <= x <= 64
self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
}
#[cfg(all(test, fuzzing))]
pub fn set_max_group_size(&self, max_group_size: u8) {
// 2*group_size <= x <= 128
let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
self.max_group_size.set(max_group_size);
if self.group_size.get() < max_group_size / 2 {
self.group_size.set(max_group_size / 2);
}
}
#[cfg(all(test, fuzzing))]
pub fn set_min_level_size(&self, min_level_size: u8) {
// 1 <= x <= inf
self.min_level_size.set(std::cmp::max(1, min_level_size));
}
pub fn insert<'a>(
&self,
wtxn: &'a mut RwTxn<'_>,

View File

@ -2093,33 +2093,6 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[cfg(feature = "default")]
#[test]
fn store_detected_script_and_language_per_document_during_indexing() {
use charabia::{Language, Script};
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let key_jpn = (Script::Cj, Language::Jpn);
let key_cmn = (Script::Cj, Language::Cmn);
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
let expected_cj_jpn_docids = [3].iter().collect();
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
let expected_cj_cmn_docids = [1, 5].iter().collect();
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();

View File

@ -86,9 +86,9 @@ pub enum EmbedErrorKind {
},
option_info(.0.as_deref(), "server replied with "))]
RestBadRequest(Option<String>, ConfigurationSource),
#[error("received internal error HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
#[error("received internal error HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))]
RestInternalServerError(u16, Option<String>),
#[error("received unexpected HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
#[error("received unexpected HTTP {} from embedding server{}", .0, option_info(.1.as_deref(), "server replied with "))]
RestOtherStatusCode(u16, Option<String>),
#[error("could not reach embedding server:\n - {0}")]
RestNetwork(ureq::Transport),

View File

@ -163,8 +163,10 @@ impl Embedder {
let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?;
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
let embeddings =
self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?;
let embeddings = self
.model
.forward(&token_ids, &token_type_ids, None)
.map_err(EmbedError::model_forward)?;
// Apply some avg-pooling by taking the mean embedding value for all tokens (including padding)
let (_n_sentence, n_tokens, _hidden_size) =
@ -185,8 +187,10 @@ impl Embedder {
Tensor::new(token_ids, &self.model.device).map_err(EmbedError::tensor_shape)?;
let token_ids = Tensor::stack(&[token_ids], 0).map_err(EmbedError::tensor_shape)?;
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
let embeddings =
self.model.forward(&token_ids, &token_type_ids).map_err(EmbedError::model_forward)?;
let embeddings = self
.model
.forward(&token_ids, &token_type_ids, None)
.map_err(EmbedError::model_forward)?;
// Apply some avg-pooling by taking the mean embedding value for all tokens (including padding)
let (_n_sentence, n_tokens, _hidden_size) =

View File

@ -1,3 +1,4 @@
use std::fmt;
use std::time::Instant;
use ordered_float::OrderedFloat;
@ -168,7 +169,6 @@ fn infer_api_key() -> String {
.unwrap_or_default()
}
#[derive(Debug)]
pub struct Embedder {
tokenizer: tiktoken_rs::CoreBPE,
rest_embedder: RestEmbedder,
@ -302,3 +302,13 @@ impl Embedder {
self.options.distribution()
}
}
impl fmt::Debug for Embedder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Embedder")
.field("tokenizer", &"CoreBPE")
.field("rest_embedder", &self.rest_embedder)
.field("options", &self.options)
.finish()
}
}

View File

@ -175,7 +175,7 @@ impl Embedder {
pub fn embed_tokens(
&self,
tokens: &[usize],
tokens: &[u32],
deadline: Option<Instant>,
) -> Result<Embedding, EmbedError> {
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;

View File

@ -8,17 +8,17 @@ edition = "2021"
[dependencies]
color-spantrace = "0.2.1"
fxprof-processed-profile = "0.7.0"
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"
tracing = "0.1.40"
tracing-error = "0.2.0"
tracing-subscriber = "0.3.18"
byte-unit = { version = "5.1.4", default-features = false, features = [
serde = { version = "1.0.217", features = ["derive"] }
serde_json = "1.0.135"
tracing = "0.1.41"
tracing-error = "0.2.1"
tracing-subscriber = "0.3.19"
byte-unit = { version = "5.1.6", default-features = false, features = [
"std",
"byte",
"serde",
] }
tokio = { version = "1.38.0", features = ["sync"] }
tokio = { version = "1.42.0", features = ["sync"] }
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
libproc = "0.14.8"
libproc = "0.14.10"

View File

@ -11,34 +11,34 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.95"
build-info = { version = "1.7.0", path = "../build-info" }
cargo_metadata = "0.18.1"
clap = { version = "4.5.9", features = ["derive"] }
futures-core = "0.3.30"
futures-util = "0.3.30"
reqwest = { version = "0.12.5", features = [
cargo_metadata = "0.19.1"
clap = { version = "4.5.24", features = ["derive"] }
futures-core = "0.3.31"
futures-util = "0.3.31"
reqwest = { version = "0.12.12", features = [
"stream",
"json",
"rustls-tls",
], default-features = false }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"
serde = { version = "1.0.217", features = ["derive"] }
serde_json = "1.0.135"
sha2 = "0.10.8"
sysinfo = "0.30.13"
time = { version = "0.3.36", features = [
sysinfo = "0.33.1"
time = { version = "0.3.37", features = [
"serde",
"serde-human-readable",
"macros",
] }
tokio = { version = "1.38.0", features = [
tokio = { version = "1.42.0", features = [
"rt",
"net",
"time",
"process",
"signal",
] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
uuid = { version = "1.10.0", features = ["v7", "serde"] }
uuid = { version = "1.11.0", features = ["v7", "serde"] }

View File

@ -27,8 +27,7 @@ impl Environment {
let unknown_string = String::from("Unknown");
let mut system = System::new();
system.refresh_cpu();
system.refresh_cpu_frequency();
system.refresh_cpu_all();
system.refresh_memory();
let (cpu, frequency) = match system.cpus().first() {
@ -50,9 +49,7 @@ impl Environment {
if let Some(os) = System::os_version() {
software.push(VersionInfo { name: os, version: String::from("kernel-release") });
}
if let Some(arch) = System::cpu_arch() {
software.push(VersionInfo { name: arch, version: String::from("arch") });
}
software.push(VersionInfo { name: System::cpu_arch(), version: String::from("arch") });
Self {
hostname: System::host_name(),

View File

@ -1,3 +1,3 @@
[toolchain]
channel = "1.79.0"
channel = "1.81.0"
components = ["clippy"]