Merge pull request #5406 from meilisearch/bump-heed

Bump heed to v0.22 and arroy to v0.6
This commit is contained in:
Clément Renault 2025-03-13 16:52:45 +01:00 committed by GitHub
commit 7df5715d39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
51 changed files with 370 additions and 274 deletions

92
Cargo.lock generated
View File

@ -47,7 +47,7 @@ dependencies = [
"actix-utils", "actix-utils",
"ahash 0.8.11", "ahash 0.8.11",
"base64 0.22.1", "base64 0.22.1",
"bitflags 2.6.0", "bitflags 2.9.0",
"brotli", "brotli",
"bytes", "bytes",
"bytestring", "bytestring",
@ -393,41 +393,23 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]] [[package]]
name = "arroy" name = "arroy"
version = "0.5.0" version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e" checksum = "4a885313dfac15b64fd61a39d1970a2befa076c69a763434117c5b6163f9fecb"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"heed", "heed",
"log",
"memmap2", "memmap2",
"nohash", "nohash",
"ordered-float", "ordered-float",
"page_size",
"rand", "rand",
"rayon", "rayon",
"roaring", "roaring",
"tempfile", "tempfile",
"thiserror 1.0.69", "thiserror 2.0.9",
] "tracing",
[[package]]
name = "arroy"
version = "0.5.0"
source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2",
"nohash",
"ordered-float",
"rand",
"rayon",
"roaring",
"tempfile",
"thiserror 1.0.69",
] ]
[[package]] [[package]]
@ -553,7 +535,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.13.0", "itertools 0.13.0",
@ -599,9 +581,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.6.0" version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -2082,7 +2064,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22" checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"debugid", "debugid",
"fxhash", "fxhash",
"serde", "serde",
@ -2249,7 +2231,7 @@ version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"libc", "libc",
"libgit2-sys", "libgit2-sys",
"log", "log",
@ -2397,11 +2379,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.20.5" version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"heed-traits", "heed-traits",
"heed-types", "heed-types",
@ -2421,9 +2403,9 @@ checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.20.1" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d3f528b053a6d700b2734eabcd0fd49cb8230647aa72958467527b0b7917114" checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -2746,7 +2728,6 @@ name = "index-scheduler"
version = "1.13.3" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s", "big_s",
"bincode", "bincode",
"bumpalo", "bumpalo",
@ -3013,9 +2994,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.169" version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]] [[package]]
name = "libgit2-sys" name = "libgit2-sys"
@ -3468,9 +3449,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]] [[package]]
name = "lmdb-master-sys" name = "lmdb-master-sys"
version = "0.2.4" version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
dependencies = [ dependencies = [
"cc", "cc",
"doxygen-rs", "doxygen-rs",
@ -3513,9 +3494,9 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.21" version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
[[package]] [[package]]
name = "lzma-rs" name = "lzma-rs"
@ -3726,7 +3707,6 @@ name = "meilitool"
version = "1.13.3" version = "1.13.3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
"clap", "clap",
"dump", "dump",
"file-store", "file-store",
@ -3761,7 +3741,7 @@ name = "milli"
version = "1.13.3" version = "1.13.3"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "arroy",
"bbqueue", "bbqueue",
"big_s", "big_s",
"bimap", "bimap",
@ -4129,9 +4109,9 @@ checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.20.2" version = "1.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad"
[[package]] [[package]]
name = "onig" name = "onig"
@ -4518,7 +4498,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"hex", "hex",
"lazy_static", "lazy_static",
"procfs-core", "procfs-core",
@ -4531,7 +4511,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"hex", "hex",
] ]
@ -4872,7 +4852,7 @@ version = "1.20.0"
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4" source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.9.0",
"instant", "instant",
"num-traits", "num-traits",
"once_cell", "once_cell",
@ -5008,7 +4988,7 @@ version = "0.38.41"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@ -5130,9 +5110,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.217" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
@ -5148,9 +5128,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.217" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -5159,9 +5139,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.138" version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"itoa", "itoa",
@ -5529,7 +5509,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",

View File

@ -36,6 +36,12 @@ license = "MIT"
[profile.release] [profile.release]
codegen-units = 1 codegen-units = 1
# We now compile heed without the NDEBUG define for better performance.
# However, we still enable debug assertions for a better detection of
# disk corruption on the cloud or in OSS.
[profile.release.package.heed]
debug-assertions = true
[profile.dev.package.flate2] [profile.dev.package.flate2]
opt-level = 3 opt-level = 3

View File

@ -35,7 +35,8 @@ fn setup_dir(path: impl AsRef<Path>) {
fn setup_index() -> Index { fn setup_index() -> Index {
let path = "benches.mmdb"; let path = "benches.mmdb";
setup_dir(path); setup_dir(path);
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
Index::new(options, path, true).unwrap() Index::new(options, path, true).unwrap()

View File

@ -65,7 +65,8 @@ pub fn base_setup(conf: &Conf) -> Index {
} }
create_dir_all(conf.database_name).unwrap(); create_dir_all(conf.database_name).unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
let index = Index::new(options, conf.database_name, true).unwrap(); let index = Index::new(options, conf.database_name, true).unwrap();

View File

@ -57,7 +57,8 @@ fn main() {
let opt = opt.clone(); let opt = opt.clone();
let handle = std::thread::spawn(move || { let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(1024 * 1024 * 1024 * 1024); options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path { let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(), Some(path) => TempDir::new_in(path).unwrap(),

View File

@ -44,7 +44,6 @@ ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.5.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
# fixed version due to format breakages in v1.40 # fixed version due to format breakages in v1.40

View File

@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn}; use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use crate::error::FeatureNotEnabledError; use crate::error::FeatureNotEnabledError;
use crate::Result; use crate::Result;
@ -139,7 +139,7 @@ impl FeatureData {
} }
pub fn new( pub fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
instance_features: InstanceTogglableFeatures, instance_features: InstanceTogglableFeatures,
) -> Result<Self> { ) -> Result<Self> {

View File

@ -304,7 +304,8 @@ fn create_or_open_index(
map_size: usize, map_size: usize,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
// You can find more details about this experimental // You can find more details about this experimental
@ -333,7 +334,7 @@ fn create_or_open_index(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use meilisearch_types::heed::Env; use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::Index; use meilisearch_types::Index;
use uuid::Uuid; use uuid::Uuid;
@ -343,7 +344,7 @@ mod tests {
use crate::IndexScheduler; use crate::IndexScheduler;
impl IndexMapper { impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) { fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle) (index_scheduler.index_mapper, index_scheduler.env, handle)
} }

View File

@ -4,7 +4,7 @@ use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::database_stats::DatabaseStats; use meilisearch_types::milli::database_stats::DatabaseStats;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@ -164,7 +164,7 @@ impl IndexMapper {
} }
pub fn new( pub fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
budget: IndexBudget, budget: IndexBudget,

View File

@ -54,7 +54,7 @@ use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::I128; use meilisearch_types::heed::types::I128;
use meilisearch_types::heed::{self, Env, RoTxn}; use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls};
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
@ -131,7 +131,7 @@ pub struct IndexSchedulerOptions {
/// to be performed on them. /// to be performed on them.
pub struct IndexScheduler { pub struct IndexScheduler {
/// The LMDB environment which the DBs are associated with. /// The LMDB environment which the DBs are associated with.
pub(crate) env: Env, pub(crate) env: Env<WithoutTls>,
/// The list of tasks currently processing /// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>, pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
@ -209,6 +209,7 @@ impl IndexScheduler {
#[allow(private_interfaces)] // because test_utils is private #[allow(private_interfaces)] // because test_utils is private
pub fn new( pub fn new(
options: IndexSchedulerOptions, options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32), from_db_version: (u32, u32, u32),
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>, #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
@ -240,7 +241,9 @@ impl IndexScheduler {
}; };
let env = unsafe { let env = unsafe {
heed::EnvOpenOptions::new() let env_options = heed::EnvOpenOptions::new();
let mut env_options = env_options.read_txn_without_tls();
env_options
.max_dbs(Self::nb_db()) .max_dbs(Self::nb_db())
.map_size(budget.task_db_size) .map_size(budget.task_db_size)
.open(&options.tasks_path) .open(&options.tasks_path)
@ -260,7 +263,7 @@ impl IndexScheduler {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
version, version,
queue, queue,
scheduler: Scheduler::new(&options), scheduler: Scheduler::new(&options, auth_env),
index_mapper, index_mapper,
env, env,
@ -358,7 +361,7 @@ impl IndexScheduler {
} }
} }
pub fn read_txn(&self) -> Result<RoTxn> { pub fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
self.env.read_txn().map_err(|e| e.into()) self.env.read_txn().map_err(|e| e.into())
} }
@ -427,12 +430,14 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes, /// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function. /// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> { pub fn index(&self, name: &str) -> Result<Index> {
self.index_mapper.index(&self.env.read_txn()?, name) let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name)
} }
/// Return the boolean referring if index exists. /// Return the boolean referring if index exists.
pub fn index_exists(&self, name: &str) -> Result<bool> { pub fn index_exists(&self, name: &str) -> Result<bool> {
self.index_mapper.index_exists(&self.env.read_txn()?, name) let rtxn = self.env.read_txn()?;
self.index_mapper.index_exists(&rtxn, name)
} }
/// Return the name of all indexes without opening them. /// Return the name of all indexes without opening them.
@ -507,7 +512,8 @@ impl IndexScheduler {
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
/// 3. The number of times the properties appeared. /// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap()) let rtxn = self.read_txn()?;
self.queue.get_stats(&rtxn, &self.processing_tasks.read().unwrap())
} }
// Return true if there is at least one task that is processing. // Return true if there is at least one task that is processing.

View File

@ -3,7 +3,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId}; use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@ -66,7 +66,7 @@ impl BatchQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,

View File

@ -13,7 +13,7 @@ use std::time::Duration;
use file_store::FileStore; use file_store::FileStore;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -157,7 +157,7 @@ impl Queue {
/// Create an index scheduler and start its run loop. /// Create an index scheduler and start its run loop.
pub(crate) fn new( pub(crate) fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
) -> Result<Self> { ) -> Result<Self> {

View File

@ -1,7 +1,7 @@
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task}; use meilisearch_types::tasks::{Kind, Status, Task};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@ -68,7 +68,7 @@ impl TaskQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
status: env.create_database(wtxn, Some(db_name::STATUS))?, status: env.create_database(wtxn, Some(db_name::STATUS))?,

View File

@ -21,6 +21,7 @@ use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc; use std::sync::Arc;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::tasks::Status; use meilisearch_types::tasks::Status;
use rayon::current_num_threads; use rayon::current_num_threads;
@ -71,7 +72,7 @@ pub struct Scheduler {
pub(crate) snapshots_path: PathBuf, pub(crate) snapshots_path: PathBuf,
/// The path to the folder containing the auth LMDB env. /// The path to the folder containing the auth LMDB env.
pub(crate) auth_path: PathBuf, pub(crate) auth_env: Env<WithoutTls>,
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
@ -87,12 +88,12 @@ impl Scheduler {
batched_tasks_size_limit: self.batched_tasks_size_limit, batched_tasks_size_limit: self.batched_tasks_size_limit,
dumps_path: self.dumps_path.clone(), dumps_path: self.dumps_path.clone(),
snapshots_path: self.snapshots_path.clone(), snapshots_path: self.snapshots_path.clone(),
auth_path: self.auth_path.clone(), auth_env: self.auth_env.clone(),
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
} }
} }
pub fn new(options: &IndexSchedulerOptions) -> Scheduler { pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
Scheduler { Scheduler {
must_stop_processing: MustStopProcessing::default(), must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
@ -102,7 +103,7 @@ impl Scheduler {
batched_tasks_size_limit: options.batched_tasks_size_limit, batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(), dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(), snapshots_path: options.snapshots_path.clone(),
auth_path: options.auth_path.clone(), auth_env,
version_file_path: options.version_file_path.clone(), version_file_path: options.version_file_path.clone(),
} }
} }

View File

@ -4,7 +4,6 @@ use std::sync::atomic::Ordering;
use meilisearch_types::heed::CompactionOption; use meilisearch_types::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Status, Task}; use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME}; use meilisearch_types::{compression, VERSION_FILE_NAME};
@ -28,7 +27,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env // 2. Snapshot the index-scheduler LMDB env
// //
// When we call copy_to_file, LMDB opens a read transaction by itself, // When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know // we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy // the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks. // of the env and the new transaction we open to retrieve the enqueued tasks.
@ -42,7 +41,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler // 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -81,7 +80,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
index index
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) .copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?; .map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
} }
@ -91,14 +90,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth"); let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.scheduler.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball); progress.update_progress(SnapshotCreationProgress::CreateTheTarball);

View File

@ -5,6 +5,7 @@ use std::time::Duration;
use big_s::S; use big_s::S;
use crossbeam_channel::RecvTimeoutError; use crossbeam_channel::RecvTimeoutError;
use file_store::File; use file_store::File;
use meilisearch_auth::open_auth_store_env;
use meilisearch_types::document_formats::DocumentFormatError; use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@ -120,7 +121,10 @@ impl IndexScheduler {
) )
}); });
let index_scheduler = Self::new(options, version, sender, planned_failures).unwrap(); std::fs::create_dir_all(&options.auth_path).unwrap();
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
let index_scheduler =
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now // To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state. // and ensure it's in the expected starting state.

View File

@ -1,5 +1,5 @@
use anyhow::bail; use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn}; use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -9,13 +9,17 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning; use crate::versioning::Versioning;
trait UpgradeIndexScheduler { trait UpgradeIndexScheduler {
fn upgrade(&self, env: &Env, wtxn: &mut RwTxn, original: (u32, u32, u32)) fn upgrade(
-> anyhow::Result<()>; &self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32); fn target_version(&self) -> (u32, u32, u32);
} }
pub fn upgrade_index_scheduler( pub fn upgrade_index_scheduler(
env: &Env, env: &Env<WithoutTls>,
versioning: &Versioning, versioning: &Versioning,
from: (u32, u32, u32), from: (u32, u32, u32),
to: (u32, u32, u32), to: (u32, u32, u32),
@ -91,7 +95,7 @@ struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp { impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade( fn upgrade(
&self, &self,
_env: &Env, _env: &Env<WithoutTls>,
_wtxn: &mut RwTxn, _wtxn: &mut RwTxn,
_original: (u32, u32, u32), _original: (u32, u32, u32),
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {

View File

@ -1,5 +1,5 @@
use meilisearch_types::heed::types::Str; use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::heed_codec::version::VersionCodec; use meilisearch_types::milli::heed_codec::version::VersionCodec;
use meilisearch_types::versioning; use meilisearch_types::versioning;
@ -46,12 +46,12 @@ impl Versioning {
} }
/// Return `Self` without checking anything about the version /// Return `Self` without checking anything about the version
pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> { pub fn raw_new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
let version = env.create_database(wtxn, Some(db_name::VERSION))?; let version = env.create_database(wtxn, Some(db_name::VERSION))?;
Ok(Self { version }) Ok(Self { version })
} }
pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> { pub(crate) fn new(env: &Env<WithoutTls>, db_version: (u32, u32, u32)) -> Result<Self> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let this = Self::raw_new(env, &mut wtxn)?; let this = Self::raw_new(env, &mut wtxn)?;
let from = match this.get_version(&wtxn)? { let from = match this.get_version(&wtxn)? {

View File

@ -2,6 +2,7 @@ use std::fs::File;
use std::io::{BufReader, Write}; use std::io::{BufReader, Write};
use std::path::Path; use std::path::Path;
use meilisearch_types::heed::{Env, WithoutTls};
use serde_json::Deserializer; use serde_json::Deserializer;
use crate::{AuthController, HeedAuthStore, Result}; use crate::{AuthController, HeedAuthStore, Result};
@ -9,11 +10,8 @@ use crate::{AuthController, HeedAuthStore, Result};
const KEYS_PATH: &str = "keys"; const KEYS_PATH: &str = "keys";
impl AuthController { impl AuthController {
pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { pub fn dump(auth_env: Env<WithoutTls>, dst: impl AsRef<Path>) -> Result<()> {
let mut store = HeedAuthStore::new(&src)?; let store = HeedAuthStore::new(auth_env)?;
// do not attempt to close the database on drop!
store.set_drop_on_close(false);
let keys_file_path = dst.as_ref().join(KEYS_PATH); let keys_file_path = dst.as_ref().join(KEYS_PATH);
@ -27,8 +25,8 @@ impl AuthController {
Ok(()) Ok(())
} }
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { pub fn load_dump(src: impl AsRef<Path>, auth_env: Env<WithoutTls>) -> Result<()> {
let store = HeedAuthStore::new(&dst)?; let store = HeedAuthStore::new(auth_env)?;
let keys_file_path = src.as_ref().join(KEYS_PATH); let keys_file_path = src.as_ref().join(KEYS_PATH);

View File

@ -3,11 +3,10 @@ pub mod error;
mod store; mod store;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result}; use error::{AuthControllerError, Result};
use maplit::hashset; use maplit::hashset;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey}; use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
@ -19,19 +18,19 @@ use uuid::Uuid;
#[derive(Clone)] #[derive(Clone)]
pub struct AuthController { pub struct AuthController {
store: Arc<HeedAuthStore>, store: HeedAuthStore,
master_key: Option<String>, master_key: Option<String>,
} }
impl AuthController { impl AuthController {
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> { pub fn new(auth_env: Env<WithoutTls>, master_key: &Option<String>) -> Result<Self> {
let store = HeedAuthStore::new(db_path)?; let store = HeedAuthStore::new(auth_env)?;
if store.is_empty()? { if store.is_empty()? {
generate_default_keys(&store)?; generate_default_keys(&store)?;
} }
Ok(Self { store: Arc::new(store), master_key: master_key.clone() }) Ok(Self { store, master_key: master_key.clone() })
} }
/// Return `Ok(())` if the auth controller is able to access one of its database. /// Return `Ok(())` if the auth controller is able to access one of its database.

View File

@ -1,18 +1,16 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use std::result::Result as StdResult; use std::result::Result as StdResult;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError; use meilisearch_types::heed::{BoxedError, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId; use meilisearch_types::keys::KeyId;
use meilisearch_types::milli; use meilisearch_types::milli::heed;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256; use sha2::Sha256;
@ -25,44 +23,32 @@ use super::error::{AuthControllerError, Result};
use super::{Action, Key}; use super::{Action, Key};
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
const AUTH_DB_PATH: &str = "auth";
const KEY_DB_NAME: &str = "api-keys"; const KEY_DB_NAME: &str = "api-keys";
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration"; const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
#[derive(Clone)] #[derive(Clone)]
pub struct HeedAuthStore { pub struct HeedAuthStore {
env: Arc<Env>, env: Env<WithoutTls>,
keys: Database<Bytes, SerdeJson<Key>>, keys: Database<Bytes, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>, action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
} }
impl Drop for HeedAuthStore { pub fn open_auth_store_env(path: &Path) -> heed::Result<Env<WithoutTls>> {
fn drop(&mut self) { let options = EnvOpenOptions::new();
if self.should_close_on_drop && Arc::strong_count(&self.env) == 1 { let mut options = options.read_txn_without_tls();
self.env.as_ref().clone().prepare_for_closing();
}
}
}
pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env> {
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2); options.max_dbs(2);
unsafe { options.open(path) } unsafe { options.open(path) }
} }
impl HeedAuthStore { impl HeedAuthStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> { pub fn new(env: Env<WithoutTls>) -> Result<Self> {
let path = path.as_ref().join(AUTH_DB_PATH);
create_dir_all(&path)?;
let env = Arc::new(open_auth_store_env(path.as_ref())?);
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
let action_keyid_index_expiration = let action_keyid_index_expiration =
env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
wtxn.commit()?; wtxn.commit()?;
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) Ok(Self { env, keys, action_keyid_index_expiration })
} }
/// Return `Ok(())` if the auth store is able to access one of its database. /// Return `Ok(())` if the auth store is able to access one of its database.
@ -82,10 +68,6 @@ impl HeedAuthStore {
Ok(self.env.non_free_pages_size()?) Ok(self.env.non_free_pages_size()?)
} }
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}
pub fn is_empty(&self) -> Result<bool> { pub fn is_empty(&self) -> Result<bool> {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -293,7 +275,7 @@ impl HeedAuthStore {
/// optionally on a specific index, for a given key. /// optionally on a specific index, for a given key.
pub struct KeyIdActionCodec; pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>); type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
@ -310,7 +292,7 @@ impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
} }
} }
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> { fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {

View File

@ -407,7 +407,7 @@ impl ErrorCode for milli::Error {
match error { match error {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions | UserError::EnvAlreadyOpened
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile, UserError::InvalidStoreFile => Code::InvalidStoreFile,
@ -504,8 +504,7 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding(_) | HeedError::Encoding(_)
| HeedError::Decoding(_) | HeedError::Decoding(_)
| HeedError::DatabaseClosing | HeedError::EnvAlreadyOpened => Code::Internal,
| HeedError::BadOpenOptions { .. } => Code::Internal,
} }
} }
} }

View File

@ -34,7 +34,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning; use index_scheduler::versioning::Versioning;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController; use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -335,9 +335,12 @@ fn open_or_create_database_unchecked(
) -> anyhow::Result<(IndexScheduler, AuthController)> { ) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we // we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later. // wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key); std::fs::create_dir_all(&index_scheduler_opt.auth_path)?;
let index_scheduler_builder = let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|| -> anyhow::Result<_> { Ok(IndexScheduler::new(index_scheduler_opt, version)?) }; let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
};
match ( match (
index_scheduler_builder(), index_scheduler_builder(),
@ -420,6 +423,7 @@ pub fn update_version_file_for_dumpless_upgrade(
if from_major == 1 && from_minor == 12 { if from_major == 1 && from_minor == 12 {
let env = unsafe { let env = unsafe {
heed::EnvOpenOptions::new() heed::EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(Versioning::nb_db()) .max_dbs(Versioning::nb_db())
.map_size(index_scheduler_opt.task_db_size) .map_size(index_scheduler_opt.task_db_size)
.open(&index_scheduler_opt.tasks_path) .open(&index_scheduler_opt.tasks_path)

View File

@ -340,7 +340,8 @@ impl SearchKind {
vector_len: Option<usize>, vector_len: Option<usize>,
route: Route, route: Route,
) -> Result<(String, Arc<Embedder>, bool), ResponseError> { ) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let rtxn = index.read_txn()?;
let embedder_configs = index.embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?; let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
let (embedder, _, quantized) = embedders let (embedder, _, quantized) = embedders

View File

@ -144,14 +144,6 @@ async fn experimental_feature_metrics() {
let (response, code) = server.get_metrics().await; let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null"); meili_snap::snapshot!(response, @"null");
// startup without flag respects persisted metrics value
let disable_metrics =
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
let (response, code) = server_no_flag.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
} }
#[actix_rt::test] #[actix_rt::test]

View File

@ -64,9 +64,6 @@ async fn version_requires_downgrade() {
#[actix_rt::test] #[actix_rt::test]
async fn upgrade_to_the_current_version() { async fn upgrade_to_the_current_version() {
let temp = tempfile::tempdir().unwrap(); let temp = tempfile::tempdir().unwrap();
let server = Server::new_with_options(default_settings(temp.path())).await.unwrap();
drop(server);
let server = Server::new_with_options(Opt { let server = Server::new_with_options(Opt {
experimental_dumpless_upgrade: true, experimental_dumpless_upgrade: true,
..default_settings(temp.path()) ..default_settings(temp.path())

View File

@ -108,6 +108,10 @@ async fn check_the_keys(server: &Server) {
/// 5.2. Enqueue a new task /// 5.2. Enqueue a new task
/// 5.3. Create an index /// 5.3. Create an index
async fn check_the_index_scheduler(server: &Server) { async fn check_the_index_scheduler(server: &Server) {
// Wait until the upgrade has been applied to all indexes to avoid flakyness
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
// All the indexes are still present // All the indexes are still present
let (indexes, _) = server.list_indexes(None, None).await; let (indexes, _) = server.list_indexes(None, None).await;
snapshot!(indexes, @r#" snapshot!(indexes, @r#"
@ -156,10 +160,6 @@ async fn check_the_index_scheduler(server: &Server) {
} }
"###); "###);
// Wait until the upgrade has been applied to all indexes to avoid flakyness
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
// Tasks and batches should still work // Tasks and batches should still work
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration. // We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
// The other tasks should NOT change // The other tasks should NOT change

View File

@ -10,7 +10,6 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.95" anyhow = "1.0.95"
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
clap = { version = "4.5.24", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] }
dump = { path = "../dump" } dump = { path = "../dump" }
file-store = { path = "../file-store" } file-store = { path = "../file-store" }

View File

@ -7,11 +7,11 @@ use anyhow::{bail, Context};
use clap::{Parser, Subcommand, ValueEnum}; use clap::{Parser, Subcommand, ValueEnum};
use dump::{DumpWriter, IndexMetadata}; use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore; use file_store::FileStore;
use meilisearch_auth::AuthController; use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::heed::types::{Bytes, SerdeJson, Str}; use meilisearch_types::heed::types::{Bytes, SerdeJson, Str};
use meilisearch_types::heed::{ use meilisearch_types::heed::{
CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
}; };
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -172,7 +172,7 @@ fn main() -> anyhow::Result<()> {
/// Clears the task queue located at `db_path`. /// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks"); let path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) } let env = unsafe { EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&path) }
.with_context(|| format!("While trying to open {:?}", path.display()))?; .with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database..."); eprintln!("Deleting tasks from the database...");
@ -225,7 +225,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
} }
fn try_opening_database<KC: 'static, DC: 'static>( fn try_opening_database<KC: 'static, DC: 'static>(
env: &Env, env: &Env<WithoutTls>,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<Database<KC, DC>> { ) -> anyhow::Result<Database<KC, DC>> {
@ -235,7 +235,7 @@ fn try_opening_database<KC: 'static, DC: 'static>(
} }
fn try_opening_poly_database( fn try_opening_poly_database(
env: &Env, env: &Env<WithoutTls>,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<Database<Unspecified, Unspecified>> { ) -> anyhow::Result<Database<Unspecified, Unspecified>> {
@ -284,13 +284,18 @@ fn export_a_dump(
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys..."); eprintln!("Dumping the keys...");
// 2. dump the keys // 2. dump the keys
let auth_store = AuthController::new(&db_path, &None) let auth_path = db_path.join("auth");
std::fs::create_dir_all(&auth_path).context("While creating the auth directory")?;
let auth_env = open_auth_store_env(&auth_path).context("While opening the auth store")?;
let auth_store = AuthController::new(auth_env, &None)
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?; .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
let mut dump_keys = dump.create_keys()?; let mut dump_keys = dump.create_keys()?;
let mut count = 0; let mut count = 0;
@ -386,7 +391,8 @@ fn export_a_dump(
for result in index_mapping.iter(&rtxn)? { for result in index_mapping.iter(&rtxn)? {
let (uid, uuid) = result?; let (uid, uuid) = result?;
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
.with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display()) format!("While trying to open the index at path {:?}", index_path.display())
})?; })?;
@ -438,7 +444,9 @@ fn export_a_dump(
fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
@ -456,7 +464,8 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
} }
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
.with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display()) format!("While trying to open the index at path {:?}", index_path.display())
})?; })?;
@ -470,7 +479,7 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
eprintln!("Compacting the index..."); eprintln!("Compacting the index...");
let before_compaction = Instant::now(); let before_compaction = Instant::now();
let new_file = index let new_file = index
.copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) .copy_to_path(&compacted_index_file_path, CompactionOption::Enabled)
.with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?;
let after_size = new_file.metadata()?.len(); let after_size = new_file.metadata()?.len();
@ -514,7 +523,9 @@ fn export_documents(
offset: Option<usize>, offset: Option<usize>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
@ -526,7 +537,8 @@ fn export_documents(
if uid == index_name { if uid == index_name {
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = let index =
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
.with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display()) format!("While trying to open the index at path {:?}", index_path.display())
})?; })?;
@ -616,7 +628,9 @@ fn hair_dryer(
index_parts: &[IndexPart], index_parts: &[IndexPart],
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Trying to get a read transaction on the index scheduler..."); eprintln!("Trying to get a read transaction on the index scheduler...");
@ -630,7 +644,8 @@ fn hair_dryer(
if index_names.iter().any(|i| i == uid) { if index_names.iter().any(|i| i == uid) {
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = let index =
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
.with_context(|| {
format!("While trying to open the index at path {:?}", index_path.display()) format!("While trying to open the index at path {:?}", index_path.display())
})?; })?;

View File

@ -2,7 +2,9 @@ use std::path::Path;
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; use meilisearch_types::heed::{
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
};
use meilisearch_types::milli::index::{db_name, main_key}; use meilisearch_types::milli::index::{db_name, main_key};
use super::v1_9; use super::v1_9;
@ -92,7 +94,7 @@ fn update_index_stats(
fn update_date_format( fn update_date_format(
index_uid: &str, index_uid: &str,
index_env: &Env, index_env: &Env<WithoutTls>,
index_wtxn: &mut RwTxn, index_wtxn: &mut RwTxn,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
@ -106,7 +108,7 @@ fn update_date_format(
fn find_rest_embedders( fn find_rest_embedders(
index_uid: &str, index_uid: &str,
index_env: &Env, index_env: &Env<WithoutTls>,
index_txn: &RoTxn, index_txn: &RoTxn,
) -> anyhow::Result<Vec<String>> { ) -> anyhow::Result<Vec<String>> {
let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
@ -164,7 +166,9 @@ pub fn v1_9_to_v1_10(
// 2. REST embedders. We don't support this case right now, so bail // 2. REST embedders. We don't support this case right now, so bail
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let mut sched_wtxn = env.write_txn()?; let mut sched_wtxn = env.write_txn()?;
@ -205,7 +209,11 @@ pub fn v1_9_to_v1_10(
let index_env = unsafe { let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file // FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(25)
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display()) format!("while opening index {uid} at '{}'", index_path.display())
})? })?
}; };
@ -252,7 +260,11 @@ pub fn v1_9_to_v1_10(
let index_env = unsafe { let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file // FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(25)
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display()) format!("while opening index {uid} at '{}'", index_path.display())
})? })?
}; };

View File

@ -23,7 +23,9 @@ pub fn v1_10_to_v1_11(
println!("Upgrading from v1.10.0 to v1.11.0"); println!("Upgrading from v1.10.0 to v1.11.0");
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let sched_rtxn = env.read_txn()?; let sched_rtxn = env.read_txn()?;
@ -50,7 +52,11 @@ pub fn v1_10_to_v1_11(
); );
let index_env = unsafe { let index_env = unsafe {
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(25)
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display()) format!("while opening index {uid} at '{}'", index_path.display())
})? })?
}; };
@ -76,11 +82,11 @@ pub fn v1_10_to_v1_11(
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY) try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
.with_context(|| format!("while updating date format for index `{uid}`"))?; .with_context(|| format!("while updating date format for index `{uid}`"))?;
arroy_v04_to_v05::ugrade_from_prev_version( meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5(
&index_rtxn, &index_rtxn,
index_read_database, index_read_database.remap_types(),
&mut index_wtxn, &mut index_wtxn,
index_write_database, index_write_database.remap_types(),
)?; )?;
index_wtxn.commit()?; index_wtxn.commit()?;

View File

@ -115,7 +115,9 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3 /// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3
fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> { fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let mut sched_wtxn = env.write_txn()?; let mut sched_wtxn = env.write_txn()?;
@ -173,11 +175,12 @@ fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
println!("\t- Rebuilding field distribution"); println!("\t- Rebuilding field distribution");
let index = let index = meilisearch_types::milli::Index::new(
meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path, false) EnvOpenOptions::new().read_txn_without_tls(),
.with_context(|| { &index_path,
format!("while opening index {uid} at '{}'", index_path.display()) false,
})?; )
.with_context(|| format!("while opening index {uid} at '{}'", index_path.display()))?;
let mut index_txn = index.write_txn()?; let mut index_txn = index.write_txn()?;

View File

@ -28,11 +28,13 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] } grenad = { version = "0.5.0", default-features = false, features = [
heed = { version = "0.20.5", default-features = false, features = [ "rayon",
"tempfile",
] }
heed = { version = "0.22.0", default-features = false, features = [
"serde-json", "serde-json",
"serde-bincode", "serde-bincode",
"read-txn-no-tls",
] } ] }
indexmap = { version = "2.7.0", features = ["serde"] } indexmap = { version = "2.7.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
@ -85,7 +87,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
"no_time", "no_time",
"sync", "sync",
] } ] }
arroy = "0.5.0" arroy = "0.6.0"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.41" tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] } ureq = { version = "2.12.1", features = ["json"] }
@ -101,7 +103,13 @@ uell = "0.1.0"
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/meilisearch/bbqueue" } bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
flume = { version = "0.11.1", default-features = false } flume = { version = "0.11.1", default-features = false }
utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } utoipa = { version = "5.3.1", features = [
"non_strict_integers",
"preserve_order",
"uuid",
"time",
"openapi_extensions",
] }
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
@ -113,9 +121,7 @@ meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] } rand = { version = "0.8.5", features = ["small_rng"] }
[features] [features]
all-tokenizations = [ all-tokenizations = ["charabia/default"]
"charabia/default",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml

View File

@ -80,9 +80,13 @@ impl DocumentsBatchIndex {
let mut map = Object::new(); let mut map = Object::new();
for (k, v) in document.iter() { for (k, v) in document.iter() {
// TODO: TAMO: update the error type let key = self
let key = .0
self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone(); .get_by_left(&k)
.ok_or(crate::error::InternalError::FieldIdMapMissingEntry(
FieldIdMapMissingEntry::FieldId { field_id: k, process: "recreate_json" },
))?
.clone();
let value = serde_json::from_slice::<serde_json::Value>(v) let value = serde_json::from_slice::<serde_json::Value>(v)
.map_err(crate::error::InternalError::SerdeJson)?; .map_err(crate::error::InternalError::SerdeJson)?;
map.insert(key, value); map.insert(key, value);

View File

@ -33,8 +33,6 @@ pub enum Error {
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum InternalError { pub enum InternalError {
#[error("{}", HeedError::DatabaseClosing)]
DatabaseClosing,
#[error("missing {} in the {db_name} database", key.unwrap_or("key"))] #[error("missing {} in the {db_name} database", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> }, DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("missing {key} in the fieldids weights mapping")] #[error("missing {key} in the fieldids weights mapping")]
@ -197,8 +195,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
valid_fields: BTreeSet<String>, valid_fields: BTreeSet<String>,
hidden_fields: bool, hidden_fields: bool,
}, },
#[error("an environment is already opened with different options")] #[error("An LMDB environment is already opened")]
InvalidLmdbOpenOptions, EnvAlreadyOpened,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
SortRankingRuleMissing, SortRankingRuleMissing,
#[error("The database file is in an invalid state.")] #[error("The database file is in an invalid state.")]
@ -362,7 +360,8 @@ impl From<arroy::Error> for Error {
| arroy::Error::UnmatchingDistance { .. } | arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_) | arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. } | arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_) => { | arroy::Error::MissingMetadata(_)
| arroy::Error::CannotDecodeKeyMode { .. } => {
Error::InternalError(InternalError::ArroyError(value)) Error::InternalError(InternalError::ArroyError(value))
} }
} }
@ -516,8 +515,7 @@ impl From<HeedError> for Error {
// TODO use the encoding // TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::DatabaseClosing => InternalError(DatabaseClosing), HeedError::EnvAlreadyOpened { .. } => UserError(EnvAlreadyOpened),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
} }
} }
} }

View File

@ -3,7 +3,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
use heed::types::*; use heed::{types::*, WithoutTls};
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
@ -110,7 +110,7 @@ pub mod db_name {
#[derive(Clone)] #[derive(Clone)]
pub struct Index { pub struct Index {
/// The LMDB environment which this index is associated with. /// The LMDB environment which this index is associated with.
pub(crate) env: heed::Env, pub(crate) env: heed::Env<WithoutTls>,
/// Contains many different types (e.g. the fields ids map). /// Contains many different types (e.g. the fields ids map).
pub(crate) main: Database<Unspecified, Unspecified>, pub(crate) main: Database<Unspecified, Unspecified>,
@ -177,7 +177,7 @@ pub struct Index {
impl Index { impl Index {
pub fn new_with_creation_dates<P: AsRef<Path>>( pub fn new_with_creation_dates<P: AsRef<Path>>(
mut options: heed::EnvOpenOptions, mut options: heed::EnvOpenOptions<WithoutTls>,
path: P, path: P,
created_at: time::OffsetDateTime, created_at: time::OffsetDateTime,
updated_at: time::OffsetDateTime, updated_at: time::OffsetDateTime,
@ -275,7 +275,7 @@ impl Index {
} }
pub fn new<P: AsRef<Path>>( pub fn new<P: AsRef<Path>>(
options: heed::EnvOpenOptions, options: heed::EnvOpenOptions<WithoutTls>,
path: P, path: P,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
@ -284,7 +284,7 @@ impl Index {
} }
fn set_creation_dates( fn set_creation_dates(
env: &heed::Env, env: &heed::Env<WithoutTls>,
main: Database<Unspecified, Unspecified>, main: Database<Unspecified, Unspecified>,
created_at: time::OffsetDateTime, created_at: time::OffsetDateTime,
updated_at: time::OffsetDateTime, updated_at: time::OffsetDateTime,
@ -306,12 +306,12 @@ impl Index {
} }
/// Create a read transaction to be able to read the index. /// Create a read transaction to be able to read the index.
pub fn read_txn(&self) -> heed::Result<RoTxn<'_>> { pub fn read_txn(&self) -> heed::Result<RoTxn<'_, WithoutTls>> {
self.env.read_txn() self.env.read_txn()
} }
/// Create a static read transaction to be able to read the index without keeping a reference to it. /// Create a static read transaction to be able to read the index without keeping a reference to it.
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> { pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static, WithoutTls>> {
self.env.clone().static_read_txn() self.env.clone().static_read_txn()
} }
@ -340,8 +340,12 @@ impl Index {
self.env.info().map_size self.env.info().map_size
} }
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> { pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
self.env.copy_to_file(path, option).map_err(Into::into) self.env.copy_to_file(file, option).map_err(Into::into)
}
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_path(path, option).map_err(Into::into)
} }
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event, /// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
@ -1825,7 +1829,8 @@ pub(crate) mod tests {
impl TempIndex { impl TempIndex {
/// Creates a temporary index /// Creates a temporary index
pub fn new_with_map_size(size: usize) -> Self { pub fn new_with_map_size(size: usize) -> Self {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(size); options.map_size(size);
let _tempdir = TempDir::new_in(".").unwrap(); let _tempdir = TempDir::new_in(".").unwrap();
let inner = Index::new(options, _tempdir.path(), true).unwrap(); let inner = Index::new(options, _tempdir.path(), true).unwrap();

View File

@ -83,6 +83,8 @@ pub use self::search::{
}; };
pub use self::update::ChannelCongestion; pub use self::update::ChannelCongestion;
pub use arroy;
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;
pub type Attribute = u32; pub type Attribute = u32;

View File

@ -15,7 +15,8 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();

View File

@ -352,7 +352,7 @@ pub(crate) mod test_helpers {
use grenad::MergerBuilder; use grenad::MergerBuilder;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn, WithoutTls};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::bulk::FacetsUpdateBulkInner; use super::bulk::FacetsUpdateBulkInner;
@ -390,7 +390,7 @@ pub(crate) mod test_helpers {
for<'a> BoundCodec: for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{ {
pub env: Env, pub env: Env<WithoutTls>,
pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>, pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>, pub min_level_size: Cell<u8>,
@ -412,7 +412,8 @@ pub(crate) mod test_helpers {
let group_size = group_size.clamp(2, 127); let group_size = group_size.clamp(2, 127);
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127 let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
let mut options = heed::EnvOpenOptions::new(); let options = heed::EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
let options = options.map_size(4096 * 4 * 1000 * 100); let options = options.map_size(4096 * 4 * 1000 * 100);
let tempdir = tempfile::TempDir::new().unwrap(); let tempdir = tempfile::TempDir::new().unwrap();
let env = unsafe { options.open(tempdir.path()) }.unwrap(); let env = unsafe { options.open(tempdir.path()) }.unwrap();

View File

@ -520,7 +520,14 @@ where
pool.install(|| { pool.install(|| {
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?; writer.build_and_quantize(
wtxn,
&mut rng,
dimension,
is_quantizing,
self.indexer_config.max_memory,
cancel,
)?;
Result::Ok(()) Result::Ok(())
}) })
.map_err(InternalError::from)??; .map_err(InternalError::from)??;

View File

@ -3,7 +3,7 @@ use std::sync::atomic::Ordering;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use bumpalo::Bump; use bumpalo::Bump;
use heed::RoTxn; use heed::{RoTxn, WithoutTls};
use rayon::iter::IndexedParallelIterator; use rayon::iter::IndexedParallelIterator;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
@ -28,7 +28,7 @@ pub struct DocumentChangeContext<
/// inside of the DB. /// inside of the DB.
pub db_fields_ids_map: &'indexer FieldsIdsMap, pub db_fields_ids_map: &'indexer FieldsIdsMap,
/// A transaction providing data from the DB before all indexing operations /// A transaction providing data from the DB before all indexing operations
pub rtxn: RoTxn<'indexer>, pub rtxn: RoTxn<'indexer, WithoutTls>,
/// Global field id map that is up to date with the current state of the indexing process. /// Global field id map that is up to date with the current state of the indexing process.
/// ///

View File

@ -62,6 +62,8 @@ where
let mut bbbuffers = Vec::new(); let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false); let finished_extraction = AtomicBool::new(false);
let arroy_memory = grenad_parameters.max_memory;
// We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch // We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch
// is because we still use the old indexer for the settings and it is highly impacted by the // is because we still use the old indexer for the settings and it is highly impacted by the
// max memory. So we keep the changes here and will remove these changes once we use the new // max memory. So we keep the changes here and will remove these changes once we use the new
@ -200,6 +202,7 @@ where
index, index,
wtxn, wtxn,
index_embeddings, index_embeddings,
arroy_memory,
&mut arroy_writers, &mut arroy_writers,
&indexing_context.must_stop_processing, &indexing_context.must_stop_processing,
) )

View File

@ -101,6 +101,7 @@ pub fn build_vectors<MSP>(
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,
index_embeddings: Vec<IndexEmbeddingConfig>, index_embeddings: Vec<IndexEmbeddingConfig>,
arroy_memory: Option<usize>,
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>, arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
must_stop_processing: &MSP, must_stop_processing: &MSP,
) -> Result<()> ) -> Result<()>
@ -111,10 +112,18 @@ where
return Ok(()); return Ok(());
} }
let mut rng = rand::rngs::StdRng::seed_from_u64(42); let seed = rand::random();
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers { for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers {
let dimensions = *dimensions; let dimensions = *dimensions;
writer.build_and_quantize(wtxn, &mut rng, dimensions, false, must_stop_processing)?; writer.build_and_quantize(
wtxn,
&mut rng,
dimensions,
false,
arroy_memory,
must_stop_processing,
)?;
} }
index.put_embedding_configs(wtxn, index_embeddings)?; index.put_embedding_configs(wtxn, index_embeddings)?;

View File

@ -1,15 +1,17 @@
mod v1_12; mod v1_12;
mod v1_13; mod v1_13;
mod v1_14;
use heed::RwTxn; use heed::RwTxn;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3}; use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Current}; use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14;
use crate::progress::{Progress, VariableNameStep}; use crate::progress::{Progress, VariableNameStep};
use crate::{Index, InternalError, Result}; use crate::{Index, InternalError, Result};
trait UpgradeIndex { trait UpgradeIndex {
/// Returns true if the index scheduler must regenerate its cached stats /// Returns `true` if the index scheduler must regenerate its cached stats.
fn upgrade( fn upgrade(
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
@ -32,15 +34,17 @@ pub fn upgrade(
&V1_12_To_V1_12_3 {}, &V1_12_To_V1_12_3 {},
&V1_12_3_To_V1_13_0 {}, &V1_12_3_To_V1_13_0 {},
&V1_13_0_To_V1_13_1 {}, &V1_13_0_To_V1_13_1 {},
&V1_13_1_To_Current {}, &V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {},
]; ];
let start = match from { let start = match from {
(1, 12, 0..=2) => 0, (1, 12, 0..=2) => 0,
(1, 12, 3..) => 1, (1, 12, 3..) => 1,
(1, 13, 0) => 2, (1, 13, 0) => 2,
(1, 13, _) => 4,
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 13, _) => 3, (1, 14, _) => 4,
(major, minor, patch) => { (major, minor, patch) => {
return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into()) return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
} }
@ -50,7 +54,6 @@ pub fn upgrade(
let upgrade_path = &upgrade_functions[start..]; let upgrade_path = &upgrade_functions[start..];
let mut current_version = from; let mut current_version = from;
let mut regenerate_stats = false; let mut regenerate_stats = false;
for (i, upgrade) in upgrade_path.iter().enumerate() { for (i, upgrade) in upgrade_path.iter().enumerate() {
let target = upgrade.target_version(); let target = upgrade.target_version();

View File

@ -37,9 +37,9 @@ impl UpgradeIndex for V1_13_0_To_V1_13_1 {
} }
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub(super) struct V1_13_1_To_Current(); pub(super) struct V1_13_1_To_Latest_V1_13();
impl UpgradeIndex for V1_13_1_To_Current { impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
fn upgrade( fn upgrade(
&self, &self,
_wtxn: &mut RwTxn, _wtxn: &mut RwTxn,

View File

@ -0,0 +1,41 @@
use arroy::distances::Cosine;
use heed::RwTxn;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_13_To_Latest_V1_14();
impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
make_enum_progress! {
enum VectorStore {
UpdateInternalVersions,
}
};
progress.update_progress(VectorStore::UpdateInternalVersions);
let rtxn = index.read_txn()?;
arroy::upgrade::from_0_5_to_0_6::<Cosine>(
&rtxn,
index.vector_arroy.remap_data_type(),
wtxn,
index.vector_arroy.remap_data_type(),
)?;
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 14, 0)
}
}

View File

@ -86,6 +86,7 @@ impl ArroyWrapper {
rng: &mut R, rng: &mut R,
dimension: usize, dimension: usize,
quantizing: bool, quantizing: bool,
arroy_memory: Option<usize>,
cancel: &(impl Fn() -> bool + Sync + Send), cancel: &(impl Fn() -> bool + Sync + Send),
) -> Result<(), arroy::Error> { ) -> Result<(), arroy::Error> {
for index in arroy_db_range_for_embedder(self.embedder_index) { for index in arroy_db_range_for_embedder(self.embedder_index) {
@ -105,9 +106,17 @@ impl ArroyWrapper {
// sensitive. // sensitive.
if quantizing && !self.quantized { if quantizing && !self.quantized {
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?; let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
writer.builder(rng).cancel(cancel).build(wtxn)?; writer
.builder(rng)
.available_memory(arroy_memory.unwrap_or(usize::MAX))
.cancel(cancel)
.build(wtxn)?;
} else if writer.need_build(wtxn)? { } else if writer.need_build(wtxn)? {
writer.builder(rng).cancel(cancel).build(wtxn)?; writer
.builder(rng)
.available_memory(arroy_memory.unwrap_or(usize::MAX))
.cancel(cancel)
.build(wtxn)?;
} else if writer.is_empty(wtxn)? { } else if writer.is_empty(wtxn)? {
break; break;
} }

View File

@ -12,7 +12,8 @@ use serde_json::{from_value, json};
#[test] #[test]
fn test_facet_distribution_with_no_facet_values() { fn test_facet_distribution_with_no_facet_values() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();

View File

@ -34,7 +34,8 @@ pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();

View File

@ -262,7 +262,8 @@ fn criteria_mixup() {
#[test] #[test]
fn criteria_ascdesc() { fn criteria_ascdesc() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(12 * 1024 * 1024); // 10 MB options.map_size(12 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();

View File

@ -108,7 +108,8 @@ fn test_typo_tolerance_two_typo() {
#[test] #[test]
fn test_typo_disabled_on_word() { fn test_typo_disabled_on_word() {
let tmp = tempdir().unwrap(); let tmp = tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(4096 * 100); options.map_size(4096 * 100);
let index = Index::new(options, tmp.path(), true).unwrap(); let index = Index::new(options, tmp.path(), true).unwrap();