diff --git a/Cargo.lock b/Cargo.lock index 5d87830a5..7df0e7e86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -378,9 +378,7 @@ dependencies = [ [[package]] name = "arroy" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57" +version = "0.3.0" dependencies = [ "bytemuck", "byteorder", @@ -1536,9 +1534,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "doxygen-rs" -version = "0.2.2" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505" +checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" dependencies = [ "phf", ] @@ -2262,12 +2260,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" -version = "0.20.0-alpha.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934" +version = "0.20.0" dependencies = [ "bitflags 2.5.0", - "bytemuck", "byteorder", "heed-traits", "heed-types", @@ -2281,15 +2276,11 @@ dependencies = [ [[package]] name = "heed-traits" -version = "0.20.0-alpha.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298" +version = "0.20.0" [[package]] name = "heed-types" -version = "0.20.0-alpha.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a" +version = "0.20.0" dependencies = [ "bincode", "byteorder", @@ -3189,14 +3180,11 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da" [[package]] name = "lmdb-master-sys" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd" +version = "0.2.0" dependencies = [ "cc", "doxygen-rs", "libc", - "pkg-config", ] [[package]] diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index bc9823a01..582497c15 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -785,10 +785,12 @@ impl IndexScheduler { let dst = temp_snapshot_dir.path().join("auth"); fs::create_dir_all(&dst)?; // TODO We can't use the open_auth_store_env function here but we should - let auth = milli::heed::EnvOpenOptions::new() - .map_size(1024 * 1024 * 1024) // 1 GiB - .max_dbs(2) - .open(&self.auth_path)?; + let auth = unsafe { + milli::heed::EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) // 1 GiB + .max_dbs(2) + .open(&self.auth_path) + }?; auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; // 5. Copy and tarball the flat snapshot diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 5704f5354..dd2b296f6 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -453,10 +453,12 @@ impl IndexScheduler { ) }; - let env = heed::EnvOpenOptions::new() - .max_dbs(11) - .map_size(budget.task_db_size) - .open(options.tasks_path)?; + let env = unsafe { + heed::EnvOpenOptions::new() + .max_dbs(11) + .map_size(budget.task_db_size) + .open(options.tasks_path) + }?; let features = features::FeatureData::new(&env, options.instance_features)?; @@ -585,9 +587,9 @@ impl IndexScheduler { } fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool { - if let Ok(env) = + if let Ok(env) = unsafe { heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path) - { + } { env.prepare_for_closing().wait(); true } else { diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 1eebd3fe9..ef992e836 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result let mut options = EnvOpenOptions::new(); options.map_size(AUTH_STORE_SIZE); // 1GB options.max_dbs(2); - options.open(path) + unsafe { options.open(path) } } impl HeedAuthStore { diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index eea012331..158dfae92 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -423,7 +423,6 @@ impl ErrorCode for HeedError { HeedError::Mdb(_) | HeedError::Encoding(_) | HeedError::Decoding(_) - | HeedError::InvalidDatabaseTyping | HeedError::DatabaseClosing | HeedError::BadOpenOptions { .. } => Code::Internal, } diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index bfcbfdd6d..06c4890a5 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -80,9 +80,7 @@ fn main() -> anyhow::Result<()> { /// Clears the task queue located at `db_path`. fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { let path = db_path.join("tasks"); - let env = EnvOpenOptions::new() - .max_dbs(100) - .open(&path) + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) } .with_context(|| format!("While trying to open {:?}", path.display()))?; eprintln!("Deleting tasks from the database..."); @@ -193,9 +191,7 @@ fn export_a_dump( FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; let index_scheduler_path = db_path.join("tasks"); - let env = EnvOpenOptions::new() - .max_dbs(100) - .open(&index_scheduler_path) + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; eprintln!("Dumping the keys..."); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 7d903178b..ab63a1fa7 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -30,7 +30,12 @@ grenad = { version = "0.4.6", default-features = false, features = [ "rayon", "tempfile", ] } -heed = { version = "0.20.0-alpha.9", default-features = false, features = [ +# heed = { version = "0.20.0", default-features = false, features = [ +# "serde-json", +# "serde-bincode", +# "read-txn-no-tls", +# ] } +heed = { path = "../../heed/heed", default-features = false, features = [ "serde-json", "serde-bincode", "read-txn-no-tls", @@ -82,7 +87,8 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", ] } tiktoken-rs = "0.5.8" liquid = "0.26.4" -arroy = "0.2.0" +# arroy = "0.2.0" +arroy = { path = "../../arroy" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.9.7", features = ["json"] } diff --git a/milli/fuzz/.gitignore b/milli/fuzz/.gitignore new file mode 100644 index 000000000..a0925114d --- /dev/null +++ b/milli/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/milli/src/error.rs b/milli/src/error.rs index 009781fcf..6db0dcac1 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -48,8 +48,6 @@ pub enum InternalError { GrenadInvalidFormatVersion, #[error("Invalid merge while processing {process}")] IndexingMergingKeys { process: &'static str }, - #[error("{}", HeedError::InvalidDatabaseTyping)] - InvalidDatabaseTyping, #[error(transparent)] RayonThreadPool(#[from] ThreadPoolBuildError), #[error(transparent)] @@ -429,7 +427,6 @@ impl From for Error { // TODO use the encoding HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), - HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping), HeedError::DatabaseClosing => InternalError(DatabaseClosing), HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions), } diff --git a/milli/src/index.rs b/milli/src/index.rs index 42b9cb111..739a7f202 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -184,7 +184,7 @@ impl Index { options.max_dbs(25); - let env = options.open(path)?; + let env = unsafe { options.open(path) }?; let mut wtxn = env.write_txn()?; let main = env.database_options().name(MAIN).create(&mut wtxn)?; let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; @@ -294,6 +294,11 @@ impl Index { self.env.read_txn() } + /// Create a static read transaction to be able to read the index without keeping a reference to it. + pub fn static_read_txn(&self) -> heed::Result> { + self.env.clone().static_read_txn() + } + /// Returns the canonicalized path where the heed `Env` of this `Index` lives. pub fn path(&self) -> &Path { self.env.path() diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 0af64c4c5..42994551f 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -379,7 +379,7 @@ pub(crate) mod test_helpers { let mut options = heed::EnvOpenOptions::new(); let options = options.map_size(4096 * 4 * 1000 * 100); let tempdir = tempfile::TempDir::new().unwrap(); - let env = options.open(tempdir.path()).unwrap(); + let env = unsafe { options.open(tempdir.path()) }.unwrap(); let mut wtxn = env.write_txn().unwrap(); let content = env.create_database(&mut wtxn, None).unwrap(); wtxn.commit().unwrap(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 936ce1efc..4d2fac7cb 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -556,7 +556,7 @@ where let writer_index = (embedder_index as u16) << 8; for k in 0..=u8::MAX { let writer = - arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?; + arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension); if writer.is_empty(wtxn)? { break; } diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 6aad290e5..e0de2d5a1 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -661,7 +661,7 @@ pub(crate) fn write_typed_chunk_into_index( )?; let writer_index = (embedder_index as u16) << 8; // FIXME: allow customizing distance - let writers: std::result::Result, _> = (0..=u8::MAX) + let writers: Vec<_> = (0..=u8::MAX) .map(|k| { arroy::Writer::new( index.vector_arroy, @@ -670,7 +670,6 @@ pub(crate) fn write_typed_chunk_into_index( ) }) .collect(); - let writers = writers?; // remove vectors for docids we want them removed let merger = remove_vectors_builder.build();