diff --git a/Cargo.lock b/Cargo.lock index 75d8463e7..8a0220c3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -520,6 +520,9 @@ name = "bitflags" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +dependencies = [ + "serde", +] [[package]] name = "block-buffer" @@ -1255,6 +1258,15 @@ dependencies = [ "syn 2.0.28", ] +[[package]] +name = "doxygen-rs" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505" +dependencies = [ + "phf", +] + [[package]] name = "dump" version = "1.5.0" @@ -1811,36 +1823,40 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" -version = "0.12.7" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632" +version = "0.20.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934" dependencies = [ + "bitflags 2.3.3", + "bytemuck", "byteorder", "heed-traits", "heed-types", "libc", - "lmdb-rkv-sys", + "lmdb-master-sys", "once_cell", - "page_size 0.4.2", + "page_size 0.6.0", "synchronoise", "url", - "zerocopy", ] [[package]] name = "heed-traits" -version = "0.7.0" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632" +version = "0.20.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298" [[package]] name = "heed-types" -version = "0.7.2" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.7#061a5276b1f336f5f3302bee291e336041d88632" +version = "0.20.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a" dependencies = [ "bincode", + "byteorder", "heed-traits", "serde", "serde_json", - "zerocopy", ] [[package]] @@ -2968,11 +2984,13 @@ dependencies = [ ] [[package]] -name = "lmdb-rkv-sys" -version = "0.15.1" -source = "git+https://github.com/meilisearch/lmdb-rs#501aa34a1ab7f092e3ff54a6c22ff6c55931a2d8" +name = "lmdb-master-sys" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd" dependencies = [ "cc", + "doxygen-rs", "libc", "pkg-config", ] @@ -3472,9 +3490,9 @@ dependencies = [ [[package]] name = "page_size" -version = "0.4.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561" dependencies = [ "libc", "winapi", @@ -3482,9 +3500,9 @@ dependencies = [ [[package]] name = "page_size" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b7663cbd190cfd818d08efa8497f6cd383076688c49a391ef7c0d03cd12b561" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" dependencies = [ "libc", "winapi", @@ -3630,6 +3648,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ + "phf_macros", "phf_shared", ] @@ -3653,6 +3672,19 @@ dependencies = [ "rand", ] +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 2.0.28", +] + [[package]] name = "phf_shared" version = "0.11.2" @@ -4479,18 +4511,6 @@ dependencies = [ "crossbeam-queue", ] -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "unicode-xid", -] - [[package]] name = "synstructure" version = "0.13.0" @@ -5359,28 +5379,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.28", - "synstructure 0.13.0", -] - -[[package]] -name = "zerocopy" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6580539ad917b7c026220c4b3f2c08d52ce54d6ce0dc491e66002e35388fab46" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" -dependencies = [ - "proc-macro2", - "syn 1.0.109", - "synstructure 0.12.6", + "synstructure", ] [[package]] @@ -5401,7 +5400,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.28", - "synstructure 0.13.0", + "synstructure", ] [[package]] diff --git a/benchmarks/benches/indexing.rs b/benchmarks/benches/indexing.rs index 65f581b93..0c19b89cf 100644 --- a/benchmarks/benches/indexing.rs +++ b/benchmarks/benches/indexing.rs @@ -36,7 +36,7 @@ fn setup_index() -> Index { } fn setup_settings<'t>( - wtxn: &mut RwTxn<'t, '_>, + wtxn: &mut RwTxn<'t>, index: &'t Index, primary_key: &str, searchable_fields: &[&str], diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index 603c557d6..5bbf4ec4d 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -13,12 +13,12 @@ use crate::{Result, Version}; mod compat; -pub(self) mod v1; -pub(self) mod v2; -pub(self) mod v3; -pub(self) mod v4; -pub(self) mod v5; -pub(self) mod v6; +mod v1; +mod v2; +mod v3; +mod v4; +mod v5; +mod v6; pub type Document = serde_json::Map; pub type UpdateFile = dyn Iterator>; diff --git a/dump/src/reader/v1/settings.rs b/dump/src/reader/v1/settings.rs index 2f7976534..94343d150 100644 --- a/dump/src/reader/v1/settings.rs +++ b/dump/src/reader/v1/settings.rs @@ -56,8 +56,7 @@ pub enum RankingRule { Desc(String), } -static ASC_DESC_REGEX: Lazy = - Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()); +static ASC_DESC_REGEX: Lazy = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap()); impl FromStr for RankingRule { type Err = (); diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 5760c8865..fa5b70606 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -564,10 +564,10 @@ pub mod tests { #[test] fn parse_escaped() { - insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#); - insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#); - insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#); - insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#); + insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#); + insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#); + insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#); + insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#); // but it also works with other sequencies insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}"); } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 63d5ac384..1d70cb025 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -270,8 +270,8 @@ pub mod test { ("aaaa", "", rtok("", "aaaa"), "aaaa"), (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"), (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#), - (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#), - (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#), + (r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"), + (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"), (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#), (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#), ]; @@ -301,12 +301,12 @@ pub mod test { ); // simple quote assert_eq!( - unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''), + unescape(Span::new_extra(r"Hello \'World\'", ""), '\''), r#"Hello 'World'"#.to_string() ); assert_eq!( - unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''), - r#"Hello \\'World\\'"#.to_string() + unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''), + r"Hello \\'World\\'".to_string() ); } @@ -335,19 +335,19 @@ pub mod test { ("\"cha'nnel\"", "cha'nnel", false), ("I'm tamo", "I", false), // escaped thing but not quote - (r#""\\""#, r#"\"#, true), - (r#""\\\\\\""#, r#"\\\"#, true), - (r#""aa\\aa""#, r#"aa\aa"#, true), + (r#""\\""#, r"\", true), + (r#""\\\\\\""#, r"\\\", true), + (r#""aa\\aa""#, r"aa\aa", true), // with double quote (r#""Hello \"world\"""#, r#"Hello "world""#, true), (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true), (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true), (r#""\"\"""#, r#""""#, true), // with simple quote - (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true), - (r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true), + (r"'Hello \'world\''", r#"Hello 'world'"#, true), + (r"'Hello \\\'world\\\''", r"Hello \'world\'", true), (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true), - (r#"'\'\''"#, r#"''"#, true), + (r"'\'\''", r#"''"#, true), ]; for (input, expected, escaped) in test_case { diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs index 1d53e069c..baf705709 100644 --- a/fuzzers/src/bin/fuzz-indexing.rs +++ b/fuzzers/src/bin/fuzz-indexing.rs @@ -113,7 +113,7 @@ fn main() { index.documents(&wtxn, res.documents_ids).unwrap(); progression.fetch_add(1, Ordering::Relaxed); } - wtxn.abort().unwrap(); + wtxn.abort(); }); if let err @ Err(_) = handle.join() { stop.store(true, Ordering::Relaxed); diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 661285325..5a886acf8 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -32,7 +32,7 @@ use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::update::{ IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings, }; -use meilisearch_types::milli::{self, Filter, BEU32}; +use meilisearch_types::milli::{self, Filter}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; @@ -715,7 +715,7 @@ impl IndexScheduler { // 2. Snapshot the index-scheduler LMDB env // - // When we call copy_to_path, LMDB opens a read transaction by itself, + // When we call copy_to_file, LMDB opens a read transaction by itself, // we can't provide our own. It is an issue as we would like to know // the update files to copy but new ones can be enqueued between the copy // of the env and the new transaction we open to retrieve the enqueued tasks. @@ -728,7 +728,7 @@ impl IndexScheduler { // 2.1 First copy the LMDB env of the index-scheduler let dst = temp_snapshot_dir.path().join("tasks"); fs::create_dir_all(&dst)?; - self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; // 2.2 Create a read transaction on the index-scheduler let rtxn = self.env.read_txn()?; @@ -753,7 +753,7 @@ impl IndexScheduler { let index = self.index_mapper.index(&rtxn, name)?; let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); fs::create_dir_all(&dst)?; - index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; } drop(rtxn); @@ -766,7 +766,7 @@ impl IndexScheduler { .map_size(1024 * 1024 * 1024) // 1 GiB .max_dbs(2) .open(&self.auth_path)?; - auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; // 5. Copy and tarball the flat snapshot // 5.1 Find the original name of the database @@ -1106,7 +1106,7 @@ impl IndexScheduler { for task_id in &index_lhs_task_ids | &index_rhs_task_ids { let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; swap_index_uid_in_task(&mut task, (lhs, rhs)); - self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?; + self.all_tasks.put(wtxn, &task_id, &task)?; } // 4. remove the task from indexuid = before_name @@ -1132,7 +1132,7 @@ impl IndexScheduler { /// The list of processed tasks. fn apply_index_operation<'i>( &self, - index_wtxn: &mut RwTxn<'i, '_>, + index_wtxn: &mut RwTxn<'i>, index: &'i Index, operation: IndexOperation, ) -> Result> { @@ -1479,10 +1479,9 @@ impl IndexScheduler { } for task in to_delete_tasks.iter() { - self.all_tasks.delete(wtxn, &BEU32::new(task))?; + self.all_tasks.delete(wtxn, &task)?; } for canceled_by in affected_canceled_by { - let canceled_by = BEU32::new(canceled_by); if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { tasks -= &to_delete_tasks; if tasks.is_empty() { @@ -1530,14 +1529,14 @@ impl IndexScheduler { task.details = task.details.map(|d| d.to_failed()); self.update_task(wtxn, &task)?; } - self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?; + self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?; Ok(content_files_to_delete) } } fn delete_document_by_filter<'a>( - wtxn: &mut RwTxn<'a, '_>, + wtxn: &mut RwTxn<'a>, filter: &serde_json::Value, indexer_config: &IndexerConfig, must_stop_processing: MustStopProcessing, diff --git a/index-scheduler/src/index_mapper/index_map.rs b/index-scheduler/src/index_mapper/index_map.rs index a24213558..f8080d23b 100644 --- a/index-scheduler/src/index_mapper/index_map.rs +++ b/index-scheduler/src/index_mapper/index_map.rs @@ -1,12 +1,8 @@ -/// the map size to use when we don't succeed in reading it in indexes. -const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB - use std::collections::BTreeMap; use std::path::Path; use std::time::Duration; -use meilisearch_types::heed::flags::Flags; -use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions}; +use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; use meilisearch_types::milli::Index; use time::OffsetDateTime; use uuid::Uuid; @@ -236,7 +232,7 @@ impl IndexMap { enable_mdb_writemap: bool, map_size_growth: usize, ) { - let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth; + let map_size = index.map_size() + map_size_growth; let closing_event = index.prepare_for_closing(); let generation = self.next_generation(); self.unavailable.insert( @@ -309,7 +305,7 @@ fn create_or_open_index( options.map_size(clamp_to_page_size(map_size)); options.max_readers(1024); if enable_mdb_writemap { - unsafe { options.flag(Flags::MdbWriteMap) }; + unsafe { options.flags(EnvFlags::WRITE_MAP) }; } if let Some((created, updated)) = date { @@ -388,7 +384,7 @@ mod tests { fn assert_index_size(index: Index, expected: usize) { let expected = clamp_to_page_size(expected); - let index_map_size = index.map_size().unwrap(); + let index_map_size = index.map_size(); assert_eq!(index_map_size, expected); } } diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 6096bad38..885a66f49 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -1,7 +1,7 @@ use std::collections::BTreeSet; use std::fmt::Write; -use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::tasks::{Details, Task}; @@ -115,7 +115,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String { snap } -pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database, SerdeJson>) -> String { +pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database>) -> String { let mut snap = String::new(); let iter = db.iter(rtxn).unwrap(); for next in iter { @@ -125,10 +125,7 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database, SerdeJson snap } -pub fn snapshot_date_db( - rtxn: &RoTxn, - db: Database, CboRoaringBitmapCodec>, -) -> String { +pub fn snapshot_date_db(rtxn: &RoTxn, db: Database) -> String { let mut snap = String::new(); let iter = db.iter(rtxn).unwrap(); for next in iter { @@ -248,10 +245,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database) } snap } -pub fn snapshot_canceled_by( - rtxn: &RoTxn, - db: Database, RoaringBitmapCodec>, -) -> String { +pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database) -> String { let mut snap = String::new(); let iter = db.iter(rtxn).unwrap(); for next in iter { diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 896c06c99..446db8eae 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -47,8 +47,9 @@ pub use features::RoFeatures; use file_store::FileStore; use meilisearch_types::error::ResponseError; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; -use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; -use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; +use meilisearch_types::heed::byteorder::BE; +use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; +use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; @@ -64,8 +65,7 @@ use uuid::Uuid; use crate::index_mapper::IndexMapper; use crate::utils::{check_index_swap_validity, clamp_to_page_size}; -pub(crate) type BEI128 = - meilisearch_types::heed::zerocopy::I128; +pub(crate) type BEI128 = I128; /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. /// @@ -278,7 +278,7 @@ pub struct IndexScheduler { pub(crate) file_store: FileStore, // The main database, it contains all the tasks accessible by their Id. - pub(crate) all_tasks: Database, SerdeJson>, + pub(crate) all_tasks: Database>, /// All the tasks ids grouped by their status. // TODO we should not be able to serialize a `Status::Processing` in this database. @@ -289,16 +289,16 @@ pub struct IndexScheduler { pub(crate) index_tasks: Database, /// Store the tasks that were canceled by a task uid - pub(crate) canceled_by: Database, RoaringBitmapCodec>, + pub(crate) canceled_by: Database, /// Store the task ids of tasks which were enqueued at a specific date - pub(crate) enqueued_at: Database, CboRoaringBitmapCodec>, + pub(crate) enqueued_at: Database, /// Store the task ids of finished tasks which started being processed at a specific date - pub(crate) started_at: Database, CboRoaringBitmapCodec>, + pub(crate) started_at: Database, /// Store the task ids of tasks which finished at a specific date - pub(crate) finished_at: Database, CboRoaringBitmapCodec>, + pub(crate) finished_at: Database, /// In charge of creating, opening, storing and returning indexes. pub(crate) index_mapper: IndexMapper, @@ -730,9 +730,7 @@ impl IndexScheduler { if let Some(canceled_by) = &query.canceled_by { let mut all_canceled_tasks = RoaringBitmap::new(); for cancel_task_uid in canceled_by { - if let Some(canceled_by_uid) = - self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))? - { + if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? { all_canceled_tasks |= canceled_by_uid; } } @@ -983,7 +981,7 @@ impl IndexScheduler { // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty()) - && (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50 + && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50 { return Err(Error::NoSpaceLeftInTaskQueue); } @@ -1009,7 +1007,7 @@ impl IndexScheduler { // Get rid of the mutability. let task = task; - self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?; + self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?; for index in task.indexes() { self.update_index(&mut wtxn, index, |bitmap| { @@ -1187,7 +1185,7 @@ impl IndexScheduler { | Err(Error::AbortedTask) => { #[cfg(test)] self.breakpoint(Breakpoint::AbortedIndexation); - wtxn.abort().map_err(Error::HeedTransaction)?; + wtxn.abort(); // We make sure that we don't call `stop_processing` on the `processing_tasks`, // this is because we want to let the next tick call `create_next_batch` and keep @@ -1208,7 +1206,7 @@ impl IndexScheduler { let index_uid = index_uid.unwrap(); // fixme: handle error more gracefully? not sure when this could happen self.index_mapper.resize_index(&wtxn, &index_uid)?; - wtxn.abort().map_err(Error::HeedTransaction)?; + wtxn.abort(); return Ok(TickOutcome::TickAgain(0)); } @@ -1354,7 +1352,7 @@ impl IndexScheduler { pub struct Dump<'a> { index_scheduler: &'a IndexScheduler, - wtxn: RwTxn<'a, 'a>, + wtxn: RwTxn<'a>, indexes: HashMap, statuses: HashMap, @@ -1469,7 +1467,7 @@ impl<'a> Dump<'a> { }, }; - self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?; + self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; for index in task.indexes() { match self.indexes.get_mut(index) { @@ -1511,8 +1509,8 @@ impl<'a> Dump<'a> { } } - self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid); - self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid); + self.statuses.entry(task.status).or_default().insert(task.uid); + self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid); Ok(task) } diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 3971d9116..9f6f90db2 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -3,9 +3,9 @@ use std::collections::{BTreeSet, HashSet}; use std::ops::Bound; -use meilisearch_types::heed::types::{DecodeIgnore, OwnedType}; +use meilisearch_types::heed::types::DecodeIgnore; use meilisearch_types::heed::{Database, RoTxn, RwTxn}; -use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::milli::CboRoaringBitmapCodec; use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; use roaring::{MultiOps, RoaringBitmap}; use time::OffsetDateTime; @@ -18,7 +18,7 @@ impl IndexScheduler { } pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { - Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k.get() + 1)) + Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k + 1)) } pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { @@ -26,7 +26,7 @@ impl IndexScheduler { } pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { - Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?) + Ok(self.all_tasks.get(rtxn, &task_id)?) } /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a @@ -88,7 +88,7 @@ impl IndexScheduler { } } - self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?; + self.all_tasks.put(wtxn, &task.uid, task)?; Ok(()) } @@ -169,11 +169,11 @@ impl IndexScheduler { pub(crate) fn insert_task_datetime( wtxn: &mut RwTxn, - database: Database, CboRoaringBitmapCodec>, + database: Database, time: OffsetDateTime, task_id: TaskId, ) -> Result<()> { - let timestamp = BEI128::new(time.unix_timestamp_nanos()); + let timestamp = time.unix_timestamp_nanos(); let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default(); task_ids.insert(task_id); database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?; @@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime( pub(crate) fn remove_task_datetime( wtxn: &mut RwTxn, - database: Database, CboRoaringBitmapCodec>, + database: Database, time: OffsetDateTime, task_id: TaskId, ) -> Result<()> { - let timestamp = BEI128::new(time.unix_timestamp_nanos()); + let timestamp = time.unix_timestamp_nanos(); if let Some(mut existing) = database.get(wtxn, ×tamp)? { existing.remove(task_id); if existing.is_empty() { @@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime( pub(crate) fn keep_tasks_within_datetimes( rtxn: &RoTxn, tasks: &mut RoaringBitmap, - database: Database, CboRoaringBitmapCodec>, + database: Database, after: Option, before: Option, ) -> Result<()> { @@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes( (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)), }; let mut collected_task_ids = RoaringBitmap::new(); - let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos())); - let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos())); + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); let iter = database.range(rtxn, &(start, end))?; for r in iter { let (_timestamp, task_ids) = r?; @@ -337,8 +337,6 @@ impl IndexScheduler { let rtxn = self.env.read_txn().unwrap(); for task in self.all_tasks.iter(&rtxn).unwrap() { let (task_id, task) = task.unwrap(); - let task_id = task_id.get(); - let task_index_uid = task.index_uid().map(ToOwned::to_owned); let Task { @@ -361,16 +359,13 @@ impl IndexScheduler { .unwrap() .contains(task.uid)); } - let db_enqueued_at = self - .enqueued_at - .get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos())) - .unwrap() - .unwrap(); + let db_enqueued_at = + self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap(); assert!(db_enqueued_at.contains(task_id)); if let Some(started_at) = started_at { let db_started_at = self .started_at - .get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos())) + .get(&rtxn, &started_at.unix_timestamp_nanos()) .unwrap() .unwrap(); assert!(db_started_at.contains(task_id)); @@ -378,7 +373,7 @@ impl IndexScheduler { if let Some(finished_at) = finished_at { let db_finished_at = self .finished_at - .get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos())) + .get(&rtxn, &finished_at.unix_timestamp_nanos()) .unwrap() .unwrap(); assert!(db_finished_at.contains(task_id)); diff --git a/index-scheduler/src/uuid_codec.rs b/index-scheduler/src/uuid_codec.rs index 70a92ca94..54020fa3c 100644 --- a/index-scheduler/src/uuid_codec.rs +++ b/index-scheduler/src/uuid_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::convert::TryInto; -use meilisearch_types::heed::{BytesDecode, BytesEncode}; +use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode}; use uuid::Uuid; /// A heed codec for value of struct Uuid. @@ -10,15 +10,15 @@ pub struct UuidCodec; impl<'a> BytesDecode<'a> for UuidCodec { type DItem = Uuid; - fn bytes_decode(bytes: &'a [u8]) -> Option { - bytes.try_into().ok().map(Uuid::from_bytes) + fn bytes_decode(bytes: &'a [u8]) -> Result { + bytes.try_into().map(Uuid::from_bytes).map_err(Into::into) } } impl BytesEncode<'_> for UuidCodec { type EItem = Uuid; - fn bytes_encode(item: &Self::EItem) -> Option> { - Some(Cow::Borrowed(item.as_bytes())) + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_bytes())) } } diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 28ec8b5e4..276c035b0 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -4,17 +4,20 @@ use std::collections::HashSet; use std::convert::{TryFrom, TryInto}; use std::fs::create_dir_all; use std::path::Path; +use std::result::Result as StdResult; use std::str; use std::str::FromStr; use std::sync::Arc; use hmac::{Hmac, Mac}; +use meilisearch_types::heed::BoxedError; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::KeyId; use meilisearch_types::milli; -use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use sha2::Sha256; +use thiserror::Error; use time::OffsetDateTime; use uuid::fmt::Hyphenated; use uuid::Uuid; @@ -30,7 +33,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat #[derive(Clone)] pub struct HeedAuthStore { env: Arc, - keys: Database>, + keys: Database>, action_keyid_index_expiration: Database>>, should_close_on_drop: bool, } @@ -276,7 +279,7 @@ impl HeedAuthStore { fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> { let mut iter = self .action_keyid_index_expiration - .remap_types::() + .remap_types::() .prefix_iter_mut(wtxn, key.as_bytes())?; while iter.next().transpose()?.is_some() { // safety: we don't keep references from inside the LMDB database. @@ -294,23 +297,24 @@ pub struct KeyIdActionCodec; impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { type DItem = (KeyId, Action, Option<&'a [u8]>); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?; - let (action_bytes, index) = match try_split_array_at(action_bytes)? { - (action, []) => (action, None), - (action, index) => (action, Some(index)), - }; + fn bytes_decode(bytes: &'a [u8]) -> StdResult { + let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?; + let (&action_byte, index) = + match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? { + ([action], []) => (action, None), + ([action], index) => (action, Some(index)), + }; let key_id = Uuid::from_bytes(*key_id_bytes); - let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?; + let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?; - Some((key_id, action, index)) + Ok((key_id, action, index)) } } impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); - fn bytes_encode((key_id, action, index): &Self::EItem) -> Option> { + fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult, BoxedError> { let mut bytes = Vec::new(); bytes.extend_from_slice(key_id.as_bytes()); @@ -320,10 +324,20 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { bytes.extend_from_slice(index); } - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } +#[derive(Error, Debug)] +#[error("the slice is too short")] +pub struct SliceTooShortError; + +#[derive(Error, Debug)] +#[error("cannot construct a valid Action from {action_byte}")] +pub struct InvalidActionError { + pub action_byte: u8, +} + pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String { // format uid as hyphenated allowing user to generate their own keys. let mut uid_buffer = [0; Hyphenated::LENGTH]; diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index afe9c5189..653cb108b 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -386,11 +386,11 @@ impl ErrorCode for HeedError { HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile, HeedError::Io(e) => e.error_code(), HeedError::Mdb(_) - | HeedError::Encoding - | HeedError::Decoding + | HeedError::Encoding(_) + | HeedError::Decoding(_) | HeedError::InvalidDatabaseTyping | HeedError::DatabaseClosing - | HeedError::BadOpenOptions => Code::Internal, + | HeedError::BadOpenOptions { .. } => Code::Internal, } } } diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 7bef64d4b..a57dc0e9a 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -532,7 +532,10 @@ pub fn settings( let faceting = FacetingSettings { max_values_per_facet: Setting::Set( - index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET), + index + .max_values_per_facet(rtxn)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET), ), sort_facet_values_by: Setting::Set( index @@ -545,7 +548,10 @@ pub fn settings( let pagination = PaginationSettings { max_total_hits: Setting::Set( - index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), + index + .pagination_max_total_hits(rtxn)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), ), }; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index b6950ae6e..6d59f60dd 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -3,7 +3,7 @@ use std::io::ErrorKind; use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; -use bstr::ByteSlice; +use bstr::ByteSlice as _; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 3921b535e..f5d98761d 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -78,6 +78,7 @@ macro_rules! make_setting_route { let body = body.into_inner(); + #[allow(clippy::redundant_closure_call)] $analytics(&body, &req); let new_settings = Settings { diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 3a028022a..bcb8bb2a1 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -46,49 +46,46 @@ pub async fn multi_search_with_post( // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code // changes. - let search_results: Result<_, (ResponseError, usize)> = (|| { - async { - let mut search_results = Vec::with_capacity(queries.len()); - for (query_index, (index_uid, mut query)) in - queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() + let search_results: Result<_, (ResponseError, usize)> = async { + let mut search_results = Vec::with_capacity(queries.len()); + for (query_index, (index_uid, mut query)) in + queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() + { + debug!("multi-search #{query_index}: called with params: {:?}", query); + + // Check index from API key + if !index_scheduler.filters().is_index_authorized(&index_uid) { + return Err(AuthenticationError::InvalidToken).with_index(query_index); + } + // Apply search rules from tenant token + if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { - debug!("multi-search #{query_index}: called with params: {:?}", query); + add_search_rules(&mut query, search_rules); + } - // Check index from API key - if !index_scheduler.filters().is_index_authorized(&index_uid) { - return Err(AuthenticationError::InvalidToken).with_index(query_index); - } - // Apply search rules from tenant token - if let Some(search_rules) = - index_scheduler.filters().get_index_search_rules(&index_uid) - { - add_search_rules(&mut query, search_rules); - } + let index = index_scheduler + .index(&index_uid) + .map_err(|err| { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err + }) + .with_index(query_index)?; - let index = index_scheduler - .index(&index_uid) - .map_err(|err| { - let mut err = ResponseError::from(err); - // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but - // here the resource not found is not part of the URL. - err.code = StatusCode::BAD_REQUEST; - err - }) + let search_result = + tokio::task::spawn_blocking(move || perform_search(&index, query, features)) + .await .with_index(query_index)?; - let search_result = - tokio::task::spawn_blocking(move || perform_search(&index, query, features)) - .await - .with_index(query_index)?; - - search_results.push(SearchResultWithIndex { - index_uid: index_uid.into_inner(), - result: search_result.with_index(query_index)?, - }); - } - Ok(search_results) + search_results.push(SearchResultWithIndex { + index_uid: index_uid.into_inner(), + result: search_result.with_index(query_index)?, + }); } - })() + Ok(search_results) + } .await; if search_results.is_ok() { diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index c9ebed80e..41f073b48 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -360,6 +360,7 @@ fn prepare_search<'t>( let max_total_hits = index .pagination_max_total_hits(rtxn) .map_err(milli::Error::from)? + .map(|x| x as usize) .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); search.exhaustive_number_hits(is_finite_pagination); @@ -586,6 +587,7 @@ pub fn perform_search( let max_values_by_facet = index .max_values_per_facet(&rtxn) .map_err(milli::Error::from)? + .map(|x| x as usize) .unwrap_or(DEFAULT_VALUES_PER_FACET); facet_distribution.max_values_per_facet(max_values_by_facet); diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index 14ce88da2..aea98215d 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -4,7 +4,7 @@ use once_cell::sync::Lazy; use crate::common::{Server, Value}; use crate::json; -pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { +static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "id": 1, @@ -107,8 +107,8 @@ pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); -pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id"; -pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id"; +static DOCUMENT_PRIMARY_KEY: &str = "id"; +static DOCUMENT_DISTINCT_KEY: &str = "product_id"; /// testing: https://github.com/meilisearch/meilisearch/issues/4078 #[actix_rt::test] diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs index 1b06f1b98..8c1229f1a 100644 --- a/meilisearch/tests/search/facet_search.rs +++ b/meilisearch/tests/search/facet_search.rs @@ -4,7 +4,7 @@ use once_cell::sync::Lazy; use crate::common::{Server, Value}; use crate::json; -pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { +static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "title": "Shazam!", diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs index 67a4ca7df..5c6bb78a1 100644 --- a/meilisearch/tests/search/geo.rs +++ b/meilisearch/tests/search/geo.rs @@ -4,7 +4,7 @@ use once_cell::sync::Lazy; use crate::common::{Server, Value}; use crate::json; -pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { +static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "id": 1, diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 0cf322401..00678f7d4 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -15,7 +15,7 @@ use once_cell::sync::Lazy; use crate::common::{Server, Value}; use crate::json; -pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { +static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "title": "Shazam!", @@ -40,7 +40,7 @@ pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); -pub(self) static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { +static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "id": 852, diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 2b40e42c2..f199df216 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -7,8 +7,8 @@ use clap::{Parser, Subcommand}; use dump::{DumpWriter, IndexMetadata}; use file_store::FileStore; use meilisearch_auth::AuthController; -use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str}; -use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn}; +use meilisearch_types::heed::types::{SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::{obkv_to_json, BEU32}; use meilisearch_types::tasks::{Status, Task}; @@ -148,15 +148,17 @@ fn try_opening_poly_database( env: &Env, rtxn: &RoTxn, db_name: &str, -) -> anyhow::Result { - env.open_poly_database(rtxn, Some(db_name)) +) -> anyhow::Result> { + env.database_options() + .name(db_name) + .open(rtxn) .with_context(|| format!("While opening the {db_name:?} poly database"))? .with_context(|| format!("Missing the {db_name:?} poly database")) } fn try_clearing_poly_database( wtxn: &mut RwTxn, - database: PolyDatabase, + database: Database, db_name: &str, ) -> anyhow::Result<()> { database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database")) @@ -212,7 +214,7 @@ fn export_a_dump( eprintln!("Successfully dumped {count} keys!"); let rtxn = env.read_txn()?; - let all_tasks: Database, SerdeJson> = + let all_tasks: Database> = try_opening_database(&env, &rtxn, "all-tasks")?; let index_mapping: Database = try_opening_database(&env, &rtxn, "index-mapping")?; diff --git a/meilitool/src/uuid_codec.rs b/meilitool/src/uuid_codec.rs index 70a92ca94..54020fa3c 100644 --- a/meilitool/src/uuid_codec.rs +++ b/meilitool/src/uuid_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::convert::TryInto; -use meilisearch_types::heed::{BytesDecode, BytesEncode}; +use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode}; use uuid::Uuid; /// A heed codec for value of struct Uuid. @@ -10,15 +10,15 @@ pub struct UuidCodec; impl<'a> BytesDecode<'a> for UuidCodec { type DItem = Uuid; - fn bytes_decode(bytes: &'a [u8]) -> Option { - bytes.try_into().ok().map(Uuid::from_bytes) + fn bytes_decode(bytes: &'a [u8]) -> Result { + bytes.try_into().map(Uuid::from_bytes).map_err(Into::into) } } impl BytesEncode<'_> for UuidCodec { type EItem = Uuid; - fn bytes_encode(item: &Self::EItem) -> Option> { - Some(Cow::Borrowed(item.as_bytes())) + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_bytes())) } } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 1d8517e73..d21ec172e 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -29,8 +29,8 @@ geoutils = "0.5.1" grenad = { version = "0.4.5", default-features = false, features = [ "rayon", "tempfile" ] } -heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [ - "lmdb", "read-txn-no-tls" +heed = { version = "0.20.0-alpha.9", default-features = false, features = [ + "serde-json", "serde-bincode", "read-txn-no-tls" ] } indexmap = { version = "2.0.0", features = ["serde"] } instant-distance = { version = "0.6.1", features = ["with-serde"] } diff --git a/milli/src/error.rs b/milli/src/error.rs index b249f2977..cbbd8a3e5 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -152,7 +152,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco valid_fields: BTreeSet, hidden_fields: bool, }, - #[error("{}", HeedError::BadOpenOptions)] + #[error("an environment is already opened with different options")] InvalidLmdbOpenOptions, #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] SortRankingRuleMissing, @@ -326,11 +326,12 @@ impl From for Error { HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached), HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile), HeedError::Mdb(error) => InternalError(Store(error)), - HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })), - HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })), + // TODO use the encoding + HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), + HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping), HeedError::DatabaseClosing => InternalError(DatabaseClosing), - HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions), + HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions), } } } diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index ec419446c..361617826 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use heed::types::{OwnedType, Str}; +use heed::types::Str; use heed::{Database, RoIter, RoTxn, RwTxn}; use crate::{DocumentId, BEU32}; @@ -16,10 +16,10 @@ pub struct DocumentOperation { pub kind: DocumentOperationKind, } -pub struct ExternalDocumentsIds(Database>); +pub struct ExternalDocumentsIds(Database); impl ExternalDocumentsIds { - pub fn new(db: Database>) -> ExternalDocumentsIds { + pub fn new(db: Database) -> ExternalDocumentsIds { ExternalDocumentsIds(db) } @@ -29,7 +29,7 @@ impl ExternalDocumentsIds { } pub fn get>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result> { - Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get())) + self.0.get(rtxn, external_id.as_ref()) } /// An helper function to debug this type, returns an `HashMap` of both, @@ -38,7 +38,7 @@ impl ExternalDocumentsIds { let mut map = HashMap::default(); for result in self.0.iter(rtxn)? { let (external, internal) = result?; - map.insert(external.to_owned(), internal.get()); + map.insert(external.to_owned(), internal); } Ok(map) } @@ -55,7 +55,7 @@ impl ExternalDocumentsIds { for DocumentOperation { external_id, internal_id, kind } in operations { match kind { DocumentOperationKind::Create => { - self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?; + self.0.put(wtxn, &external_id, &internal_id)?; } DocumentOperationKind::Delete => { if !self.0.delete(wtxn, &external_id)? { @@ -69,7 +69,7 @@ impl ExternalDocumentsIds { } /// Returns an iterator over all the external ids. - pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { + pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { self.0.iter(rtxn) } } diff --git a/milli/src/heed_codec/beu16_str_codec.rs b/milli/src/heed_codec/beu16_str_codec.rs index d1b85d47f..ba04f0900 100644 --- a/milli/src/heed_codec/beu16_str_codec.rs +++ b/milli/src/heed_codec/beu16_str_codec.rs @@ -2,26 +2,28 @@ use std::borrow::Cow; use std::convert::TryInto; use std::str; +use heed::BoxedError; + pub struct BEU16StrCodec; impl<'a> heed::BytesDecode<'a> for BEU16StrCodec { type DItem = (u16, &'a str); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let (n_bytes, str_bytes) = bytes.split_at(2); - let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?; - let s = str::from_utf8(str_bytes).ok()?; - Some((n, s)) + let n = n_bytes.try_into().map(u16::from_be_bytes)?; + let s = str::from_utf8(str_bytes)?; + Ok((n, s)) } } impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { type EItem = (u16, &'a str); - fn bytes_encode((n, s): &Self::EItem) -> Option> { + fn bytes_encode((n, s): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s.len() + 2); bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(s.as_bytes()); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/beu32_str_codec.rs b/milli/src/heed_codec/beu32_str_codec.rs index c525d6b5b..762e31ca4 100644 --- a/milli/src/heed_codec/beu32_str_codec.rs +++ b/milli/src/heed_codec/beu32_str_codec.rs @@ -2,26 +2,28 @@ use std::borrow::Cow; use std::convert::TryInto; use std::str; +use heed::BoxedError; + pub struct BEU32StrCodec; impl<'a> heed::BytesDecode<'a> for BEU32StrCodec { type DItem = (u32, &'a str); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let (n_bytes, str_bytes) = bytes.split_at(4); - let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?; - let s = str::from_utf8(str_bytes).ok()?; - Some((n, s)) + let n = n_bytes.try_into().map(u32::from_be_bytes)?; + let s = str::from_utf8(str_bytes)?; + Ok((n, s)) } } impl<'a> heed::BytesEncode<'a> for BEU32StrCodec { type EItem = (u32, &'a str); - fn bytes_encode((n, s): &Self::EItem) -> Option> { + fn bytes_encode((n, s): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s.len() + 4); bytes.extend_from_slice(&n.to_be_bytes()); bytes.extend_from_slice(s.as_bytes()); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/byte_slice_ref.rs b/milli/src/heed_codec/byte_slice_ref.rs index 48eda63c5..a4b5748f1 100644 --- a/milli/src/heed_codec/byte_slice_ref.rs +++ b/milli/src/heed_codec/byte_slice_ref.rs @@ -1,23 +1,23 @@ use std::borrow::Cow; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; -/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated +/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. -pub struct ByteSliceRefCodec; +pub struct BytesRefCodec; -impl<'a> BytesEncode<'a> for ByteSliceRefCodec { +impl<'a> BytesEncode<'a> for BytesRefCodec { type EItem = &'a [u8]; - fn bytes_encode(item: &'a Self::EItem) -> Option> { - Some(Cow::Borrowed(item)) + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item)) } } -impl<'a> BytesDecode<'a> for ByteSliceRefCodec { +impl<'a> BytesDecode<'a> for BytesRefCodec { type DItem = &'a [u8]; - fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(bytes) + fn bytes_decode(bytes: &'a [u8]) -> Result { + Ok(bytes) } } diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs index cc9919ad2..7e281adfa 100644 --- a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs +++ b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs @@ -1,8 +1,9 @@ use std::borrow::Cow; use std::marker::PhantomData; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; +use crate::heed_codec::SliceTooShortError; use crate::{try_split_array_at, DocumentId, FieldId}; pub struct FieldDocIdFacetCodec(PhantomData); @@ -13,16 +14,16 @@ where { type DItem = (FieldId, DocumentId, C::DItem); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?; let field_id = u16::from_be_bytes(field_id_bytes); - let (document_id_bytes, bytes) = try_split_array_at(bytes)?; + let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?; let document_id = u32::from_be_bytes(document_id_bytes); let value = C::bytes_decode(bytes)?; - Some((field_id, document_id, value)) + Ok((field_id, document_id, value)) } } @@ -32,13 +33,15 @@ where { type EItem = (FieldId, DocumentId, C::EItem); - fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option> { + fn bytes_encode( + (field_id, document_id, value): &'a Self::EItem, + ) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(32); bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes let value_bytes = C::bytes_encode(value)?; // variable length, if f64 -> 16 bytes, if string -> large, potentially bytes.extend_from_slice(&value_bytes); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index d36ec8434..7bb874060 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -5,8 +5,8 @@ use std::borrow::Cow; use std::convert::TryFrom; use std::marker::PhantomData; -use heed::types::{DecodeIgnore, OwnedType}; -use heed::{BytesDecode, BytesEncode}; +use heed::types::DecodeIgnore; +use heed::{BoxedError, BytesDecode, BytesEncode}; use roaring::RoaringBitmap; pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; @@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec; pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec; pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec; -pub type FieldIdCodec = OwnedType; +pub type FieldIdCodec = BEU16; /// Tries to split a slice in half at the given middle point, /// `None` if the slice is too short. @@ -58,7 +58,7 @@ where { type EItem = FacetGroupKey; - fn bytes_encode(value: &'a Self::EItem) -> Option> { + fn bytes_encode(value: &'a Self::EItem) -> Result, BoxedError> { let mut v = vec![]; v.extend_from_slice(&value.field_id.to_be_bytes()); v.extend_from_slice(&[value.level]); @@ -66,7 +66,7 @@ where let bound = T::bytes_encode(&value.left_bound)?; v.extend_from_slice(&bound); - Some(Cow::Owned(v)) + Ok(Cow::Owned(v)) } } impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec @@ -75,11 +75,11 @@ where { type DItem = FacetGroupKey; - fn bytes_decode(bytes: &'a [u8]) -> Option { - let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); + fn bytes_decode(bytes: &'a [u8]) -> Result { + let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?); let level = bytes[2]; let bound = T::bytes_decode(&bytes[3..])?; - Some(FacetGroupKey { field_id: fid, level, left_bound: bound }) + Ok(FacetGroupKey { field_id: fid, level, left_bound: bound }) } } @@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec; impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { type EItem = FacetGroupValue; - fn bytes_encode(value: &'a Self::EItem) -> Option> { + fn bytes_encode(value: &'a Self::EItem) -> Result, BoxedError> { let mut v = vec![value.size]; CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); - Some(Cow::Owned(v)) + Ok(Cow::Owned(v)) } } impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { type DItem = FacetGroupValue; - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let size = bytes[0]; - let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; - Some(FacetGroupValue { size, bitmap }) + let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; + Ok(FacetGroupValue { size, bitmap }) } } diff --git a/milli/src/heed_codec/facet/ordered_f64_codec.rs b/milli/src/heed_codec/facet/ordered_f64_codec.rs index 5ac9ffcfc..b692b2363 100644 --- a/milli/src/heed_codec/facet/ordered_f64_codec.rs +++ b/milli/src/heed_codec/facet/ordered_f64_codec.rs @@ -1,37 +1,45 @@ use std::borrow::Cow; use std::convert::TryInto; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; +use thiserror::Error; use crate::facet::value_encoding::f64_into_bytes; +use crate::heed_codec::SliceTooShortError; pub struct OrderedF64Codec; impl<'a> BytesDecode<'a> for OrderedF64Codec { type DItem = f64; - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { if bytes.len() < 16 { - return None; + Err(SliceTooShortError.into()) + } else { + bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into) } - let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?; - Some(f) } } impl heed::BytesEncode<'_> for OrderedF64Codec { type EItem = f64; - fn bytes_encode(f: &Self::EItem) -> Option> { + fn bytes_encode(f: &Self::EItem) -> Result, BoxedError> { let mut buffer = [0u8; 16]; // write the globally ordered float - let bytes = f64_into_bytes(*f)?; + let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?; buffer[..8].copy_from_slice(&bytes[..]); // Then the f64 value just to be able to read it back let bytes = f.to_be_bytes(); buffer[8..16].copy_from_slice(&bytes[..]); - Some(Cow::Owned(buffer.to_vec())) + Ok(Cow::Owned(buffer.to_vec())) } } + +#[derive(Error, Debug)] +#[error("the float {float} cannot be converted to a globally ordered representation")] +pub struct InvalidGloballyOrderedFloatError { + float: f64, +} diff --git a/milli/src/heed_codec/field_id_word_count_codec.rs b/milli/src/heed_codec/field_id_word_count_codec.rs index aca7a80c4..19d8d63c6 100644 --- a/milli/src/heed_codec/field_id_word_count_codec.rs +++ b/milli/src/heed_codec/field_id_word_count_codec.rs @@ -1,5 +1,8 @@ use std::borrow::Cow; +use heed::BoxedError; + +use super::SliceTooShortError; use crate::{try_split_array_at, FieldId}; pub struct FieldIdWordCountCodec; @@ -7,21 +10,21 @@ pub struct FieldIdWordCountCodec; impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec { type DItem = (FieldId, u8); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?; let field_id = u16::from_be_bytes(field_id_bytes); - let ([word_count], _nothing) = try_split_array_at(bytes)?; - Some((field_id, word_count)) + let ([word_count], _nothing) = try_split_array_at(bytes).ok_or(SliceTooShortError)?; + Ok((field_id, word_count)) } } impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec { type EItem = (FieldId, u8); - fn bytes_encode((field_id, word_count): &Self::EItem) -> Option> { + fn bytes_encode((field_id, word_count): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(2 + 1); bytes.extend_from_slice(&field_id.to_be_bytes()); bytes.push(*word_count); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/fst_set_codec.rs b/milli/src/heed_codec/fst_set_codec.rs index fc79acf29..b402c8ff3 100644 --- a/milli/src/heed_codec/fst_set_codec.rs +++ b/milli/src/heed_codec/fst_set_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use fst::Set; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; /// A codec for values of type `Set<&[u8]>`. pub struct FstSetCodec; @@ -9,15 +9,15 @@ pub struct FstSetCodec; impl<'a> BytesEncode<'a> for FstSetCodec { type EItem = Set>; - fn bytes_encode(item: &'a Self::EItem) -> Option> { - Some(Cow::Borrowed(item.as_fst().as_bytes())) + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_fst().as_bytes())) } } impl<'a> BytesDecode<'a> for FstSetCodec { type DItem = Set<&'a [u8]>; - fn bytes_decode(bytes: &'a [u8]) -> Option { - Set::new(bytes).ok() + fn bytes_decode(bytes: &'a [u8]) -> Result { + Set::new(bytes).map_err(Into::into) } } diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index d04eaa644..449d1955c 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -12,8 +12,10 @@ mod str_beu32_codec; mod str_ref; mod str_str_u8_codec; -pub use byte_slice_ref::ByteSliceRefCodec; +pub use byte_slice_ref::BytesRefCodec; +use heed::BoxedError; pub use str_ref::StrRefCodec; +use thiserror::Error; pub use self::beu16_str_codec::BEU16StrCodec; pub use self::beu32_str_codec::BEU32StrCodec; @@ -31,5 +33,9 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; pub trait BytesDecodeOwned { type DItem; - fn bytes_decode_owned(bytes: &[u8]) -> Option; + fn bytes_decode_owned(bytes: &[u8]) -> Result; } + +#[derive(Error, Debug)] +#[error("the slice is too short")] +pub struct SliceTooShortError; diff --git a/milli/src/heed_codec/obkv_codec.rs b/milli/src/heed_codec/obkv_codec.rs index 6dad771a8..d2408c87d 100644 --- a/milli/src/heed_codec/obkv_codec.rs +++ b/milli/src/heed_codec/obkv_codec.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; +use heed::BoxedError; use obkv::{KvReaderU16, KvWriterU16}; pub struct ObkvCodec; @@ -7,15 +8,15 @@ pub struct ObkvCodec; impl<'a> heed::BytesDecode<'a> for ObkvCodec { type DItem = KvReaderU16<'a>; - fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(KvReaderU16::new(bytes)) + fn bytes_decode(bytes: &'a [u8]) -> Result { + Ok(KvReaderU16::new(bytes)) } } impl heed::BytesEncode<'_> for ObkvCodec { type EItem = KvWriterU16>; - fn bytes_encode(item: &Self::EItem) -> Option> { - item.clone().into_inner().map(Cow::Owned).ok() + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + item.clone().into_inner().map(Cow::Owned).map_err(Into::into) } } diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs index 9ad2e9707..c5e7e3e89 100644 --- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use std::convert::TryInto; use std::mem::size_of; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use roaring::RoaringBitmap; use crate::heed_codec::BytesDecodeOwned; @@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec { impl BytesDecode<'_> for BoRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode(bytes: &[u8]) -> Option { + fn bytes_decode(bytes: &[u8]) -> Result { let mut bitmap = RoaringBitmap::new(); for chunk in bytes.chunks(size_of::()) { - let bytes = chunk.try_into().ok()?; + let bytes = chunk.try_into()?; bitmap.push(u32::from_ne_bytes(bytes)); } - Some(bitmap) + Ok(bitmap) } } impl BytesDecodeOwned for BoRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode_owned(bytes: &[u8]) -> Option { + fn bytes_decode_owned(bytes: &[u8]) -> Result { Self::bytes_decode(bytes) } } @@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec { impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Option> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut out = Vec::new(); BoRoaringBitmapCodec::serialize_into(item, &mut out); - Some(Cow::Owned(out)) + Ok(Cow::Owned(out)) } } diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index f635e55af..dcab42c0a 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -3,6 +3,7 @@ use std::io; use std::mem::size_of; use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; +use heed::BoxedError; use roaring::RoaringBitmap; use crate::heed_codec::BytesDecodeOwned; @@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec { impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode(bytes: &[u8]) -> Option { - Self::deserialize_from(bytes).ok() + fn bytes_decode(bytes: &[u8]) -> Result { + Self::deserialize_from(bytes).map_err(Into::into) } } impl BytesDecodeOwned for CboRoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode_owned(bytes: &[u8]) -> Option { - Self::deserialize_from(bytes).ok() + fn bytes_decode_owned(bytes: &[u8]) -> Result { + Self::deserialize_from(bytes).map_err(Into::into) } } impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Option> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut vec = Vec::with_capacity(Self::serialized_size(item)); Self::serialize_into(item, &mut vec); - Some(Cow::Owned(vec)) + Ok(Cow::Owned(vec)) } } diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index f982cc105..aa532ffdd 100644 --- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; +use heed::BoxedError; use roaring::RoaringBitmap; use crate::heed_codec::BytesDecodeOwned; @@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec; impl heed::BytesDecode<'_> for RoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode(bytes: &[u8]) -> Option { - RoaringBitmap::deserialize_unchecked_from(bytes).ok() + fn bytes_decode(bytes: &[u8]) -> Result { + RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into) } } impl BytesDecodeOwned for RoaringBitmapCodec { type DItem = RoaringBitmap; - fn bytes_decode_owned(bytes: &[u8]) -> Option { - RoaringBitmap::deserialize_from(bytes).ok() + fn bytes_decode_owned(bytes: &[u8]) -> Result { + RoaringBitmap::deserialize_from(bytes).map_err(Into::into) } } impl heed::BytesEncode<'_> for RoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Option> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(item.serialized_size()); - item.serialize_into(&mut bytes).ok()?; - Some(Cow::Owned(bytes)) + item.serialize_into(&mut bytes)?; + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs index 8fae60df7..cf4997d26 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs @@ -1,6 +1,6 @@ use std::mem; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use crate::heed_codec::BytesDecodeOwned; @@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec; impl BytesDecode<'_> for BoRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode(bytes: &[u8]) -> Option { - Some((bytes.len() / mem::size_of::()) as u64) + fn bytes_decode(bytes: &[u8]) -> Result { + Ok((bytes.len() / mem::size_of::()) as u64) } } impl BytesDecodeOwned for BoRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode_owned(bytes: &[u8]) -> Option { + fn bytes_decode_owned(bytes: &[u8]) -> Result { Self::bytes_decode(bytes) } } diff --git a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs index 5719a538a..c2565c939 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs @@ -1,6 +1,6 @@ use std::mem; -use heed::BytesDecode; +use heed::{BoxedError, BytesDecode}; use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; @@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec; impl BytesDecode<'_> for CboRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode(bytes: &[u8]) -> Option { + fn bytes_decode(bytes: &[u8]) -> Result { if bytes.len() <= THRESHOLD * mem::size_of::() { // If there is threshold or less than threshold integers that can fit into this array // of bytes it means that we used the ByteOrder codec serializer. @@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec { impl BytesDecodeOwned for CboRoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode_owned(bytes: &[u8]) -> Option { + fn bytes_decode_owned(bytes: &[u8]) -> Result { Self::bytes_decode(bytes) } } diff --git a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs index a9b0506ff..578cb31e2 100644 --- a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs @@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read}; use std::mem; use byteorder::{LittleEndian, ReadBytesExt}; +use heed::BoxedError; use crate::heed_codec::BytesDecodeOwned; @@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec { impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode(bytes: &[u8]) -> Option { - RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() + fn bytes_decode(bytes: &[u8]) -> Result { + RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into) } } impl BytesDecodeOwned for RoaringBitmapLenCodec { type DItem = u64; - fn bytes_decode_owned(bytes: &[u8]) -> Option { - RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() + fn bytes_decode_owned(bytes: &[u8]) -> Result { + RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into) } } diff --git a/milli/src/heed_codec/script_language_codec.rs b/milli/src/heed_codec/script_language_codec.rs index 83e8a7241..ef2ad4bec 100644 --- a/milli/src/heed_codec/script_language_codec.rs +++ b/milli/src/heed_codec/script_language_codec.rs @@ -1,30 +1,31 @@ use std::borrow::Cow; +use std::ffi::CStr; use std::str; use charabia::{Language, Script}; +use heed::BoxedError; pub struct ScriptLanguageCodec; impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec { type DItem = (Script, Language); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let sep = bytes.iter().position(|b| *b == 0)?; - let (s_bytes, l_bytes) = bytes.split_at(sep); - let script = str::from_utf8(s_bytes).ok()?; + fn bytes_decode(bytes: &'a [u8]) -> Result { + let cstr = CStr::from_bytes_until_nul(bytes)?; + let script = cstr.to_str()?; let script_name = Script::from_name(script); - let lan = str::from_utf8(l_bytes).ok()?; // skip '\0' byte between the two strings. - let lan_name = Language::from_name(&lan[1..]); + let lan = str::from_utf8(&bytes[script.len() + 1..])?; + let lan_name = Language::from_name(lan); - Some((script_name, lan_name)) + Ok((script_name, lan_name)) } } impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { type EItem = (Script, Language); - fn bytes_encode((script, lan): &Self::EItem) -> Option> { + fn bytes_encode((script, lan): &Self::EItem) -> Result, BoxedError> { let script_name = script.name().as_bytes(); let lan_name = lan.name().as_bytes(); @@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { bytes.push(0); bytes.extend_from_slice(lan_name); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/str_beu32_codec.rs b/milli/src/heed_codec/str_beu32_codec.rs index cce849e37..e3ffddcd6 100644 --- a/milli/src/heed_codec/str_beu32_codec.rs +++ b/milli/src/heed_codec/str_beu32_codec.rs @@ -3,37 +3,41 @@ use std::convert::TryInto; use std::mem::size_of; use std::str; +use heed::BoxedError; + +use super::SliceTooShortError; + pub struct StrBEU32Codec; impl<'a> heed::BytesDecode<'a> for StrBEU32Codec { type DItem = (&'a str, u32); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let footer_len = size_of::(); if bytes.len() < footer_len { - return None; + return Err(SliceTooShortError.into()); } let (word, bytes) = bytes.split_at(bytes.len() - footer_len); - let word = str::from_utf8(word).ok()?; - let pos = bytes.try_into().map(u32::from_be_bytes).ok()?; + let word = str::from_utf8(word)?; + let pos = bytes.try_into().map(u32::from_be_bytes)?; - Some((word, pos)) + Ok((word, pos)) } } impl<'a> heed::BytesEncode<'a> for StrBEU32Codec { type EItem = (&'a str, u32); - fn bytes_encode((word, pos): &Self::EItem) -> Option> { + fn bytes_encode((word, pos): &Self::EItem) -> Result, BoxedError> { let pos = pos.to_be_bytes(); let mut bytes = Vec::with_capacity(word.len() + pos.len()); bytes.extend_from_slice(word.as_bytes()); bytes.extend_from_slice(&pos[..]); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } @@ -42,26 +46,27 @@ pub struct StrBEU16Codec; impl<'a> heed::BytesDecode<'a> for StrBEU16Codec { type DItem = (&'a str, u16); - fn bytes_decode(bytes: &'a [u8]) -> Option { + fn bytes_decode(bytes: &'a [u8]) -> Result { let footer_len = size_of::(); if bytes.len() < footer_len + 1 { - return None; + return Err(SliceTooShortError.into()); } let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len); - let (_, word) = word_plus_nul_byte.split_last()?; - let word = str::from_utf8(word).ok()?; - let pos = bytes.try_into().map(u16::from_be_bytes).ok()?; + // unwrap: we just checked the footer + 1 above. + let (_, word) = word_plus_nul_byte.split_last().unwrap(); + let word = str::from_utf8(word)?; + let pos = bytes.try_into().map(u16::from_be_bytes)?; - Some((word, pos)) + Ok((word, pos)) } } impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { type EItem = (&'a str, u16); - fn bytes_encode((word, pos): &Self::EItem) -> Option> { + fn bytes_encode((word, pos): &Self::EItem) -> Result, BoxedError> { let pos = pos.to_be_bytes(); let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len()); @@ -69,6 +74,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { bytes.push(0); bytes.extend_from_slice(&pos[..]); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/heed_codec/str_ref.rs b/milli/src/heed_codec/str_ref.rs index ced5cc65e..bdf262a46 100644 --- a/milli/src/heed_codec/str_ref.rs +++ b/milli/src/heed_codec/str_ref.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use heed::{BytesDecode, BytesEncode}; +use heed::{BoxedError, BytesDecode, BytesEncode}; /// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated /// types are equivalent (= `&'a str`) and these values can reside within another structure. @@ -8,15 +8,14 @@ pub struct StrRefCodec; impl<'a> BytesEncode<'a> for StrRefCodec { type EItem = &'a str; - fn bytes_encode(item: &'a &'a str) -> Option> { - Some(Cow::Borrowed(item.as_bytes())) + fn bytes_encode(item: &'a &'a str) -> Result, BoxedError> { + Ok(Cow::Borrowed(item.as_bytes())) } } impl<'a> BytesDecode<'a> for StrRefCodec { type DItem = &'a str; - fn bytes_decode(bytes: &'a [u8]) -> Option { - let s = std::str::from_utf8(bytes).ok()?; - Some(s) + fn bytes_decode(bytes: &'a [u8]) -> Result { + std::str::from_utf8(bytes).map_err(Into::into) } } diff --git a/milli/src/heed_codec/str_str_u8_codec.rs b/milli/src/heed_codec/str_str_u8_codec.rs index 60be8ddc7..0aedf0c94 100644 --- a/milli/src/heed_codec/str_str_u8_codec.rs +++ b/milli/src/heed_codec/str_str_u8_codec.rs @@ -1,32 +1,36 @@ use std::borrow::Cow; +use std::ffi::CStr; use std::str; +use heed::BoxedError; + +use super::SliceTooShortError; + pub struct U8StrStrCodec; impl<'a> heed::BytesDecode<'a> for U8StrStrCodec { type DItem = (u8, &'a str, &'a str); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (n, bytes) = bytes.split_first()?; - let s1_end = bytes.iter().position(|b| *b == 0)?; - let (s1_bytes, rest) = bytes.split_at(s1_end); - let s2_bytes = &rest[1..]; - let s1 = str::from_utf8(s1_bytes).ok()?; - let s2 = str::from_utf8(s2_bytes).ok()?; - Some((*n, s1, s2)) + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?; + let cstr = CStr::from_bytes_until_nul(bytes)?; + let s1 = cstr.to_str()?; + // skip '\0' byte between the two strings. + let s2 = str::from_utf8(&bytes[s1.len() + 1..])?; + Ok((*n, s1, s2)) } } impl<'a> heed::BytesEncode<'a> for U8StrStrCodec { type EItem = (u8, &'a str, &'a str); - fn bytes_encode((n, s1, s2): &Self::EItem) -> Option> { + fn bytes_encode((n, s1, s2): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); bytes.push(*n); bytes.extend_from_slice(s1.as_bytes()); bytes.push(0); bytes.extend_from_slice(s2.as_bytes()); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } pub struct UncheckedU8StrStrCodec; @@ -34,24 +38,25 @@ pub struct UncheckedU8StrStrCodec; impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec { type DItem = (u8, &'a [u8], &'a [u8]); - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (n, bytes) = bytes.split_first()?; - let s1_end = bytes.iter().position(|b| *b == 0)?; - let (s1_bytes, rest) = bytes.split_at(s1_end); - let s2_bytes = &rest[1..]; - Some((*n, s1_bytes, s2_bytes)) + fn bytes_decode(bytes: &'a [u8]) -> Result { + let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?; + let cstr = CStr::from_bytes_until_nul(bytes)?; + let s1_bytes = cstr.to_bytes(); + // skip '\0' byte between the two strings. + let s2_bytes = &bytes[s1_bytes.len() + 1..]; + Ok((*n, s1_bytes, s2_bytes)) } } impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec { type EItem = (u8, &'a [u8], &'a [u8]); - fn bytes_encode((n, s1, s2): &Self::EItem) -> Option> { + fn bytes_encode((n, s1, s2): &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); bytes.push(*n); bytes.extend_from_slice(s1); bytes.push(0); bytes.extend_from_slice(s2); - Some(Cow::Owned(bytes)) + Ok(Cow::Owned(bytes)) } } diff --git a/milli/src/index.rs b/milli/src/index.rs index f8cceb0ef..800edcbfc 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -4,9 +4,8 @@ use std::fs::File; use std::path::Path; use charabia::{Language, Script}; -use heed::flags::Flags; use heed::types::*; -use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn}; +use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use roaring::RoaringBitmap; use rstar::RTree; use time::OffsetDateTime; @@ -27,7 +26,7 @@ use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, - BEU32, + BEU32, BEU64, }; /// The HNSW data-structure that we serialize, fill and search in. @@ -109,10 +108,10 @@ pub struct Index { pub(crate) env: heed::Env, /// Contains many different types (e.g. the fields ids map). - pub(crate) main: PolyDatabase, + pub(crate) main: Database, /// Maps the external documents ids with the internal document id. - pub external_documents_ids: Database>, + pub external_documents_ids: Database, /// A word and all the documents ids containing the word. pub word_docids: Database, @@ -158,7 +157,7 @@ pub struct Index { /// Maps the facet field id of the normalized-for-search string facets with their original versions. pub facet_id_normalized_string_strings: Database>>, /// Maps the facet field id of the string facets with an FST containing all the facets values. - pub facet_id_string_fst: Database, FstSetCodec>, + pub facet_id_string_fst: Database, /// Maps the document id, the facet field id and the numbers. pub field_id_docid_facet_f64s: Database, @@ -166,10 +165,10 @@ pub struct Index { pub field_id_docid_facet_strings: Database, /// Maps a vector id to the document id that have it. - pub vector_id_docid: Database, OwnedType>, + pub vector_id_docid: Database, /// Maps the document id to the document as an obkv store. - pub(crate) documents: Database, ObkvCodec>, + pub(crate) documents: Database, } impl Index { @@ -182,11 +181,10 @@ impl Index { use db_name::*; options.max_dbs(24); - unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; let mut wtxn = env.write_txn()?; - let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; + let main = env.database_options().name(MAIN).create(&mut wtxn)?; let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; let external_documents_ids = env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?; @@ -264,24 +262,16 @@ impl Index { fn set_creation_dates( env: &heed::Env, - main: PolyDatabase, + main: Database, created_at: OffsetDateTime, updated_at: OffsetDateTime, ) -> heed::Result<()> { let mut txn = env.write_txn()?; // The db was just created, we update its metadata with the relevant information. - if main.get::<_, Str, SerdeJson>(&txn, main_key::CREATED_AT_KEY)?.is_none() - { - main.put::<_, Str, SerdeJson>( - &mut txn, - main_key::UPDATED_AT_KEY, - &updated_at, - )?; - main.put::<_, Str, SerdeJson>( - &mut txn, - main_key::CREATED_AT_KEY, - &created_at, - )?; + let main = main.remap_types::>(); + if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() { + main.put(&mut txn, main_key::UPDATED_AT_KEY, &updated_at)?; + main.put(&mut txn, main_key::CREATED_AT_KEY, &created_at)?; txn.commit()?; } Ok(()) @@ -318,12 +308,12 @@ impl Index { /// /// This value is the maximum between the map size passed during the opening of the index /// and the on-disk size of the index at the time of opening. - pub fn map_size(&self) -> Result { - Ok(self.env.map_size()?) + pub fn map_size(&self) -> usize { + self.env.info().map_size } - pub fn copy_to_path>(&self, path: P, option: CompactionOption) -> Result { - self.env.copy_to_path(path, option).map_err(Into::into) + pub fn copy_to_file>(&self, path: P, option: CompactionOption) -> Result { + self.env.copy_to_file(path, option).map_err(Into::into) } /// Returns an `EnvClosingEvent` that can be used to wait for the closing event, @@ -343,21 +333,28 @@ impl Index { wtxn: &mut RwTxn, docids: &RoaringBitmap, ) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids) + self.main.remap_types::().put( + wtxn, + main_key::DOCUMENTS_IDS_KEY, + docids, + ) } /// Returns the internal documents ids. pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result { Ok(self .main - .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)? + .remap_types::() + .get(rtxn, main_key::DOCUMENTS_IDS_KEY)? .unwrap_or_default()) } /// Returns the number of documents indexed in the database. pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result { - let count = - self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?; + let count = self + .main + .remap_types::() + .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?; Ok(count.unwrap_or_default()) } @@ -366,17 +363,17 @@ impl Index { /// Writes the documents primary key, this is the field name that is used to store the id. pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; - self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) + self.main.remap_types::().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) } /// Deletes the primary key of the documents, this can be done to reset indexes settings. pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::PRIMARY_KEY_KEY) } /// Returns the documents primary key, `None` if it hasn't been defined. pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY) + self.main.remap_types::().get(rtxn, main_key::PRIMARY_KEY_KEY) } /* external documents ids */ @@ -396,7 +393,11 @@ impl Index { wtxn: &mut RwTxn, map: &FieldsIdsMap, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map) + self.main.remap_types::>().put( + wtxn, + main_key::FIELDS_IDS_MAP_KEY, + map, + ) } /// Returns the fields ids map which associate the documents keys with an internal field id @@ -404,7 +405,8 @@ impl Index { pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::FIELDS_IDS_MAP_KEY)? + .remap_types::>() + .get(rtxn, main_key::FIELDS_IDS_MAP_KEY)? .unwrap_or_default()) } @@ -416,19 +418,24 @@ impl Index { wtxn: &mut RwTxn, rtree: &RTree, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode>>(wtxn, main_key::GEO_RTREE_KEY, rtree) + self.main.remap_types::>>().put( + wtxn, + main_key::GEO_RTREE_KEY, + rtree, + ) } /// Delete the `rtree` which associates coordinates to documents ids. pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::GEO_RTREE_KEY) } /// Returns the `rtree` which associates coordinates to documents ids. pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result>> { match self .main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::GEO_RTREE_KEY)? + .remap_types::>>() + .get(rtxn, main_key::GEO_RTREE_KEY)? { Some(rtree) => Ok(Some(rtree)), None => Ok(None), @@ -443,7 +450,7 @@ impl Index { wtxn: &mut RwTxn, docids: &RoaringBitmap, ) -> heed::Result<()> { - self.main.put::<_, Str, RoaringBitmapCodec>( + self.main.remap_types::().put( wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY, docids, @@ -452,14 +459,15 @@ impl Index { /// Delete the documents ids that are faceted with a _geo field. pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) } /// Retrieve all the documents ids that are faceted with a _geo field. pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result { match self .main - .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? + .remap_types::() + .get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), @@ -474,22 +482,22 @@ impl Index { self.delete_vector_hnsw(wtxn)?; let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB - let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?; + let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?; for (i, chunk) in bytes.chunks(chunk_size).enumerate() { let i = i as u32; let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec(); key.extend_from_slice(&i.to_be_bytes()); - self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?; + self.main.remap_types::().put(wtxn, &key, chunk)?; } Ok(()) } /// Delete the `hnsw`. pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result { - let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>( - wtxn, - main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(), - )?; + let mut iter = self + .main + .remap_types::() + .prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?; let mut deleted = false; while iter.next().transpose()?.is_some() { // We do not keep a reference to the key or the value. @@ -501,8 +509,10 @@ impl Index { /// Returns the `hnsw`. pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result> { let mut slices = Vec::new(); - for result in - self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? + for result in self + .main + .remap_types::() + .prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? { let (_, slice) = result?; slices.push(slice); @@ -512,7 +522,11 @@ impl Index { Ok(None) } else { let readable_slices: ReadableSlices<_> = slices.into_iter().collect(); - Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?)) + Ok(Some( + bincode::deserialize_from(readable_slices) + .map_err(Into::into) + .map_err(heed::Error::Decoding)?, + )) } } @@ -525,7 +539,7 @@ impl Index { wtxn: &mut RwTxn, distribution: &FieldDistribution, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>( + self.main.remap_types::>().put( wtxn, main_key::FIELD_DISTRIBUTION_KEY, distribution, @@ -537,7 +551,8 @@ impl Index { pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? + .remap_types::>() + .get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? .unwrap_or_default()) } @@ -550,7 +565,7 @@ impl Index { wtxn: &mut RwTxn, fields: &[&str], ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>( + self.main.remap_types::>().put( wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields, @@ -560,13 +575,15 @@ impl Index { /// Deletes the displayed fields ids, this will make the engine to display /// all the documents attributes in the order of the `FieldsIdsMap`. pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY) } /// Returns the displayed fields in the order they were set by the user. If it returns /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { - self.main.get::<_, Str, SerdeBincode>>(rtxn, main_key::DISPLAYED_FIELDS_KEY) + self.main + .remap_types::>>() + .get(rtxn, main_key::DISPLAYED_FIELDS_KEY) } /// Identical to `displayed_fields`, but returns the ids instead. @@ -646,7 +663,7 @@ impl Index { /// Writes the searchable fields, when this list is specified, only these are indexed. fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>( + self.main.remap_types::>().put( wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields, @@ -655,13 +672,15 @@ impl Index { /// Deletes the searchable fields, when no fields are specified, all fields are indexed. fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY) } /// Returns the searchable fields, those are the fields that are indexed, /// if the searchable fields aren't there it means that **all** the fields are indexed. pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result>> { - self.main.get::<_, Str, SerdeBincode>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY) + self.main + .remap_types::>>() + .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY) } /// Identical to `searchable_fields`, but returns the ids instead. @@ -687,7 +706,7 @@ impl Index { wtxn: &mut RwTxn, fields: &[&str], ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>( + self.main.remap_types::>().put( wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY, &fields, @@ -699,7 +718,7 @@ impl Index { &self, wtxn: &mut RwTxn, ) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) } /// Returns the user defined searchable fields. @@ -708,7 +727,8 @@ impl Index { rtxn: &'t RoTxn, ) -> heed::Result>> { self.main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) + .remap_types::>>() + .get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) } /* filterable fields */ @@ -719,19 +739,24 @@ impl Index { wtxn: &mut RwTxn, fields: &HashSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields) + self.main.remap_types::>().put( + wtxn, + main_key::FILTERABLE_FIELDS_KEY, + fields, + ) } /// Deletes the filterable fields ids in the database. pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY) } /// Returns the filterable fields names. pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self .main - .get::<_, Str, SerdeJson<_>>(rtxn, main_key::FILTERABLE_FIELDS_KEY)? + .remap_types::>() + .get(rtxn, main_key::FILTERABLE_FIELDS_KEY)? .unwrap_or_default()) } @@ -758,19 +783,24 @@ impl Index { wtxn: &mut RwTxn, fields: &HashSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields) + self.main.remap_types::>().put( + wtxn, + main_key::SORTABLE_FIELDS_KEY, + fields, + ) } /// Deletes the sortable fields ids in the database. pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SORTABLE_FIELDS_KEY) } /// Returns the sortable fields names. pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self .main - .get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)? + .remap_types::>() + .get(rtxn, main_key::SORTABLE_FIELDS_KEY)? .unwrap_or_default()) } @@ -789,14 +819,19 @@ impl Index { wtxn: &mut RwTxn, fields: &HashSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::HIDDEN_FACETED_FIELDS_KEY, fields) + self.main.remap_types::>().put( + wtxn, + main_key::HIDDEN_FACETED_FIELDS_KEY, + fields, + ) } /// Returns the faceted fields names. pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self .main - .get::<_, Str, SerdeJson<_>>(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? + .remap_types::>() + .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? .unwrap_or_default()) } @@ -863,7 +898,7 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? { + match self.facet_id_is_null_docids.get(rtxn, &field_id)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), } @@ -875,7 +910,7 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { + match self.facet_id_is_empty_docids.get(rtxn, &field_id)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), } @@ -887,7 +922,7 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? { + match self.facet_id_exists_docids.get(rtxn, &field_id)? { Some(docids) => Ok(docids), None => Ok(RoaringBitmap::new()), } @@ -900,15 +935,15 @@ impl Index { wtxn: &mut RwTxn, distinct_field: &str, ) -> heed::Result<()> { - self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) + self.main.remap_types::().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) } pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY) + self.main.remap_types::().get(rtxn, main_key::DISTINCT_FIELD_KEY) } pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::DISTINCT_FIELD_KEY) } /* criteria */ @@ -918,15 +953,23 @@ impl Index { wtxn: &mut RwTxn, criteria: &[Criterion], ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) + self.main.remap_types::>().put( + wtxn, + main_key::CRITERIA_KEY, + &criteria, + ) } pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::CRITERIA_KEY) } pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result> { - match self.main.get::<_, Str, SerdeJson>>(rtxn, main_key::CRITERIA_KEY)? { + match self + .main + .remap_types::>>() + .get(rtxn, main_key::CRITERIA_KEY)? + { Some(criteria) => Ok(criteria), None => Ok(default_criteria()), } @@ -940,12 +983,16 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes()) + self.main.remap_types::().put( + wtxn, + main_key::WORDS_FST_KEY, + fst.as_fst().as_bytes(), + ) } /// Returns the FST which is the words dictionary of the engine. pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::WORDS_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -958,15 +1005,19 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) + self.main.remap_types::().put( + wtxn, + main_key::STOP_WORDS_KEY, + fst.as_fst().as_bytes(), + ) } pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::STOP_WORDS_KEY) } pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::STOP_WORDS_KEY)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), None => Ok(None), } @@ -979,18 +1030,22 @@ impl Index { wtxn: &mut RwTxn, set: &BTreeSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set) + self.main.remap_types::>().put( + wtxn, + main_key::NON_SEPARATOR_TOKENS_KEY, + set, + ) } pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) } pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result>> { - Ok(self.main.get::<_, Str, SerdeBincode>>( - rtxn, - main_key::NON_SEPARATOR_TOKENS_KEY, - )?) + Ok(self + .main + .remap_types::>>() + .get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?) } /* separator tokens */ @@ -1000,17 +1055,22 @@ impl Index { wtxn: &mut RwTxn, set: &BTreeSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set) + self.main.remap_types::>().put( + wtxn, + main_key::SEPARATOR_TOKENS_KEY, + set, + ) } pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY) } pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result>> { Ok(self .main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) + .remap_types::>>() + .get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) } /* separators easing method */ @@ -1040,17 +1100,18 @@ impl Index { wtxn: &mut RwTxn, set: &BTreeSet, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set) + self.main.remap_types::>().put(wtxn, main_key::DICTIONARY_KEY, set) } pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::DICTIONARY_KEY) } pub fn dictionary(&self, rtxn: &RoTxn) -> Result>> { Ok(self .main - .get::<_, Str, SerdeBincode>>(rtxn, main_key::DICTIONARY_KEY)?) + .remap_types::>>() + .get(rtxn, main_key::DICTIONARY_KEY)?) } /* synonyms */ @@ -1061,8 +1122,12 @@ impl Index { synonyms: &HashMap, Vec>>, user_defined_synonyms: &BTreeMap>, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?; - self.main.put::<_, Str, SerdeBincode<_>>( + self.main.remap_types::>().put( + wtxn, + main_key::SYNONYMS_KEY, + synonyms, + )?; + self.main.remap_types::>().put( wtxn, main_key::USER_DEFINED_SYNONYMS_KEY, user_defined_synonyms, @@ -1070,8 +1135,8 @@ impl Index { } pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?; - self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) + self.main.remap_key_type::().delete(wtxn, main_key::SYNONYMS_KEY)?; + self.main.remap_key_type::().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) } pub fn user_defined_synonyms( @@ -1080,14 +1145,16 @@ impl Index { ) -> heed::Result>> { Ok(self .main - .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? + .remap_types::>() + .get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? .unwrap_or_default()) } pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result, Vec>>> { Ok(self .main - .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)? + .remap_types::>() + .get(rtxn, main_key::SYNONYMS_KEY)? .unwrap_or_default()) } @@ -1108,7 +1175,7 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.put::<_, Str, ByteSlice>( + self.main.remap_types::().put( wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes(), @@ -1117,7 +1184,7 @@ impl Index { /// Returns the FST which is the words prefixes dictionnary of the engine. pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -1142,7 +1209,7 @@ impl Index { Ok(ids.into_iter().map(move |id| { let kv = self .documents - .get(rtxn, &BEU32::new(id))? + .get(rtxn, &id)? .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?; Ok((id, kv)) })) @@ -1207,7 +1274,8 @@ impl Index { pub fn created_at(&self, rtxn: &RoTxn) -> Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::CREATED_AT_KEY)? + .remap_types::>() + .get(rtxn, main_key::CREATED_AT_KEY)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::MAIN, key: Some(main_key::CREATED_AT_KEY), @@ -1218,7 +1286,8 @@ impl Index { pub fn updated_at(&self, rtxn: &RoTxn) -> Result { Ok(self .main - .get::<_, Str, SerdeJson>(rtxn, main_key::UPDATED_AT_KEY)? + .remap_types::>() + .get(rtxn, main_key::UPDATED_AT_KEY)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::MAIN, key: Some(main_key::UPDATED_AT_KEY), @@ -1230,14 +1299,18 @@ impl Index { wtxn: &mut RwTxn, time: &OffsetDateTime, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson>(wtxn, main_key::UPDATED_AT_KEY, time) + self.main.remap_types::>().put( + wtxn, + main_key::UPDATED_AT_KEY, + time, + ) } pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - match self.main.get::<_, Str, OwnedType>(txn, main_key::AUTHORIZE_TYPOS)? { + match self.main.remap_types::().get(txn, main_key::AUTHORIZE_TYPOS)? { Some(0) => Ok(false), _ => Ok(true), } @@ -1247,7 +1320,7 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.put::<_, Str, OwnedType>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; + self.main.remap_types::().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; Ok(()) } @@ -1258,7 +1331,8 @@ impl Index { // because by default, we authorize typos. Ok(self .main - .get::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN)? + .remap_types::() + .get(txn, main_key::ONE_TYPO_WORD_LEN)? .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO)) } @@ -1266,7 +1340,7 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.put::<_, Str, OwnedType>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; + self.main.remap_types::().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; Ok(()) } @@ -1276,7 +1350,8 @@ impl Index { // because by default, we authorize typos. Ok(self .main - .get::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN)? + .remap_types::() + .get(txn, main_key::TWO_TYPOS_WORD_LEN)? .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } @@ -1284,13 +1359,13 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.put::<_, Str, OwnedType>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; + self.main.remap_types::().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; Ok(()) } /// List the words on which typo are not allowed pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>>> { - match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { + match self.main.remap_types::().get(txn, main_key::EXACT_WORDS)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), None => Ok(None), } @@ -1301,7 +1376,7 @@ impl Index { txn: &mut RwTxn, words: &fst::Set, ) -> Result<()> { - self.main.put::<_, Str, ByteSlice>( + self.main.remap_types::().put( txn, main_key::EXACT_WORDS, words.as_fst().as_bytes(), @@ -1313,7 +1388,8 @@ impl Index { pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result> { Ok(self .main - .get::<_, Str, SerdeBincode>>(txn, main_key::EXACT_ATTRIBUTES)? + .remap_types::>>() + .get(txn, main_key::EXACT_ATTRIBUTES)? .unwrap_or_default()) } @@ -1326,34 +1402,36 @@ impl Index { /// Writes the exact attributes to the database. pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> { - self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?; + self.main.remap_types::>().put( + txn, + main_key::EXACT_ATTRIBUTES, + &attrs, + )?; Ok(()) } /// Clears the exact attributes from the store. pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES) + self.main.remap_key_type::().delete(txn, main_key::EXACT_ATTRIBUTES) } - pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { - self.main.get::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET) + pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { + self.main.remap_types::().get(txn, main_key::MAX_VALUES_PER_FACET) } - pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { - self.main.put::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET, &val) + pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> { + self.main.remap_types::().put(txn, main_key::MAX_VALUES_PER_FACET, &val) } pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) + self.main.remap_key_type::().delete(txn, main_key::MAX_VALUES_PER_FACET) } pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result> { let mut orders = self .main - .get::<_, Str, SerdeJson>>( - txn, - main_key::SORT_FACET_VALUES_BY, - )? + .remap_types::>>() + .get(txn, main_key::SORT_FACET_VALUES_BY)? .unwrap_or_default(); // Insert the default ordering if it is not already overwritten by the user. orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); @@ -1365,27 +1443,27 @@ impl Index { txn: &mut RwTxn, val: &HashMap, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val) + self.main.remap_types::>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) } pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY) + self.main.remap_key_type::().delete(txn, main_key::SORT_FACET_VALUES_BY) } - pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result> { - self.main.get::<_, Str, OwnedType>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) + pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result> { + self.main.remap_types::().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } pub(crate) fn put_pagination_max_total_hits( &self, txn: &mut RwTxn, - val: usize, + val: u64, ) -> heed::Result<()> { - self.main.put::<_, Str, OwnedType>(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val) + self.main.remap_types::().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val) } pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) + self.main.remap_key_type::().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } /* script language docids */ @@ -1479,7 +1557,7 @@ pub(crate) mod tests { } pub fn add_documents_using_wtxn<'t, R>( &'t self, - wtxn: &mut RwTxn<'t, '_>, + wtxn: &mut RwTxn<'t>, documents: DocumentsBatchReader, ) -> Result<(), crate::error::Error> where @@ -1523,7 +1601,7 @@ pub(crate) mod tests { } pub fn update_settings_using_wtxn<'t>( &'t self, - wtxn: &mut RwTxn<'t, '_>, + wtxn: &mut RwTxn<'t>, update: impl Fn(&mut Settings), ) -> Result<(), crate::error::Error> { let mut builder = update::Settings::new(wtxn, &self.inner, &self.indexer_config); @@ -1534,7 +1612,7 @@ pub(crate) mod tests { pub fn delete_documents_using_wtxn<'t>( &'t self, - wtxn: &mut RwTxn<'t, '_>, + wtxn: &mut RwTxn<'t>, external_document_ids: Vec, ) { let builder = IndexDocuments::new( diff --git a/milli/src/lib.rs b/milli/src/lib.rs index cfa438609..acea72c41 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -66,9 +66,9 @@ pub use self::search::{ pub type Result = std::result::Result; pub type Attribute = u32; -pub type BEU16 = heed::zerocopy::U16; -pub type BEU32 = heed::zerocopy::U32; -pub type BEU64 = heed::zerocopy::U64; +pub type BEU16 = heed::types::U16; +pub type BEU32 = heed::types::U32; +pub type BEU64 = heed::types::U64; pub type DocumentId = u32; pub type FastMap4 = HashMap>; pub type FastMap8 = HashMap>; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index acf117ef6..90da16797 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::ops::ControlFlow; use std::{fmt, mem}; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::BytesDecode; use indexmap::IndexMap; use roaring::RoaringBitmap; @@ -13,7 +13,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; -use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; +use crate::heed_codec::{BytesRefCodec, StrRefCodec}; use crate::search::facet::facet_distribution_iter::{ count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, }; @@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); @@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); @@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> { search_function( self.rtxn, - self.index - .facet_id_f64_docids - .remap_key_type::>(), + self.index.facet_id_f64_docids.remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids, _| { @@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> { search_function( self.rtxn, - self.index - .facet_id_string_docids - .remap_key_type::>(), + self.index.facet_id_string_docids.remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids, any_docid| { diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 722a30e6d..d993ef2dc 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::DocumentId; /// Call the given closure on the facet distribution of the candidate documents. @@ -23,7 +23,7 @@ use crate::DocumentId; /// keep iterating over the different facet values or stop. pub fn lexicographically_iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, callback: CB, @@ -34,11 +34,11 @@ where let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; let highest_level = get_highest_level( rtxn, - db.remap_key_type::>(), + db.remap_key_type::>(), field_id, )?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; Ok(()) } else { @@ -48,7 +48,7 @@ where pub fn count_iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, mut callback: CB, @@ -77,11 +77,11 @@ where let mut heap = BinaryHeap::new(); let highest_level = get_highest_level( rtxn, - db.remap_key_type::>(), + db.remap_key_type::>(), field_id, )?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { // We first fill the heap with values from the highest level let starting_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; @@ -146,7 +146,7 @@ where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, callback: CB, } diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index 26854bc1a..f1a26ded5 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::Result; /// Find all the document ids for which the given field contains a value contained within @@ -25,11 +25,11 @@ where let inner; let left = match left { Bound::Included(left) => { - inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?; Bound::Included(inner.as_ref()) } Bound::Excluded(left) => { - inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?; Bound::Excluded(inner.as_ref()) } Bound::Unbounded => Bound::Unbounded, @@ -37,25 +37,22 @@ where let inner; let right = match right { Bound::Included(right) => { - inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?; Bound::Included(inner.as_ref()) } Bound::Excluded(right) => { - inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; + inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?; Bound::Excluded(inner.as_ref()) } Bound::Unbounded => Bound::Unbounded, }; - let db = db.remap_key_type::>(); + let db = db.remap_key_type::>(); let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(starting_left_bound) = - get_first_facet_value::(rtxn, db, field_id)? - { - let rightmost_bound = Bound::Included( - get_last_facet_value::(rtxn, db, field_id)?.unwrap(), - ); // will not fail because get_first_facet_value succeeded + if let Some(starting_left_bound) = get_first_facet_value::(rtxn, db, field_id)? { + let rightmost_bound = + Bound::Included(get_last_facet_value::(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded let group_size = usize::MAX; f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; Ok(()) @@ -67,7 +64,7 @@ where /// Fetch the document ids that have a facet with a value between the two given bounds struct FacetRangeSearch<'t, 'b, 'bitmap> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, left: Bound<&'b [u8]>, right: Bound<&'b [u8]>, diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 0197639e4..20c277c63 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; /// Return an iterator which iterates over the given candidate documents in /// ascending order of their facet value for the given field id. @@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec; /// Note that once a document id is returned by the iterator, it is never returned again. pub fn ascending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); @@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>( struct AscendingFacetSort<'t, 'e> { rtxn: &'t heed::RoTxn<'e>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, #[allow(clippy::type_complexity)] stack: Vec<( RoaringBitmap, - std::iter::Take< - heed::RoRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, - >, + std::iter::Take, FacetGroupValueCodec>>, )>, } diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index 549f50f0a..ae6eb60d0 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; /// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort). /// /// This function does the same thing, but in the opposite order. pub fn descending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; - let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); Ok(itertools::Either::Left(DescendingFacetSort { @@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>( struct DescendingFacetSort<'t> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, #[allow(clippy::type_complexity)] stack: Vec<( RoaringBitmap, std::iter::Take< - heed::RoRevRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, + heed::RoRevRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, >, Bound<&'t [u8]>, )>, @@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { *right_bound = Bound::Excluded(left_bound); let iter = match self .db - .remap_key_type::>() + .remap_key_type::>() .rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow)) { Ok(iter) => iter, @@ -123,7 +123,7 @@ mod tests { use roaring::RoaringBitmap; use crate::heed_codec::facet::FacetGroupKeyCodec; - use crate::heed_codec::ByteSliceRefCodec; + use crate::heed_codec::BytesRefCodec; use crate::milli_snap; use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::tests::{ @@ -144,7 +144,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).collect::(); let mut results = String::new(); - let db = index.content.remap_key_type::>(); + let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); for el in iter { let (docids, _) = el.unwrap(); @@ -167,7 +167,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).collect::(); let mut results = String::new(); - let db = index.content.remap_key_type::>(); + let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap(); for el in iter { let (docids, _) = el.unwrap(); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index ebc9e1da0..f44d6a153 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,13 +1,13 @@ pub use facet_sort_ascending::ascending_facet_sort; pub use facet_sort_descending::descending_facet_sort; -use heed::types::{ByteSlice, DecodeIgnore}; +use heed::types::{Bytes, DecodeIgnore}; use heed::{BytesDecode, RoTxn}; use roaring::RoaringBitmap; pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::filter::{BadGeoError, Filter}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::{Index, Result}; mod facet_distribution; mod facet_distribution_iter; @@ -22,8 +22,10 @@ fn facet_extreme_value<'t>( let extreme_value = if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; let (_, extreme_value) = extreme_value?; - - Ok(OrderedF64Codec::bytes_decode(extreme_value)) + OrderedF64Codec::bytes_decode(extreme_value) + .map(Some) + .map_err(heed::Error::Decoding) + .map_err(Into::into) } pub fn facet_min_value<'t>( @@ -32,7 +34,7 @@ pub fn facet_min_value<'t>( field_id: u16, candidates: RoaringBitmap, ) -> Result> { - let db = index.facet_id_f64_docids.remap_key_type::>(); + let db = index.facet_id_f64_docids.remap_key_type::>(); let it = ascending_facet_sort(rtxn, db, field_id, candidates)?; facet_extreme_value(it) } @@ -43,7 +45,7 @@ pub fn facet_max_value<'t>( field_id: u16, candidates: RoaringBitmap, ) -> Result> { - let db = index.facet_id_f64_docids.remap_key_type::>(); + let db = index.facet_id_f64_docids.remap_key_type::>(); let it = descending_facet_sort(rtxn, db, field_id, candidates)?; facet_extreme_value(it) } @@ -51,7 +53,7 @@ pub fn facet_max_value<'t>( /// Get the first facet value in the facet database pub(crate) fn get_first_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -60,13 +62,12 @@ where let mut level0prefix = vec![]; level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); - let mut level0_iter_forward = db - .as_polymorph() - .prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?; + let mut level0_iter_forward = + db.remap_types::().prefix_iter(txn, level0prefix.as_slice())?; if let Some(first) = level0_iter_forward.next() { let (first_key, _) = first?; let first_key = FacetGroupKeyCodec::::bytes_decode(first_key) - .ok_or(heed::Error::Encoding)?; + .map_err(heed::Error::Decoding)?; Ok(Some(first_key.left_bound)) } else { Ok(None) @@ -76,7 +77,7 @@ where /// Get the last facet value in the facet database pub(crate) fn get_last_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -85,13 +86,12 @@ where let mut level0prefix = vec![]; level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); - let mut level0_iter_backward = db - .as_polymorph() - .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?; + let mut level0_iter_backward = + db.remap_types::().rev_prefix_iter(txn, level0prefix.as_slice())?; if let Some(last) = level0_iter_backward.next() { let (last_key, _) = last?; let last_key = FacetGroupKeyCodec::::bytes_decode(last_key) - .ok_or(heed::Error::Encoding)?; + .map_err(heed::Error::Decoding)?; Ok(Some(last_key.left_bound)) } else { Ok(None) @@ -101,17 +101,17 @@ where /// Get the height of the highest level in the facet database pub(crate) fn get_highest_level<'t>( txn: &'t RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result { let field_id_prefix = &field_id.to_be_bytes(); Ok(db - .as_polymorph() - .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)? + .remap_types::() + .rev_prefix_iter(txn, field_id_prefix)? .next() .map(|el| { let (key, _) = el.unwrap(); - let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); key.level }) .unwrap_or(0)) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 786b565ae..ee8cd1faf 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -17,8 +17,7 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, - SearchContext, BEU16, + execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext, }; // Building these factories is not free. @@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> { None => return Ok(Vec::new()), }; - let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { Some(fst) => fst, None => return Ok(vec![]), }; diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 2c670658d..24c7d5076 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -3,7 +3,7 @@ use std::collections::hash_map::Entry; use std::hash::Hash; use fxhash::FxHashMap; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::{BytesEncode, Database, RoTxn}; use roaring::RoaringBitmap; @@ -50,7 +50,7 @@ impl<'ctx> DatabaseCache<'ctx> { cache_key: K1, db_key: &'v KC::EItem, cache: &mut FxHashMap>>, - db: Database, + db: Database, ) -> Result> where K1: Copy + Eq + Hash, @@ -63,12 +63,14 @@ impl<'ctx> DatabaseCache<'ctx> { } match cache.get(&cache_key).unwrap() { - Some(Cow::Borrowed(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } - Some(Cow::Owned(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } + Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) + .map(Some) + .map_err(heed::Error::Decoding) + .map_err(Into::into), + Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) + .map(Some) + .map_err(heed::Error::Decoding) + .map_err(Into::into), None => Ok(None), } } @@ -78,7 +80,7 @@ impl<'ctx> DatabaseCache<'ctx> { cache_key: K1, db_keys: &'v [KC::EItem], cache: &mut FxHashMap>>, - db: Database, + db: Database, merger: MergeFn, ) -> Result> where @@ -110,12 +112,14 @@ impl<'ctx> DatabaseCache<'ctx> { } match cache.get(&cache_key).unwrap() { - Some(Cow::Borrowed(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } - Some(Cow::Owned(bytes)) => { - DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) - } + Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) + .map(Some) + .map_err(heed::Error::Decoding) + .map_err(Into::into), + Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) + .map(Some) + .map_err(heed::Error::Decoding) + .map_err(Into::into), None => Ok(None), } } @@ -164,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> { word, &keys[..], &mut self.db_cache.word_docids, - self.index.word_fid_docids.remap_data_type::(), + self.index.word_fid_docids.remap_data_type::(), merge_cbo_roaring_bitmaps, ) } @@ -173,7 +177,7 @@ impl<'ctx> SearchContext<'ctx> { word, self.word_interner.get(word).as_str(), &mut self.db_cache.word_docids, - self.index.word_docids.remap_data_type::(), + self.index.word_docids.remap_data_type::(), ), } } @@ -187,7 +191,7 @@ impl<'ctx> SearchContext<'ctx> { word, self.word_interner.get(word).as_str(), &mut self.db_cache.exact_word_docids, - self.index.exact_word_docids.remap_data_type::(), + self.index.exact_word_docids.remap_data_type::(), ) } @@ -226,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> { prefix, &keys[..], &mut self.db_cache.word_prefix_docids, - self.index.word_prefix_fid_docids.remap_data_type::(), + self.index.word_prefix_fid_docids.remap_data_type::(), merge_cbo_roaring_bitmaps, ) } @@ -235,7 +239,7 @@ impl<'ctx> SearchContext<'ctx> { prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.word_prefix_docids, - self.index.word_prefix_docids.remap_data_type::(), + self.index.word_prefix_docids.remap_data_type::(), ), } } @@ -249,7 +253,7 @@ impl<'ctx> SearchContext<'ctx> { prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.exact_word_prefix_docids, - self.index.exact_word_prefix_docids.remap_data_type::(), + self.index.exact_word_prefix_docids.remap_data_type::(), ) } @@ -268,7 +272,7 @@ impl<'ctx> SearchContext<'ctx> { self.word_interner.get(word2).as_str(), ), &mut self.db_cache.word_pair_proximity_docids, - self.index.word_pair_proximity_docids.remap_data_type::(), + self.index.word_pair_proximity_docids.remap_data_type::(), ) } @@ -287,7 +291,7 @@ impl<'ctx> SearchContext<'ctx> { self.word_interner.get(word2).as_str(), ), &mut self.db_cache.word_pair_proximity_docids, - self.index.word_pair_proximity_docids.remap_data_type::(), + self.index.word_pair_proximity_docids.remap_data_type::(), ) } @@ -316,7 +320,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_pair_proximity_docids - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.txn, &key)?; for result in remap_key_type { let (_, docids) = result?; @@ -355,7 +359,7 @@ impl<'ctx> SearchContext<'ctx> { (word, fid), &(self.word_interner.get(word).as_str(), fid), &mut self.db_cache.word_fid_docids, - self.index.word_fid_docids.remap_data_type::(), + self.index.word_fid_docids.remap_data_type::(), ) } @@ -374,7 +378,7 @@ impl<'ctx> SearchContext<'ctx> { (word_prefix, fid), &(self.word_interner.get(word_prefix).as_str(), fid), &mut self.db_cache.word_prefix_fid_docids, - self.index.word_prefix_fid_docids.remap_data_type::(), + self.index.word_prefix_fid_docids.remap_data_type::(), ) } @@ -388,7 +392,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_fid_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { @@ -414,7 +418,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_prefix_fid_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { @@ -442,7 +446,7 @@ impl<'ctx> SearchContext<'ctx> { (word, position), &(self.word_interner.get(word).as_str(), position), &mut self.db_cache.word_position_docids, - self.index.word_position_docids.remap_data_type::(), + self.index.word_position_docids.remap_data_type::(), ) } @@ -456,7 +460,7 @@ impl<'ctx> SearchContext<'ctx> { (word_prefix, position), &(self.word_interner.get(word_prefix).as_str(), position), &mut self.db_cache.word_prefix_position_docids, - self.index.word_prefix_position_docids.remap_data_type::(), + self.index.word_prefix_position_docids.remap_data_type::(), ) } @@ -470,7 +474,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_position_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { @@ -501,7 +505,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_prefix_position_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { diff --git a/milli/src/search/new/distinct.rs b/milli/src/search/new/distinct.rs index e90ffe878..25ea0b0a3 100644 --- a/milli/src/search/new/distinct.rs +++ b/milli/src/search/new/distinct.rs @@ -1,4 +1,4 @@ -use heed::types::{ByteSlice, Str, Unit}; +use heed::types::{Bytes, Str, Unit}; use heed::{Database, RoPrefix, RoTxn}; use roaring::RoaringBitmap; @@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::{Index, Result, SearchContext}; pub struct DistinctOutput { @@ -71,7 +71,7 @@ pub fn distinct_single_docid( /// Return all the docids containing the given value in the given field fn facet_value_docids( - database: Database, FacetGroupValueCodec>, + database: Database, FacetGroupValueCodec>, txn: &RoTxn, field_id: u16, facet_value: &[u8], @@ -87,12 +87,12 @@ fn facet_number_values<'a>( field_id: u16, index: &Index, txn: &'a RoTxn, -) -> Result, Unit>> { +) -> Result, Unit>> { let key = facet_values_prefix_key(field_id, docid); let iter = index .field_id_docid_facet_f64s - .remap_key_type::() + .remap_key_type::() .prefix_iter(txn, &key)? .remap_key_type(); @@ -105,12 +105,12 @@ pub fn facet_string_values<'a>( field_id: u16, index: &Index, txn: &'a RoTxn, -) -> Result, Str>> { +) -> Result, Str>> { let key = facet_values_prefix_key(field_id, docid); let iter = index .field_id_docid_facet_strings - .remap_key_type::() + .remap_key_type::() .prefix_iter(txn, &key)? .remap_types(); diff --git a/milli/src/search/new/geo_sort.rs b/milli/src/search/new/geo_sort.rs index bd9546048..b2e3a2f3d 100644 --- a/milli/src/search/new/geo_sort.rs +++ b/milli/src/search/new/geo_sort.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use std::iter::FromIterator; -use heed::types::{ByteSlice, Unit}; +use heed::types::{Bytes, Unit}; use heed::{RoPrefix, RoTxn}; use roaring::RoaringBitmap; use rstar::RTree; @@ -34,7 +34,7 @@ fn facet_number_values<'a>( let iter = index .field_id_docid_facet_f64s - .remap_key_type::() + .remap_key_type::() .prefix_iter(txn, &key)? .remap_key_type(); @@ -163,7 +163,7 @@ impl GeoSort { // computing the distance between two points is expensive thus we cache the result documents .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize); - self.cached_sorted_docids.extend(documents.into_iter()); + self.cached_sorted_docids.extend(documents); }; Ok(()) diff --git a/milli/src/search/new/interner.rs b/milli/src/search/new/interner.rs index c2d325a86..e94be2e77 100644 --- a/milli/src/search/new/interner.rs +++ b/milli/src/search/new/interner.rs @@ -228,7 +228,7 @@ impl Ord for Interned { impl PartialOrd for Interned { fn partial_cmp(&self, other: &Self) -> Option { - self.idx.partial_cmp(&other.idx) + Some(self.cmp(other)) } } @@ -241,7 +241,7 @@ impl PartialEq for Interned { } impl Clone for Interned { fn clone(&self) -> Self { - Self { idx: self.idx, _phantom: PhantomData } + *self } } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 6ceb78223..eaf55ccbb 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -50,9 +50,7 @@ use crate::distance::NDotProductPoint; use crate::error::FieldIdMapMissingEntry; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; -use crate::{ - AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32, -}; +use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; /// A structure used throughout the execution of a search query. pub struct SearchContext<'ctx> { @@ -451,8 +449,8 @@ pub fn execute_search( let mut docids = Vec::new(); let mut uniq_docids = RoaringBitmap::new(); for instant_distance::Item { distance: _, pid, point: _ } in neighbors { - let index = BEU32::new(pid.into_inner()); - let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get(); + let index = pid.into_inner(); + let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap(); if universe.contains(docid) && uniq_docids.insert(docid) { docids.push(docid); if docids.len() == (from + length) { @@ -609,7 +607,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec> field: field.to_string(), valid_fields, hidden_fields, - })?; + } + .into()); } Member::Geo(_) if !sortable_fields.contains("_geo") => { let (valid_fields, hidden_fields) = @@ -619,7 +618,8 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec> field: "_geo".to_string(), valid_fields, hidden_fields, - })?; + } + .into()); } _ => (), } diff --git a/milli/src/search/new/query_term/mod.rs b/milli/src/search/new/query_term/mod.rs index 72a427379..6760c8be7 100644 --- a/milli/src/search/new/query_term/mod.rs +++ b/milli/src/search/new/query_term/mod.rs @@ -175,7 +175,7 @@ impl QueryTermSubset { pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option { let original = ctx.term_interner.get(self.original); - let Some(use_prefix_db) = original.zero_typo.use_prefix_db else { return None }; + let use_prefix_db = original.zero_typo.use_prefix_db?; let word = match &self.zero_typo_subset { NTypoTermSubset::All => Some(use_prefix_db), NTypoTermSubset::Subset { words, phrases: _ } => { diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index 6f7321e7b..fb234b293 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -4,7 +4,7 @@ use roaring::RoaringBitmap; use super::logger::SearchLogger; use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext}; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; -use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; +use crate::heed_codec::{BytesRefCodec, StrRefCodec}; use crate::score_details::{self, ScoreDetails}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::{FieldId, Index, Result}; @@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, let number_db = ctx .index .facet_id_f64_docids - .remap_key_type::>(); + .remap_key_type::>(); let string_db = ctx .index .facet_id_string_docids - .remap_key_type::>(); + .remap_key_type::>(); let (number_iter, string_iter) = if self.is_ascending { let number_iter = ascending_facet_sort( diff --git a/milli/src/search/new/tests/attribute_fid.rs b/milli/src/search/new/tests/attribute_fid.rs index 09e52a394..38225404c 100644 --- a/milli/src/search/new/tests/attribute_fid.rs +++ b/milli/src/search/new/tests/attribute_fid.rs @@ -124,8 +124,7 @@ fn test_attribute_fid_simple() { s.query("the quick brown fox jumps over the lazy dog"); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } @@ -142,7 +141,6 @@ fn test_attribute_fid_ngrams() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } diff --git a/milli/src/search/new/tests/attribute_position.rs b/milli/src/search/new/tests/attribute_position.rs index 1513528ec..68c5de540 100644 --- a/milli/src/search/new/tests/attribute_position.rs +++ b/milli/src/search/new/tests/attribute_position.rs @@ -141,8 +141,7 @@ fn test_attribute_position_simple() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } #[test] @@ -158,8 +157,7 @@ fn test_attribute_position_repeated() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } @@ -176,8 +174,7 @@ fn test_attribute_position_different_fields() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } @@ -194,7 +191,6 @@ fn test_attribute_position_ngrams() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); } diff --git a/milli/src/search/new/tests/exactness.rs b/milli/src/search/new/tests/exactness.rs index a486342c1..c52006e3d 100644 --- a/milli/src/search/new/tests/exactness.rs +++ b/milli/src/search/new/tests/exactness.rs @@ -478,8 +478,7 @@ fn test_exactness_simple_ordered() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -511,8 +510,7 @@ fn test_exactness_simple_reversed() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -535,8 +533,7 @@ fn test_exactness_simple_reversed() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -566,8 +563,7 @@ fn test_exactness_simple_random() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -596,8 +592,7 @@ fn test_exactness_attribute_starts_with_simple() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -623,8 +618,7 @@ fn test_exactness_attribute_starts_with_phrase() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -644,8 +638,7 @@ fn test_exactness_attribute_starts_with_phrase() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -674,8 +667,7 @@ fn test_exactness_all_candidates_with_typo() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -711,8 +703,7 @@ fn test_exactness_after_words() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -760,8 +751,7 @@ fn test_words_after_exactness() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -809,8 +799,7 @@ fn test_proximity_after_exactness() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]"); @@ -847,8 +836,7 @@ fn test_proximity_after_exactness() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -881,8 +869,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); @@ -917,8 +904,7 @@ fn test_typo_followed_by_exactness() { let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap(); - let document_ids_scores: Vec<_> = - documents_ids.iter().zip(document_scores.into_iter()).collect(); + let document_ids_scores: Vec<_> = documents_ids.iter().zip(document_scores).collect(); insta::assert_snapshot!(format!("{document_ids_scores:#?}")); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index afe0191b1..59adda3e8 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -1,15 +1,16 @@ +use heed::RwTxn; use roaring::RoaringBitmap; use time::OffsetDateTime; use crate::{FieldDistribution, Index, Result}; -pub struct ClearDocuments<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct ClearDocuments<'t, 'i> { + wtxn: &'t mut RwTxn<'i>, index: &'i Index, } -impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { - pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> { +impl<'t, 'i> ClearDocuments<'t, 'i> { + pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> { ClearDocuments { wtxn, index } } diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index 5626a4aae..3bd4cf5f5 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -2,8 +2,8 @@ use std::fs::File; use std::io::BufReader; use grenad::CompressionType; -use heed::types::ByteSlice; -use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn}; +use heed::types::Bytes; +use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; use roaring::RoaringBitmap; use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; @@ -11,7 +11,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader}; use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result}; @@ -70,11 +70,11 @@ impl<'i> FacetsUpdateBulk<'i> { let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self; let db = match facet_type { - FacetType::String => index - .facet_id_string_docids - .remap_key_type::>(), + FacetType::String => { + index.facet_id_string_docids.remap_key_type::>() + } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; @@ -88,7 +88,7 @@ impl<'i> FacetsUpdateBulk<'i> { /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type pub(crate) struct FacetsUpdateBulkInner { - pub db: heed::Database, FacetGroupValueCodec>, + pub db: heed::Database, FacetGroupValueCodec>, pub delta_data: Option>, pub group_size: u8, pub min_level_size: u8, @@ -106,7 +106,7 @@ impl FacetsUpdateBulkInner { for level_reader in level_readers { let mut cursor = level_reader.into_cursor()?; while let Some((k, v)) = cursor.move_on_next()? { - self.db.remap_types::().put(wtxn, k, v)?; + self.db.remap_types::().put(wtxn, k, v)?; } } } @@ -128,7 +128,7 @@ impl FacetsUpdateBulkInner { }; if self.db.is_empty(wtxn)? { let mut buffer = Vec::new(); - let mut database = self.db.iter_mut(wtxn)?.remap_types::(); + let mut database = self.db.iter_mut(wtxn)?.remap_types::(); let mut cursor = delta_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { if !valid_lmdb_key(key) { @@ -146,11 +146,13 @@ impl FacetsUpdateBulkInner { buffer.push(1); // then we extend the buffer with the docids bitmap buffer.extend_from_slice(value); - unsafe { database.append(key, &buffer)? }; + unsafe { + database.put_current_with_options::(PutFlags::APPEND, key, &buffer)? + }; } } else { let mut buffer = Vec::new(); - let database = self.db.remap_types::(); + let database = self.db.remap_types::(); let mut cursor = delta_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { @@ -219,9 +221,9 @@ impl FacetsUpdateBulkInner { let level_0_iter = self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); + .remap_types::() + .prefix_iter(rtxn, level_0_prefix.as_slice())? + .remap_types::, FacetGroupValueCodec>(); let mut left_bound: &[u8] = &[]; let mut first_iteration_for_new_group = true; @@ -307,11 +309,11 @@ impl FacetsUpdateBulkInner { bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { let key = FacetGroupKey { field_id, level, left_bound }; - let key = FacetGroupKeyCodec::::bytes_encode(&key) - .ok_or(Error::Encoding)?; + let key = FacetGroupKeyCodec::::bytes_encode(&key) + .map_err(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = - FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; + FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; cur_writer.insert(key, value)?; cur_writer_len += 1; } @@ -336,10 +338,10 @@ impl FacetsUpdateBulkInner { bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { let key = FacetGroupKey { field_id, level, left_bound }; - let key = FacetGroupKeyCodec::::bytes_encode(&key) - .ok_or(Error::Encoding)?; + let key = FacetGroupKeyCodec::::bytes_encode(&key) + .map_err(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; - let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; + let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; cur_writer.insert(key, value)?; cur_writer_len += 1; } diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index e241c499c..9d8ee08f4 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -1,7 +1,7 @@ use std::fs::File; use std::io::BufReader; -use heed::types::{ByteSlice, DecodeIgnore}; +use heed::types::{Bytes, DecodeIgnore}; use heed::{BytesDecode, Error, RoTxn, RwTxn}; use obkv::KvReader; use roaring::RoaringBitmap; @@ -10,7 +10,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::search::facet::get_highest_level; use crate::update::del_add::DelAdd; use crate::update::index_documents::valid_lmdb_key; @@ -48,10 +48,10 @@ impl FacetsUpdateIncremental { db: match facet_type { FacetType::String => index .facet_id_string_docids - .remap_key_type::>(), + .remap_key_type::>(), FacetType::Number => index .facet_id_f64_docids - .remap_key_type::>(), + .remap_key_type::>(), }, group_size, max_group_size, @@ -67,19 +67,19 @@ impl FacetsUpdateIncremental { if !valid_lmdb_key(key) { continue; } - let key = FacetGroupKeyCodec::::bytes_decode(key) - .ok_or(heed::Error::Encoding)?; + let key = FacetGroupKeyCodec::::bytes_decode(key) + .map_err(heed::Error::Encoding)?; let value = KvReader::new(value); let docids_to_delete = value .get(DelAdd::Deletion) .map(CboRoaringBitmapCodec::bytes_decode) - .map(|o| o.ok_or(heed::Error::Encoding)); + .map(|o| o.map_err(heed::Error::Encoding)); let docids_to_add = value .get(DelAdd::Addition) .map(CboRoaringBitmapCodec::bytes_decode) - .map(|o| o.ok_or(heed::Error::Encoding)); + .map(|o| o.map_err(heed::Error::Encoding)); if let Some(docids_to_delete) = docids_to_delete { let docids_to_delete = docids_to_delete?; @@ -98,7 +98,7 @@ impl FacetsUpdateIncremental { /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type pub struct FacetsUpdateIncrementalInner { - pub db: heed::Database, FacetGroupValueCodec>, + pub db: heed::Database, FacetGroupValueCodec>, pub group_size: u8, pub min_level_size: u8, pub max_group_size: u8, @@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner { prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.push(level); - let mut iter = - self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( - txn, - prefix.as_slice(), - )?; + let mut iter = self + .db + .remap_types::() + .prefix_iter(txn, prefix.as_slice())?; let (key_bytes, value) = iter.next().unwrap()?; Ok(( - FacetGroupKeyCodec::::bytes_decode(key_bytes) - .ok_or(Error::Encoding)? + FacetGroupKeyCodec::::bytes_decode(key_bytes) + .map_err(Error::Encoding)? .into_owned(), value, )) @@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner { level0_prefix.extend_from_slice(&field_id.to_be_bytes()); level0_prefix.push(0); - let mut iter = self - .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?; + let mut iter = + self.db.remap_types::().prefix_iter(txn, &level0_prefix)?; if iter.next().is_none() { drop(iter); @@ -382,11 +379,8 @@ impl FacetsUpdateIncrementalInner { highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); highest_level_prefix.push(highest_level); - let size_highest_level = self - .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? - .count(); + let size_highest_level = + self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?.count(); if size_highest_level < self.group_size as usize * self.min_level_size as usize { return Ok(()); @@ -394,8 +388,8 @@ impl FacetsUpdateIncrementalInner { let mut groups_iter = self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?; + .remap_types::() + .prefix_iter(txn, &highest_level_prefix)?; let nbr_new_groups = size_highest_level / self.group_size as usize; let nbr_leftover_elements = size_highest_level % self.group_size as usize; @@ -406,8 +400,8 @@ impl FacetsUpdateIncrementalInner { let mut values = RoaringBitmap::new(); for _ in 0..group_size { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) - .ok_or(Error::Encoding)?; + let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) + .map_err(Error::Encoding)?; if first_key.is_none() { first_key = Some(key_i); @@ -429,8 +423,8 @@ impl FacetsUpdateIncrementalInner { let mut values = RoaringBitmap::new(); for _ in 0..nbr_leftover_elements { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) - .ok_or(Error::Encoding)?; + let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) + .map_err(Error::Encoding)?; if first_key.is_none() { first_key = Some(key_i); @@ -597,23 +591,21 @@ impl FacetsUpdateIncrementalInner { if highest_level == 0 || self .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? + .remap_types::() + .prefix_iter(txn, &highest_level_prefix)? .count() >= self.min_level_size as usize { return Ok(()); } let mut to_delete = vec![]; - let mut iter = self - .db - .as_polymorph() - .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?; + let mut iter = + self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?; for el in iter.by_ref() { let (k, _) = el?; to_delete.push( - FacetGroupKeyCodec::::bytes_decode(k) - .ok_or(Error::Encoding)? + FacetGroupKeyCodec::::bytes_decode(k) + .map_err(Error::Encoding)? .into_owned(), ); } @@ -1121,7 +1113,7 @@ mod fuzz { #[no_coverage] fn compare_with_trivial_database(tempdir: Rc, operations: &[Operation]) { - let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten + let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten let mut txn = index.env.write_txn().unwrap(); let mut trivial_db = TrivialDatabase::>::default(); @@ -1167,16 +1159,13 @@ mod fuzz { let level0iter = index .content .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( - &mut txn, - &field_id.to_be_bytes(), - ) + .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) .unwrap(); for ((key, values), group) in values_field_id.iter().zip(level0iter) { let (group_key, group_values) = group.unwrap(); let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); + FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); assert_eq!(key, &group_key.left_bound); assert_eq!(values, &group_values.bitmap); } @@ -1186,13 +1175,13 @@ mod fuzz { let level0iter = index .content .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) + .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) .unwrap(); for ((key, values), group) in values_field_id.iter().zip(level0iter) { let (group_key, group_values) = group.unwrap(); let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); + FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); assert_eq!(key, &group_key.left_bound); assert_eq!(values, &group_values.bitmap); } diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 52fea0f5f..ad8a838c8 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -83,7 +83,7 @@ use std::iter::FromIterator; use charabia::normalizer::{Normalize, NormalizerOption}; use grenad::{CompressionType, SortAlgorithm}; -use heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::BytesEncode; use log::debug; use time::OffsetDateTime; @@ -92,10 +92,10 @@ use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::update::index_documents::create_sorter; use crate::update::merge_btreeset_string; -use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH}; +use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH}; pub mod bulk; pub mod incremental; @@ -106,7 +106,7 @@ pub mod incremental; /// a bulk update method or an incremental update method. pub struct FacetsUpdate<'i> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, + database: heed::Database, FacetGroupValueCodec>, facet_type: FacetType, delta_data: grenad::Reader>, group_size: u8, @@ -120,11 +120,11 @@ impl<'i> FacetsUpdate<'i> { delta_data: grenad::Reader>, ) -> Self { let database = match facet_type { - FacetType::String => index - .facet_id_string_docids - .remap_key_type::>(), + FacetType::String => { + index.facet_id_string_docids.remap_key_type::>() + } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; Self { @@ -146,7 +146,7 @@ impl<'i> FacetsUpdate<'i> { self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; // See self::comparison_bench::benchmark_facet_indexing - if self.delta_data.len() >= (self.database.len(wtxn)? as u64 / 50) { + if self.delta_data.len() >= (self.database.len(wtxn)? / 50) { let field_ids = self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::>(); let bulk_update = FacetsUpdateBulk::new( @@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> { } let set = BTreeSet::from_iter(std::iter::once(left_bound)); let key = (field_id, normalized_facet.as_ref()); - let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; - let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?; + let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; + let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?; sorter.insert(key, val)?; } } @@ -217,10 +217,11 @@ impl<'i> FacetsUpdate<'i> { // as the grenad sorter already merged them for us. let mut merger_iter = sorter.into_stream_merger_iter()?; while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? { - self.index - .facet_id_normalized_string_strings - .remap_types::() - .put(wtxn, key_bytes, btreeset_bytes)?; + self.index.facet_id_normalized_string_strings.remap_types::().put( + wtxn, + key_bytes, + btreeset_bytes, + )?; } // We compute one FST by string facet @@ -252,7 +253,7 @@ impl<'i> FacetsUpdate<'i> { // We write those FSTs in LMDB now for (field_id, fst) in text_fsts { - self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; + self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?; } Ok(()) @@ -267,7 +268,7 @@ pub(crate) mod test_helpers { use std::marker::PhantomData; use std::rc::Rc; - use heed::types::ByteSlice; + use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; use roaring::RoaringBitmap; @@ -275,7 +276,7 @@ pub(crate) mod test_helpers { use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; - use crate::heed_codec::ByteSliceRefCodec; + use crate::heed_codec::BytesRefCodec; use crate::search::facet::get_highest_level; use crate::snapshot_tests::display_bitmap; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; @@ -306,7 +307,7 @@ pub(crate) mod test_helpers { BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, { pub env: Env, - pub content: heed::Database, FacetGroupValueCodec>, + pub content: heed::Database, FacetGroupValueCodec>, pub group_size: Cell, pub min_level_size: Cell, pub max_group_size: Cell, @@ -454,7 +455,7 @@ pub(crate) mod test_helpers { let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned(); let key: FacetGroupKey<&[u8]> = FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes }; - let key = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); let mut inner_writer = KvWriterDelAdd::memory(); let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap(); inner_writer.insert(DelAdd::Addition, value).unwrap(); @@ -486,12 +487,12 @@ pub(crate) mod test_helpers { let iter = self .content - .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &level_no_prefix) + .remap_types::() + .prefix_iter(txn, &level_no_prefix) .unwrap(); for el in iter { let (key, value) = el.unwrap(); - let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); let mut prefix_start_below = vec![]; prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); @@ -501,14 +502,11 @@ pub(crate) mod test_helpers { let start_below = { let mut start_below_iter = self .content - .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( - txn, - &prefix_start_below, - ) + .remap_types::() + .prefix_iter(txn, &prefix_start_below) .unwrap(); let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); - FacetGroupKeyCodec::::bytes_decode(key_bytes).unwrap() + FacetGroupKeyCodec::::bytes_decode(key_bytes).unwrap() }; assert!(value.size > 0); @@ -612,7 +610,7 @@ mod comparison_bench { } let time_spent = timer.elapsed().as_millis(); println!(" add {nbr_doc} : {time_spent}ms"); - txn.abort().unwrap(); + txn.abort(); } } } diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 303b64271..a6bbf939a 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -309,8 +309,7 @@ fn tokens_from_document<'a>( // if a language has been detected for the token, we update the counter. if let Some(language) = token.language { let script = token.script; - let entry = - script_language_word_count.entry(script).or_insert_with(Vec::new); + let entry = script_language_word_count.entry(script).or_default(); match entry.iter_mut().find(|(l, _)| *l == language) { Some((_, n)) => *n += 1, None => entry.push((language, 1)), diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 3fcec3e79..b7de1e621 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -6,8 +6,8 @@ use std::io::{self, BufReader}; use std::mem::size_of; use std::result::Result as StdResult; +use bytemuck::bytes_of; use grenad::Sorter; -use heed::zerocopy::AsBytes; use heed::BytesEncode; use itertools::EitherOrBoth; use ordered_float::OrderedFloat; @@ -20,9 +20,7 @@ use crate::error::InternalError; use crate::facet::value_encoding::f64_into_bytes; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::index_documents::{create_writer, writer_into_reader}; -use crate::{ - CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH, -}; +use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH}; /// The length of the elements that are always in the buffer when inserting new values. const TRUNCATE_SIZE: usize = size_of::() + size_of::(); @@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values( strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); - let document = BEU32::from(document).get(); + let document = DocumentId::from_be_bytes(document); // For the other extraction tasks, prefix the key with the field_id and the document_id numbers_key_buffer.extend_from_slice(docid_bytes); @@ -323,7 +321,7 @@ where // We insert only the Del part of the Obkv to inform // that we only want to remove all those numbers. let mut obkv = KvWriterDelAdd::memory(); - obkv.insert(DelAdd::Deletion, ().as_bytes())?; + obkv.insert(DelAdd::Deletion, bytes_of(&()))?; let bytes = obkv.into_inner()?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } @@ -336,7 +334,7 @@ where // We insert only the Add part of the Obkv to inform // that we only want to remove all those numbers. let mut obkv = KvWriterDelAdd::memory(); - obkv.insert(DelAdd::Addition, ().as_bytes())?; + obkv.insert(DelAdd::Addition, bytes_of(&()))?; let bytes = obkv.into_inner()?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index f278012c7..66092821f 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -118,7 +118,7 @@ pub fn extract_word_docids( } let (word, fid) = StrBEU16Codec::bytes_decode(key) - .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; + .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; // every words contained in an attribute set to exact must be pushed in the exact_words list. if exact_attributes.contains(&fid) { diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index 061cbe5a0..e1b27baa2 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -3,7 +3,7 @@ use std::fs::File; use std::io::{self, BufReader, BufWriter, Seek}; use grenad::{CompressionType, Sorter}; -use heed::types::ByteSlice; +use heed::types::Bytes; use super::{ClonableMmap, MergeFn}; use crate::update::index_documents::valid_lmdb_key; @@ -255,7 +255,7 @@ where puffin::profile_function!(); let mut buffer = Vec::new(); - let database = database.remap_types::(); + let database = database.remap_types::(); let mut merger_iter = sorter.into_stream_merger_iter()?; while let Some((key, value)) = merger_iter.next()? { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 113114681..be2fbb25e 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod { } } -pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct IndexDocuments<'t, 'i, 'a, FP, FA> { + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, config: IndexDocumentsConfig, indexer_config: &'a IndexerConfig, @@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig { pub autogenerate_docids: bool, } -impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA> +impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA> where FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, indexer_config: &'a IndexerConfig, config: IndexDocumentsConfig, progress: FP, should_abort: FA, - ) -> Result> { + ) -> Result> { let transform = Some(Transform::new( wtxn, index, @@ -701,7 +701,7 @@ mod tests { use crate::documents::documents_batch_reader_from_objects; use crate::index::tests::TempIndex; use crate::search::TermsMatchingStrategy; - use crate::{db_snap, Filter, Search, BEU16}; + use crate::{db_snap, Filter, Search}; #[test] fn simple_document_replacement() { @@ -1743,14 +1743,11 @@ mod tests { let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); let bitmap_colour = - index.facet_id_exists_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); + index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap(); assert_eq!(bitmap_colour.into_iter().collect::>(), vec![0, 1, 2, 3, 4, 6, 7]); - let bitmap_colour_green = index - .facet_id_exists_docids - .get(&rtxn, &BEU16::new(colour_green_id)) - .unwrap() - .unwrap(); + let bitmap_colour_green = + index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap(); assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![6, 7]); }; @@ -1848,21 +1845,15 @@ mod tests { index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap(); let bitmap_null_colour = - index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); + index.facet_id_is_null_docids.get(&rtxn, &colour_id).unwrap().unwrap(); assert_eq!(bitmap_null_colour.into_iter().collect::>(), vec![0]); - let bitmap_colour_green = index - .facet_id_is_null_docids - .get(&rtxn, &BEU16::new(colour_green_id)) - .unwrap() - .unwrap(); + let bitmap_colour_green = + index.facet_id_is_null_docids.get(&rtxn, &colour_green_id).unwrap().unwrap(); assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![2]); - let bitmap_colour_blue = index - .facet_id_is_null_docids - .get(&rtxn, &BEU16::new(colour_blue_id)) - .unwrap() - .unwrap(); + let bitmap_colour_blue = + index.facet_id_is_null_docids.get(&rtxn, &colour_blue_id).unwrap().unwrap(); assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![3]); }; @@ -1917,21 +1908,15 @@ mod tests { let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap(); let bitmap_empty_tags = - index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap(); + index.facet_id_is_empty_docids.get(&rtxn, &tags_id).unwrap().unwrap(); assert_eq!(bitmap_empty_tags.into_iter().collect::>(), vec![2, 6, 9]); - let bitmap_tags_green = index - .facet_id_is_empty_docids - .get(&rtxn, &BEU16::new(tags_green_id)) - .unwrap() - .unwrap(); + let bitmap_tags_green = + index.facet_id_is_empty_docids.get(&rtxn, &tags_green_id).unwrap().unwrap(); assert_eq!(bitmap_tags_green.into_iter().collect::>(), vec![8]); - let bitmap_tags_blue = index - .facet_id_is_empty_docids - .get(&rtxn, &BEU16::new(tags_blue_id)) - .unwrap() - .unwrap(); + let bitmap_tags_blue = + index.facet_id_is_empty_docids.get(&rtxn, &tags_blue_id).unwrap().unwrap(); assert_eq!(bitmap_tags_blue.into_iter().collect::>(), vec![12]); }; @@ -2684,7 +2669,7 @@ mod tests { } fn delete_documents<'t>( - wtxn: &mut RwTxn<'t, '_>, + wtxn: &mut RwTxn<'t>, index: &'t TempIndex, external_ids: &[&str], ) -> Vec { diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 323bc3da7..ab8e27edb 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -24,9 +24,7 @@ use crate::index::{db_name, main_key}; use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; use crate::update::index_documents::GrenadParameters; use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; -use crate::{ - FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, -}; +use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result}; pub struct TransformOutput { pub primary_key: String, @@ -245,11 +243,11 @@ impl<'a, 'i> Transform<'a, 'i> { let mut skip_insertion = false; if let Some(original_docid) = original_docid { - let original_key = BEU32::new(original_docid); + let original_key = original_docid; let base_obkv = self .index .documents - .remap_data_type::() + .remap_data_type::() .get(wtxn, &original_key)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, @@ -499,11 +497,11 @@ impl<'a, 'i> Transform<'a, 'i> { self.replaced_documents_ids.insert(internal_docid); // fetch the obkv document - let original_key = BEU32::new(internal_docid); + let original_key = internal_docid; let base_obkv = self .index .documents - .remap_data_type::() + .remap_data_type::() .get(txn, &original_key)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, @@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> { // TODO this can be done in parallel by using the rayon `ThreadPool`. pub fn prepare_for_documents_reindexing( self, - wtxn: &mut heed::RwTxn<'i, '_>, + wtxn: &mut heed::RwTxn<'i>, old_fields_ids_map: FieldsIdsMap, mut new_fields_ids_map: FieldsIdsMap, ) -> Result { @@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> { let obkv = self.index.documents.get(wtxn, &docid)?.ok_or( InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, )?; - let docid = docid.get(); obkv_buffer.clear(); let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer); diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 4f9f0ef6f..49e36b87e 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -6,8 +6,8 @@ use std::io::{self, BufReader}; use bytemuck::allocation::pod_collect_to_vec; use charabia::{Language, Script}; use grenad::MergerBuilder; -use heed::types::ByteSlice; -use heed::RwTxn; +use heed::types::Bytes; +use heed::{PutFlags, RwTxn}; use log::error; use obkv::{KvReader, KvWriter}; use ordered_float::OrderedFloat; @@ -27,9 +27,7 @@ use crate::index::Hnsw; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at}; -use crate::{ - lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32, -}; +use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError}; pub(crate) enum TypedChunk { FieldIdDocidFacetStrings(grenad::Reader), @@ -146,10 +144,10 @@ pub(crate) fn write_typed_chunk_into_index( } } - let db = index.documents.remap_data_type::(); + let db = index.documents.remap_data_type::(); if !writer.is_empty() { - db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?; + db.put(wtxn, &docid, &writer.into_inner().unwrap())?; operations.push(DocumentOperation { external_id: external_id.to_string(), internal_id: docid, @@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index( }); docids.insert(docid); } else { - db.delete(wtxn, &BEU32::new(docid))?; + db.delete(wtxn, &docid)?; operations.push(DocumentOperation { external_id: external_id.to_string(), internal_id: docid, @@ -295,7 +293,7 @@ pub(crate) fn write_typed_chunk_into_index( } TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { let index_fid_docid_facet_numbers = - index.field_id_docid_facet_f64s.remap_types::(); + index.field_id_docid_facet_f64s.remap_types::(); let mut cursor = fid_docid_facet_number.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { let reader = KvReaderDelAdd::new(value); @@ -315,7 +313,7 @@ pub(crate) fn write_typed_chunk_into_index( } TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { let index_fid_docid_facet_strings = - index.field_id_docid_facet_strings.remap_types::(); + index.field_id_docid_facet_strings.remap_types::(); let mut cursor = fid_docid_facet_string.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { let reader = KvReaderDelAdd::new(value); @@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index( // We extract and store the previous vectors if let Some(hnsw) = index.vector_hnsw(wtxn)? { for (pid, point) in hnsw.iter() { - let pid_key = BEU32::new(pid.into_inner()); - let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get(); + let pid_key = pid.into_inner(); + let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap(); let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect(); vectors_set.insert((docid, vector)); } @@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index( // Store the vectors in the point-docid relation database index.vector_id_docid.clear(wtxn)?; for (docid, pid) in docids.into_iter().zip(pids) { - index.vector_id_docid.put( - wtxn, - &BEU32::new(pid.into_inner()), - &BEU32::new(docid), - )?; + index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?; } log::debug!("There are {} entries in the HNSW so far", hnsw_length); @@ -504,7 +498,7 @@ where puffin::profile_function!(format!("number of entries: {}", data.len())); let mut buffer = Vec::new(); - let database = database.remap_types::(); + let database = database.remap_types::(); let mut cursor = data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { @@ -562,20 +556,23 @@ where } let mut buffer = Vec::new(); - let mut database = database.iter_mut(wtxn)?.remap_types::(); + let mut database = database.iter_mut(wtxn)?.remap_types::(); let mut cursor = data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { if valid_lmdb_key(key) { debug_assert!( - K::bytes_decode(key).is_some(), + K::bytes_decode(key).is_ok(), "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}", key.len(), &key ); buffer.clear(); let value = serialize_value(value, &mut buffer)?; - unsafe { database.append(key, value)? }; + unsafe { + // safety: We do not keep a reference to anything that lives inside the database + database.put_current_with_options::(PutFlags::APPEND, key, value)? + }; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index fd7ffa760..98697325e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -100,8 +100,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { } } -pub struct Settings<'a, 't, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct Settings<'a, 't, 'i> { + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, indexer_config: &'a IndexerConfig, @@ -129,12 +129,12 @@ pub struct Settings<'a, 't, 'u, 'i> { pagination_max_total_hits: Setting, } -impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { +impl<'a, 't, 'i> Settings<'a, 't, 'i> { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, index: &'i Index, indexer_config: &'a IndexerConfig, - ) -> Settings<'a, 't, 'u, 'i> { + ) -> Settings<'a, 't, 'i> { Settings { wtxn, index, @@ -822,7 +822,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_max_values_per_facet(&mut self) -> Result<()> { match self.max_values_per_facet { Setting::Set(max) => { - self.index.put_max_values_per_facet(self.wtxn, max)?; + self.index.put_max_values_per_facet(self.wtxn, max as u64)?; } Setting::Reset => { self.index.delete_max_values_per_facet(self.wtxn)?; @@ -850,7 +850,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_pagination_max_total_hits(&mut self) -> Result<()> { match self.pagination_max_total_hits { Setting::Set(max) => { - self.index.put_pagination_max_total_hits(self.wtxn, max)?; + self.index.put_pagination_max_total_hits(self.wtxn, max as u64)?; } Setting::Reset => { self.index.delete_pagination_max_total_hits(self.wtxn)?; @@ -917,7 +917,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { #[cfg(test)] mod tests { use big_s::S; - use heed::types::ByteSlice; + use heed::types::Bytes; use maplit::{btreemap, btreeset, hashset}; use super::*; @@ -1130,7 +1130,7 @@ mod tests { } let count = index .facet_id_f64_docids - .remap_key_type::() + .remap_key_type::() // The faceted field id is 1u16 .prefix_iter(&rtxn, &[0, 1, 0]) .unwrap() @@ -1151,7 +1151,7 @@ mod tests { // Only count the field_id 0 and level 0 facet values. let count = index .facet_id_f64_docids - .remap_key_type::() + .remap_key_type::() .prefix_iter(&rtxn, &[0, 1, 0]) .unwrap() .count(); @@ -1565,7 +1565,7 @@ mod tests { }) .unwrap_err(); assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_)))); - wtxn.abort().unwrap(); + wtxn.abort(); // But if we clear the database... let mut wtxn = index.write_txn().unwrap(); diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 618f451dc..544bea224 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, HashSet}; use grenad::CompressionType; -use heed::types::{ByteSlice, Str}; +use heed::types::{Bytes, Str}; use heed::Database; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd}; @@ -12,8 +12,8 @@ use crate::update::index_documents::{ }; use crate::{CboRoaringBitmapCodec, Result}; -pub struct WordPrefixDocids<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct WordPrefixDocids<'t, 'i> { + wtxn: &'t mut heed::RwTxn<'i>, word_docids: Database, word_prefix_docids: Database, pub(crate) chunk_compression_type: CompressionType, @@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> { pub(crate) max_memory: Option, } -impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { +impl<'t, 'i> WordPrefixDocids<'t, 'i> { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, word_docids: Database, word_prefix_docids: Database, - ) -> WordPrefixDocids<'t, 'u, 'i> { + ) -> WordPrefixDocids<'t, 'i> { WordPrefixDocids { wtxn, word_docids, @@ -93,7 +93,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { } // We fetch the docids associated to the newly added word prefix fst only. - let db = self.word_docids.remap_data_type::(); + let db = self.word_docids.remap_data_type::(); let mut buffer = Vec::new(); for prefix in new_prefix_fst_words { let prefix = std::str::from_utf8(prefix.as_bytes())?; diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index e083f510a..819cc097b 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::str; use grenad::CompressionType; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Database}; use log::debug; @@ -17,8 +17,8 @@ use crate::update::index_documents::{ }; use crate::{CboRoaringBitmapCodec, Result}; -pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct WordPrefixIntegerDocids<'t, 'i> { + wtxn: &'t mut heed::RwTxn<'i>, prefix_database: Database, word_database: Database, pub(crate) chunk_compression_type: CompressionType, @@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { pub(crate) max_memory: Option, } -impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { +impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, + wtxn: &'t mut heed::RwTxn<'i>, prefix_database: Database, word_database: Database, - ) -> WordPrefixIntegerDocids<'t, 'u, 'i> { + ) -> WordPrefixIntegerDocids<'t, 'i> { WordPrefixIntegerDocids { wtxn, prefix_database, @@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { let mut current_prefixes: Option<&&[String]> = None; let mut prefixes_cache = HashMap::new(); while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? { - let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?; + let (word, pos) = + StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?; current_prefixes = match current_prefixes.take() { Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes), @@ -109,7 +110,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { } // We fetch the docids associated to the newly added word prefix fst only. - let db = self.word_database.remap_data_type::(); + let db = self.word_database.remap_data_type::(); let mut buffer = Vec::new(); for prefix_bytes in new_prefix_fst_words { let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| { @@ -118,7 +119,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { // iter over all lines of the DB where the key is prefixed by the current prefix. let iter = db - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.wtxn, prefix_bytes.as_bytes())? .remap_key_type::(); for result in iter { diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index 121b45c4a..f26bf93e5 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator}; use std::str; use fst::{SetBuilder, Streamer}; +use heed::RwTxn; use crate::{Index, Result, SmallString32}; -pub struct WordsPrefixesFst<'t, 'u, 'i> { - wtxn: &'t mut heed::RwTxn<'i, 'u>, +pub struct WordsPrefixesFst<'t, 'i> { + wtxn: &'t mut RwTxn<'i>, index: &'i Index, threshold: u32, max_prefix_length: usize, } -impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> { - pub fn new( - wtxn: &'t mut heed::RwTxn<'i, 'u>, - index: &'i Index, - ) -> WordsPrefixesFst<'t, 'u, 'i> { +impl<'t, 'i> WordsPrefixesFst<'t, 'i> { + pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> { WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 } }