diff --git a/Cargo.lock b/Cargo.lock index 21ae2b17f..8a0220c3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1260,9 +1260,9 @@ dependencies = [ [[package]] name = "doxygen-rs" -version = "0.4.2" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" +checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505" dependencies = [ "phf", ] @@ -1823,8 +1823,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" -version = "0.20.0-alpha.6" -source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" +version = "0.20.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934" dependencies = [ "bitflags 2.3.3", "bytemuck", @@ -1834,23 +1835,24 @@ dependencies = [ "libc", "lmdb-master-sys", "once_cell", - "page_size", + "page_size 0.6.0", "synchronoise", "url", ] [[package]] name = "heed-traits" -version = "0.20.0-alpha.6" -source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" +version = "0.20.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298" [[package]] name = "heed-types" -version = "0.20.0-alpha.6" -source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" +version = "0.20.0-alpha.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a" dependencies = [ "bincode", - "bytemuck", "byteorder", "heed-traits", "serde", @@ -2472,7 +2474,7 @@ dependencies = [ "meilisearch-auth", "meilisearch-types", "nelson", - "page_size", + "page_size 0.5.0", "puffin", "roaring", "serde", @@ -2984,11 +2986,13 @@ dependencies = [ [[package]] name = "lmdb-master-sys" version = "0.1.0" -source = "git+https://github.com/meilisearch/heed?branch=main#321b0d4f2d26ceb504e70b4cb4eac0e0bdb3d796" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd" dependencies = [ "cc", "doxygen-rs", "libc", + "pkg-config", ] [[package]] @@ -3494,6 +3498,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.12.1" diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 944082285..742fd8d4e 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -14,7 +14,7 @@ use meilisearch_types::heed::BoxedError; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::KeyId; use meilisearch_types::milli; -use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use sha2::Sha256; use time::OffsetDateTime; @@ -32,7 +32,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat #[derive(Clone)] pub struct HeedAuthStore { env: Arc, - keys: Database>, + keys: Database>, action_keyid_index_expiration: Database>>, should_close_on_drop: bool, } @@ -278,7 +278,7 @@ impl HeedAuthStore { fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> { let mut iter = self .action_keyid_index_expiration - .remap_types::() + .remap_types::() .prefix_iter_mut(wtxn, key.as_bytes())?; while iter.next().transpose()?.is_some() { // safety: we don't keep references from inside the LMDB database. diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 7bef64d4b..a57dc0e9a 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -532,7 +532,10 @@ pub fn settings( let faceting = FacetingSettings { max_values_per_facet: Setting::Set( - index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET), + index + .max_values_per_facet(rtxn)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET), ), sort_facet_values_by: Setting::Set( index @@ -545,7 +548,10 @@ pub fn settings( let pagination = PaginationSettings { max_total_hits: Setting::Set( - index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), + index + .pagination_max_total_hits(rtxn)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), ), }; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index b6950ae6e..6d59f60dd 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -3,7 +3,7 @@ use std::io::ErrorKind; use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; -use bstr::ByteSlice; +use bstr::ByteSlice as _; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index c9ebed80e..41f073b48 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -360,6 +360,7 @@ fn prepare_search<'t>( let max_total_hits = index .pagination_max_total_hits(rtxn) .map_err(milli::Error::from)? + .map(|x| x as usize) .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); search.exhaustive_number_hits(is_finite_pagination); @@ -586,6 +587,7 @@ pub fn perform_search( let max_values_by_facet = index .max_values_per_facet(&rtxn) .map_err(milli::Error::from)? + .map(|x| x as usize) .unwrap_or(DEFAULT_VALUES_PER_FACET); facet_distribution.max_values_per_facet(max_values_by_facet); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index e563f5d82..d21ec172e 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -29,7 +29,7 @@ geoutils = "0.5.1" grenad = { version = "0.4.5", default-features = false, features = [ "rayon", "tempfile" ] } -heed = { git = "https://github.com/meilisearch/heed", branch = "main", default-features = false, features = [ +heed = { version = "0.20.0-alpha.9", default-features = false, features = [ "serde-json", "serde-bincode", "read-txn-no-tls" ] } indexmap = { version = "2.0.0", features = ["serde"] } diff --git a/milli/src/heed_codec/byte_slice_ref.rs b/milli/src/heed_codec/byte_slice_ref.rs index b027c7ebc..a4b5748f1 100644 --- a/milli/src/heed_codec/byte_slice_ref.rs +++ b/milli/src/heed_codec/byte_slice_ref.rs @@ -2,11 +2,11 @@ use std::borrow::Cow; use heed::{BoxedError, BytesDecode, BytesEncode}; -/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated +/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. -pub struct ByteSliceRefCodec; +pub struct BytesRefCodec; -impl<'a> BytesEncode<'a> for ByteSliceRefCodec { +impl<'a> BytesEncode<'a> for BytesRefCodec { type EItem = &'a [u8]; fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { @@ -14,7 +14,7 @@ impl<'a> BytesEncode<'a> for ByteSliceRefCodec { } } -impl<'a> BytesDecode<'a> for ByteSliceRefCodec { +impl<'a> BytesDecode<'a> for BytesRefCodec { type DItem = &'a [u8]; fn bytes_decode(bytes: &'a [u8]) -> Result { diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index 4b2b0fb6f..dde77a5f3 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -12,7 +12,7 @@ mod str_beu32_codec; mod str_ref; mod str_str_u8_codec; -pub use byte_slice_ref::ByteSliceRefCodec; +pub use byte_slice_ref::BytesRefCodec; use heed::BoxedError; pub use str_ref::StrRefCodec; diff --git a/milli/src/index.rs b/milli/src/index.rs index 13030fc20..b64aafbb3 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -26,7 +26,7 @@ use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, - BEU32, + BEU32, BEU64, }; /// The HNSW data-structure that we serialize, fill and search in. @@ -498,7 +498,7 @@ impl Index { let i = i as u32; let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec(); key.extend_from_slice(&i.to_be_bytes()); - self.main.remap_types::().put(wtxn, &key, chunk)?; + self.main.remap_types::().put(wtxn, &key, chunk)?; } Ok(()) } @@ -507,7 +507,7 @@ impl Index { pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result { let mut iter = self .main - .remap_types::() + .remap_types::() .prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?; let mut deleted = false; while iter.next().transpose()?.is_some() { @@ -522,7 +522,7 @@ impl Index { let mut slices = Vec::new(); for result in self .main - .remap_types::() + .remap_types::() .prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? { let (_, slice) = result?; @@ -994,7 +994,7 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.remap_types::().put( + self.main.remap_types::().put( wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes(), @@ -1003,7 +1003,7 @@ impl Index { /// Returns the FST which is the words dictionary of the engine. pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.remap_types::().get(rtxn, main_key::WORDS_FST_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::WORDS_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -1016,7 +1016,7 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.remap_types::().put( + self.main.remap_types::().put( wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes(), @@ -1028,7 +1028,7 @@ impl Index { } pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self.main.remap_types::().get(rtxn, main_key::STOP_WORDS_KEY)? { + match self.main.remap_types::().get(rtxn, main_key::STOP_WORDS_KEY)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), None => Ok(None), } @@ -1186,7 +1186,7 @@ impl Index { wtxn: &mut RwTxn, fst: &fst::Set, ) -> heed::Result<()> { - self.main.remap_types::().put( + self.main.remap_types::().put( wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes(), @@ -1195,11 +1195,7 @@ impl Index { /// Returns the FST which is the words prefixes dictionnary of the engine. pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { - match self - .main - .remap_types::() - .get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? - { + match self.main.remap_types::().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), } @@ -1325,7 +1321,7 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - match self.main.remap_types::>().get(txn, main_key::AUTHORIZE_TYPOS)? { + match self.main.remap_types::().get(txn, main_key::AUTHORIZE_TYPOS)? { Some(0) => Ok(false), _ => Ok(true), } @@ -1335,11 +1331,7 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.remap_types::>().put( - txn, - main_key::AUTHORIZE_TYPOS, - &(flag as u8), - )?; + self.main.remap_types::().put(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; Ok(()) } @@ -1350,7 +1342,7 @@ impl Index { // because by default, we authorize typos. Ok(self .main - .remap_types::>() + .remap_types::() .get(txn, main_key::ONE_TYPO_WORD_LEN)? .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO)) } @@ -1359,11 +1351,7 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.remap_types::>().put( - txn, - main_key::ONE_TYPO_WORD_LEN, - &val, - )?; + self.main.remap_types::().put(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; Ok(()) } @@ -1373,7 +1361,7 @@ impl Index { // because by default, we authorize typos. Ok(self .main - .remap_types::>() + .remap_types::() .get(txn, main_key::TWO_TYPOS_WORD_LEN)? .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } @@ -1382,17 +1370,13 @@ impl Index { // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We // identify 0 as being false, and anything else as true. The absence of a value is true, // because by default, we authorize typos. - self.main.remap_types::>().put( - txn, - main_key::TWO_TYPOS_WORD_LEN, - &val, - )?; + self.main.remap_types::().put(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; Ok(()) } /// List the words on which typo are not allowed pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>>> { - match self.main.remap_types::().get(txn, main_key::EXACT_WORDS)? { + match self.main.remap_types::().get(txn, main_key::EXACT_WORDS)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), None => Ok(None), } @@ -1403,7 +1387,7 @@ impl Index { txn: &mut RwTxn, words: &fst::Set, ) -> Result<()> { - self.main.remap_types::().put( + self.main.remap_types::().put( txn, main_key::EXACT_WORDS, words.as_fst().as_bytes(), @@ -1442,16 +1426,12 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::EXACT_ATTRIBUTES) } - pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { - self.main.remap_types::>().get(txn, main_key::MAX_VALUES_PER_FACET) + pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { + self.main.remap_types::().get(txn, main_key::MAX_VALUES_PER_FACET) } - pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { - self.main.remap_types::>().put( - txn, - main_key::MAX_VALUES_PER_FACET, - &val, - ) + pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: u64) -> heed::Result<()> { + self.main.remap_types::().put(txn, main_key::MAX_VALUES_PER_FACET, &val) } pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result { @@ -1481,22 +1461,16 @@ impl Index { self.main.remap_key_type::().delete(txn, main_key::SORT_FACET_VALUES_BY) } - pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result> { - self.main - .remap_types::>() - .get(txn, main_key::PAGINATION_MAX_TOTAL_HITS) + pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result> { + self.main.remap_types::().get(txn, main_key::PAGINATION_MAX_TOTAL_HITS) } pub(crate) fn put_pagination_max_total_hits( &self, txn: &mut RwTxn, - val: usize, + val: u64, ) -> heed::Result<()> { - self.main.remap_types::>().put( - txn, - main_key::PAGINATION_MAX_TOTAL_HITS, - &val, - ) + self.main.remap_types::().put(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val) } pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result { diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index acf117ef6..90da16797 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::ops::ControlFlow; use std::{fmt, mem}; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::BytesDecode; use indexmap::IndexMap; use roaring::RoaringBitmap; @@ -13,7 +13,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; -use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; +use crate::heed_codec::{BytesRefCodec, StrRefCodec}; use crate::search::facet::facet_distribution_iter::{ count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, }; @@ -105,7 +105,7 @@ impl<'a> FacetDistribution<'a> { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); @@ -129,7 +129,7 @@ impl<'a> FacetDistribution<'a> { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); @@ -172,9 +172,7 @@ impl<'a> FacetDistribution<'a> { search_function( self.rtxn, - self.index - .facet_id_f64_docids - .remap_key_type::>(), + self.index.facet_id_f64_docids.remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids, _| { @@ -203,9 +201,7 @@ impl<'a> FacetDistribution<'a> { search_function( self.rtxn, - self.index - .facet_id_string_docids - .remap_key_type::>(), + self.index.facet_id_string_docids.remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids, any_docid| { diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 722a30e6d..d993ef2dc 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::DocumentId; /// Call the given closure on the facet distribution of the candidate documents. @@ -23,7 +23,7 @@ use crate::DocumentId; /// keep iterating over the different facet values or stop. pub fn lexicographically_iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, callback: CB, @@ -34,11 +34,11 @@ where let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; let highest_level = get_highest_level( rtxn, - db.remap_key_type::>(), + db.remap_key_type::>(), field_id, )?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; Ok(()) } else { @@ -48,7 +48,7 @@ where pub fn count_iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, mut callback: CB, @@ -77,11 +77,11 @@ where let mut heap = BinaryHeap::new(); let highest_level = get_highest_level( rtxn, - db.remap_key_type::>(), + db.remap_key_type::>(), field_id, )?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { // We first fill the heap with values from the highest level let starting_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; @@ -146,7 +146,7 @@ where CB: FnMut(&'t [u8], u64, DocumentId) -> Result>, { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, callback: CB, } diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index f90c331d7..f1a26ded5 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::Result; /// Find all the document ids for which the given field contains a value contained within @@ -46,16 +46,13 @@ where } Bound::Unbounded => Bound::Unbounded, }; - let db = db.remap_key_type::>(); + let db = db.remap_key_type::>(); let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(starting_left_bound) = - get_first_facet_value::(rtxn, db, field_id)? - { - let rightmost_bound = Bound::Included( - get_last_facet_value::(rtxn, db, field_id)?.unwrap(), - ); // will not fail because get_first_facet_value succeeded + if let Some(starting_left_bound) = get_first_facet_value::(rtxn, db, field_id)? { + let rightmost_bound = + Bound::Included(get_last_facet_value::(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded let group_size = usize::MAX; f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; Ok(()) @@ -67,7 +64,7 @@ where /// Fetch the document ids that have a facet with a value between the two given bounds struct FacetRangeSearch<'t, 'b, 'bitmap> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, left: Bound<&'b [u8]>, right: Bound<&'b [u8]>, diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index 0197639e4..20c277c63 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -5,7 +5,7 @@ use super::{get_first_facet_value, get_highest_level}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; /// Return an iterator which iterates over the given candidate documents in /// ascending order of their facet value for the given field id. @@ -31,12 +31,12 @@ use crate::heed_codec::ByteSliceRefCodec; /// Note that once a document id is returned by the iterator, it is never returned again. pub fn ascending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); @@ -53,14 +53,12 @@ pub fn ascending_facet_sort<'t>( struct AscendingFacetSort<'t, 'e> { rtxn: &'t heed::RoTxn<'e>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, #[allow(clippy::type_complexity)] stack: Vec<( RoaringBitmap, - std::iter::Take< - heed::RoRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, - >, + std::iter::Take, FacetGroupValueCodec>>, )>, } diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index 549f50f0a..ae6eb60d0 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -7,21 +7,21 @@ use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; /// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort). /// /// This function does the same thing, but in the opposite order. pub fn descending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; - let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); Ok(itertools::Either::Left(DescendingFacetSort { @@ -37,13 +37,13 @@ pub fn descending_facet_sort<'t>( struct DescendingFacetSort<'t> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, #[allow(clippy::type_complexity)] stack: Vec<( RoaringBitmap, std::iter::Take< - heed::RoRevRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, + heed::RoRevRange<'t, FacetGroupKeyCodec, FacetGroupValueCodec>, >, Bound<&'t [u8]>, )>, @@ -100,7 +100,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { *right_bound = Bound::Excluded(left_bound); let iter = match self .db - .remap_key_type::>() + .remap_key_type::>() .rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow)) { Ok(iter) => iter, @@ -123,7 +123,7 @@ mod tests { use roaring::RoaringBitmap; use crate::heed_codec::facet::FacetGroupKeyCodec; - use crate::heed_codec::ByteSliceRefCodec; + use crate::heed_codec::BytesRefCodec; use crate::milli_snap; use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::tests::{ @@ -144,7 +144,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).collect::(); let mut results = String::new(); - let db = index.content.remap_key_type::>(); + let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); for el in iter { let (docids, _) = el.unwrap(); @@ -167,7 +167,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).collect::(); let mut results = String::new(); - let db = index.content.remap_key_type::>(); + let db = index.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap(); for el in iter { let (docids, _) = el.unwrap(); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 0572d7297..f44d6a153 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,13 +1,13 @@ pub use facet_sort_ascending::ascending_facet_sort; pub use facet_sort_descending::descending_facet_sort; -use heed::types::{ByteSlice, DecodeIgnore}; +use heed::types::{Bytes, DecodeIgnore}; use heed::{BytesDecode, RoTxn}; use roaring::RoaringBitmap; pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::filter::{BadGeoError, Filter}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::{Index, Result}; mod facet_distribution; mod facet_distribution_iter; @@ -34,7 +34,7 @@ pub fn facet_min_value<'t>( field_id: u16, candidates: RoaringBitmap, ) -> Result> { - let db = index.facet_id_f64_docids.remap_key_type::>(); + let db = index.facet_id_f64_docids.remap_key_type::>(); let it = ascending_facet_sort(rtxn, db, field_id, candidates)?; facet_extreme_value(it) } @@ -45,7 +45,7 @@ pub fn facet_max_value<'t>( field_id: u16, candidates: RoaringBitmap, ) -> Result> { - let db = index.facet_id_f64_docids.remap_key_type::>(); + let db = index.facet_id_f64_docids.remap_key_type::>(); let it = descending_facet_sort(rtxn, db, field_id, candidates)?; facet_extreme_value(it) } @@ -53,7 +53,7 @@ pub fn facet_max_value<'t>( /// Get the first facet value in the facet database pub(crate) fn get_first_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -63,7 +63,7 @@ where level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); let mut level0_iter_forward = - db.remap_types::().prefix_iter(txn, level0prefix.as_slice())?; + db.remap_types::().prefix_iter(txn, level0prefix.as_slice())?; if let Some(first) = level0_iter_forward.next() { let (first_key, _) = first?; let first_key = FacetGroupKeyCodec::::bytes_decode(first_key) @@ -77,7 +77,7 @@ where /// Get the last facet value in the facet database pub(crate) fn get_last_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -86,9 +86,8 @@ where let mut level0prefix = vec![]; level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.push(0); - let mut level0_iter_backward = db - .remap_types::() - .rev_prefix_iter(txn, level0prefix.as_slice())?; + let mut level0_iter_backward = + db.remap_types::().rev_prefix_iter(txn, level0prefix.as_slice())?; if let Some(last) = level0_iter_backward.next() { let (last_key, _) = last?; let last_key = FacetGroupKeyCodec::::bytes_decode(last_key) @@ -102,17 +101,17 @@ where /// Get the height of the highest level in the facet database pub(crate) fn get_highest_level<'t>( txn: &'t RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result { let field_id_prefix = &field_id.to_be_bytes(); Ok(db - .remap_types::() + .remap_types::() .rev_prefix_iter(txn, field_id_prefix)? .next() .map(|el| { let (key, _) = el.unwrap(); - let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); key.level }) .unwrap_or(0)) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 1dd701553..24c7d5076 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -3,7 +3,7 @@ use std::collections::hash_map::Entry; use std::hash::Hash; use fxhash::FxHashMap; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::{BytesEncode, Database, RoTxn}; use roaring::RoaringBitmap; @@ -50,7 +50,7 @@ impl<'ctx> DatabaseCache<'ctx> { cache_key: K1, db_key: &'v KC::EItem, cache: &mut FxHashMap>>, - db: Database, + db: Database, ) -> Result> where K1: Copy + Eq + Hash, @@ -80,7 +80,7 @@ impl<'ctx> DatabaseCache<'ctx> { cache_key: K1, db_keys: &'v [KC::EItem], cache: &mut FxHashMap>>, - db: Database, + db: Database, merger: MergeFn, ) -> Result> where @@ -168,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> { word, &keys[..], &mut self.db_cache.word_docids, - self.index.word_fid_docids.remap_data_type::(), + self.index.word_fid_docids.remap_data_type::(), merge_cbo_roaring_bitmaps, ) } @@ -177,7 +177,7 @@ impl<'ctx> SearchContext<'ctx> { word, self.word_interner.get(word).as_str(), &mut self.db_cache.word_docids, - self.index.word_docids.remap_data_type::(), + self.index.word_docids.remap_data_type::(), ), } } @@ -191,7 +191,7 @@ impl<'ctx> SearchContext<'ctx> { word, self.word_interner.get(word).as_str(), &mut self.db_cache.exact_word_docids, - self.index.exact_word_docids.remap_data_type::(), + self.index.exact_word_docids.remap_data_type::(), ) } @@ -230,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> { prefix, &keys[..], &mut self.db_cache.word_prefix_docids, - self.index.word_prefix_fid_docids.remap_data_type::(), + self.index.word_prefix_fid_docids.remap_data_type::(), merge_cbo_roaring_bitmaps, ) } @@ -239,7 +239,7 @@ impl<'ctx> SearchContext<'ctx> { prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.word_prefix_docids, - self.index.word_prefix_docids.remap_data_type::(), + self.index.word_prefix_docids.remap_data_type::(), ), } } @@ -253,7 +253,7 @@ impl<'ctx> SearchContext<'ctx> { prefix, self.word_interner.get(prefix).as_str(), &mut self.db_cache.exact_word_prefix_docids, - self.index.exact_word_prefix_docids.remap_data_type::(), + self.index.exact_word_prefix_docids.remap_data_type::(), ) } @@ -272,7 +272,7 @@ impl<'ctx> SearchContext<'ctx> { self.word_interner.get(word2).as_str(), ), &mut self.db_cache.word_pair_proximity_docids, - self.index.word_pair_proximity_docids.remap_data_type::(), + self.index.word_pair_proximity_docids.remap_data_type::(), ) } @@ -291,7 +291,7 @@ impl<'ctx> SearchContext<'ctx> { self.word_interner.get(word2).as_str(), ), &mut self.db_cache.word_pair_proximity_docids, - self.index.word_pair_proximity_docids.remap_data_type::(), + self.index.word_pair_proximity_docids.remap_data_type::(), ) } @@ -320,7 +320,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_pair_proximity_docids - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.txn, &key)?; for result in remap_key_type { let (_, docids) = result?; @@ -359,7 +359,7 @@ impl<'ctx> SearchContext<'ctx> { (word, fid), &(self.word_interner.get(word).as_str(), fid), &mut self.db_cache.word_fid_docids, - self.index.word_fid_docids.remap_data_type::(), + self.index.word_fid_docids.remap_data_type::(), ) } @@ -378,7 +378,7 @@ impl<'ctx> SearchContext<'ctx> { (word_prefix, fid), &(self.word_interner.get(word_prefix).as_str(), fid), &mut self.db_cache.word_prefix_fid_docids, - self.index.word_prefix_fid_docids.remap_data_type::(), + self.index.word_prefix_fid_docids.remap_data_type::(), ) } @@ -392,7 +392,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_fid_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { @@ -418,7 +418,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_prefix_fid_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { @@ -446,7 +446,7 @@ impl<'ctx> SearchContext<'ctx> { (word, position), &(self.word_interner.get(word).as_str(), position), &mut self.db_cache.word_position_docids, - self.index.word_position_docids.remap_data_type::(), + self.index.word_position_docids.remap_data_type::(), ) } @@ -460,7 +460,7 @@ impl<'ctx> SearchContext<'ctx> { (word_prefix, position), &(self.word_interner.get(word_prefix).as_str(), position), &mut self.db_cache.word_prefix_position_docids, - self.index.word_prefix_position_docids.remap_data_type::(), + self.index.word_prefix_position_docids.remap_data_type::(), ) } @@ -474,7 +474,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_position_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { @@ -505,7 +505,7 @@ impl<'ctx> SearchContext<'ctx> { let remap_key_type = self .index .word_prefix_position_docids - .remap_types::() + .remap_types::() .prefix_iter(self.txn, &key)? .remap_key_type::(); for result in remap_key_type { diff --git a/milli/src/search/new/distinct.rs b/milli/src/search/new/distinct.rs index e90ffe878..25ea0b0a3 100644 --- a/milli/src/search/new/distinct.rs +++ b/milli/src/search/new/distinct.rs @@ -1,4 +1,4 @@ -use heed::types::{ByteSlice, Str, Unit}; +use heed::types::{Bytes, Str, Unit}; use heed::{Database, RoPrefix, RoTxn}; use roaring::RoaringBitmap; @@ -8,7 +8,7 @@ const DOCID_SIZE: usize = 4; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::{Index, Result, SearchContext}; pub struct DistinctOutput { @@ -71,7 +71,7 @@ pub fn distinct_single_docid( /// Return all the docids containing the given value in the given field fn facet_value_docids( - database: Database, FacetGroupValueCodec>, + database: Database, FacetGroupValueCodec>, txn: &RoTxn, field_id: u16, facet_value: &[u8], @@ -87,12 +87,12 @@ fn facet_number_values<'a>( field_id: u16, index: &Index, txn: &'a RoTxn, -) -> Result, Unit>> { +) -> Result, Unit>> { let key = facet_values_prefix_key(field_id, docid); let iter = index .field_id_docid_facet_f64s - .remap_key_type::() + .remap_key_type::() .prefix_iter(txn, &key)? .remap_key_type(); @@ -105,12 +105,12 @@ pub fn facet_string_values<'a>( field_id: u16, index: &Index, txn: &'a RoTxn, -) -> Result, Str>> { +) -> Result, Str>> { let key = facet_values_prefix_key(field_id, docid); let iter = index .field_id_docid_facet_strings - .remap_key_type::() + .remap_key_type::() .prefix_iter(txn, &key)? .remap_types(); diff --git a/milli/src/search/new/geo_sort.rs b/milli/src/search/new/geo_sort.rs index 0cefc19c2..b2e3a2f3d 100644 --- a/milli/src/search/new/geo_sort.rs +++ b/milli/src/search/new/geo_sort.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use std::iter::FromIterator; -use heed::types::{ByteSlice, Unit}; +use heed::types::{Bytes, Unit}; use heed::{RoPrefix, RoTxn}; use roaring::RoaringBitmap; use rstar::RTree; @@ -34,7 +34,7 @@ fn facet_number_values<'a>( let iter = index .field_id_docid_facet_f64s - .remap_key_type::() + .remap_key_type::() .prefix_iter(txn, &key)? .remap_key_type(); diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index 6f7321e7b..fb234b293 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -4,7 +4,7 @@ use roaring::RoaringBitmap; use super::logger::SearchLogger; use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext}; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; -use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; +use crate::heed_codec::{BytesRefCodec, StrRefCodec}; use crate::score_details::{self, ScoreDetails}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::{FieldId, Index, Result}; @@ -100,11 +100,11 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, let number_db = ctx .index .facet_id_f64_docids - .remap_key_type::>(); + .remap_key_type::>(); let string_db = ctx .index .facet_id_string_docids - .remap_key_type::>(); + .remap_key_type::>(); let (number_iter, string_iter) = if self.is_ascending { let number_iter = ascending_facet_sort( diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index bfee002fd..3bd4cf5f5 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -2,7 +2,7 @@ use std::fs::File; use std::io::BufReader; use grenad::CompressionType; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; use roaring::RoaringBitmap; @@ -11,7 +11,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader}; use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result}; @@ -70,11 +70,11 @@ impl<'i> FacetsUpdateBulk<'i> { let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self; let db = match facet_type { - FacetType::String => index - .facet_id_string_docids - .remap_key_type::>(), + FacetType::String => { + index.facet_id_string_docids.remap_key_type::>() + } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; @@ -88,7 +88,7 @@ impl<'i> FacetsUpdateBulk<'i> { /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type pub(crate) struct FacetsUpdateBulkInner { - pub db: heed::Database, FacetGroupValueCodec>, + pub db: heed::Database, FacetGroupValueCodec>, pub delta_data: Option>, pub group_size: u8, pub min_level_size: u8, @@ -106,7 +106,7 @@ impl FacetsUpdateBulkInner { for level_reader in level_readers { let mut cursor = level_reader.into_cursor()?; while let Some((k, v)) = cursor.move_on_next()? { - self.db.remap_types::().put(wtxn, k, v)?; + self.db.remap_types::().put(wtxn, k, v)?; } } } @@ -128,7 +128,7 @@ impl FacetsUpdateBulkInner { }; if self.db.is_empty(wtxn)? { let mut buffer = Vec::new(); - let mut database = self.db.iter_mut(wtxn)?.remap_types::(); + let mut database = self.db.iter_mut(wtxn)?.remap_types::(); let mut cursor = delta_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { if !valid_lmdb_key(key) { @@ -147,16 +147,12 @@ impl FacetsUpdateBulkInner { // then we extend the buffer with the docids bitmap buffer.extend_from_slice(value); unsafe { - database.put_current_with_options::( - PutFlags::APPEND, - key, - &buffer, - )? + database.put_current_with_options::(PutFlags::APPEND, key, &buffer)? }; } } else { let mut buffer = Vec::new(); - let database = self.db.remap_types::(); + let database = self.db.remap_types::(); let mut cursor = delta_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { @@ -225,9 +221,9 @@ impl FacetsUpdateBulkInner { let level_0_iter = self .db - .remap_types::() + .remap_types::() .prefix_iter(rtxn, level_0_prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); + .remap_types::, FacetGroupValueCodec>(); let mut left_bound: &[u8] = &[]; let mut first_iteration_for_new_group = true; @@ -313,7 +309,7 @@ impl FacetsUpdateBulkInner { bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { let key = FacetGroupKey { field_id, level, left_bound }; - let key = FacetGroupKeyCodec::::bytes_encode(&key) + let key = FacetGroupKeyCodec::::bytes_encode(&key) .map_err(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = @@ -342,7 +338,7 @@ impl FacetsUpdateBulkInner { bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { let key = FacetGroupKey { field_id, level, left_bound }; - let key = FacetGroupKeyCodec::::bytes_encode(&key) + let key = FacetGroupKeyCodec::::bytes_encode(&key) .map_err(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index dc47ca126..9d8ee08f4 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -1,7 +1,7 @@ use std::fs::File; use std::io::BufReader; -use heed::types::{ByteSlice, DecodeIgnore}; +use heed::types::{Bytes, DecodeIgnore}; use heed::{BytesDecode, Error, RoTxn, RwTxn}; use obkv::KvReader; use roaring::RoaringBitmap; @@ -10,7 +10,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::search::facet::get_highest_level; use crate::update::del_add::DelAdd; use crate::update::index_documents::valid_lmdb_key; @@ -48,10 +48,10 @@ impl FacetsUpdateIncremental { db: match facet_type { FacetType::String => index .facet_id_string_docids - .remap_key_type::>(), + .remap_key_type::>(), FacetType::Number => index .facet_id_f64_docids - .remap_key_type::>(), + .remap_key_type::>(), }, group_size, max_group_size, @@ -67,7 +67,7 @@ impl FacetsUpdateIncremental { if !valid_lmdb_key(key) { continue; } - let key = FacetGroupKeyCodec::::bytes_decode(key) + let key = FacetGroupKeyCodec::::bytes_decode(key) .map_err(heed::Error::Encoding)?; let value = KvReader::new(value); @@ -98,7 +98,7 @@ impl FacetsUpdateIncremental { /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type pub struct FacetsUpdateIncrementalInner { - pub db: heed::Database, FacetGroupValueCodec>, + pub db: heed::Database, FacetGroupValueCodec>, pub group_size: u8, pub min_level_size: u8, pub max_group_size: u8, @@ -136,11 +136,11 @@ impl FacetsUpdateIncrementalInner { let mut iter = self .db - .remap_types::() + .remap_types::() .prefix_iter(txn, prefix.as_slice())?; let (key_bytes, value) = iter.next().unwrap()?; Ok(( - FacetGroupKeyCodec::::bytes_decode(key_bytes) + FacetGroupKeyCodec::::bytes_decode(key_bytes) .map_err(Error::Encoding)? .into_owned(), value, @@ -177,7 +177,7 @@ impl FacetsUpdateIncrementalInner { level0_prefix.push(0); let mut iter = - self.db.remap_types::().prefix_iter(txn, &level0_prefix)?; + self.db.remap_types::().prefix_iter(txn, &level0_prefix)?; if iter.next().is_none() { drop(iter); @@ -379,11 +379,8 @@ impl FacetsUpdateIncrementalInner { highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); highest_level_prefix.push(highest_level); - let size_highest_level = self - .db - .remap_types::() - .prefix_iter(txn, &highest_level_prefix)? - .count(); + let size_highest_level = + self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?.count(); if size_highest_level < self.group_size as usize * self.min_level_size as usize { return Ok(()); @@ -391,7 +388,7 @@ impl FacetsUpdateIncrementalInner { let mut groups_iter = self .db - .remap_types::() + .remap_types::() .prefix_iter(txn, &highest_level_prefix)?; let nbr_new_groups = size_highest_level / self.group_size as usize; @@ -403,7 +400,7 @@ impl FacetsUpdateIncrementalInner { let mut values = RoaringBitmap::new(); for _ in 0..group_size { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) + let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) .map_err(Error::Encoding)?; if first_key.is_none() { @@ -426,7 +423,7 @@ impl FacetsUpdateIncrementalInner { let mut values = RoaringBitmap::new(); for _ in 0..nbr_leftover_elements { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) + let key_i = FacetGroupKeyCodec::::bytes_decode(key_bytes) .map_err(Error::Encoding)?; if first_key.is_none() { @@ -594,7 +591,7 @@ impl FacetsUpdateIncrementalInner { if highest_level == 0 || self .db - .remap_types::() + .remap_types::() .prefix_iter(txn, &highest_level_prefix)? .count() >= self.min_level_size as usize @@ -602,14 +599,12 @@ impl FacetsUpdateIncrementalInner { return Ok(()); } let mut to_delete = vec![]; - let mut iter = self - .db - .remap_types::() - .prefix_iter(txn, &highest_level_prefix)?; + let mut iter = + self.db.remap_types::().prefix_iter(txn, &highest_level_prefix)?; for el in iter.by_ref() { let (k, _) = el?; to_delete.push( - FacetGroupKeyCodec::::bytes_decode(k) + FacetGroupKeyCodec::::bytes_decode(k) .map_err(Error::Encoding)? .into_owned(), ); @@ -1118,7 +1113,7 @@ mod fuzz { #[no_coverage] fn compare_with_trivial_database(tempdir: Rc, operations: &[Operation]) { - let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten + let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten let mut txn = index.env.write_txn().unwrap(); let mut trivial_db = TrivialDatabase::>::default(); @@ -1164,16 +1159,13 @@ mod fuzz { let level0iter = index .content .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( - &mut txn, - &field_id.to_be_bytes(), - ) + .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) .unwrap(); for ((key, values), group) in values_field_id.iter().zip(level0iter) { let (group_key, group_values) = group.unwrap(); let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); + FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); assert_eq!(key, &group_key.left_bound); assert_eq!(values, &group_values.bitmap); } @@ -1183,13 +1175,13 @@ mod fuzz { let level0iter = index .content .as_polymorph() - .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) + .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) .unwrap(); for ((key, values), group) in values_field_id.iter().zip(level0iter) { let (group_key, group_values) = group.unwrap(); let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); + FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); assert_eq!(key, &group_key.left_bound); assert_eq!(values, &group_values.bitmap); } diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 991178903..ad8a838c8 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -83,7 +83,7 @@ use std::iter::FromIterator; use charabia::normalizer::{Normalize, NormalizerOption}; use grenad::{CompressionType, SortAlgorithm}; -use heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::BytesEncode; use log::debug; use time::OffsetDateTime; @@ -92,7 +92,7 @@ use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; -use crate::heed_codec::ByteSliceRefCodec; +use crate::heed_codec::BytesRefCodec; use crate::update::index_documents::create_sorter; use crate::update::merge_btreeset_string; use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH}; @@ -106,7 +106,7 @@ pub mod incremental; /// a bulk update method or an incremental update method. pub struct FacetsUpdate<'i> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, + database: heed::Database, FacetGroupValueCodec>, facet_type: FacetType, delta_data: grenad::Reader>, group_size: u8, @@ -120,11 +120,11 @@ impl<'i> FacetsUpdate<'i> { delta_data: grenad::Reader>, ) -> Self { let database = match facet_type { - FacetType::String => index - .facet_id_string_docids - .remap_key_type::>(), + FacetType::String => { + index.facet_id_string_docids.remap_key_type::>() + } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; Self { @@ -217,10 +217,11 @@ impl<'i> FacetsUpdate<'i> { // as the grenad sorter already merged them for us. let mut merger_iter = sorter.into_stream_merger_iter()?; while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? { - self.index - .facet_id_normalized_string_strings - .remap_types::() - .put(wtxn, key_bytes, btreeset_bytes)?; + self.index.facet_id_normalized_string_strings.remap_types::().put( + wtxn, + key_bytes, + btreeset_bytes, + )?; } // We compute one FST by string facet @@ -267,7 +268,7 @@ pub(crate) mod test_helpers { use std::marker::PhantomData; use std::rc::Rc; - use heed::types::ByteSlice; + use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; use roaring::RoaringBitmap; @@ -275,7 +276,7 @@ pub(crate) mod test_helpers { use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; - use crate::heed_codec::ByteSliceRefCodec; + use crate::heed_codec::BytesRefCodec; use crate::search::facet::get_highest_level; use crate::snapshot_tests::display_bitmap; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; @@ -306,7 +307,7 @@ pub(crate) mod test_helpers { BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, { pub env: Env, - pub content: heed::Database, FacetGroupValueCodec>, + pub content: heed::Database, FacetGroupValueCodec>, pub group_size: Cell, pub min_level_size: Cell, pub max_group_size: Cell, @@ -454,7 +455,7 @@ pub(crate) mod test_helpers { let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned(); let key: FacetGroupKey<&[u8]> = FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes }; - let key = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); let mut inner_writer = KvWriterDelAdd::memory(); let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap(); inner_writer.insert(DelAdd::Addition, value).unwrap(); @@ -486,12 +487,12 @@ pub(crate) mod test_helpers { let iter = self .content - .remap_types::() + .remap_types::() .prefix_iter(txn, &level_no_prefix) .unwrap(); for el in iter { let (key, value) = el.unwrap(); - let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); let mut prefix_start_below = vec![]; prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); @@ -501,11 +502,11 @@ pub(crate) mod test_helpers { let start_below = { let mut start_below_iter = self .content - .remap_types::() + .remap_types::() .prefix_iter(txn, &prefix_start_below) .unwrap(); let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); - FacetGroupKeyCodec::::bytes_decode(key_bytes).unwrap() + FacetGroupKeyCodec::::bytes_decode(key_bytes).unwrap() }; assert!(value.size > 0); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index 061cbe5a0..e1b27baa2 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -3,7 +3,7 @@ use std::fs::File; use std::io::{self, BufReader, BufWriter, Seek}; use grenad::{CompressionType, Sorter}; -use heed::types::ByteSlice; +use heed::types::Bytes; use super::{ClonableMmap, MergeFn}; use crate::update::index_documents::valid_lmdb_key; @@ -255,7 +255,7 @@ where puffin::profile_function!(); let mut buffer = Vec::new(); - let database = database.remap_types::(); + let database = database.remap_types::(); let mut merger_iter = sorter.into_stream_merger_iter()?; while let Some((key, value)) = merger_iter.next()? { diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 0c70b592f..ab8e27edb 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -247,7 +247,7 @@ impl<'a, 'i> Transform<'a, 'i> { let base_obkv = self .index .documents - .remap_data_type::() + .remap_data_type::() .get(wtxn, &original_key)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, @@ -501,7 +501,7 @@ impl<'a, 'i> Transform<'a, 'i> { let base_obkv = self .index .documents - .remap_data_type::() + .remap_data_type::() .get(txn, &original_key)? .ok_or(InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 6ec00caae..49e36b87e 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -6,7 +6,7 @@ use std::io::{self, BufReader}; use bytemuck::allocation::pod_collect_to_vec; use charabia::{Language, Script}; use grenad::MergerBuilder; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::{PutFlags, RwTxn}; use log::error; use obkv::{KvReader, KvWriter}; @@ -144,7 +144,7 @@ pub(crate) fn write_typed_chunk_into_index( } } - let db = index.documents.remap_data_type::(); + let db = index.documents.remap_data_type::(); if !writer.is_empty() { db.put(wtxn, &docid, &writer.into_inner().unwrap())?; @@ -293,7 +293,7 @@ pub(crate) fn write_typed_chunk_into_index( } TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { let index_fid_docid_facet_numbers = - index.field_id_docid_facet_f64s.remap_types::(); + index.field_id_docid_facet_f64s.remap_types::(); let mut cursor = fid_docid_facet_number.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { let reader = KvReaderDelAdd::new(value); @@ -313,7 +313,7 @@ pub(crate) fn write_typed_chunk_into_index( } TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { let index_fid_docid_facet_strings = - index.field_id_docid_facet_strings.remap_types::(); + index.field_id_docid_facet_strings.remap_types::(); let mut cursor = fid_docid_facet_string.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { let reader = KvReaderDelAdd::new(value); @@ -498,7 +498,7 @@ where puffin::profile_function!(format!("number of entries: {}", data.len())); let mut buffer = Vec::new(); - let database = database.remap_types::(); + let database = database.remap_types::(); let mut cursor = data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { @@ -556,7 +556,7 @@ where } let mut buffer = Vec::new(); - let mut database = database.iter_mut(wtxn)?.remap_types::(); + let mut database = database.iter_mut(wtxn)?.remap_types::(); let mut cursor = data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { @@ -571,7 +571,7 @@ where let value = serialize_value(value, &mut buffer)?; unsafe { // safety: We do not keep a reference to anything that lives inside the database - database.put_current_with_options::(PutFlags::APPEND, key, value)? + database.put_current_with_options::(PutFlags::APPEND, key, value)? }; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index e8c6e2c10..98697325e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -822,7 +822,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { fn update_max_values_per_facet(&mut self) -> Result<()> { match self.max_values_per_facet { Setting::Set(max) => { - self.index.put_max_values_per_facet(self.wtxn, max)?; + self.index.put_max_values_per_facet(self.wtxn, max as u64)?; } Setting::Reset => { self.index.delete_max_values_per_facet(self.wtxn)?; @@ -850,7 +850,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { fn update_pagination_max_total_hits(&mut self) -> Result<()> { match self.pagination_max_total_hits { Setting::Set(max) => { - self.index.put_pagination_max_total_hits(self.wtxn, max)?; + self.index.put_pagination_max_total_hits(self.wtxn, max as u64)?; } Setting::Reset => { self.index.delete_pagination_max_total_hits(self.wtxn)?; @@ -917,7 +917,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { #[cfg(test)] mod tests { use big_s::S; - use heed::types::ByteSlice; + use heed::types::Bytes; use maplit::{btreemap, btreeset, hashset}; use super::*; @@ -1130,7 +1130,7 @@ mod tests { } let count = index .facet_id_f64_docids - .remap_key_type::() + .remap_key_type::() // The faceted field id is 1u16 .prefix_iter(&rtxn, &[0, 1, 0]) .unwrap() @@ -1151,7 +1151,7 @@ mod tests { // Only count the field_id 0 and level 0 facet values. let count = index .facet_id_f64_docids - .remap_key_type::() + .remap_key_type::() .prefix_iter(&rtxn, &[0, 1, 0]) .unwrap() .count(); diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 5d60c3765..544bea224 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, HashSet}; use grenad::CompressionType; -use heed::types::{ByteSlice, Str}; +use heed::types::{Bytes, Str}; use heed::Database; use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd}; @@ -93,7 +93,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { } // We fetch the docids associated to the newly added word prefix fst only. - let db = self.word_docids.remap_data_type::(); + let db = self.word_docids.remap_data_type::(); let mut buffer = Vec::new(); for prefix in new_prefix_fst_words { let prefix = std::str::from_utf8(prefix.as_bytes())?; diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index a763ecaeb..819cc097b 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::str; use grenad::CompressionType; -use heed::types::ByteSlice; +use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Database}; use log::debug; @@ -110,7 +110,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { } // We fetch the docids associated to the newly added word prefix fst only. - let db = self.word_database.remap_data_type::(); + let db = self.word_database.remap_data_type::(); let mut buffer = Vec::new(); for prefix_bytes in new_prefix_fst_words { let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| { @@ -119,7 +119,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { // iter over all lines of the DB where the key is prefixed by the current prefix. let iter = db - .remap_key_type::() + .remap_key_type::() .prefix_iter(self.wtxn, prefix_bytes.as_bytes())? .remap_key_type::(); for result in iter {