diff --git a/src/fields_ids_map.rs b/src/fields_ids_map.rs index 82d06e818..ce79e6e04 100644 --- a/src/fields_ids_map.rs +++ b/src/fields_ids_map.rs @@ -1,11 +1,12 @@ use std::collections::BTreeMap; use serde::{Serialize, Deserialize}; +use crate::FieldId; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FieldsIdsMap { - names_ids: BTreeMap, - ids_names: BTreeMap, - next_id: Option, + names_ids: BTreeMap, + ids_names: BTreeMap, + next_id: Option, } impl FieldsIdsMap { @@ -29,7 +30,7 @@ impl FieldsIdsMap { /// Returns the field id related to a field name, it will create a new field id if the /// name is not already known. Returns `None` if the maximum field id as been reached. - pub fn insert(&mut self, name: &str) -> Option { + pub fn insert(&mut self, name: &str) -> Option { match self.names_ids.get(name) { Some(id) => Some(*id), None => { @@ -43,17 +44,17 @@ impl FieldsIdsMap { } /// Get the id of a field based on its name. - pub fn id(&self, name: &str) -> Option { + pub fn id(&self, name: &str) -> Option { self.names_ids.get(name).copied() } /// Get the name of a field based on its id. - pub fn name(&self, id: u8) -> Option<&str> { + pub fn name(&self, id: FieldId) -> Option<&str> { self.ids_names.get(&id).map(String::as_str) } /// Remove a field name and id based on its name. - pub fn remove(&mut self, name: &str) -> Option { + pub fn remove(&mut self, name: &str) -> Option { match self.names_ids.remove(name) { Some(id) => self.ids_names.remove_entry(&id).map(|(id, _)| id), None => None, @@ -61,7 +62,7 @@ impl FieldsIdsMap { } /// Iterate over the ids and names in the ids order. - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator { self.ids_names.iter().map(|(id, name)| (*id, name.as_str())) } } diff --git a/src/heed_codec/facet/facet_level_value_f64_codec.rs b/src/heed_codec/facet/facet_level_value_f64_codec.rs index 1ee8e6bf3..a4642f961 100644 --- a/src/heed_codec/facet/facet_level_value_f64_codec.rs +++ b/src/heed_codec/facet/facet_level_value_f64_codec.rs @@ -2,12 +2,13 @@ use std::borrow::Cow; use std::convert::TryInto; use crate::facet::value_encoding::f64_into_bytes; +use crate::FieldId; // TODO do not de/serialize right bound when level = 0 pub struct FacetLevelValueF64Codec; impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec { - type DItem = (u8, u8, f64, f64); + type DItem = (FieldId, u8, f64, f64); fn bytes_decode(bytes: &'a [u8]) -> Option { let (field_id, bytes) = bytes.split_first()?; @@ -27,7 +28,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec { } impl heed::BytesEncode<'_> for FacetLevelValueF64Codec { - type EItem = (u8, u8, f64, f64); + type EItem = (FieldId, u8, f64, f64); fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option> { let mut buffer = [0u8; 32]; diff --git a/src/heed_codec/facet/facet_level_value_i64_codec.rs b/src/heed_codec/facet/facet_level_value_i64_codec.rs index 7cf9a714b..cc0d3120d 100644 --- a/src/heed_codec/facet/facet_level_value_i64_codec.rs +++ b/src/heed_codec/facet/facet_level_value_i64_codec.rs @@ -2,11 +2,12 @@ use std::borrow::Cow; use std::convert::TryInto; use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes}; +use crate::FieldId; pub struct FacetLevelValueI64Codec; impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec { - type DItem = (u8, u8, i64, i64); + type DItem = (FieldId, u8, i64, i64); fn bytes_decode(bytes: &'a [u8]) -> Option { let (field_id, bytes) = bytes.split_first()?; @@ -24,7 +25,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec { } impl heed::BytesEncode<'_> for FacetLevelValueI64Codec { - type EItem = (u8, u8, i64, i64); + type EItem = (FieldId, u8, i64, i64); fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option> { let left = i64_into_bytes(*left); diff --git a/src/heed_codec/facet/facet_value_string_codec.rs b/src/heed_codec/facet/facet_value_string_codec.rs index faa8b407b..350efc450 100644 --- a/src/heed_codec/facet/facet_value_string_codec.rs +++ b/src/heed_codec/facet/facet_value_string_codec.rs @@ -1,10 +1,12 @@ use std::borrow::Cow; use std::str; +use crate::FieldId; + pub struct FacetValueStringCodec; impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec { - type DItem = (u8, &'a str); + type DItem = (FieldId, &'a str); fn bytes_decode(bytes: &'a [u8]) -> Option { let (field_id, bytes) = bytes.split_first()?; @@ -14,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec { } impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec { - type EItem = (u8, &'a str); + type EItem = (FieldId, &'a str); fn bytes_encode((field_id, value): &Self::EItem) -> Option> { let mut bytes = Vec::with_capacity(value.len() + 1); diff --git a/src/index.rs b/src/index.rs index b21c7d39b..b0c2b1a3f 100644 --- a/src/index.rs +++ b/src/index.rs @@ -10,7 +10,7 @@ use roaring::RoaringBitmap; use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; use crate::Search; -use crate::{BEU32, DocumentId, ExternalDocumentsIds}; +use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds}; use crate::{ RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, @@ -107,8 +107,8 @@ impl Index { /* primary key */ /// Writes the documents primary key, this is the field name that is used to store the id. - pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: u8) -> heed::Result<()> { - self.main.put::<_, Str, OwnedType>(wtxn, PRIMARY_KEY_KEY, &primary_key) + pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> { + self.main.put::<_, Str, OwnedType>(wtxn, PRIMARY_KEY_KEY, &primary_key) } /// Deletes the primary key of the documents, this can be done to reset indexes settings. @@ -117,8 +117,8 @@ impl Index { } /// Returns the documents primary key, `None` if it hasn't been defined. - pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result> { - self.main.get::<_, Str, OwnedType>(rtxn, PRIMARY_KEY_KEY) + pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result> { + self.main.get::<_, Str, OwnedType>(rtxn, PRIMARY_KEY_KEY) } /* external documents ids */ @@ -172,7 +172,7 @@ impl Index { /// Writes the fields ids that must be displayed in the defined order. /// There must be not be any duplicate field id. - pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[u8]) -> heed::Result<()> { + pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields) } @@ -184,14 +184,14 @@ impl Index { /// Returns the displayed fields ids in the order they must be returned. If it returns /// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`. - pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { + pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY) } /* searchable fields */ /// Writes the searchable fields, when this list is specified, only these are indexed. - pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[u8]) -> heed::Result<()> { + pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> { assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields) } @@ -203,7 +203,7 @@ impl Index { /// Returns the searchable fields ids, those are the fields that are indexed, /// if the searchable fields aren't there it means that **all** the fields are indexed. - pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { + pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result> { self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY) } @@ -211,7 +211,7 @@ impl Index { /// Writes the facet fields ids associated with their facet type or `None` if /// the facet type is currently unknown. - pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap) -> heed::Result<()> { + pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap) -> heed::Result<()> { self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) } @@ -221,14 +221,14 @@ impl Index { } /// Returns the facet fields ids associated with their facet type. - pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result> { + pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result> { Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) } /* faceted documents ids */ /// Writes the documents ids that are faceted under this field id. - pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: u8, docids: &RoaringBitmap) -> heed::Result<()> { + pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: FieldId, docids: &RoaringBitmap) -> heed::Result<()> { let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; @@ -236,7 +236,7 @@ impl Index { } /// Retrieve all the documents ids that faceted under this field id. - pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: u8) -> heed::Result { + pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: FieldId) -> heed::Result { let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; diff --git a/src/lib.rs b/src/lib.rs index 93f9cc0df..9fa19c68c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,15 +40,16 @@ pub type SmallVec16 = smallvec::SmallVec<[T; 16]>; pub type SmallVec8 = smallvec::SmallVec<[T; 8]>; pub type BEU32 = heed::zerocopy::U32; pub type BEU64 = heed::zerocopy::U64; -pub type DocumentId = u32; pub type Attribute = u32; +pub type DocumentId = u32; +pub type FieldId = u8; pub type Position = u32; type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result>; /// Transform a raw obkv store into a JSON Object. pub fn obkv_to_json( - displayed_fields: &[u8], + displayed_fields: &[FieldId], fields_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, ) -> anyhow::Result> diff --git a/src/search/facet/mod.rs b/src/search/facet/mod.rs index c47f290e0..283c95e0d 100644 --- a/src/search/facet/mod.rs +++ b/src/search/facet/mod.rs @@ -15,7 +15,7 @@ use roaring::RoaringBitmap; use crate::facet::FacetType; use crate::heed_codec::facet::FacetValueStringCodec; use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec}; -use crate::{Index, FieldsIdsMap, CboRoaringBitmapCodec}; +use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec}; use self::FacetCondition::*; use self::FacetNumberOperator::*; @@ -75,18 +75,18 @@ impl FacetStringOperator { #[derive(Debug, Clone, PartialEq)] pub enum FacetCondition { - OperatorI64(u8, FacetNumberOperator), - OperatorF64(u8, FacetNumberOperator), - OperatorString(u8, FacetStringOperator), + OperatorI64(FieldId, FacetNumberOperator), + OperatorF64(FieldId, FacetNumberOperator), + OperatorString(FieldId, FacetStringOperator), Or(Box, Box), And(Box, Box), } fn get_field_id_facet_type<'a>( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, items: &mut Pairs<'a, Rule>, -) -> Result<(u8, FacetType), PestError> +) -> Result<(FieldId, FacetType), PestError> { // lexing ensures that we at least have a key let key = items.next().unwrap(); @@ -154,7 +154,7 @@ impl FacetCondition { fn from_pairs( fim: &FieldsIdsMap, - ff: &HashMap, + ff: &HashMap, expression: Pairs, ) -> anyhow::Result { @@ -201,7 +201,7 @@ impl FacetCondition { fn between( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, item: Pair, ) -> anyhow::Result { @@ -234,7 +234,7 @@ impl FacetCondition { fn equal( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, item: Pair, ) -> anyhow::Result { @@ -250,7 +250,7 @@ impl FacetCondition { fn greater_than( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, item: Pair, ) -> anyhow::Result { @@ -274,7 +274,7 @@ impl FacetCondition { fn greater_than_or_equal( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, item: Pair, ) -> anyhow::Result { @@ -298,7 +298,7 @@ impl FacetCondition { fn lower_than( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, item: Pair, ) -> anyhow::Result { @@ -322,7 +322,7 @@ impl FacetCondition { fn lower_than_or_equal( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashMap, + faceted_fields: &HashMap, item: Pair, ) -> anyhow::Result { @@ -351,7 +351,7 @@ impl FacetCondition { fn explore_facet_levels<'t, T: 't, KC>( rtxn: &'t heed::RoTxn, db: heed::Database, - field_id: u8, + field_id: FieldId, level: u8, left: Bound, right: Bound, @@ -447,7 +447,7 @@ impl FacetCondition { rtxn: &'t heed::RoTxn, index: &Index, db: heed::Database, - field_id: u8, + field_id: FieldId, operator: FacetNumberOperator, ) -> anyhow::Result where @@ -493,7 +493,7 @@ impl FacetCondition { rtxn: &heed::RoTxn, index: &Index, db: heed::Database, - field_id: u8, + field_id: FieldId, operator: &FacetStringOperator, ) -> anyhow::Result { diff --git a/src/update/index_documents/store.rs b/src/update/index_documents/store.rs index 57f99c908..c56b8b09e 100644 --- a/src/update/index_documents/store.rs +++ b/src/update/index_documents/store.rs @@ -22,7 +22,7 @@ use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; use crate::tokenizer::{simple_tokenizer, only_token}; use crate::update::UpdateIndexingStep; -use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId}; +use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId, FieldId}; use super::{MergeFn, create_writer, create_sorter, writer_into_reader}; use super::merge_function::{ @@ -47,8 +47,8 @@ pub struct Readers { pub struct Store { // Indexing parameters - searchable_fields: HashSet, - faceted_fields: HashMap, + searchable_fields: HashSet, + faceted_fields: HashMap, // Caches word_docids: LinkedHashMap, RoaringBitmap>, word_docids_limit: usize, @@ -72,8 +72,8 @@ pub struct Store { impl Store { pub fn new( - searchable_fields: HashSet, - faceted_fields: HashMap, + searchable_fields: HashSet, + faceted_fields: HashMap, linked_hash_map_size: Option, max_nb_chunks: Option, max_memory: Option, @@ -176,7 +176,7 @@ impl Store { // Save the documents ids under the facet field id and value we have seen it. fn insert_facet_values_docid( &mut self, - field_id: u8, + field_id: FieldId, field_value: FacetValue, id: DocumentId, ) -> anyhow::Result<()> @@ -243,7 +243,7 @@ impl Store { &mut self, document_id: DocumentId, words_positions: &mut HashMap>, - facet_values: &mut HashMap>, + facet_values: &mut HashMap>, record: &[u8], ) -> anyhow::Result<()> { diff --git a/src/update/index_documents/transform.rs b/src/update/index_documents/transform.rs index a42da45f1..f44593c05 100644 --- a/src/update/index_documents/transform.rs +++ b/src/update/index_documents/transform.rs @@ -10,13 +10,13 @@ use log::info; use roaring::RoaringBitmap; use serde_json::{Map, Value}; -use crate::{BEU32, MergeFn, Index, FieldsIdsMap, ExternalDocumentsIds}; +use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use super::merge_function::merge_two_obkvs; use super::{create_writer, create_sorter, IndexDocumentsMethod}; pub struct TransformOutput { - pub primary_key: u8, + pub primary_key: FieldId, pub fields_ids_map: FieldsIdsMap, pub external_documents_ids: ExternalDocumentsIds<'static>, pub new_documents_ids: RoaringBitmap, @@ -365,7 +365,7 @@ impl Transform<'_, '_> { fn output_from_sorter( self, sorter: grenad::Sorter, - primary_key: u8, + primary_key: FieldId, fields_ids_map: FieldsIdsMap, approximate_number_of_documents: usize, mut external_documents_ids: ExternalDocumentsIds<'_>, @@ -477,7 +477,7 @@ impl Transform<'_, '_> { // TODO this can be done in parallel by using the rayon `ThreadPool`. pub fn remap_index_documents( self, - primary_key: u8, + primary_key: FieldId, fields_ids_map: FieldsIdsMap, ) -> anyhow::Result {