Introduce the FieldId u8 alias type

This commit is contained in:
Clément Renault 2020-11-26 17:38:08 +01:00
parent 0a63e69e04
commit ecc8bc8910
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
9 changed files with 62 additions and 56 deletions

View File

@ -1,11 +1,12 @@
use std::collections::BTreeMap;
use serde::{Serialize, Deserialize};
use crate::FieldId;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldsIdsMap {
names_ids: BTreeMap<String, u8>,
ids_names: BTreeMap<u8, String>,
next_id: Option<u8>,
names_ids: BTreeMap<String, FieldId>,
ids_names: BTreeMap<FieldId, String>,
next_id: Option<FieldId>,
}
impl FieldsIdsMap {
@ -29,7 +30,7 @@ impl FieldsIdsMap {
/// Returns the field id related to a field name, it will create a new field id if the
/// name is not already known. Returns `None` if the maximum field id as been reached.
pub fn insert(&mut self, name: &str) -> Option<u8> {
pub fn insert(&mut self, name: &str) -> Option<FieldId> {
match self.names_ids.get(name) {
Some(id) => Some(*id),
None => {
@ -43,17 +44,17 @@ impl FieldsIdsMap {
}
/// Get the id of a field based on its name.
pub fn id(&self, name: &str) -> Option<u8> {
pub fn id(&self, name: &str) -> Option<FieldId> {
self.names_ids.get(name).copied()
}
/// Get the name of a field based on its id.
pub fn name(&self, id: u8) -> Option<&str> {
pub fn name(&self, id: FieldId) -> Option<&str> {
self.ids_names.get(&id).map(String::as_str)
}
/// Remove a field name and id based on its name.
pub fn remove(&mut self, name: &str) -> Option<u8> {
pub fn remove(&mut self, name: &str) -> Option<FieldId> {
match self.names_ids.remove(name) {
Some(id) => self.ids_names.remove_entry(&id).map(|(id, _)| id),
None => None,
@ -61,7 +62,7 @@ impl FieldsIdsMap {
}
/// Iterate over the ids and names in the ids order.
pub fn iter(&self) -> impl Iterator<Item=(u8, &str)> {
pub fn iter(&self) -> impl Iterator<Item=(FieldId, &str)> {
self.ids_names.iter().map(|(id, name)| (*id, name.as_str()))
}
}

View File

@ -2,12 +2,13 @@ use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::f64_into_bytes;
use crate::FieldId;
// TODO do not de/serialize right bound when level = 0
pub struct FacetLevelValueF64Codec;
impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
type DItem = (u8, u8, f64, f64);
type DItem = (FieldId, u8, f64, f64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
@ -27,7 +28,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
}
impl heed::BytesEncode<'_> for FacetLevelValueF64Codec {
type EItem = (u8, u8, f64, f64);
type EItem = (FieldId, u8, f64, f64);
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
let mut buffer = [0u8; 32];

View File

@ -2,11 +2,12 @@ use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes};
use crate::FieldId;
pub struct FacetLevelValueI64Codec;
impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec {
type DItem = (u8, u8, i64, i64);
type DItem = (FieldId, u8, i64, i64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
@ -24,7 +25,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec {
}
impl heed::BytesEncode<'_> for FacetLevelValueI64Codec {
type EItem = (u8, u8, i64, i64);
type EItem = (FieldId, u8, i64, i64);
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
let left = i64_into_bytes(*left);

View File

@ -1,10 +1,12 @@
use std::borrow::Cow;
use std::str;
use crate::FieldId;
pub struct FacetValueStringCodec;
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
type DItem = (u8, &'a str);
type DItem = (FieldId, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
@ -14,7 +16,7 @@ impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
}
impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
type EItem = (u8, &'a str);
type EItem = (FieldId, &'a str);
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(value.len() + 1);

View File

@ -10,7 +10,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap;
use crate::Search;
use crate::{BEU32, DocumentId, ExternalDocumentsIds};
use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds};
use crate::{
RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec,
BoRoaringBitmapCodec, CboRoaringBitmapCodec,
@ -107,8 +107,8 @@ impl Index {
/* primary key */
/// Writes the documents primary key, this is the field name that is used to store the id.
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: u8) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<u8>>(wtxn, PRIMARY_KEY_KEY, &primary_key)
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: FieldId) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<FieldId>>(wtxn, PRIMARY_KEY_KEY, &primary_key)
}
/// Deletes the primary key of the documents, this can be done to reset indexes settings.
@ -117,8 +117,8 @@ impl Index {
}
/// Returns the documents primary key, `None` if it hasn't been defined.
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<u8>> {
self.main.get::<_, Str, OwnedType<u8>>(rtxn, PRIMARY_KEY_KEY)
pub fn primary_key(&self, rtxn: &RoTxn) -> heed::Result<Option<FieldId>> {
self.main.get::<_, Str, OwnedType<FieldId>>(rtxn, PRIMARY_KEY_KEY)
}
/* external documents ids */
@ -172,7 +172,7 @@ impl Index {
/// Writes the fields ids that must be displayed in the defined order.
/// There must be not be any duplicate field id.
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[u8]) -> heed::Result<()> {
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
self.main.put::<_, Str, ByteSlice>(wtxn, DISPLAYED_FIELDS_KEY, fields)
}
@ -184,14 +184,14 @@ impl Index {
/// Returns the displayed fields ids in the order they must be returned. If it returns
/// `None` it means that all the attributes are displayed in the order of the `FieldsIdsMap`.
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [u8]>> {
pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
self.main.get::<_, Str, ByteSlice>(rtxn, DISPLAYED_FIELDS_KEY)
}
/* searchable fields */
/// Writes the searchable fields, when this list is specified, only these are indexed.
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[u8]) -> heed::Result<()> {
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[FieldId]) -> heed::Result<()> {
assert!(fields.windows(2).all(|win| win[0] < win[1])); // is sorted
self.main.put::<_, Str, ByteSlice>(wtxn, SEARCHABLE_FIELDS_KEY, fields)
}
@ -203,7 +203,7 @@ impl Index {
/// Returns the searchable fields ids, those are the fields that are indexed,
/// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [u8]>> {
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t [FieldId]>> {
self.main.get::<_, Str, ByteSlice>(rtxn, SEARCHABLE_FIELDS_KEY)
}
@ -211,7 +211,7 @@ impl Index {
/// Writes the facet fields ids associated with their facet type or `None` if
/// the facet type is currently unknown.
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<u8, FacetType>) -> heed::Result<()> {
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<FieldId, FacetType>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
}
@ -221,14 +221,14 @@ impl Index {
}
/// Returns the facet fields ids associated with their facet type.
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<u8, FacetType>> {
pub fn faceted_fields(&self, wtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
}
/* faceted documents ids */
/// Writes the documents ids that are faceted under this field id.
pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: u8, docids: &RoaringBitmap) -> heed::Result<()> {
pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: FieldId, docids: &RoaringBitmap) -> heed::Result<()> {
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
@ -236,7 +236,7 @@ impl Index {
}
/// Retrieve all the documents ids that faceted under this field id.
pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: u8) -> heed::Result<RoaringBitmap> {
pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: FieldId) -> heed::Result<RoaringBitmap> {
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;

View File

@ -40,15 +40,16 @@ pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub type DocumentId = u32;
pub type Attribute = u32;
pub type DocumentId = u32;
pub type FieldId = u8;
pub type Position = u32;
type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>;
/// Transform a raw obkv store into a JSON Object.
pub fn obkv_to_json(
displayed_fields: &[u8],
displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReader,
) -> anyhow::Result<Map<String, Value>>

View File

@ -15,7 +15,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::FacetValueStringCodec;
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
use crate::{Index, FieldsIdsMap, CboRoaringBitmapCodec};
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec};
use self::FacetCondition::*;
use self::FacetNumberOperator::*;
@ -75,18 +75,18 @@ impl FacetStringOperator {
#[derive(Debug, Clone, PartialEq)]
pub enum FacetCondition {
OperatorI64(u8, FacetNumberOperator<i64>),
OperatorF64(u8, FacetNumberOperator<f64>),
OperatorString(u8, FacetStringOperator),
OperatorI64(FieldId, FacetNumberOperator<i64>),
OperatorF64(FieldId, FacetNumberOperator<f64>),
OperatorString(FieldId, FacetStringOperator),
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
}
fn get_field_id_facet_type<'a>(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
items: &mut Pairs<'a, Rule>,
) -> Result<(u8, FacetType), PestError<Rule>>
) -> Result<(FieldId, FacetType), PestError<Rule>>
{
// lexing ensures that we at least have a key
let key = items.next().unwrap();
@ -154,7 +154,7 @@ impl FacetCondition {
fn from_pairs(
fim: &FieldsIdsMap,
ff: &HashMap<u8, FacetType>,
ff: &HashMap<FieldId, FacetType>,
expression: Pairs<Rule>,
) -> anyhow::Result<Self>
{
@ -201,7 +201,7 @@ impl FacetCondition {
fn between(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
item: Pair<Rule>,
) -> anyhow::Result<FacetCondition>
{
@ -234,7 +234,7 @@ impl FacetCondition {
fn equal(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
item: Pair<Rule>,
) -> anyhow::Result<FacetCondition>
{
@ -250,7 +250,7 @@ impl FacetCondition {
fn greater_than(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
item: Pair<Rule>,
) -> anyhow::Result<FacetCondition>
{
@ -274,7 +274,7 @@ impl FacetCondition {
fn greater_than_or_equal(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
item: Pair<Rule>,
) -> anyhow::Result<FacetCondition>
{
@ -298,7 +298,7 @@ impl FacetCondition {
fn lower_than(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
item: Pair<Rule>,
) -> anyhow::Result<FacetCondition>
{
@ -322,7 +322,7 @@ impl FacetCondition {
fn lower_than_or_equal(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
faceted_fields: &HashMap<FieldId, FacetType>,
item: Pair<Rule>,
) -> anyhow::Result<FacetCondition>
{
@ -351,7 +351,7 @@ impl FacetCondition {
fn explore_facet_levels<'t, T: 't, KC>(
rtxn: &'t heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8,
field_id: FieldId,
level: u8,
left: Bound<T>,
right: Bound<T>,
@ -447,7 +447,7 @@ impl FacetCondition {
rtxn: &'t heed::RoTxn,
index: &Index,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8,
field_id: FieldId,
operator: FacetNumberOperator<T>,
) -> anyhow::Result<RoaringBitmap>
where
@ -493,7 +493,7 @@ impl FacetCondition {
rtxn: &heed::RoTxn,
index: &Index,
db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
field_id: u8,
field_id: FieldId,
operator: &FacetStringOperator,
) -> anyhow::Result<RoaringBitmap>
{

View File

@ -22,7 +22,7 @@ use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
use crate::tokenizer::{simple_tokenizer, only_token};
use crate::update::UpdateIndexingStep;
use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId};
use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId, FieldId};
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
use super::merge_function::{
@ -47,8 +47,8 @@ pub struct Readers {
pub struct Store {
// Indexing parameters
searchable_fields: HashSet<u8>,
faceted_fields: HashMap<u8, FacetType>,
searchable_fields: HashSet<FieldId>,
faceted_fields: HashMap<FieldId, FacetType>,
// Caches
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
word_docids_limit: usize,
@ -72,8 +72,8 @@ pub struct Store {
impl Store {
pub fn new(
searchable_fields: HashSet<u8>,
faceted_fields: HashMap<u8, FacetType>,
searchable_fields: HashSet<FieldId>,
faceted_fields: HashMap<FieldId, FacetType>,
linked_hash_map_size: Option<usize>,
max_nb_chunks: Option<usize>,
max_memory: Option<usize>,
@ -176,7 +176,7 @@ impl Store {
// Save the documents ids under the facet field id and value we have seen it.
fn insert_facet_values_docid(
&mut self,
field_id: u8,
field_id: FieldId,
field_value: FacetValue,
id: DocumentId,
) -> anyhow::Result<()>
@ -243,7 +243,7 @@ impl Store {
&mut self,
document_id: DocumentId,
words_positions: &mut HashMap<String, SmallVec32<Position>>,
facet_values: &mut HashMap<u8, SmallVec8<FacetValue>>,
facet_values: &mut HashMap<FieldId, SmallVec8<FacetValue>>,
record: &[u8],
) -> anyhow::Result<()>
{

View File

@ -10,13 +10,13 @@ use log::info;
use roaring::RoaringBitmap;
use serde_json::{Map, Value};
use crate::{BEU32, MergeFn, Index, FieldsIdsMap, ExternalDocumentsIds};
use crate::{BEU32, MergeFn, Index, FieldId, FieldsIdsMap, ExternalDocumentsIds};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use super::merge_function::merge_two_obkvs;
use super::{create_writer, create_sorter, IndexDocumentsMethod};
pub struct TransformOutput {
pub primary_key: u8,
pub primary_key: FieldId,
pub fields_ids_map: FieldsIdsMap,
pub external_documents_ids: ExternalDocumentsIds<'static>,
pub new_documents_ids: RoaringBitmap,
@ -365,7 +365,7 @@ impl Transform<'_, '_> {
fn output_from_sorter<F>(
self,
sorter: grenad::Sorter<MergeFn>,
primary_key: u8,
primary_key: FieldId,
fields_ids_map: FieldsIdsMap,
approximate_number_of_documents: usize,
mut external_documents_ids: ExternalDocumentsIds<'_>,
@ -477,7 +477,7 @@ impl Transform<'_, '_> {
// TODO this can be done in parallel by using the rayon `ThreadPool`.
pub fn remap_index_documents(
self,
primary_key: u8,
primary_key: FieldId,
fields_ids_map: FieldsIdsMap,
) -> anyhow::Result<TransformOutput>
{