mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-15 13:58:36 +02:00
Simplify integer and float functions trait bounds
This commit is contained in:
parent
efbfa81fa7
commit
51767725b2
17 changed files with 217 additions and 521 deletions
|
@ -10,7 +10,7 @@ use serde_json::Value;
|
|||
|
||||
use crate::facet::FacetType;
|
||||
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
|
||||
use super::ClearDocuments;
|
||||
|
||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||
|
@ -302,7 +302,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
}
|
||||
}
|
||||
},
|
||||
FacetType::Float => {
|
||||
FacetType::Number => {
|
||||
let mut iter = iter.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||
while let Some(result) = iter.next() {
|
||||
let ((_fid, docid, _value), ()) = result?;
|
||||
|
@ -311,15 +311,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||
}
|
||||
}
|
||||
},
|
||||
FacetType::Integer => {
|
||||
let mut iter = iter.remap_key_type::<FieldDocIdFacetI64Codec>();
|
||||
while let Some(result) = iter.next() {
|
||||
let ((_fid, docid, _value), ()) = result?;
|
||||
if self.documents_ids.contains(docid) {
|
||||
iter.del_current()?;
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,12 +7,11 @@ use grenad::{CompressionType, Reader, Writer, FileFuse};
|
|||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesEncode, Error};
|
||||
use log::debug;
|
||||
use num_traits::{Bounded, Zero};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
|
||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||
use crate::Index;
|
||||
use crate::update::index_documents::WriteMethod;
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
||||
|
@ -65,58 +64,6 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||
for (field_id, facet_type) in faceted_fields {
|
||||
let (content, documents_ids) = match facet_type {
|
||||
FacetType::Integer => {
|
||||
clear_field_levels::<i64, FacetLevelValueI64Codec>(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
let documents_ids = compute_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
let content = compute_facet_levels::<i64, FacetLevelValueI64Codec>(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
self.level_group_size,
|
||||
self.min_level_size,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
(Some(content), documents_ids)
|
||||
},
|
||||
FacetType::Float => {
|
||||
clear_field_levels::<f64, FacetLevelValueF64Codec>(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
let documents_ids = compute_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
let content = compute_facet_levels::<f64, FacetLevelValueF64Codec>(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
self.level_group_size,
|
||||
self.min_level_size,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
(Some(content), documents_ids)
|
||||
},
|
||||
FacetType::String => {
|
||||
let documents_ids = compute_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
|
@ -126,6 +73,32 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||
|
||||
(None, documents_ids)
|
||||
},
|
||||
FacetType::Number => {
|
||||
clear_field_number_levels(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>(),
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
let documents_ids = compute_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
let content = compute_facet_number_levels(
|
||||
self.wtxn,
|
||||
self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>(),
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
self.level_group_size,
|
||||
self.min_level_size,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
(Some(content), documents_ids)
|
||||
},
|
||||
};
|
||||
|
||||
if let Some(content) = content {
|
||||
|
@ -145,25 +118,21 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||
}
|
||||
}
|
||||
|
||||
fn clear_field_levels<'t, T: 't, KC>(
|
||||
fn clear_field_number_levels<'t, >(
|
||||
wtxn: &'t mut heed::RwTxn,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
||||
field_id: u8,
|
||||
) -> heed::Result<()>
|
||||
where
|
||||
T: Copy + Bounded,
|
||||
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
{
|
||||
let left = (field_id, 1, T::min_value(), T::min_value());
|
||||
let right = (field_id, u8::MAX, T::max_value(), T::max_value());
|
||||
let left = (field_id, 1, f64::MIN, f64::MIN);
|
||||
let right = (field_id, u8::MAX, f64::MAX, f64::MAX);
|
||||
let range = left..=right;
|
||||
db.remap_key_type::<KC>().delete_range(wtxn, &range).map(drop)
|
||||
db.delete_range(wtxn, &range).map(drop)
|
||||
}
|
||||
|
||||
fn compute_facet_levels<'t, T: 't, KC>(
|
||||
fn compute_facet_number_levels<'t>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
||||
compression_type: CompressionType,
|
||||
compression_level: Option<u32>,
|
||||
shrink_size: Option<u64>,
|
||||
|
@ -171,12 +140,10 @@ fn compute_facet_levels<'t, T: 't, KC>(
|
|||
min_level_size: NonZeroUsize,
|
||||
field_id: u8,
|
||||
) -> anyhow::Result<Reader<FileFuse>>
|
||||
where
|
||||
T: Copy + PartialEq + PartialOrd + Bounded + Zero,
|
||||
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
{
|
||||
let first_level_size = db.prefix_iter(rtxn, &[field_id])?
|
||||
let first_level_size = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(rtxn, &[field_id])?
|
||||
.remap_types::<DecodeIgnore, DecodeIgnore>()
|
||||
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
||||
|
||||
|
@ -187,8 +154,8 @@ where
|
|||
})?;
|
||||
|
||||
let level_0_range = {
|
||||
let left = (field_id, 0, T::min_value(), T::min_value());
|
||||
let right = (field_id, 0, T::max_value(), T::max_value());
|
||||
let left = (field_id, 0, f64::MIN, f64::MIN);
|
||||
let right = (field_id, 0, f64::MAX, f64::MAX);
|
||||
left..=right
|
||||
};
|
||||
|
||||
|
@ -199,11 +166,10 @@ where
|
|||
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
|
||||
|
||||
for (level, group_size) in group_size_iter {
|
||||
let mut left = T::zero();
|
||||
let mut right = T::zero();
|
||||
let mut left = 0.0;
|
||||
let mut right = 0.0;
|
||||
let mut group_docids = RoaringBitmap::new();
|
||||
|
||||
let db = db.remap_key_type::<KC>();
|
||||
for (i, result) in db.range(rtxn, &level_0_range)?.enumerate() {
|
||||
let ((_field_id, _level, value, _right), docids) = result?;
|
||||
|
||||
|
@ -212,7 +178,7 @@ where
|
|||
} else if i % group_size == 0 {
|
||||
// we found the first bound of the next group, we must store the left
|
||||
// and right bounds associated with the docids.
|
||||
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||
write_number_entry(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||
|
||||
// We save the left bound for the new group and also reset the docids.
|
||||
group_docids = RoaringBitmap::new();
|
||||
|
@ -225,7 +191,7 @@ where
|
|||
}
|
||||
|
||||
if !group_docids.is_empty() {
|
||||
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||
write_number_entry(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -246,19 +212,17 @@ fn compute_faceted_documents_ids(
|
|||
Ok(documents_ids)
|
||||
}
|
||||
|
||||
fn write_entry<T, KC>(
|
||||
fn write_number_entry(
|
||||
writer: &mut Writer<File>,
|
||||
field_id: u8,
|
||||
level: u8,
|
||||
left: T,
|
||||
right: T,
|
||||
left: f64,
|
||||
right: f64,
|
||||
ids: &RoaringBitmap,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
|
||||
{
|
||||
let key = (field_id, level, left, right);
|
||||
let key = KC::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||
let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?;
|
||||
writer.insert(&key, &data)?;
|
||||
Ok(())
|
||||
|
|
|
@ -19,12 +19,12 @@ use roaring::RoaringBitmap;
|
|||
use serde_json::Value;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
|
||||
use crate::facet::{FacetType, FacetValue};
|
||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
|
||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
use crate::update::UpdateIndexingStep;
|
||||
use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId, FieldId};
|
||||
use crate::{json_to_string, SmallVec8, SmallVec32, Position, DocumentId, FieldId};
|
||||
|
||||
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
|
||||
use super::merge_function::{
|
||||
|
@ -365,8 +365,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||
for ((field_id, value), docids) in iter {
|
||||
let result = match value {
|
||||
String(s) => FacetValueStringCodec::bytes_encode(&(field_id, &s)).map(Cow::into_owned),
|
||||
Float(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned),
|
||||
Integer(i) => FacetLevelValueI64Codec::bytes_encode(&(field_id, 0, i, i)).map(Cow::into_owned),
|
||||
Number(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned),
|
||||
};
|
||||
let key = result.context("could not serialize facet key")?;
|
||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||
|
@ -390,8 +389,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||
|
||||
let result = match value {
|
||||
String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned),
|
||||
Float(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned),
|
||||
Integer(i) => FieldDocIdFacetI64Codec::bytes_encode(&(field_id, document_id, *i)).map(Cow::into_owned),
|
||||
Number(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned),
|
||||
};
|
||||
|
||||
let key = result.context("could not serialize facet key")?;
|
||||
|
@ -605,13 +603,6 @@ fn lmdb_key_valid_size(key: &[u8]) -> bool {
|
|||
!key.is_empty() && key.len() <= LMDB_MAX_KEY_LENGTH
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
enum FacetValue {
|
||||
String(SmallString32),
|
||||
Float(OrderedFloat<f64>),
|
||||
Integer(i64),
|
||||
}
|
||||
|
||||
/// take an iterator on tokens and compute their relative position depending on separator kinds
|
||||
/// if it's an `Hard` separator we add an additional relative proximity of 8 between words,
|
||||
/// else we keep the standart proximity of 1 between words.
|
||||
|
@ -654,54 +645,40 @@ fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result<SmallVec
|
|||
{
|
||||
match value {
|
||||
Value::Null => Ok(()),
|
||||
Value::Bool(b) => {
|
||||
output.push(Integer(*b as i64));
|
||||
Ok(())
|
||||
Value::Bool(b) => match ftype {
|
||||
FacetType::String => {
|
||||
output.push(String(b.to_string()));
|
||||
Ok(())
|
||||
},
|
||||
FacetType::Number => {
|
||||
output.push(Number(OrderedFloat(if *b { 1.0 } else { 0.0 })));
|
||||
Ok(())
|
||||
},
|
||||
},
|
||||
Value::Number(number) => match ftype {
|
||||
FacetType::String => {
|
||||
let string = SmallString32::from(number.to_string());
|
||||
output.push(String(string));
|
||||
output.push(String(number.to_string()));
|
||||
Ok(())
|
||||
},
|
||||
FacetType::Float => match number.as_f64() {
|
||||
FacetType::Number => match number.as_f64() {
|
||||
Some(float) => {
|
||||
output.push(Float(OrderedFloat(float)));
|
||||
output.push(Number(OrderedFloat(float)));
|
||||
Ok(())
|
||||
},
|
||||
None => bail!("invalid facet type, expecting {} found integer", ftype),
|
||||
},
|
||||
FacetType::Integer => match number.as_i64() {
|
||||
Some(integer) => {
|
||||
output.push(Integer(integer));
|
||||
Ok(())
|
||||
},
|
||||
None => if number.is_f64() {
|
||||
bail!("invalid facet type, expecting {} found float", ftype)
|
||||
} else {
|
||||
bail!("invalid facet type, expecting {} found out-of-bound integer (64bit)", ftype)
|
||||
},
|
||||
None => bail!("invalid facet type, expecting {} found number", ftype),
|
||||
},
|
||||
},
|
||||
Value::String(string) => {
|
||||
// TODO must be normalized and not only lowercased.
|
||||
let string = string.trim().to_lowercase();
|
||||
if string.is_empty() { return Ok(()) }
|
||||
match ftype {
|
||||
FacetType::String => {
|
||||
let string = SmallString32::from(string);
|
||||
output.push(String(string));
|
||||
Ok(())
|
||||
},
|
||||
FacetType::Float => match string.parse() {
|
||||
FacetType::Number => match string.parse() {
|
||||
Ok(float) => {
|
||||
output.push(Float(OrderedFloat(float)));
|
||||
Ok(())
|
||||
},
|
||||
Err(_err) => bail!("invalid facet type, expecting {} found string", ftype),
|
||||
},
|
||||
FacetType::Integer => match string.parse() {
|
||||
Ok(integer) => {
|
||||
output.push(Integer(integer));
|
||||
output.push(Number(OrderedFloat(float)));
|
||||
Ok(())
|
||||
},
|
||||
Err(_err) => bail!("invalid facet type, expecting {} found string", ftype),
|
||||
|
@ -711,7 +688,10 @@ fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result<SmallVec
|
|||
Value::Array(values) => if can_recurse {
|
||||
values.iter().map(|v| inner_parse_facet_value(ftype, v, false, output)).collect()
|
||||
} else {
|
||||
bail!("invalid facet type, expecting {} found sub-array ()", ftype)
|
||||
bail!(
|
||||
"invalid facet type, expecting {} found array (recursive arrays are not supported)",
|
||||
ftype,
|
||||
);
|
||||
},
|
||||
Value::Object(_) => bail!("invalid facet type, expecting {} found object", ftype),
|
||||
}
|
||||
|
|
|
@ -619,7 +619,7 @@ mod tests {
|
|||
// Set the faceted fields to be the age.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_faceted_fields(hashmap! { "age".into() => "integer".into() });
|
||||
builder.set_faceted_fields(hashmap!{ "age".into() => "number".into() });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
|
@ -632,7 +632,7 @@ mod tests {
|
|||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashmap! { "age".to_string() => FacetType::Integer });
|
||||
assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Number });
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
|
||||
assert_eq!(count, 3);
|
||||
|
@ -812,9 +812,9 @@ mod tests {
|
|||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
builder.set_faceted_fields(hashmap! {
|
||||
"age".into() => "integer".into(),
|
||||
"toto".into() => "integer".into(),
|
||||
builder.set_faceted_fields(hashmap!{
|
||||
"age".into() => "number".into(),
|
||||
"toto".into() => "number".into(),
|
||||
});
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue