mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Further unify facet databases of f64s and strings
This commit is contained in:
parent
3baa34d842
commit
cb8442a119
@ -1,13 +1,15 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use std::borrow::Cow;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct FieldDocIdFacetF64Codec;
|
||||
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
|
||||
type DItem = (FieldId, DocumentId, f64);
|
||||
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
|
||||
where
|
||||
C: BytesDecode<'a>,
|
||||
{
|
||||
type DItem = (FieldId, DocumentId, C::DItem);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
@ -16,22 +18,24 @@ impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
|
||||
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
||||
let value = bytes[8..16].try_into().map(f64::from_be_bytes).ok()?;
|
||||
let value = C::bytes_decode(&bytes[8..])?;
|
||||
|
||||
Some((field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetF64Codec {
|
||||
type EItem = (FieldId, DocumentId, f64);
|
||||
impl<'a, C> BytesEncode<'a> for FieldDocIdFacetCodec<C>
|
||||
where
|
||||
C: BytesEncode<'a>,
|
||||
{
|
||||
type EItem = (FieldId, DocumentId, C::EItem);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(2 + 4 + 8 + 8);
|
||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
let value_bytes = f64_into_bytes(*value)?;
|
||||
let value_bytes = C::bytes_encode(value)?;
|
||||
bytes.extend_from_slice(&value_bytes);
|
||||
bytes.extend_from_slice(&value.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
||||
|
||||
pub struct FieldDocIdFacetStringCodec;
|
||||
|
||||
impl FieldDocIdFacetStringCodec {
|
||||
pub fn serialize_into(
|
||||
field_id: FieldId,
|
||||
document_id: DocumentId,
|
||||
normalized_value: &str,
|
||||
out: &mut Vec<u8>,
|
||||
) {
|
||||
out.reserve(2 + 4 + normalized_value.len());
|
||||
out.extend_from_slice(&field_id.to_be_bytes());
|
||||
out.extend_from_slice(&document_id.to_be_bytes());
|
||||
out.extend_from_slice(normalized_value.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
|
||||
type DItem = (FieldId, DocumentId, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
||||
|
||||
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
||||
let normalized_value = str::from_utf8(bytes).ok()?;
|
||||
Some((field_id, document_id, normalized_value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
|
||||
type EItem = (FieldId, DocumentId, &'a str);
|
||||
|
||||
fn bytes_encode((field_id, document_id, normalized_value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::new();
|
||||
FieldDocIdFacetStringCodec::serialize_into(
|
||||
*field_id,
|
||||
*document_id,
|
||||
normalized_value,
|
||||
&mut bytes,
|
||||
);
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
mod field_doc_id_facet_f64_codec;
|
||||
mod field_doc_id_facet_string_codec;
|
||||
mod field_doc_id_facet_codec;
|
||||
mod ordered_f64_codec;
|
||||
mod str_ref;
|
||||
|
||||
@ -7,16 +6,19 @@ use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::types::OwnedType;
|
||||
use heed::types::{DecodeIgnore, OwnedType};
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||
pub use self::str_ref::StrRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||
|
||||
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
||||
|
||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||
|
||||
/// Tries to split a slice in half at the given middle point,
|
||||
|
@ -15,7 +15,7 @@ use log::debug;
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub use self::facet::{FacetDistribution, /* FacetNumberIter,*/ Filter, DEFAULT_VALUES_PER_FACET,};
|
||||
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
pub use self::matches::{
|
||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use heed::types::{ByteSlice, Str};
|
||||
use heed::types::{ByteSlice, DecodeIgnore, Str};
|
||||
use heed::Database;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -11,11 +11,13 @@ use time::OffsetDateTime;
|
||||
use super::{ClearDocuments, FacetsUpdateBulk};
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetIgnoreCodec,
|
||||
};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::{
|
||||
DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||
RoaringBitmapCodec, SmallString32, BEU32,
|
||||
};
|
||||
|
||||
@ -187,10 +189,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
word_position_docids,
|
||||
word_prefix_position_docids,
|
||||
facet_id_f64_docids: _,
|
||||
facet_id_exists_docids,
|
||||
facet_id_string_docids: _,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
field_id_docid_facet_f64s: _,
|
||||
field_id_docid_facet_strings: _,
|
||||
facet_id_exists_docids,
|
||||
documents,
|
||||
} = self.index;
|
||||
|
||||
@ -449,6 +451,21 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
fields_ids_map.clone(),
|
||||
facet_type,
|
||||
)?;
|
||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||
// Remove docids from the number faceted documents ids
|
||||
let mut docids =
|
||||
self.index.faceted_documents_ids(self.wtxn, field_id, facet_type)?;
|
||||
docids -= &self.to_delete_docids;
|
||||
self.index.put_faceted_documents_ids(self.wtxn, field_id, facet_type, &docids)?;
|
||||
|
||||
remove_docids_from_field_id_docid_facet_value(
|
||||
&self.index,
|
||||
self.wtxn,
|
||||
facet_type,
|
||||
field_id,
|
||||
&self.to_delete_docids,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
// We delete the documents ids that are under the facet field id values.
|
||||
@ -458,47 +475,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
&self.to_delete_docids,
|
||||
)?;
|
||||
|
||||
// Remove the documents ids from the faceted documents ids.
|
||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||
// Remove docids from the number faceted documents ids
|
||||
let mut docids =
|
||||
self.index.faceted_documents_ids(self.wtxn, field_id, FacetType::Number)?;
|
||||
docids -= &self.to_delete_docids;
|
||||
self.index.put_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
field_id,
|
||||
FacetType::Number,
|
||||
&docids,
|
||||
)?;
|
||||
|
||||
remove_docids_from_field_id_docid_facet_value(
|
||||
self.wtxn,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id,
|
||||
&self.to_delete_docids,
|
||||
|(_fid, docid, _value)| docid,
|
||||
)?;
|
||||
|
||||
// Remove docids from the string faceted documents ids
|
||||
let mut docids =
|
||||
self.index.faceted_documents_ids(self.wtxn, field_id, FacetType::String)?;
|
||||
docids -= &self.to_delete_docids;
|
||||
self.index.put_faceted_documents_ids(
|
||||
self.wtxn,
|
||||
field_id,
|
||||
FacetType::String,
|
||||
&docids,
|
||||
)?;
|
||||
|
||||
remove_docids_from_field_id_docid_facet_value(
|
||||
self.wtxn,
|
||||
field_id_docid_facet_strings,
|
||||
field_id,
|
||||
&self.to_delete_docids,
|
||||
|(_fid, docid, _value)| docid,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(DocumentDeletionResult {
|
||||
deleted_documents: self.to_delete_docids.len(),
|
||||
remaining_documents: documents_ids.len(),
|
||||
@ -564,26 +540,28 @@ fn remove_from_word_docids(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F, DC, V>(
|
||||
fn remove_docids_from_field_id_docid_facet_value<'i, 'a>(
|
||||
index: &'i Index,
|
||||
wtxn: &'a mut heed::RwTxn,
|
||||
db: &heed::Database<C, DC>,
|
||||
facet_type: FacetType,
|
||||
field_id: FieldId,
|
||||
to_remove: &RoaringBitmap,
|
||||
convert: F,
|
||||
) -> heed::Result<()>
|
||||
where
|
||||
C: heed::BytesDecode<'a, DItem = K>,
|
||||
DC: heed::BytesDecode<'a, DItem = V>,
|
||||
F: Fn(K) -> DocumentId,
|
||||
{
|
||||
) -> heed::Result<()> {
|
||||
let db = match facet_type {
|
||||
FacetType::String => {
|
||||
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
}
|
||||
FacetType::Number => {
|
||||
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, DecodeIgnore>()
|
||||
}
|
||||
};
|
||||
let mut iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter_mut(wtxn, &field_id.to_be_bytes())?
|
||||
.remap_key_type::<C>();
|
||||
.remap_key_type::<FieldDocIdFacetIgnoreCodec>();
|
||||
|
||||
while let Some(result) = iter.next() {
|
||||
let (key, _) = result?;
|
||||
if to_remove.contains(convert(key)) {
|
||||
let ((_, docid, _), _) = result?;
|
||||
if to_remove.contains(docid) {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user