mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Further unify facet databases of f64s and strings
This commit is contained in:
parent
3baa34d842
commit
cb8442a119
@ -1,13 +1,15 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryInto;
|
|
||||||
|
|
||||||
use crate::facet::value_encoding::f64_into_bytes;
|
|
||||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
use crate::{try_split_array_at, DocumentId, FieldId};
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
pub struct FieldDocIdFacetF64Codec;
|
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
|
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
|
||||||
type DItem = (FieldId, DocumentId, f64);
|
where
|
||||||
|
C: BytesDecode<'a>,
|
||||||
|
{
|
||||||
|
type DItem = (FieldId, DocumentId, C::DItem);
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||||
@ -16,22 +18,24 @@ impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
|
|||||||
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
|
|
||||||
let value = bytes[8..16].try_into().map(f64::from_be_bytes).ok()?;
|
let value = C::bytes_decode(&bytes[8..])?;
|
||||||
|
|
||||||
Some((field_id, document_id, value))
|
Some((field_id, document_id, value))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetF64Codec {
|
impl<'a, C> BytesEncode<'a> for FieldDocIdFacetCodec<C>
|
||||||
type EItem = (FieldId, DocumentId, f64);
|
where
|
||||||
|
C: BytesEncode<'a>,
|
||||||
|
{
|
||||||
|
type EItem = (FieldId, DocumentId, C::EItem);
|
||||||
|
|
||||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
let mut bytes = Vec::with_capacity(2 + 4 + 8 + 8);
|
let mut bytes = Vec::with_capacity(2 + 4 + 8 + 8);
|
||||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
bytes.extend_from_slice(&field_id.to_be_bytes());
|
||||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||||
let value_bytes = f64_into_bytes(*value)?;
|
let value_bytes = C::bytes_encode(value)?;
|
||||||
bytes.extend_from_slice(&value_bytes);
|
bytes.extend_from_slice(&value_bytes);
|
||||||
bytes.extend_from_slice(&value.to_be_bytes());
|
|
||||||
Some(Cow::Owned(bytes))
|
Some(Cow::Owned(bytes))
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,50 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::str;
|
|
||||||
|
|
||||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
|
||||||
|
|
||||||
pub struct FieldDocIdFacetStringCodec;
|
|
||||||
|
|
||||||
impl FieldDocIdFacetStringCodec {
|
|
||||||
pub fn serialize_into(
|
|
||||||
field_id: FieldId,
|
|
||||||
document_id: DocumentId,
|
|
||||||
normalized_value: &str,
|
|
||||||
out: &mut Vec<u8>,
|
|
||||||
) {
|
|
||||||
out.reserve(2 + 4 + normalized_value.len());
|
|
||||||
out.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
out.extend_from_slice(&document_id.to_be_bytes());
|
|
||||||
out.extend_from_slice(normalized_value.as_bytes());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
|
|
||||||
type DItem = (FieldId, DocumentId, &'a str);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
|
||||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
|
||||||
|
|
||||||
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
|
|
||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
|
||||||
|
|
||||||
let normalized_value = str::from_utf8(bytes).ok()?;
|
|
||||||
Some((field_id, document_id, normalized_value))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
|
|
||||||
type EItem = (FieldId, DocumentId, &'a str);
|
|
||||||
|
|
||||||
fn bytes_encode((field_id, document_id, normalized_value): &Self::EItem) -> Option<Cow<[u8]>> {
|
|
||||||
let mut bytes = Vec::new();
|
|
||||||
FieldDocIdFacetStringCodec::serialize_into(
|
|
||||||
*field_id,
|
|
||||||
*document_id,
|
|
||||||
normalized_value,
|
|
||||||
&mut bytes,
|
|
||||||
);
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,5 +1,4 @@
|
|||||||
mod field_doc_id_facet_f64_codec;
|
mod field_doc_id_facet_codec;
|
||||||
mod field_doc_id_facet_string_codec;
|
|
||||||
mod ordered_f64_codec;
|
mod ordered_f64_codec;
|
||||||
mod str_ref;
|
mod str_ref;
|
||||||
|
|
||||||
@ -7,16 +6,19 @@ use std::borrow::Cow;
|
|||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use heed::types::OwnedType;
|
use heed::types::{DecodeIgnore, OwnedType};
|
||||||
use heed::{BytesDecode, BytesEncode};
|
use heed::{BytesDecode, BytesEncode};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
|
||||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||||
pub use self::str_ref::StrRefCodec;
|
pub use self::str_ref::StrRefCodec;
|
||||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||||
|
|
||||||
|
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||||
|
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||||
|
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
||||||
|
|
||||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||||
|
|
||||||
/// Tries to split a slice in half at the given middle point,
|
/// Tries to split a slice in half at the given middle point,
|
||||||
|
@ -15,7 +15,7 @@ use log::debug;
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::facet::{FacetDistribution, /* FacetNumberIter,*/ Filter, DEFAULT_VALUES_PER_FACET,};
|
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
pub use self::matches::{
|
pub use self::matches::{
|
||||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::btree_map::Entry;
|
use std::collections::btree_map::Entry;
|
||||||
|
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use heed::types::{ByteSlice, Str};
|
use heed::types::{ByteSlice, DecodeIgnore, Str};
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@ -11,11 +11,13 @@ use time::OffsetDateTime;
|
|||||||
use super::{ClearDocuments, FacetsUpdateBulk};
|
use super::{ClearDocuments, FacetsUpdateBulk};
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{
|
||||||
|
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetIgnoreCodec,
|
||||||
|
};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||||
RoaringBitmapCodec, SmallString32, BEU32,
|
RoaringBitmapCodec, SmallString32, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -187,10 +189,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
word_position_docids,
|
word_position_docids,
|
||||||
word_prefix_position_docids,
|
word_prefix_position_docids,
|
||||||
facet_id_f64_docids: _,
|
facet_id_f64_docids: _,
|
||||||
facet_id_exists_docids,
|
|
||||||
facet_id_string_docids: _,
|
facet_id_string_docids: _,
|
||||||
field_id_docid_facet_f64s,
|
field_id_docid_facet_f64s: _,
|
||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings: _,
|
||||||
|
facet_id_exists_docids,
|
||||||
documents,
|
documents,
|
||||||
} = self.index;
|
} = self.index;
|
||||||
|
|
||||||
@ -449,6 +451,21 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
fields_ids_map.clone(),
|
fields_ids_map.clone(),
|
||||||
facet_type,
|
facet_type,
|
||||||
)?;
|
)?;
|
||||||
|
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||||
|
// Remove docids from the number faceted documents ids
|
||||||
|
let mut docids =
|
||||||
|
self.index.faceted_documents_ids(self.wtxn, field_id, facet_type)?;
|
||||||
|
docids -= &self.to_delete_docids;
|
||||||
|
self.index.put_faceted_documents_ids(self.wtxn, field_id, facet_type, &docids)?;
|
||||||
|
|
||||||
|
remove_docids_from_field_id_docid_facet_value(
|
||||||
|
&self.index,
|
||||||
|
self.wtxn,
|
||||||
|
facet_type,
|
||||||
|
field_id,
|
||||||
|
&self.to_delete_docids,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
@ -458,47 +475,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
&self.to_delete_docids,
|
&self.to_delete_docids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Remove the documents ids from the faceted documents ids.
|
|
||||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
|
||||||
// Remove docids from the number faceted documents ids
|
|
||||||
let mut docids =
|
|
||||||
self.index.faceted_documents_ids(self.wtxn, field_id, FacetType::Number)?;
|
|
||||||
docids -= &self.to_delete_docids;
|
|
||||||
self.index.put_faceted_documents_ids(
|
|
||||||
self.wtxn,
|
|
||||||
field_id,
|
|
||||||
FacetType::Number,
|
|
||||||
&docids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
|
||||||
self.wtxn,
|
|
||||||
field_id_docid_facet_f64s,
|
|
||||||
field_id,
|
|
||||||
&self.to_delete_docids,
|
|
||||||
|(_fid, docid, _value)| docid,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// Remove docids from the string faceted documents ids
|
|
||||||
let mut docids =
|
|
||||||
self.index.faceted_documents_ids(self.wtxn, field_id, FacetType::String)?;
|
|
||||||
docids -= &self.to_delete_docids;
|
|
||||||
self.index.put_faceted_documents_ids(
|
|
||||||
self.wtxn,
|
|
||||||
field_id,
|
|
||||||
FacetType::String,
|
|
||||||
&docids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
|
||||||
self.wtxn,
|
|
||||||
field_id_docid_facet_strings,
|
|
||||||
field_id,
|
|
||||||
&self.to_delete_docids,
|
|
||||||
|(_fid, docid, _value)| docid,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(DocumentDeletionResult {
|
Ok(DocumentDeletionResult {
|
||||||
deleted_documents: self.to_delete_docids.len(),
|
deleted_documents: self.to_delete_docids.len(),
|
||||||
remaining_documents: documents_ids.len(),
|
remaining_documents: documents_ids.len(),
|
||||||
@ -564,26 +540,28 @@ fn remove_from_word_docids(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F, DC, V>(
|
fn remove_docids_from_field_id_docid_facet_value<'i, 'a>(
|
||||||
|
index: &'i Index,
|
||||||
wtxn: &'a mut heed::RwTxn,
|
wtxn: &'a mut heed::RwTxn,
|
||||||
db: &heed::Database<C, DC>,
|
facet_type: FacetType,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
to_remove: &RoaringBitmap,
|
to_remove: &RoaringBitmap,
|
||||||
convert: F,
|
) -> heed::Result<()> {
|
||||||
) -> heed::Result<()>
|
let db = match facet_type {
|
||||||
where
|
FacetType::String => {
|
||||||
C: heed::BytesDecode<'a, DItem = K>,
|
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
DC: heed::BytesDecode<'a, DItem = V>,
|
}
|
||||||
F: Fn(K) -> DocumentId,
|
FacetType::Number => {
|
||||||
{
|
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
|
}
|
||||||
|
};
|
||||||
let mut iter = db
|
let mut iter = db
|
||||||
.remap_key_type::<ByteSlice>()
|
|
||||||
.prefix_iter_mut(wtxn, &field_id.to_be_bytes())?
|
.prefix_iter_mut(wtxn, &field_id.to_be_bytes())?
|
||||||
.remap_key_type::<C>();
|
.remap_key_type::<FieldDocIdFacetIgnoreCodec>();
|
||||||
|
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (key, _) = result?;
|
let ((_, docid, _), _) = result?;
|
||||||
if to_remove.contains(convert(key)) {
|
if to_remove.contains(docid) {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user