mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Reintroduce filter range search and facet extractors
This commit is contained in:
parent
22d80eeaf9
commit
39a4a0a362
@ -15,7 +15,7 @@ use super::get_last_facet_value;
|
||||
|
||||
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: &'t heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
@ -48,13 +48,13 @@ where
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
|
||||
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
|
||||
let mut docids = RoaringBitmap::new();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
let mut f = FacetRangeSearch { rtxn, db: &db, field_id, left, right, docids: &mut docids };
|
||||
let highest_level = get_highest_level(rtxn, &db, field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, &db, field_id)? {
|
||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, &db, field_id)?.unwrap();
|
||||
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
||||
Ok(docids)
|
||||
} else {
|
||||
|
@ -1,22 +1,17 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
use std::ops::RangeBounds;
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||
use heed::types::DecodeIgnore;
|
||||
use heed::LazyDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
// use super::FacetNumberRange;
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
||||
// use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||
};
|
||||
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||||
|
||||
use super::facet_range_search;
|
||||
|
||||
/// The maximum number of filters the filter AST can process.
|
||||
const MAX_FILTER_DEPTH: usize = 2000;
|
||||
@ -147,158 +142,15 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn explore_facet_number_levels(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
field_id: FieldId,
|
||||
) {
|
||||
}
|
||||
|
||||
impl<'a> Filter<'a> {
|
||||
/// Aggregates the documents ids that are part of the specified range automatically
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_number_levels(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
level: u8,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
// level must be > 0, I'll create a separate function for level 0
|
||||
// if level == 0 {
|
||||
// call that function
|
||||
//}
|
||||
match (left, right) {
|
||||
// If the request is an exact value we must go directly to the deepest level.
|
||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||
return Self::explore_facet_number_levels(
|
||||
rtxn, db, field_id, 0, left, right, output,
|
||||
);
|
||||
}
|
||||
// lower TO upper when lower > upper must return no result
|
||||
(Included(l), Included(r)) if l > r => return Ok(()),
|
||||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||
(_, _) => (),
|
||||
}
|
||||
let range_start_key = FacetKey {
|
||||
field_id,
|
||||
level,
|
||||
left_bound: match left {
|
||||
Included(l) => l,
|
||||
Excluded(l) => l,
|
||||
Bound::Unbounded => f64::MIN,
|
||||
},
|
||||
};
|
||||
let mut range_iter = db
|
||||
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
|
||||
.range(rtxn, &(range_start_key..))?;
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
|
||||
let (mut previous_facet_key, mut previous_value) = range_iter.next().unwrap()?;
|
||||
while let Some(el) = range_iter.next() {
|
||||
let (facet_key, value) = el?;
|
||||
let range = (Included(previous_facet_key.left_bound), Excluded(facet_key.left_bound));
|
||||
// if the current range intersects with the query range, then go deeper
|
||||
// what does it mean for two ranges to intersect?
|
||||
let gte_left = match left {
|
||||
Included(l) => previous_facet_key.left_bound >= l,
|
||||
Excluded(l) => previous_facet_key.left_bound > l, // TODO: not true?
|
||||
Bound::Unbounded => true,
|
||||
};
|
||||
let lte_right = match right {
|
||||
Included(r) => facet_key.left_bound <= r,
|
||||
Excluded(r) => facet_key.left_bound < r,
|
||||
Bound::Unbounded => true,
|
||||
};
|
||||
}
|
||||
// at this point, previous_facet_key and previous_value are the last groups in the level
|
||||
// we must also check whether we should visit this group
|
||||
|
||||
todo!();
|
||||
|
||||
// let mut left_found = None;
|
||||
// let mut right_found = None;
|
||||
|
||||
// // We must create a custom iterator to be able to iterate over the
|
||||
// // requested range as the range iterator cannot express some conditions.
|
||||
// let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?;
|
||||
|
||||
// debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
|
||||
|
||||
// for (i, result) in iter.enumerate() {
|
||||
// let ((_fid, level, l, r), docids) = result?;
|
||||
// debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
||||
// *output |= docids;
|
||||
// // We save the leftest and rightest bounds we actually found at this level.
|
||||
// if i == 0 {
|
||||
// left_found = Some(l);
|
||||
// }
|
||||
// right_found = Some(r);
|
||||
// }
|
||||
|
||||
// // Can we go deeper?
|
||||
// let deeper_level = match level.checked_sub(1) {
|
||||
// Some(level) => level,
|
||||
// None => return Ok(()),
|
||||
// };
|
||||
|
||||
// // We must refine the left and right bounds of this range by retrieving the
|
||||
// // missing part in a deeper level.
|
||||
// match left_found.zip(right_found) {
|
||||
// Some((left_found, right_found)) => {
|
||||
// // If the bound is satisfied we avoid calling this function again.
|
||||
// if !matches!(left, Included(l) if l == left_found) {
|
||||
// let sub_right = Excluded(left_found);
|
||||
// debug!(
|
||||
// "calling left with {:?} to {:?} (level {})",
|
||||
// left, sub_right, deeper_level
|
||||
// );
|
||||
// Self::explore_facet_number_levels(
|
||||
// rtxn,
|
||||
// db,
|
||||
// field_id,
|
||||
// deeper_level,
|
||||
// left,
|
||||
// sub_right,
|
||||
// output,
|
||||
// )?;
|
||||
// }
|
||||
// if !matches!(right, Included(r) if r == right_found) {
|
||||
// let sub_left = Excluded(right_found);
|
||||
// debug!(
|
||||
// "calling right with {:?} to {:?} (level {})",
|
||||
// sub_left, right, deeper_level
|
||||
// );
|
||||
// Self::explore_facet_number_levels(
|
||||
// rtxn,
|
||||
// db,
|
||||
// field_id,
|
||||
// deeper_level,
|
||||
// sub_left,
|
||||
// right,
|
||||
// output,
|
||||
// )?;
|
||||
// }
|
||||
// }
|
||||
// None => {
|
||||
// // If we found nothing at this level it means that we must find
|
||||
// // the same bounds but at a deeper, more precise level.
|
||||
// Self::explore_facet_number_levels(
|
||||
// rtxn,
|
||||
// db,
|
||||
// field_id,
|
||||
// deeper_level,
|
||||
// left,
|
||||
// right,
|
||||
// output,
|
||||
// )?;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Ok(())
|
||||
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
.map(|result| result - soft_deleted_documents)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
@ -337,15 +189,15 @@ impl<'a> Filter<'a> {
|
||||
Some(n) => {
|
||||
let n = Included(n);
|
||||
let mut output = RoaringBitmap::new();
|
||||
// Self::explore_facet_number_levels(
|
||||
// rtxn,
|
||||
// numbers_db,
|
||||
// field_id,
|
||||
// 0,
|
||||
// n,
|
||||
// n,
|
||||
// &mut output,
|
||||
// )?;
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
0,
|
||||
n,
|
||||
n,
|
||||
&mut output,
|
||||
)?;
|
||||
output
|
||||
}
|
||||
None => RoaringBitmap::new(),
|
||||
@ -381,29 +233,53 @@ impl<'a> Filter<'a> {
|
||||
match biggest_level {
|
||||
Some(level) => {
|
||||
let mut output = RoaringBitmap::new();
|
||||
// Self::explore_facet_number_levels(
|
||||
// rtxn,
|
||||
// numbers_db,
|
||||
// field_id,
|
||||
// level,
|
||||
// left,
|
||||
// right,
|
||||
// &mut output,
|
||||
// )?;
|
||||
Self::explore_facet_number_levels(
|
||||
rtxn,
|
||||
numbers_db,
|
||||
field_id,
|
||||
level,
|
||||
left,
|
||||
right,
|
||||
&mut output,
|
||||
)?;
|
||||
Ok(output)
|
||||
}
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
/// Aggregates the documents ids that are part of the specified range automatically
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_number_levels(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
field_id: FieldId,
|
||||
level: u8,
|
||||
left: Bound<f64>,
|
||||
right: Bound<f64>,
|
||||
output: &mut RoaringBitmap,
|
||||
) -> Result<()> {
|
||||
match (left, right) {
|
||||
// If the request is an exact value we must go directly to the deepest level.
|
||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||
return Self::explore_facet_number_levels(
|
||||
rtxn, db, field_id, 0, left, right, output,
|
||||
);
|
||||
}
|
||||
// lower TO upper when lower > upper must return no result
|
||||
(Included(l), Included(r)) if l > r => return Ok(()),
|
||||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||
(_, _) => (),
|
||||
}
|
||||
let x = facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
rtxn, &db, field_id, &left, &right,
|
||||
)?;
|
||||
// TODO: the facet range search should take a mutable roaring bitmap as argument
|
||||
*output = x;
|
||||
|
||||
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||
.map(|result| result - soft_deleted_documents)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn inner_evaluate(
|
||||
|
@ -2,22 +2,20 @@ use std::collections::btree_map::Entry;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use heed::types::{ByteSlice, Str};
|
||||
use heed::{BytesDecode, BytesEncode, Database};
|
||||
use obkv::Key;
|
||||
use heed::Database;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{ClearDocuments, Facets};
|
||||
use crate::error::{InternalError, SerializationError, UserError};
|
||||
// use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::{
|
||||
fields_ids_map, DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry,
|
||||
FieldsIdsMap, Index, Result, RoaringBitmapCodec, SmallString32, BEU32,
|
||||
DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||
RoaringBitmapCodec, SmallString32, BEU32,
|
||||
};
|
||||
|
||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||
|
@ -6,6 +6,8 @@ use heed::{BytesDecode, BytesEncode};
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||
};
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
||||
use crate::Result;
|
||||
|
||||
@ -31,14 +33,13 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
||||
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
||||
todo!()
|
||||
// let (field_id, document_id, number) =
|
||||
// FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||
let (field_id, document_id, number) =
|
||||
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||
|
||||
// let key = (field_id, 0, number, number);
|
||||
// // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
|
||||
let key = FacetKey { field_id, level: 0, left_bound: number };
|
||||
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
||||
|
||||
// facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||
}
|
||||
|
||||
sorter_into_reader(facet_number_docids_sorter, indexer)
|
||||
|
@ -1,13 +1,11 @@
|
||||
use std::fs::File;
|
||||
use std::iter::FromIterator;
|
||||
use std::{io, str};
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||
// use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
|
||||
use crate::{FieldId, Result};
|
||||
use heed::BytesEncode;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
|
||||
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||
///
|
||||
@ -22,38 +20,26 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
merge_cbo_roaring_bitmaps, // TODO: check
|
||||
merge_cbo_roaring_bitmaps, // TODO: check that it is correct
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
);
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut value_buffer = Vec::new();
|
||||
let mut cursor = docid_fid_facet_string.into_cursor()?;
|
||||
while let Some((key, original_value_bytes)) = cursor.move_on_next()? {
|
||||
while let Some((key, _original_value_bytes)) = cursor.move_on_next()? {
|
||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||
let (document_id_bytes, normalized_value_bytes) = try_split_array_at(bytes).unwrap();
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
let original_value = str::from_utf8(original_value_bytes)?;
|
||||
|
||||
key_buffer.clear();
|
||||
// TODO
|
||||
// FacetStringLevelZeroCodec::serialize_into(
|
||||
// field_id,
|
||||
// str::from_utf8(normalized_value_bytes)?,
|
||||
// &mut key_buffer,
|
||||
// );
|
||||
let (document_id_bytes, normalized_value_bytes) =
|
||||
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||
|
||||
value_buffer.clear();
|
||||
// TODO
|
||||
// encode_prefix_string(original_value, &mut value_buffer)?;
|
||||
let bitmap = RoaringBitmap::from_iter(Some(document_id));
|
||||
bitmap.serialize_into(&mut value_buffer)?;
|
||||
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
|
||||
let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
|
||||
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||
|
||||
facet_string_docids_sorter.insert(&key_buffer, &value_buffer)?;
|
||||
facet_string_docids_sorter.insert(&key_bytes, &document_id_bytes)?;
|
||||
}
|
||||
|
||||
sorter_into_reader(facet_string_docids_sorter, indexer)
|
||||
|
Loading…
x
Reference in New Issue
Block a user