mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
Revert "Revert "Sort at query time""
This commit is contained in:
parent
879d5e8799
commit
89d0758713
17 changed files with 701 additions and 148 deletions
|
@ -1,15 +1,10 @@
|
|||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Error, UserError};
|
||||
|
||||
static ASC_DESC_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
/// Sorted by decreasing number of matched query terms.
|
||||
|
@ -17,10 +12,13 @@ pub enum Criterion {
|
|||
Words,
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
|
||||
/// attributes can be used in place of this criterion at query time.
|
||||
Sort,
|
||||
/// Sorted by increasing distance between matched query terms.
|
||||
Proximity,
|
||||
/// Documents with quey words contained in more important
|
||||
/// attributes are considred better.
|
||||
/// attributes are considered better.
|
||||
Attribute,
|
||||
/// Sorted by the similarity of the matched words with the query words.
|
||||
Exactness,
|
||||
|
@ -43,29 +41,46 @@ impl Criterion {
|
|||
impl FromStr for Criterion {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
|
||||
match txt {
|
||||
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
|
||||
match text {
|
||||
"words" => Ok(Criterion::Words),
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"sort" => Ok(Criterion::Sort),
|
||||
"proximity" => Ok(Criterion::Proximity),
|
||||
"attribute" => Ok(Criterion::Attribute),
|
||||
"exactness" => Ok(Criterion::Exactness),
|
||||
text => {
|
||||
let caps = ASC_DESC_REGEX
|
||||
.captures(text)
|
||||
.ok_or_else(|| UserError::InvalidCriterionName { name: text.to_string() })?;
|
||||
let order = caps.get(1).unwrap().as_str();
|
||||
let field_name = caps.get(2).unwrap().as_str();
|
||||
match order {
|
||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||
text => {
|
||||
return Err(
|
||||
UserError::InvalidCriterionName { name: text.to_string() }.into()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
text => match AscDesc::from_str(text) {
|
||||
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
|
||||
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
|
||||
Err(error) => Err(error.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum AscDesc {
|
||||
Asc(String),
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl AscDesc {
|
||||
pub fn field(&self) -> &str {
|
||||
match self {
|
||||
AscDesc::Asc(field) => field,
|
||||
AscDesc::Desc(field) => field,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for AscDesc {
|
||||
type Err = UserError;
|
||||
|
||||
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
|
||||
match text.rsplit_once(':') {
|
||||
Some((field_name, "asc")) => Ok(AscDesc::Asc(field_name.to_string())),
|
||||
Some((field_name, "desc")) => Ok(AscDesc::Desc(field_name.to_string())),
|
||||
_ => Err(UserError::InvalidCriterionName { name: text.to_string() }),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -74,6 +89,7 @@ pub fn default_criteria() -> Vec<Criterion> {
|
|||
vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Sort,
|
||||
Criterion::Proximity,
|
||||
Criterion::Attribute,
|
||||
Criterion::Exactness,
|
||||
|
@ -87,11 +103,12 @@ impl fmt::Display for Criterion {
|
|||
match self {
|
||||
Words => f.write_str("words"),
|
||||
Typo => f.write_str("typo"),
|
||||
Sort => f.write_str("sort"),
|
||||
Proximity => f.write_str("proximity"),
|
||||
Attribute => f.write_str("attribute"),
|
||||
Exactness => f.write_str("exactness"),
|
||||
Asc(attr) => write!(f, "asc({})", attr),
|
||||
Desc(attr) => write!(f, "desc({})", attr),
|
||||
Asc(attr) => write!(f, "{}:asc", attr),
|
||||
Desc(attr) => write!(f, "{}:desc", attr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ pub enum UserError {
|
|||
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
|
||||
InvalidFilter(pest::error::Error<ParserRule>),
|
||||
InvalidFilterAttribute(pest::error::Error<ParserRule>),
|
||||
InvalidSortableAttribute { field: String, valid_fields: HashSet<String> },
|
||||
InvalidStoreFile,
|
||||
MaxDatabaseSizeReached,
|
||||
MissingDocumentId { document: Object },
|
||||
|
@ -226,6 +227,15 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||
)
|
||||
}
|
||||
Self::InvalidFilterAttribute(error) => error.fmt(f),
|
||||
Self::InvalidSortableAttribute { field, valid_fields } => {
|
||||
let valid_names =
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ");
|
||||
write!(
|
||||
f,
|
||||
"Attribute {} is not sortable, available sortable attributes are: {}",
|
||||
field, valid_names
|
||||
)
|
||||
}
|
||||
Self::MissingDocumentId { document } => {
|
||||
let json = serde_json::to_string(document).unwrap();
|
||||
write!(f, "document doesn't have an identifier {}", json)
|
||||
|
|
|
@ -28,6 +28,7 @@ pub mod main_key {
|
|||
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
|
@ -446,13 +447,45 @@ impl Index {
|
|||
Ok(fields_ids)
|
||||
}
|
||||
|
||||
/* sortable fields */
|
||||
|
||||
/// Writes the sortable fields names in the database.
|
||||
pub(crate) fn put_sortable_fields(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
fields: &HashSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields)
|
||||
}
|
||||
|
||||
/// Deletes the sortable fields ids in the database.
|
||||
pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the sortable fields names.
|
||||
pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Identical to `sortable_fields`, but returns ids instead.
|
||||
pub fn sortable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
|
||||
let fields = self.sortable_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Returns the faceted fields names.
|
||||
///
|
||||
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
||||
/// Faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
|
||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||
let sortable_fields = self.sortable_fields(rtxn)?;
|
||||
let distinct_field = self.distinct_field(rtxn)?;
|
||||
let asc_desc_fields =
|
||||
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
|
||||
|
@ -461,6 +494,7 @@ impl Index {
|
|||
});
|
||||
|
||||
let mut faceted_fields = filterable_fields;
|
||||
faceted_fields.extend(sortable_fields);
|
||||
faceted_fields.extend(asc_desc_fields);
|
||||
if let Some(field) = distinct_field {
|
||||
faceted_fields.insert(field.to_owned());
|
||||
|
|
|
@ -22,7 +22,7 @@ use std::result::Result as StdResult;
|
|||
use fxhash::{FxHasher32, FxHasher64};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
pub use self::criterion::{default_criteria, Criterion};
|
||||
pub use self::criterion::{default_criteria, AscDesc, Criterion};
|
||||
pub use self::error::{
|
||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||
};
|
||||
|
|
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
|||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::search::facet::FacetNumberIter;
|
||||
use crate::search::facet::{FacetNumberIter, FacetStringIter};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
|
@ -20,7 +20,7 @@ pub struct AscDesc<'t> {
|
|||
rtxn: &'t heed::RoTxn<'t>,
|
||||
field_name: String,
|
||||
field_id: Option<FieldId>,
|
||||
ascending: bool,
|
||||
is_ascending: bool,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
|
@ -53,12 +53,16 @@ impl<'t> AscDesc<'t> {
|
|||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
ascending: bool,
|
||||
is_ascending: bool,
|
||||
) -> Result<Self> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let field_id = fields_ids_map.id(&field_name);
|
||||
let faceted_candidates = match field_id {
|
||||
Some(field_id) => index.number_faceted_documents_ids(rtxn, field_id)?,
|
||||
Some(field_id) => {
|
||||
let number_faceted = index.number_faceted_documents_ids(rtxn, field_id)?;
|
||||
let string_faceted = index.string_faceted_documents_ids(rtxn, field_id)?;
|
||||
number_faceted | string_faceted
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
};
|
||||
|
||||
|
@ -67,7 +71,7 @@ impl<'t> AscDesc<'t> {
|
|||
rtxn,
|
||||
field_name,
|
||||
field_id,
|
||||
ascending,
|
||||
is_ascending,
|
||||
query_tree: None,
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
allowed_candidates: RoaringBitmap::new(),
|
||||
|
@ -87,7 +91,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||
loop {
|
||||
debug!(
|
||||
"Facet {}({}) iteration",
|
||||
if self.ascending { "Asc" } else { "Desc" },
|
||||
if self.is_ascending { "Asc" } else { "Desc" },
|
||||
self.field_name
|
||||
);
|
||||
|
||||
|
@ -136,7 +140,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||
self.index,
|
||||
self.rtxn,
|
||||
field_id,
|
||||
self.ascending,
|
||||
self.is_ascending,
|
||||
candidates & &self.faceted_candidates,
|
||||
)?,
|
||||
None => Box::new(std::iter::empty()),
|
||||
|
@ -167,31 +171,49 @@ fn facet_ordered<'t>(
|
|||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
ascending: bool,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
||||
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||
let number_iter = iterative_facet_number_ordered_iter(
|
||||
index,
|
||||
rtxn,
|
||||
field_id,
|
||||
is_ascending,
|
||||
candidates.clone(),
|
||||
)?;
|
||||
let string_iter =
|
||||
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||
} else {
|
||||
let facet_fn = if ascending {
|
||||
let facet_number_fn = if is_ascending {
|
||||
FacetNumberIter::new_reducing
|
||||
} else {
|
||||
FacetNumberIter::new_reverse_reducing
|
||||
};
|
||||
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
||||
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
||||
let number_iter = facet_number_fn(rtxn, index, field_id, candidates.clone())?
|
||||
.map(|res| res.map(|(_, docids)| docids));
|
||||
|
||||
let facet_string_fn = if is_ascending {
|
||||
FacetStringIter::new_reducing
|
||||
} else {
|
||||
FacetStringIter::new_reverse_reducing
|
||||
};
|
||||
let string_iter = facet_string_fn(rtxn, index, field_id, candidates)?
|
||||
.map(|res| res.map(|(_, _, docids)| docids));
|
||||
|
||||
Ok(Box::new(number_iter.chain(string_iter)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet values one by one and order them by it.
|
||||
/// Fetch the whole list of candidates facet number values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_ordered_iter<'t>(
|
||||
fn iterative_facet_number_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
ascending: bool,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
|
@ -199,14 +221,14 @@ fn iterative_facet_ordered_iter<'t>(
|
|||
let left = (field_id, docid, f64::MIN);
|
||||
let right = (field_id, docid, f64::MAX);
|
||||
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
|
||||
let entry = if ascending { iter.next() } else { iter.last() };
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, OrderedFloat(value)));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if ascending {
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
|
@ -216,7 +238,49 @@ fn iterative_facet_ordered_iter<'t>(
|
|||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| v.clone())
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet string values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_string_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, "");
|
||||
let right = (field_id, docid.saturating_add(1), "");
|
||||
// FIXME Doing this means that it will never be possible to retrieve
|
||||
// the document with id 2^32, not sure this is a real problem.
|
||||
let mut iter = index.field_id_docid_facet_strings.range(rtxn, &(left..right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), _)) = entry.transpose()? {
|
||||
docids_values.push((docid, value));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
|
|
@ -12,6 +12,7 @@ use self::r#final::Final;
|
|||
use self::typo::Typo;
|
||||
use self::words::Words;
|
||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||
use crate::criterion::AscDesc as AscDescName;
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::{DocumentId, FieldId, Index, Result, TreeLevel};
|
||||
|
||||
|
@ -273,6 +274,7 @@ impl<'t> CriteriaBuilder<'t> {
|
|||
query_tree: Option<Operation>,
|
||||
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
sort_criteria: Option<Vec<AscDescName>>,
|
||||
) -> Result<Final<'t>> {
|
||||
use crate::criterion::Criterion as Name;
|
||||
|
||||
|
@ -282,8 +284,30 @@ impl<'t> CriteriaBuilder<'t> {
|
|||
Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
|
||||
for name in self.index.criteria(&self.rtxn)? {
|
||||
criterion = match name {
|
||||
Name::Typo => Box::new(Typo::new(self, criterion)),
|
||||
Name::Words => Box::new(Words::new(self, criterion)),
|
||||
Name::Typo => Box::new(Typo::new(self, criterion)),
|
||||
Name::Sort => match sort_criteria {
|
||||
Some(ref sort_criteria) => {
|
||||
for asc_desc in sort_criteria {
|
||||
criterion = match asc_desc {
|
||||
AscDescName::Asc(field) => Box::new(AscDesc::asc(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
criterion,
|
||||
field.to_string(),
|
||||
)?),
|
||||
AscDescName::Desc(field) => Box::new(AscDesc::desc(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
criterion,
|
||||
field.to_string(),
|
||||
)?),
|
||||
};
|
||||
}
|
||||
criterion
|
||||
}
|
||||
None => criterion,
|
||||
},
|
||||
Name::Proximity => Box::new(Proximity::new(self, criterion)),
|
||||
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
||||
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
||||
|
|
|
@ -131,7 +131,7 @@ use std::ops::Bound::{Excluded, Included, Unbounded};
|
|||
|
||||
use either::{Either, Left, Right};
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{Database, LazyDecode, RoRange};
|
||||
use heed::{Database, LazyDecode, RoRange, RoRevRange};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{
|
||||
|
@ -206,6 +206,65 @@ impl<'t> Iterator for FacetStringGroupRange<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct FacetStringGroupRevRange<'t> {
|
||||
iter: RoRevRange<
|
||||
't,
|
||||
FacetLevelValueU32Codec,
|
||||
LazyDecode<FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>>,
|
||||
>,
|
||||
end: Bound<u32>,
|
||||
}
|
||||
|
||||
impl<'t> FacetStringGroupRevRange<'t> {
|
||||
pub fn new<X, Y>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: Database<X, Y>,
|
||||
field_id: FieldId,
|
||||
level: NonZeroU8,
|
||||
left: Bound<u32>,
|
||||
right: Bound<u32>,
|
||||
) -> heed::Result<FacetStringGroupRevRange<'t>> {
|
||||
let db = db.remap_types::<
|
||||
FacetLevelValueU32Codec,
|
||||
FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>,
|
||||
>();
|
||||
let left_bound = match left {
|
||||
Included(left) => Included((field_id, level, left, u32::MIN)),
|
||||
Excluded(left) => Excluded((field_id, level, left, u32::MIN)),
|
||||
Unbounded => Included((field_id, level, u32::MIN, u32::MIN)),
|
||||
};
|
||||
let right_bound = Included((field_id, level, u32::MAX, u32::MAX));
|
||||
let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
|
||||
Ok(FacetStringGroupRevRange { iter, end: right })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for FacetStringGroupRevRange<'t> {
|
||||
type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok(((_fid, level, left, right), docids))) => {
|
||||
let must_be_returned = match self.end {
|
||||
Included(end) => right <= end,
|
||||
Excluded(end) => right < end,
|
||||
Unbounded => true,
|
||||
};
|
||||
if must_be_returned {
|
||||
match docids.decode() {
|
||||
Ok((bounds, docids)) => Some(Ok(((level, left, right), (bounds, docids)))),
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator that is used to explore the level 0 of the facets string database.
|
||||
///
|
||||
/// It yields the facet string and the roaring bitmap associated with it.
|
||||
|
@ -280,6 +339,81 @@ impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct FacetStringLevelZeroRevRange<'t> {
|
||||
iter: RoRevRange<
|
||||
't,
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
}
|
||||
|
||||
impl<'t> FacetStringLevelZeroRevRange<'t> {
|
||||
pub fn new<X, Y>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: Database<X, Y>,
|
||||
field_id: FieldId,
|
||||
left: Bound<&str>,
|
||||
right: Bound<&str>,
|
||||
) -> heed::Result<FacetStringLevelZeroRevRange<'t>> {
|
||||
fn encode_value<'a>(buffer: &'a mut Vec<u8>, field_id: FieldId, value: &str) -> &'a [u8] {
|
||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
buffer.push(0);
|
||||
buffer.extend_from_slice(value.as_bytes());
|
||||
&buffer[..]
|
||||
}
|
||||
|
||||
let mut left_buffer = Vec::new();
|
||||
let left_bound = match left {
|
||||
Included(value) => Included(encode_value(&mut left_buffer, field_id, value)),
|
||||
Excluded(value) => Excluded(encode_value(&mut left_buffer, field_id, value)),
|
||||
Unbounded => {
|
||||
left_buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
left_buffer.push(0);
|
||||
Included(&left_buffer[..])
|
||||
}
|
||||
};
|
||||
|
||||
let mut right_buffer = Vec::new();
|
||||
let right_bound = match right {
|
||||
Included(value) => Included(encode_value(&mut right_buffer, field_id, value)),
|
||||
Excluded(value) => Excluded(encode_value(&mut right_buffer, field_id, value)),
|
||||
Unbounded => {
|
||||
right_buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
right_buffer.push(1); // we must only get the level 0
|
||||
Excluded(&right_buffer[..])
|
||||
}
|
||||
};
|
||||
|
||||
let iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.rev_range(rtxn, &(left_bound, right_bound))?
|
||||
.remap_types::<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
|
||||
>();
|
||||
|
||||
Ok(FacetStringLevelZeroRevRange { iter })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for FacetStringLevelZeroRevRange<'t> {
|
||||
type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok(((_fid, normalized), (original, docids)))) => {
|
||||
Some(Ok((normalized, original, docids)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type EitherStringRange<'t> = Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>;
|
||||
type EitherStringRevRange<'t> =
|
||||
Either<FacetStringGroupRevRange<'t>, FacetStringLevelZeroRevRange<'t>>;
|
||||
|
||||
/// An iterator that is used to explore the facet strings level by level,
|
||||
/// it will only return facets strings that are associated with the
|
||||
/// candidates documents ids given.
|
||||
|
@ -287,12 +421,45 @@ pub struct FacetStringIter<'t> {
|
|||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: Database<ByteSlice, ByteSlice>,
|
||||
field_id: FieldId,
|
||||
level_iters:
|
||||
Vec<(RoaringBitmap, Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>)>,
|
||||
level_iters: Vec<(RoaringBitmap, Either<EitherStringRange<'t>, EitherStringRevRange<'t>>)>,
|
||||
must_reduce: bool,
|
||||
}
|
||||
|
||||
impl<'t> FacetStringIter<'t> {
|
||||
pub fn new_reducing(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetStringIter<'t>> {
|
||||
let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
|
||||
let highest_iter = Self::highest_iter(rtxn, index, db, field_id)?;
|
||||
Ok(FacetStringIter {
|
||||
rtxn,
|
||||
db,
|
||||
field_id,
|
||||
level_iters: vec![(documents_ids, Left(highest_iter))],
|
||||
must_reduce: true,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new_reverse_reducing(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
field_id: FieldId,
|
||||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetStringIter<'t>> {
|
||||
let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
|
||||
let highest_reverse_iter = Self::highest_reverse_iter(rtxn, index, db, field_id)?;
|
||||
Ok(FacetStringIter {
|
||||
rtxn,
|
||||
db,
|
||||
field_id,
|
||||
level_iters: vec![(documents_ids, Right(highest_reverse_iter))],
|
||||
must_reduce: true,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new_non_reducing(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
|
@ -300,30 +467,12 @@ impl<'t> FacetStringIter<'t> {
|
|||
documents_ids: RoaringBitmap,
|
||||
) -> heed::Result<FacetStringIter<'t>> {
|
||||
let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
let highest_iter = match NonZeroU8::new(highest_level) {
|
||||
Some(highest_level) => Left(FacetStringGroupRange::new(
|
||||
rtxn,
|
||||
index.facet_id_string_docids,
|
||||
field_id,
|
||||
highest_level,
|
||||
Unbounded,
|
||||
Unbounded,
|
||||
)?),
|
||||
None => Right(FacetStringLevelZeroRange::new(
|
||||
rtxn,
|
||||
index.facet_id_string_docids,
|
||||
field_id,
|
||||
Unbounded,
|
||||
Unbounded,
|
||||
)?),
|
||||
};
|
||||
|
||||
let highest_iter = Self::highest_iter(rtxn, index, db, field_id)?;
|
||||
Ok(FacetStringIter {
|
||||
rtxn,
|
||||
db,
|
||||
field_id,
|
||||
level_iters: vec![(documents_ids, highest_iter)],
|
||||
level_iters: vec![(documents_ids, Left(highest_iter))],
|
||||
must_reduce: false,
|
||||
})
|
||||
}
|
||||
|
@ -340,6 +489,62 @@ impl<'t> FacetStringIter<'t> {
|
|||
.transpose()?
|
||||
.map(|(key_bytes, _)| key_bytes[2])) // the level is the third bit
|
||||
}
|
||||
|
||||
fn highest_iter<X, Y>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
db: Database<X, Y>,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>> {
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
match NonZeroU8::new(highest_level) {
|
||||
Some(highest_level) => FacetStringGroupRange::new(
|
||||
rtxn,
|
||||
index.facet_id_string_docids,
|
||||
field_id,
|
||||
highest_level,
|
||||
Unbounded,
|
||||
Unbounded,
|
||||
)
|
||||
.map(Left),
|
||||
None => FacetStringLevelZeroRange::new(
|
||||
rtxn,
|
||||
index.facet_id_string_docids,
|
||||
field_id,
|
||||
Unbounded,
|
||||
Unbounded,
|
||||
)
|
||||
.map(Right),
|
||||
}
|
||||
}
|
||||
|
||||
fn highest_reverse_iter<X, Y>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
index: &'t Index,
|
||||
db: Database<X, Y>,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<Either<FacetStringGroupRevRange<'t>, FacetStringLevelZeroRevRange<'t>>> {
|
||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||
match NonZeroU8::new(highest_level) {
|
||||
Some(highest_level) => FacetStringGroupRevRange::new(
|
||||
rtxn,
|
||||
index.facet_id_string_docids,
|
||||
field_id,
|
||||
highest_level,
|
||||
Unbounded,
|
||||
Unbounded,
|
||||
)
|
||||
.map(Left),
|
||||
None => FacetStringLevelZeroRevRange::new(
|
||||
rtxn,
|
||||
index.facet_id_string_docids,
|
||||
field_id,
|
||||
Unbounded,
|
||||
Unbounded,
|
||||
)
|
||||
.map(Right),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for FacetStringIter<'t> {
|
||||
|
@ -348,6 +553,21 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
|||
fn next(&mut self) -> Option<Self::Item> {
|
||||
'outer: loop {
|
||||
let (documents_ids, last) = self.level_iters.last_mut()?;
|
||||
let is_ascending = last.is_left();
|
||||
|
||||
// We remap the different iterator types to make
|
||||
// the algorithm less complex to understand.
|
||||
let last = match last {
|
||||
Left(ascending) => match ascending {
|
||||
Left(last) => Left(Left(last)),
|
||||
Right(last) => Right(Left(last)),
|
||||
},
|
||||
Right(descending) => match descending {
|
||||
Left(last) => Left(Right(last)),
|
||||
Right(last) => Right(Right(last)),
|
||||
},
|
||||
};
|
||||
|
||||
match last {
|
||||
Left(last) => {
|
||||
for result in last {
|
||||
|
@ -359,24 +579,50 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
|||
*documents_ids -= &docids;
|
||||
}
|
||||
|
||||
let result = match string_bounds {
|
||||
Some((left, right)) => FacetStringLevelZeroRange::new(
|
||||
self.rtxn,
|
||||
self.db,
|
||||
self.field_id,
|
||||
Included(left),
|
||||
Included(right),
|
||||
)
|
||||
.map(Right),
|
||||
None => FacetStringGroupRange::new(
|
||||
self.rtxn,
|
||||
self.db,
|
||||
self.field_id,
|
||||
NonZeroU8::new(level.get() - 1).unwrap(),
|
||||
Included(left),
|
||||
Included(right),
|
||||
)
|
||||
.map(Left),
|
||||
let result = if is_ascending {
|
||||
match string_bounds {
|
||||
Some((left, right)) => {
|
||||
FacetStringLevelZeroRevRange::new(
|
||||
self.rtxn,
|
||||
self.db,
|
||||
self.field_id,
|
||||
Included(left),
|
||||
Included(right),
|
||||
)
|
||||
.map(Right)
|
||||
}
|
||||
None => FacetStringGroupRevRange::new(
|
||||
self.rtxn,
|
||||
self.db,
|
||||
self.field_id,
|
||||
NonZeroU8::new(level.get() - 1).unwrap(),
|
||||
Included(left),
|
||||
Included(right),
|
||||
)
|
||||
.map(Left),
|
||||
}
|
||||
.map(Right)
|
||||
} else {
|
||||
match string_bounds {
|
||||
Some((left, right)) => FacetStringLevelZeroRange::new(
|
||||
self.rtxn,
|
||||
self.db,
|
||||
self.field_id,
|
||||
Included(left),
|
||||
Included(right),
|
||||
)
|
||||
.map(Right),
|
||||
None => FacetStringGroupRange::new(
|
||||
self.rtxn,
|
||||
self.db,
|
||||
self.field_id,
|
||||
NonZeroU8::new(level.get() - 1).unwrap(),
|
||||
Included(left),
|
||||
Included(right),
|
||||
)
|
||||
.map(Left),
|
||||
}
|
||||
.map(Left)
|
||||
};
|
||||
|
||||
match result {
|
||||
|
|
|
@ -18,6 +18,8 @@ pub(crate) use self::facet::ParserRule;
|
|||
pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator};
|
||||
pub use self::matching_words::MatchingWords;
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
use crate::criterion::AscDesc;
|
||||
use crate::error::UserError;
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::{DocumentId, Index, Result};
|
||||
|
||||
|
@ -37,6 +39,7 @@ pub struct Search<'a> {
|
|||
filter: Option<FilterCondition>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
optional_words: bool,
|
||||
authorize_typos: bool,
|
||||
words_limit: usize,
|
||||
|
@ -51,6 +54,7 @@ impl<'a> Search<'a> {
|
|||
filter: None,
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
sort_criteria: None,
|
||||
optional_words: true,
|
||||
authorize_typos: true,
|
||||
words_limit: 10,
|
||||
|
@ -74,6 +78,11 @@ impl<'a> Search<'a> {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn sort_criteria(&mut self, criteria: Vec<AscDesc>) -> &mut Search<'a> {
|
||||
self.sort_criteria = Some(criteria);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn optional_words(&mut self, value: bool) -> &mut Search<'a> {
|
||||
self.optional_words = value;
|
||||
self
|
||||
|
@ -134,8 +143,29 @@ impl<'a> Search<'a> {
|
|||
None => MatchingWords::default(),
|
||||
};
|
||||
|
||||
// We check that we are allowed to use the sort criteria, we check
|
||||
// that they are declared in the sortable fields.
|
||||
let sortable_fields = self.index.sortable_fields(self.rtxn)?;
|
||||
if let Some(sort_criteria) = &self.sort_criteria {
|
||||
for asc_desc in sort_criteria {
|
||||
let field = asc_desc.field();
|
||||
if !sortable_fields.contains(field) {
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: field.to_string(),
|
||||
valid_fields: sortable_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?;
|
||||
let criteria = criteria_builder.build(
|
||||
query_tree,
|
||||
primitive_query,
|
||||
filtered_candidates,
|
||||
self.sort_criteria.clone(),
|
||||
)?;
|
||||
|
||||
match self.index.distinct_field(self.rtxn)? {
|
||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||
|
@ -199,6 +229,7 @@ impl fmt::Debug for Search<'_> {
|
|||
filter,
|
||||
offset,
|
||||
limit,
|
||||
sort_criteria,
|
||||
optional_words,
|
||||
authorize_typos,
|
||||
words_limit,
|
||||
|
@ -210,6 +241,7 @@ impl fmt::Debug for Search<'_> {
|
|||
.field("filter", filter)
|
||||
.field("offset", offset)
|
||||
.field("limit", limit)
|
||||
.field("sort_criteria", sort_criteria)
|
||||
.field("optional_words", optional_words)
|
||||
.field("authorize_typos", authorize_typos)
|
||||
.field("words_limit", words_limit)
|
||||
|
|
|
@ -75,6 +75,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||
searchable_fields: Setting<Vec<String>>,
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
filterable_fields: Setting<HashSet<String>>,
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<String>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
distinct_field: Setting<String>,
|
||||
|
@ -102,6 +103,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
searchable_fields: Setting::NotSet,
|
||||
displayed_fields: Setting::NotSet,
|
||||
filterable_fields: Setting::NotSet,
|
||||
sortable_fields: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
distinct_field: Setting::NotSet,
|
||||
|
@ -135,6 +137,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
self.filterable_fields = Setting::Set(names);
|
||||
}
|
||||
|
||||
pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
|
||||
self.sortable_fields = Setting::Set(names);
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
self.criteria = Setting::Reset;
|
||||
}
|
||||
|
@ -392,6 +398,23 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn update_sortable(&mut self) -> Result<()> {
|
||||
match self.sortable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
let mut new_fields = HashSet::new();
|
||||
for name in fields {
|
||||
new_fields.insert(name.clone());
|
||||
}
|
||||
self.index.put_sortable_fields(self.wtxn, &new_fields)?;
|
||||
}
|
||||
Setting::Reset => {
|
||||
self.index.delete_sortable_fields(self.wtxn)?;
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> Result<()> {
|
||||
match self.criteria {
|
||||
Setting::Set(ref fields) => {
|
||||
|
@ -446,6 +469,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||
|
||||
self.update_displayed()?;
|
||||
self.update_filterable()?;
|
||||
self.update_sortable()?;
|
||||
self.update_distinct_field()?;
|
||||
self.update_criteria()?;
|
||||
self.update_primary_key()?;
|
||||
|
@ -719,7 +743,7 @@ mod tests {
|
|||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
// Don't display the generated `id` field.
|
||||
builder.set_displayed_fields(vec![S("name")]);
|
||||
builder.set_criteria(vec![S("asc(age)")]);
|
||||
builder.set_criteria(vec![S("age:asc")]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
|
@ -953,7 +977,7 @@ mod tests {
|
|||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
builder.set_filterable_fields(hashset! { S("age"), S("toto") });
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.set_criteria(vec!["toto:asc".to_string()]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
@ -990,7 +1014,7 @@ mod tests {
|
|||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.set_criteria(vec!["toto:asc".to_string()]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue