255: Fix facet distribution error r=Kerollmops a=Kerollmops

This PR fixes two invalid behaviors and fixes #253:
 - We were ignoring the list of fields for which the user wanted a facet distribution.
 - We were not raising any error for when a non-filterable field was requested a facet distribution.

~For the latter behavior I need the help of @curquiza to help me choose the right error type.~

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-06-23 12:03:05 +00:00 committed by GitHub
commit 66f55e3e6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 37 deletions

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::convert::Infallible; use std::convert::Infallible;
use std::error::Error as StdError; use std::error::Error as StdError;
use std::{fmt, io, str}; use std::{fmt, io, str};
@ -51,13 +52,14 @@ pub enum FieldIdMapMissingEntry {
pub enum UserError { pub enum UserError {
AttributeLimitReached, AttributeLimitReached,
Csv(csv::Error), Csv(csv::Error),
MaxDatabaseSizeReached,
DocumentLimitReached, DocumentLimitReached,
InvalidFilter(pest::error::Error<ParserRule>),
InvalidCriterionName { name: String }, InvalidCriterionName { name: String },
InvalidDocumentId { document_id: Value }, InvalidDocumentId { document_id: Value },
InvalidFacetsDistribution { invalid_facets_name: HashSet<String> },
InvalidFilter(pest::error::Error<ParserRule>),
InvalidFilterAttribute(pest::error::Error<ParserRule>), InvalidFilterAttribute(pest::error::Error<ParserRule>),
InvalidStoreFile, InvalidStoreFile,
MaxDatabaseSizeReached,
MissingDocumentId { document: Object }, MissingDocumentId { document: Object },
MissingPrimaryKey, MissingPrimaryKey,
NoSpaceLeftOnDevice, NoSpaceLeftOnDevice,
@ -202,6 +204,15 @@ impl fmt::Display for UserError {
Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"),
Self::Csv(error) => error.fmt(f), Self::Csv(error) => error.fmt(f),
Self::DocumentLimitReached => f.write_str("maximum number of documents reached"), Self::DocumentLimitReached => f.write_str("maximum number of documents reached"),
Self::InvalidFacetsDistribution { invalid_facets_name } => {
let name_list =
invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ");
write!(
f,
"invalid facet distribution, the fields {} are not set as filterable",
name_list
)
}
Self::InvalidFilter(error) => error.fmt(f), Self::InvalidFilter(error) => error.fmt(f),
Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name), Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
Self::InvalidDocumentId { document_id } => { Self::InvalidDocumentId { document_id } => {

View File

@ -6,7 +6,7 @@ use heed::types::{ByteSlice, Unit};
use heed::{BytesDecode, Database}; use heed::{BytesDecode, Database};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::FieldIdMapMissingEntry; use crate::error::{FieldIdMapMissingEntry, UserError};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::FacetValueStringCodec; use crate::heed_codec::facet::FacetValueStringCodec;
use crate::search::facet::{FacetIter, FacetRange}; use crate::search::facet::{FacetIter, FacetRange};
@ -174,49 +174,64 @@ impl<'a> FacetDistribution<'a> {
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> { fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
use FacetType::{Number, String}; use FacetType::{Number, String};
if let Some(candidates) = self.candidates.as_ref() { match self.candidates {
// Classic search, candidates were specified, we must return facet values only related Some(ref candidates) => {
// to those candidates. We also enter here for facet strings for performance reasons. // Classic search, candidates were specified, we must return facet values only related
let mut distribution = BTreeMap::new(); // to those candidates. We also enter here for facet strings for performance reasons.
if candidates.len() <= CANDIDATES_THRESHOLD { let mut distribution = BTreeMap::new();
self.facet_distribution_from_documents( if candidates.len() <= CANDIDATES_THRESHOLD {
field_id, self.facet_distribution_from_documents(
Number, field_id,
candidates, Number,
&mut distribution, candidates,
)?; &mut distribution,
self.facet_distribution_from_documents( )?;
field_id, self.facet_distribution_from_documents(
String, field_id,
candidates, String,
&mut distribution, candidates,
)?; &mut distribution,
} else { )?;
self.facet_numbers_distribution_from_facet_levels( } else {
field_id, self.facet_numbers_distribution_from_facet_levels(
candidates, field_id,
&mut distribution, candidates,
)?; &mut distribution,
self.facet_distribution_from_documents( )?;
field_id, self.facet_distribution_from_documents(
String, field_id,
candidates, String,
&mut distribution, candidates,
)?; &mut distribution,
} )?;
}
Ok(distribution) Ok(distribution)
} else { }
self.facet_values_from_raw_facet_database(field_id) None => self.facet_values_from_raw_facet_database(field_id),
} }
} }
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let fields = match self.facets {
Some(ref facets) => {
let invalid_fields: HashSet<_> = facets.difference(&filterable_fields).collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
}
.into());
} else {
facets.clone()
}
}
None => filterable_fields,
};
let mut distribution = BTreeMap::new(); let mut distribution = BTreeMap::new();
for name in filterable_fields { for name in fields {
let fid = let fid =
fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName { fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: name.clone(), field_name: name.clone(),