mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 04:44:26 +01:00
enables facet count
This commit is contained in:
parent
effbb7f7f1
commit
e5126af458
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1664,8 +1664,8 @@ dependencies = [
|
||||
"mime",
|
||||
"pretty-bytes",
|
||||
"rand 0.7.3",
|
||||
"sentry",
|
||||
"regex",
|
||||
"sentry",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_qs",
|
||||
|
@ -17,7 +17,6 @@ use slice_group_by::{GroupBy, GroupByMut};
|
||||
use crate::error::Error;
|
||||
use crate::criterion::{Criteria, Context, ContextMut};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::facets::FacetKey;
|
||||
use crate::raw_document::RawDocument;
|
||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||
use crate::{store, Document, DocumentId, MResult};
|
||||
@ -30,7 +29,8 @@ pub struct SortResult {
|
||||
pub documents: Vec<Document>,
|
||||
pub nb_hits: usize,
|
||||
pub is_exhaustive: bool,
|
||||
pub facets: Option<HashMap<FacetKey, usize>>,
|
||||
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
|
||||
pub exhaustive_facet_count: Option<bool>,
|
||||
}
|
||||
|
||||
pub fn bucket_sort<'c, FI>(
|
||||
@ -38,7 +38,7 @@ pub fn bucket_sort<'c, FI>(
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
||||
filter: Option<FI>,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
@ -120,15 +120,10 @@ where
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(facet_count_docids) = facet_count_docids {
|
||||
let mut facets = HashMap::new();
|
||||
for (key, document_ids) in facet_count_docids {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
facets.insert(key, counter.0);
|
||||
}
|
||||
result.facets = Some(facets);
|
||||
if let Some(f) = facet_count_docids {
|
||||
// hardcoded value, until approximation optimization
|
||||
result.exhaustive_facet_count = Some(true);
|
||||
result.facets = Some(facet_count(f, &docids));
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
@ -216,7 +211,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
||||
filter: Option<FI>,
|
||||
distinct: FD,
|
||||
distinct_size: usize,
|
||||
@ -276,15 +271,10 @@ where
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(facet_count_docids) = facet_count_docids {
|
||||
let mut facets = HashMap::new();
|
||||
for (key, document_ids) in facet_count_docids {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
facets.insert(key, counter.0);
|
||||
}
|
||||
result.facets = Some(facets);
|
||||
if let Some(f) = facet_count_docids {
|
||||
// hardcoded value, until approximation optimization
|
||||
result.exhaustive_facet_count = Some(true);
|
||||
result.facets = Some(facet_count(f, &docids));
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
@ -618,3 +608,22 @@ impl Deref for PostingsListView<'_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
||||
fn facet_count(
|
||||
facet_docids: HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>,
|
||||
candidate_docids: &Set<DocumentId>,
|
||||
) -> HashMap<String, HashMap<String, usize>> {
|
||||
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
||||
for (key, doc_map) in facet_docids {
|
||||
let mut count_map = HashMap::with_capacity(doc_map.len());
|
||||
for (value, docids) in doc_map {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
count_map.insert(value, counter.0);
|
||||
}
|
||||
facets_counts.insert(key, count_map);
|
||||
}
|
||||
facets_counts
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ use crate::facets::FacetFilter;
|
||||
use either::Either;
|
||||
use sdset::SetOperation;
|
||||
|
||||
use meilisearch_schema::FieldId;
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
|
||||
pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
criteria: Criteria<'c>,
|
||||
@ -21,8 +21,8 @@ pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
|
||||
timeout: Option<Duration>,
|
||||
index: &'i store::Index,
|
||||
facet_fitlers: Option<&'q FacetFilter>,
|
||||
facets: Option<&'q [FieldId]>,
|
||||
facet_filter: Option<FacetFilter>,
|
||||
facets: Option<Vec<(FieldId, String)>>,
|
||||
}
|
||||
|
||||
impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
@ -34,8 +34,8 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
}
|
||||
|
||||
/// sets facet attributes to filter on
|
||||
pub fn set_facet_filters(&mut self, facets: Option<&'q FacetFilter>) {
|
||||
self.facet_fitlers = facets;
|
||||
pub fn set_facet_filter(&mut self, facets: Option<FacetFilter>) {
|
||||
self.facet_filter = facets;
|
||||
}
|
||||
|
||||
/// sets facet attributes for which to return the count
|
||||
@ -54,7 +54,7 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
distinct: None,
|
||||
timeout: None,
|
||||
index,
|
||||
facet_fitlers: None,
|
||||
facet_filter: None,
|
||||
facets: None,
|
||||
}
|
||||
}
|
||||
@ -87,8 +87,9 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
schema: &Schema,
|
||||
) -> MResult<SortResult> {
|
||||
let facets_docids = match self.facet_fitlers {
|
||||
let facets_docids = match self.facet_filter {
|
||||
Some(facets) => {
|
||||
let mut ands = Vec::with_capacity(facets.len());
|
||||
let mut ors = Vec::new();
|
||||
@ -120,14 +121,21 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
None => None
|
||||
};
|
||||
|
||||
// for each field to retrieve the count for, create an HashMap associating the attribute
|
||||
// value to a set of matching documents. The HashMaps are them collected in another
|
||||
// HashMap, associating each HashMap to it's field.
|
||||
let facet_count_docids = match self.facets {
|
||||
Some(field_ids) => {
|
||||
let mut facet_count_map = HashMap::new();
|
||||
for field_id in field_ids {
|
||||
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
|
||||
let (facet_key, document_ids) = pair?;
|
||||
let facet_key_string = facet_key.to_parts(schema)?;
|
||||
facet_count_map.insert(facet_key, document_ids);
|
||||
if let Some(field_name) = schema.name(*field_id) {
|
||||
let mut key_map = HashMap::new();
|
||||
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
|
||||
let (facet_key, document_ids) = pair?;
|
||||
let value = facet_key.value();
|
||||
key_map.insert(value.to_string(), document_ids);
|
||||
}
|
||||
facet_count_map.insert(field_name.to_string(), key_map);
|
||||
}
|
||||
}
|
||||
Some(facet_count_map)
|
||||
|
@ -24,7 +24,7 @@ impl Facets {
|
||||
}
|
||||
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
|
||||
self.facets.prefix_iter(reader, &FacetKey::new(field_id, "".to_string()))
|
||||
self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))
|
||||
}
|
||||
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
||||
|
@ -363,7 +363,7 @@ impl Index {
|
||||
QueryBuilder::new(self)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i, 'q>(
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'i>(
|
||||
&'i self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
|
@ -14,7 +14,7 @@ name = "meilisearch"
|
||||
path = "src/main.rs"
|
||||
|
||||
[features]
|
||||
default = ["sentry"]
|
||||
#default = ["sentry"]
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.2.0"
|
||||
|
@ -157,7 +157,7 @@ impl<'a> SearchBuilder<'a> {
|
||||
query_builder.set_facets(self.facets.as_deref());
|
||||
|
||||
let start = Instant::now();
|
||||
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
|
||||
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit), &schema);
|
||||
let search_result = result.map_err(ResponseError::search_documents)?;
|
||||
let time_ms = start.elapsed().as_millis() as usize;
|
||||
|
||||
@ -247,7 +247,7 @@ impl<'a> SearchBuilder<'a> {
|
||||
exhaustive_nb_hits: search_result.is_exhaustive,
|
||||
processing_time_ms: time_ms,
|
||||
query: self.query.to_string(),
|
||||
facets: search_result.facets
|
||||
facets: search_result.facets,
|
||||
};
|
||||
|
||||
Ok(results)
|
||||
@ -332,6 +332,7 @@ pub struct SearchResult {
|
||||
pub exhaustive_nb_hits: bool,
|
||||
pub processing_time_ms: usize,
|
||||
pub query: String,
|
||||
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
|
||||
}
|
||||
|
||||
/// returns the start index and the length on the crop.
|
||||
|
@ -33,7 +33,7 @@ struct SearchQuery {
|
||||
filters: Option<String>,
|
||||
matches: Option<bool>,
|
||||
facet_filters: Option<String>,
|
||||
facets: Option<String>
|
||||
facets: Option<String>,
|
||||
}
|
||||
|
||||
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
||||
@ -94,9 +94,12 @@ async fn search_with_url_query(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref facets) = params.facets {
|
||||
if let Some(facets) = ¶ms.facets {
|
||||
match index.main.attributes_for_faceting(&reader)? {
|
||||
Some(ref attrs) => { search_builder.add_facets(prepare_facet_list(facets, &schema, attrs)?); },
|
||||
Some(ref attrs) => {
|
||||
let field_ids = prepare_facet_list(&facets, &schema, attrs)?;
|
||||
search_builder.add_facets(field_ids);
|
||||
},
|
||||
None => return Err(ResponseError::FacetExpression("can't return facets count, as no facet is set".to_string()))
|
||||
}
|
||||
|
||||
@ -162,26 +165,35 @@ async fn search_with_url_query(
|
||||
Ok(HttpResponse::Ok().json(search_builder.search(&reader)?))
|
||||
}
|
||||
|
||||
fn prepare_facet_list<'fa>(facets: &str, schema: &Schema, facet_attrs: &'fa [FieldId]) -> Result<Vec<FieldId>, ResponseError> {
|
||||
let facet_array = serde_json::from_str(facets).expect("do error handling"); // TODO
|
||||
match facet_array {
|
||||
Value::Array(facet_array) => {
|
||||
let wild_card = Value::String("*".to_string());
|
||||
if facet_array.iter().any(|it| it == &wild_card) {
|
||||
return Ok(Vec::from(facet_attrs)); // TODO can make cow?
|
||||
/// Parses the incoming string into an array of attributes for which to return a count. It returns
|
||||
/// a Vec of attribute names ascociated with their id.
|
||||
///
|
||||
/// An error is returned if the array is malformed, or if it contains attributes that are
|
||||
/// unexisting, or not set as facets.
|
||||
fn prepare_facet_list(facets: &str, schema: &Schema, facet_attrs: &[FieldId]) -> Result<Vec<(FieldId, String)>, FacetCountError> {
|
||||
let json_array = serde_json::from_str(facets)?;
|
||||
match json_array {
|
||||
Value::Array(vals) => {
|
||||
let wildcard = Value::String("*".to_string());
|
||||
if vals.iter().any(|f| f == &wildcard) {
|
||||
return Ok(Vec::from(facet_attrs));
|
||||
}
|
||||
let mut fields = Vec::with_capacity(facet_attrs.len());
|
||||
for v in facet_array {
|
||||
match v {
|
||||
Value::String(name) => {
|
||||
let id = schema.id(&name).expect("not found error"); // TODO
|
||||
fields.push(id);
|
||||
let mut field_ids = Vec::new();
|
||||
for facet in vals {
|
||||
match facet {
|
||||
Value::String(facet) => {
|
||||
if let Some(id) = schema.id(&facet) {
|
||||
if !facet_attrs.contains(&id) {
|
||||
return Err(ResponseError::FacetExpression("Only attributes set as facet can be counted".to_string())); // TODO make special error
|
||||
}
|
||||
field_ids.push(id);
|
||||
}
|
||||
}
|
||||
_ => todo!("expected string, found {}", v),
|
||||
bad_val => return Err(ResponseError::FacetExpression(format!("expected String found {}", bad_val)))
|
||||
}
|
||||
}
|
||||
return Ok(fields);
|
||||
Ok(field_ids)
|
||||
}
|
||||
_ => todo!("error, bad syntax, expected array")
|
||||
bad_val => return Err(ResponseError::FacetExpression(format!("expected Array found {}", bad_val)))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user