mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 12:54:26 +01:00
enables facet count
This commit is contained in:
parent
effbb7f7f1
commit
e5126af458
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1664,8 +1664,8 @@ dependencies = [
|
|||||||
"mime",
|
"mime",
|
||||||
"pretty-bytes",
|
"pretty-bytes",
|
||||||
"rand 0.7.3",
|
"rand 0.7.3",
|
||||||
"sentry",
|
|
||||||
"regex",
|
"regex",
|
||||||
|
"sentry",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_qs",
|
"serde_qs",
|
||||||
|
@ -17,7 +17,6 @@ use slice_group_by::{GroupBy, GroupByMut};
|
|||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::criterion::{Criteria, Context, ContextMut};
|
use crate::criterion::{Criteria, Context, ContextMut};
|
||||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||||
use crate::facets::FacetKey;
|
|
||||||
use crate::raw_document::RawDocument;
|
use crate::raw_document::RawDocument;
|
||||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||||
use crate::{store, Document, DocumentId, MResult};
|
use crate::{store, Document, DocumentId, MResult};
|
||||||
@ -30,7 +29,8 @@ pub struct SortResult {
|
|||||||
pub documents: Vec<Document>,
|
pub documents: Vec<Document>,
|
||||||
pub nb_hits: usize,
|
pub nb_hits: usize,
|
||||||
pub is_exhaustive: bool,
|
pub is_exhaustive: bool,
|
||||||
pub facets: Option<HashMap<FacetKey, usize>>,
|
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
|
||||||
|
pub exhaustive_facet_count: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn bucket_sort<'c, FI>(
|
pub fn bucket_sort<'c, FI>(
|
||||||
@ -38,7 +38,7 @@ pub fn bucket_sort<'c, FI>(
|
|||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
facets_docids: Option<SetBuf<DocumentId>>,
|
facets_docids: Option<SetBuf<DocumentId>>,
|
||||||
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
|
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
||||||
filter: Option<FI>,
|
filter: Option<FI>,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
searchable_attrs: Option<ReorderedAttrs>,
|
searchable_attrs: Option<ReorderedAttrs>,
|
||||||
@ -120,15 +120,10 @@ where
|
|||||||
docids = Cow::Owned(intersection);
|
docids = Cow::Owned(intersection);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(facet_count_docids) = facet_count_docids {
|
if let Some(f) = facet_count_docids {
|
||||||
let mut facets = HashMap::new();
|
// hardcoded value, until approximation optimization
|
||||||
for (key, document_ids) in facet_count_docids {
|
result.exhaustive_facet_count = Some(true);
|
||||||
let mut counter = Counter::new();
|
result.facets = Some(facet_count(f, &docids));
|
||||||
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
|
|
||||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
|
||||||
facets.insert(key, counter.0);
|
|
||||||
}
|
|
||||||
result.facets = Some(facets);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
@ -216,7 +211,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
facets_docids: Option<SetBuf<DocumentId>>,
|
facets_docids: Option<SetBuf<DocumentId>>,
|
||||||
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
|
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
||||||
filter: Option<FI>,
|
filter: Option<FI>,
|
||||||
distinct: FD,
|
distinct: FD,
|
||||||
distinct_size: usize,
|
distinct_size: usize,
|
||||||
@ -276,15 +271,10 @@ where
|
|||||||
docids = Cow::Owned(intersection);
|
docids = Cow::Owned(intersection);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(facet_count_docids) = facet_count_docids {
|
if let Some(f) = facet_count_docids {
|
||||||
let mut facets = HashMap::new();
|
// hardcoded value, until approximation optimization
|
||||||
for (key, document_ids) in facet_count_docids {
|
result.exhaustive_facet_count = Some(true);
|
||||||
let mut counter = Counter::new();
|
result.facets = Some(facet_count(f, &docids));
|
||||||
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
|
|
||||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
|
||||||
facets.insert(key, counter.0);
|
|
||||||
}
|
|
||||||
result.facets = Some(facets);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
@ -618,3 +608,22 @@ impl Deref for PostingsListView<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
||||||
|
fn facet_count(
|
||||||
|
facet_docids: HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>,
|
||||||
|
candidate_docids: &Set<DocumentId>,
|
||||||
|
) -> HashMap<String, HashMap<String, usize>> {
|
||||||
|
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
||||||
|
for (key, doc_map) in facet_docids {
|
||||||
|
let mut count_map = HashMap::with_capacity(doc_map.len());
|
||||||
|
for (value, docids) in doc_map {
|
||||||
|
let mut counter = Counter::new();
|
||||||
|
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
||||||
|
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||||
|
count_map.insert(value, counter.0);
|
||||||
|
}
|
||||||
|
facets_counts.insert(key, count_map);
|
||||||
|
}
|
||||||
|
facets_counts
|
||||||
|
}
|
||||||
|
@ -12,7 +12,7 @@ use crate::facets::FacetFilter;
|
|||||||
use either::Either;
|
use either::Either;
|
||||||
use sdset::SetOperation;
|
use sdset::SetOperation;
|
||||||
|
|
||||||
use meilisearch_schema::FieldId;
|
use meilisearch_schema::{Schema, FieldId};
|
||||||
|
|
||||||
pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
@ -21,8 +21,8 @@ pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
|||||||
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
|
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
|
||||||
timeout: Option<Duration>,
|
timeout: Option<Duration>,
|
||||||
index: &'i store::Index,
|
index: &'i store::Index,
|
||||||
facet_fitlers: Option<&'q FacetFilter>,
|
facet_filter: Option<FacetFilter>,
|
||||||
facets: Option<&'q [FieldId]>,
|
facets: Option<Vec<(FieldId, String)>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||||
@ -34,8 +34,8 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// sets facet attributes to filter on
|
/// sets facet attributes to filter on
|
||||||
pub fn set_facet_filters(&mut self, facets: Option<&'q FacetFilter>) {
|
pub fn set_facet_filter(&mut self, facets: Option<FacetFilter>) {
|
||||||
self.facet_fitlers = facets;
|
self.facet_filter = facets;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// sets facet attributes for which to return the count
|
/// sets facet attributes for which to return the count
|
||||||
@ -54,7 +54,7 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
|||||||
distinct: None,
|
distinct: None,
|
||||||
timeout: None,
|
timeout: None,
|
||||||
index,
|
index,
|
||||||
facet_fitlers: None,
|
facet_filter: None,
|
||||||
facets: None,
|
facets: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -87,8 +87,9 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
|||||||
reader: &heed::RoTxn<MainT>,
|
reader: &heed::RoTxn<MainT>,
|
||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
|
schema: &Schema,
|
||||||
) -> MResult<SortResult> {
|
) -> MResult<SortResult> {
|
||||||
let facets_docids = match self.facet_fitlers {
|
let facets_docids = match self.facet_filter {
|
||||||
Some(facets) => {
|
Some(facets) => {
|
||||||
let mut ands = Vec::with_capacity(facets.len());
|
let mut ands = Vec::with_capacity(facets.len());
|
||||||
let mut ors = Vec::new();
|
let mut ors = Vec::new();
|
||||||
@ -120,14 +121,21 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
|||||||
None => None
|
None => None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// for each field to retrieve the count for, create an HashMap associating the attribute
|
||||||
|
// value to a set of matching documents. The HashMaps are them collected in another
|
||||||
|
// HashMap, associating each HashMap to it's field.
|
||||||
let facet_count_docids = match self.facets {
|
let facet_count_docids = match self.facets {
|
||||||
Some(field_ids) => {
|
Some(field_ids) => {
|
||||||
let mut facet_count_map = HashMap::new();
|
let mut facet_count_map = HashMap::new();
|
||||||
for field_id in field_ids {
|
for field_id in field_ids {
|
||||||
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
|
if let Some(field_name) = schema.name(*field_id) {
|
||||||
let (facet_key, document_ids) = pair?;
|
let mut key_map = HashMap::new();
|
||||||
let facet_key_string = facet_key.to_parts(schema)?;
|
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
|
||||||
facet_count_map.insert(facet_key, document_ids);
|
let (facet_key, document_ids) = pair?;
|
||||||
|
let value = facet_key.value();
|
||||||
|
key_map.insert(value.to_string(), document_ids);
|
||||||
|
}
|
||||||
|
facet_count_map.insert(field_name.to_string(), key_map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(facet_count_map)
|
Some(facet_count_map)
|
||||||
|
@ -24,7 +24,7 @@ impl Facets {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
|
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
|
||||||
self.facets.prefix_iter(reader, &FacetKey::new(field_id, "".to_string()))
|
self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
||||||
|
@ -363,7 +363,7 @@ impl Index {
|
|||||||
QueryBuilder::new(self)
|
QueryBuilder::new(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i, 'q>(
|
pub fn query_builder_with_criteria<'c, 'f, 'd, 'i>(
|
||||||
&'i self,
|
&'i self,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
) -> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
) -> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||||
|
@ -14,7 +14,7 @@ name = "meilisearch"
|
|||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["sentry"]
|
#default = ["sentry"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-cors = "0.2.0"
|
actix-cors = "0.2.0"
|
||||||
|
@ -157,7 +157,7 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
query_builder.set_facets(self.facets.as_deref());
|
query_builder.set_facets(self.facets.as_deref());
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
|
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit), &schema);
|
||||||
let search_result = result.map_err(ResponseError::search_documents)?;
|
let search_result = result.map_err(ResponseError::search_documents)?;
|
||||||
let time_ms = start.elapsed().as_millis() as usize;
|
let time_ms = start.elapsed().as_millis() as usize;
|
||||||
|
|
||||||
@ -247,7 +247,7 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
exhaustive_nb_hits: search_result.is_exhaustive,
|
exhaustive_nb_hits: search_result.is_exhaustive,
|
||||||
processing_time_ms: time_ms,
|
processing_time_ms: time_ms,
|
||||||
query: self.query.to_string(),
|
query: self.query.to_string(),
|
||||||
facets: search_result.facets
|
facets: search_result.facets,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
@ -332,6 +332,7 @@ pub struct SearchResult {
|
|||||||
pub exhaustive_nb_hits: bool,
|
pub exhaustive_nb_hits: bool,
|
||||||
pub processing_time_ms: usize,
|
pub processing_time_ms: usize,
|
||||||
pub query: String,
|
pub query: String,
|
||||||
|
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// returns the start index and the length on the crop.
|
/// returns the start index and the length on the crop.
|
||||||
|
@ -33,7 +33,7 @@ struct SearchQuery {
|
|||||||
filters: Option<String>,
|
filters: Option<String>,
|
||||||
matches: Option<bool>,
|
matches: Option<bool>,
|
||||||
facet_filters: Option<String>,
|
facet_filters: Option<String>,
|
||||||
facets: Option<String>
|
facets: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
||||||
@ -94,9 +94,12 @@ async fn search_with_url_query(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(ref facets) = params.facets {
|
if let Some(facets) = ¶ms.facets {
|
||||||
match index.main.attributes_for_faceting(&reader)? {
|
match index.main.attributes_for_faceting(&reader)? {
|
||||||
Some(ref attrs) => { search_builder.add_facets(prepare_facet_list(facets, &schema, attrs)?); },
|
Some(ref attrs) => {
|
||||||
|
let field_ids = prepare_facet_list(&facets, &schema, attrs)?;
|
||||||
|
search_builder.add_facets(field_ids);
|
||||||
|
},
|
||||||
None => return Err(ResponseError::FacetExpression("can't return facets count, as no facet is set".to_string()))
|
None => return Err(ResponseError::FacetExpression("can't return facets count, as no facet is set".to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,26 +165,35 @@ async fn search_with_url_query(
|
|||||||
Ok(HttpResponse::Ok().json(search_builder.search(&reader)?))
|
Ok(HttpResponse::Ok().json(search_builder.search(&reader)?))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_facet_list<'fa>(facets: &str, schema: &Schema, facet_attrs: &'fa [FieldId]) -> Result<Vec<FieldId>, ResponseError> {
|
/// Parses the incoming string into an array of attributes for which to return a count. It returns
|
||||||
let facet_array = serde_json::from_str(facets).expect("do error handling"); // TODO
|
/// a Vec of attribute names ascociated with their id.
|
||||||
match facet_array {
|
///
|
||||||
Value::Array(facet_array) => {
|
/// An error is returned if the array is malformed, or if it contains attributes that are
|
||||||
let wild_card = Value::String("*".to_string());
|
/// unexisting, or not set as facets.
|
||||||
if facet_array.iter().any(|it| it == &wild_card) {
|
fn prepare_facet_list(facets: &str, schema: &Schema, facet_attrs: &[FieldId]) -> Result<Vec<(FieldId, String)>, FacetCountError> {
|
||||||
return Ok(Vec::from(facet_attrs)); // TODO can make cow?
|
let json_array = serde_json::from_str(facets)?;
|
||||||
|
match json_array {
|
||||||
|
Value::Array(vals) => {
|
||||||
|
let wildcard = Value::String("*".to_string());
|
||||||
|
if vals.iter().any(|f| f == &wildcard) {
|
||||||
|
return Ok(Vec::from(facet_attrs));
|
||||||
}
|
}
|
||||||
let mut fields = Vec::with_capacity(facet_attrs.len());
|
let mut field_ids = Vec::new();
|
||||||
for v in facet_array {
|
for facet in vals {
|
||||||
match v {
|
match facet {
|
||||||
Value::String(name) => {
|
Value::String(facet) => {
|
||||||
let id = schema.id(&name).expect("not found error"); // TODO
|
if let Some(id) = schema.id(&facet) {
|
||||||
fields.push(id);
|
if !facet_attrs.contains(&id) {
|
||||||
|
return Err(ResponseError::FacetExpression("Only attributes set as facet can be counted".to_string())); // TODO make special error
|
||||||
|
}
|
||||||
|
field_ids.push(id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => todo!("expected string, found {}", v),
|
bad_val => return Err(ResponseError::FacetExpression(format!("expected String found {}", bad_val)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Ok(fields);
|
Ok(field_ids)
|
||||||
}
|
}
|
||||||
_ => todo!("error, bad syntax, expected array")
|
bad_val => return Err(ResponseError::FacetExpression(format!("expected Array found {}", bad_val)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user