mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
add sort result struct
This commit is contained in:
parent
a88f6c3241
commit
effbb7f7f1
7 changed files with 194 additions and 69 deletions
|
@ -11,12 +11,13 @@ use std::fmt;
|
|||
use compact_arena::{SmallArena, Idx32, mk_arena};
|
||||
use log::debug;
|
||||
use meilisearch_types::DocIndex;
|
||||
use sdset::{Set, SetBuf, exponential_search, SetOperation};
|
||||
use sdset::{Set, SetBuf, exponential_search, SetOperation, Counter, duo::OpBuilder};
|
||||
use slice_group_by::{GroupBy, GroupByMut};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::criterion::{Criteria, Context, ContextMut};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::facets::FacetKey;
|
||||
use crate::raw_document::RawDocument;
|
||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||
use crate::{store, Document, DocumentId, MResult};
|
||||
|
@ -24,11 +25,20 @@ use crate::query_tree::{create_query_tree, traverse_query_tree};
|
|||
use crate::query_tree::{Operation, QueryResult, QueryKind, QueryId, PostingsKey};
|
||||
use crate::query_tree::Context as QTContext;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SortResult {
|
||||
pub documents: Vec<Document>,
|
||||
pub nb_hits: usize,
|
||||
pub is_exhaustive: bool,
|
||||
pub facets: Option<HashMap<FacetKey, usize>>,
|
||||
}
|
||||
|
||||
pub fn bucket_sort<'c, FI>(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
|
||||
filter: Option<FI>,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
|
@ -38,7 +48,7 @@ pub fn bucket_sort<'c, FI>(
|
|||
synonyms_store: store::Synonyms,
|
||||
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
||||
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
||||
) -> MResult<(Vec<Document>, usize)>
|
||||
) -> MResult<SortResult>
|
||||
where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
{
|
||||
|
@ -52,6 +62,7 @@ where
|
|||
query,
|
||||
range,
|
||||
facets_docids,
|
||||
facet_count_docids,
|
||||
filter,
|
||||
distinct,
|
||||
distinct_size,
|
||||
|
@ -66,9 +77,11 @@ where
|
|||
);
|
||||
}
|
||||
|
||||
let mut result = SortResult::default();
|
||||
|
||||
let words_set = match unsafe { main_store.static_words_fst(reader)? } {
|
||||
Some(words) => words,
|
||||
None => return Ok((Vec::new(), 0)),
|
||||
None => return Ok(SortResult::default()),
|
||||
};
|
||||
|
||||
let stop_words = main_store.stop_words_fst(reader)?.unwrap_or_default();
|
||||
|
@ -107,6 +120,17 @@ where
|
|||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(facet_count_docids) = facet_count_docids {
|
||||
let mut facets = HashMap::new();
|
||||
for (key, document_ids) in facet_count_docids {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
facets.insert(key, counter.0);
|
||||
}
|
||||
result.facets = Some(facets);
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
mk_arena!(arena);
|
||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||
|
@ -181,7 +205,10 @@ where
|
|||
|
||||
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
||||
|
||||
Ok((documents, docids.len()))
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
||||
|
@ -189,6 +216,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
|
||||
filter: Option<FI>,
|
||||
distinct: FD,
|
||||
distinct_size: usize,
|
||||
|
@ -200,14 +228,16 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||
synonyms_store: store::Synonyms,
|
||||
_prefix_documents_cache_store: store::PrefixDocumentsCache,
|
||||
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
||||
) -> MResult<(Vec<Document>, usize)>
|
||||
) -> MResult<SortResult>
|
||||
where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let mut result = SortResult::default();
|
||||
|
||||
let words_set = match unsafe { main_store.static_words_fst(reader)? } {
|
||||
Some(words) => words,
|
||||
None => return Ok((Vec::new(), 0)),
|
||||
None => return Ok(SortResult::default()),
|
||||
};
|
||||
|
||||
let stop_words = main_store.stop_words_fst(reader)?.unwrap_or_default();
|
||||
|
@ -240,12 +270,23 @@ where
|
|||
debug!("number of postings {:?}", queries.len());
|
||||
|
||||
if let Some(facets_docids) = facets_docids {
|
||||
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||
let intersection = OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||
.intersection()
|
||||
.into_set_buf();
|
||||
docids = Cow::Owned(intersection);
|
||||
}
|
||||
|
||||
if let Some(facet_count_docids) = facet_count_docids {
|
||||
let mut facets = HashMap::new();
|
||||
for (key, document_ids) in facet_count_docids {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
facets.insert(key, counter.0);
|
||||
}
|
||||
result.facets = Some(facets);
|
||||
}
|
||||
|
||||
let before = Instant::now();
|
||||
mk_arena!(arena);
|
||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||
|
@ -379,8 +420,10 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
|
||||
Ok((documents, docids.len()))
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn cleanup_bare_matches<'tag, 'txn>(
|
||||
|
|
|
@ -1,27 +1,31 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::ops::{Range, Deref};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct};
|
||||
use crate::{criterion::Criteria, Document, DocumentId};
|
||||
use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct, SortResult};
|
||||
use crate::{criterion::Criteria, DocumentId};
|
||||
use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
|
||||
use crate::facets::FacetFilter;
|
||||
|
||||
use either::Either;
|
||||
use sdset::SetOperation;
|
||||
|
||||
pub struct QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
filter: Option<Box<dyn Fn(DocumentId) -> bool + 'f>>,
|
||||
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
|
||||
timeout: Option<Duration>,
|
||||
index: &'i store::Index,
|
||||
facets: Option<&'fa FacetFilter>,
|
||||
facet_fitlers: Option<&'q FacetFilter>,
|
||||
facets: Option<&'q [FieldId]>,
|
||||
}
|
||||
|
||||
impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
||||
impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
pub fn new(index: &'i store::Index) -> Self {
|
||||
QueryBuilder::with_criteria(
|
||||
index,
|
||||
|
@ -29,7 +33,13 @@ impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn set_facets(&mut self, facets: Option<&'fa FacetFilter>) {
|
||||
/// sets facet attributes to filter on
|
||||
pub fn set_facet_filters(&mut self, facets: Option<&'q FacetFilter>) {
|
||||
self.facet_fitlers = facets;
|
||||
}
|
||||
|
||||
/// sets facet attributes for which to return the count
|
||||
pub fn set_facets(&mut self, facets: Option<&'q [FieldId]>) {
|
||||
self.facets = facets;
|
||||
}
|
||||
|
||||
|
@ -44,6 +54,7 @@ impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
|||
distinct: None,
|
||||
timeout: None,
|
||||
index,
|
||||
facet_fitlers: None,
|
||||
facets: None,
|
||||
}
|
||||
}
|
||||
|
@ -76,8 +87,8 @@ impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
|||
reader: &heed::RoTxn<MainT>,
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
) -> MResult<(Vec<Document>, usize)> {
|
||||
let facets_docids = match self.facets {
|
||||
) -> MResult<SortResult> {
|
||||
let facets_docids = match self.facet_fitlers {
|
||||
Some(facets) => {
|
||||
let mut ands = Vec::with_capacity(facets.len());
|
||||
let mut ors = Vec::new();
|
||||
|
@ -98,7 +109,7 @@ impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
|||
match self.index.facets.facet_document_ids(reader, &key)? {
|
||||
Some(docids) => ands.push(docids),
|
||||
// no candidates for search, early return.
|
||||
None => return Ok((vec![], 0)),
|
||||
None => return Ok(SortResult::default()),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -109,12 +120,29 @@ impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
|||
None => None
|
||||
};
|
||||
|
||||
let facet_count_docids = match self.facets {
|
||||
Some(field_ids) => {
|
||||
let mut facet_count_map = HashMap::new();
|
||||
for field_id in field_ids {
|
||||
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
|
||||
let (facet_key, document_ids) = pair?;
|
||||
let facet_key_string = facet_key.to_parts(schema)?;
|
||||
facet_count_map.insert(facet_key, document_ids);
|
||||
}
|
||||
}
|
||||
Some(facet_count_map)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
|
||||
match self.distinct {
|
||||
Some((distinct, distinct_size)) => bucket_sort_with_distinct(
|
||||
reader,
|
||||
query,
|
||||
range,
|
||||
facets_docids,
|
||||
facet_count_docids,
|
||||
self.filter,
|
||||
distinct,
|
||||
distinct_size,
|
||||
|
@ -132,6 +160,7 @@ impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
|||
query,
|
||||
range,
|
||||
facets_docids,
|
||||
facet_count_docids,
|
||||
self.filter,
|
||||
self.criteria,
|
||||
self.searchable_attrs,
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use heed::{RwTxn, RoTxn, Result as ZResult};
|
||||
use heed::{RwTxn, RoTxn, Result as ZResult, RoRange};
|
||||
use sdset::{SetBuf, Set, SetOperation};
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::facets::FacetKey;
|
||||
|
@ -22,6 +23,10 @@ impl Facets {
|
|||
self.facets.put(writer, &facet_key, doc_ids)
|
||||
}
|
||||
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
|
||||
self.facets.prefix_iter(reader, &FacetKey::new(field_id, "".to_string()))
|
||||
}
|
||||
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
||||
self.facets.get(reader, &facet_key)
|
||||
}
|
||||
|
|
|
@ -363,10 +363,10 @@ impl Index {
|
|||
QueryBuilder::new(self)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i>(
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i, 'q>(
|
||||
&'i self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
||||
) -> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
|
||||
QueryBuilder::with_criteria(self, criteria)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue