mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Add TODO notes
This commit is contained in:
parent
00bad8c716
commit
16fefd364e
@ -11,6 +11,7 @@ mod resolve_query_graph;
|
|||||||
// TODO: documentation + comments
|
// TODO: documentation + comments
|
||||||
mod small_bitmap;
|
mod small_bitmap;
|
||||||
// TODO: documentation + comments
|
// TODO: documentation + comments
|
||||||
|
// implementation is currently an adaptation of the previous implementation to fit with the new model
|
||||||
mod sort;
|
mod sort;
|
||||||
// TODO: documentation + comments
|
// TODO: documentation + comments
|
||||||
mod words;
|
mod words;
|
||||||
@ -42,6 +43,7 @@ pub struct SearchContext<'ctx> {
|
|||||||
pub word_interner: DedupInterner<String>,
|
pub word_interner: DedupInterner<String>,
|
||||||
pub phrase_interner: DedupInterner<Phrase>,
|
pub phrase_interner: DedupInterner<Phrase>,
|
||||||
pub term_interner: DedupInterner<QueryTerm>,
|
pub term_interner: DedupInterner<QueryTerm>,
|
||||||
|
// think about memory usage of that field (roaring bitmaps in a hashmap)
|
||||||
pub term_docids: QueryTermDocIdsCache,
|
pub term_docids: QueryTermDocIdsCache,
|
||||||
}
|
}
|
||||||
impl<'ctx> SearchContext<'ctx> {
|
impl<'ctx> SearchContext<'ctx> {
|
||||||
|
@ -119,6 +119,8 @@ impl QueryGraph {
|
|||||||
|
|
||||||
impl QueryGraph {
|
impl QueryGraph {
|
||||||
/// Build the query graph from the parsed user search query.
|
/// Build the query graph from the parsed user search query.
|
||||||
|
///
|
||||||
|
/// The ngrams are made at this point.
|
||||||
pub fn from_query(ctx: &mut SearchContext, terms: Vec<LocatedQueryTerm>) -> Result<QueryGraph> {
|
pub fn from_query(ctx: &mut SearchContext, terms: Vec<LocatedQueryTerm>) -> Result<QueryGraph> {
|
||||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||||
|
|
||||||
@ -219,6 +221,7 @@ impl QueryGraph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Remove the given nodes and all their edges from the query graph.
|
/// Remove the given nodes and all their edges from the query graph.
|
||||||
|
/// TODO: need to check where this is used, and if this is correct.
|
||||||
pub fn remove_nodes(&mut self, nodes: &[Interned<QueryNode>]) {
|
pub fn remove_nodes(&mut self, nodes: &[Interned<QueryNode>]) {
|
||||||
for &node_id in nodes {
|
for &node_id in nodes {
|
||||||
let node = &self.nodes.get(node_id);
|
let node = &self.nodes.get(node_id);
|
||||||
|
@ -414,6 +414,7 @@ impl QueryTerm {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct LocatedQueryTerm {
|
pub struct LocatedQueryTerm {
|
||||||
pub value: Interned<QueryTerm>,
|
pub value: Interned<QueryTerm>,
|
||||||
|
// TODO: consider changing to u8, or even a u16
|
||||||
pub positions: RangeInclusive<i8>,
|
pub positions: RangeInclusive<i8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -425,6 +426,8 @@ impl LocatedQueryTerm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Convert the tokenised search query into a list of located query terms.
|
/// Convert the tokenised search query into a list of located query terms.
|
||||||
|
// TODO: checking if the positions are correct for phrases, separators, ngrams
|
||||||
|
// hard-limit the number of tokens that are considered
|
||||||
pub fn located_query_terms_from_string(
|
pub fn located_query_terms_from_string(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
query: NormalizedTokenIter<&[u8]>,
|
query: NormalizedTokenIter<&[u8]>,
|
||||||
@ -484,6 +487,7 @@ pub fn located_query_terms_from_string(
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let word = token.lemma();
|
let word = token.lemma();
|
||||||
|
// eagerly compute all derivations
|
||||||
let term = query_term_from_word(ctx, word, nbr_typos(word), true)?;
|
let term = query_term_from_word(ctx, word, nbr_typos(word), true)?;
|
||||||
let located_term = LocatedQueryTerm {
|
let located_term = LocatedQueryTerm {
|
||||||
value: ctx.term_interner.insert(term),
|
value: ctx.term_interner.insert(term),
|
||||||
@ -507,6 +511,7 @@ pub fn located_query_terms_from_string(
|
|||||||
quoted = !quoted;
|
quoted = !quoted;
|
||||||
}
|
}
|
||||||
// if there is a quote or a hard separator we close the phrase.
|
// if there is a quote or a hard separator we close the phrase.
|
||||||
|
// TODO: limit phrase size?
|
||||||
if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard)
|
if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard)
|
||||||
{
|
{
|
||||||
let located_query_term = LocatedQueryTerm {
|
let located_query_term = LocatedQueryTerm {
|
||||||
|
@ -16,6 +16,9 @@ impl<T> SmallBitmap<T> {
|
|||||||
pub fn for_interned_values_in(interner: &FixedSizeInterner<T>) -> Self {
|
pub fn for_interned_values_in(interner: &FixedSizeInterner<T>) -> Self {
|
||||||
Self::new(interner.len())
|
Self::new(interner.len())
|
||||||
}
|
}
|
||||||
|
// universe_length not stored anywhere, only used to decide between tiny/small
|
||||||
|
// universe_length: passed 63, actual length will be rounded up 64
|
||||||
|
// passed 66, actual 64 * xs.len() as u16 = 128, passed sized rounded up to the next 64
|
||||||
pub fn new(universe_length: u16) -> Self {
|
pub fn new(universe_length: u16) -> Self {
|
||||||
if universe_length <= 64 {
|
if universe_length <= 64 {
|
||||||
Self { internal: SmallBitmapInternal::Tiny(0), _phantom: PhantomData }
|
Self { internal: SmallBitmapInternal::Tiny(0), _phantom: PhantomData }
|
||||||
|
@ -28,6 +28,21 @@ impl<'ctx, Query> RankingRuleOutputIter<'ctx, Query> for RankingRuleOutputIterWr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// `Query` type parameter: the same as the type parameter to bucket_sort
|
||||||
|
// implements RankingRuleQuery trait, either querygraph or placeholdersearch
|
||||||
|
// The sort ranking rule doesn't need the query parameter, it is doing the same thing
|
||||||
|
// whether we're doing a querygraph or placeholder search.
|
||||||
|
//
|
||||||
|
// Query Stored anyway because every ranking rule must return a query from next_bucket
|
||||||
|
// ---
|
||||||
|
// "Mismatch" between new/old impl.:
|
||||||
|
// - old impl: roaring bitmap as input, ranking rule iterates other all the buckets
|
||||||
|
// - new impl: still works like that but it shouldn't, because the universe may change for every call to next_bucket, itself due to:
|
||||||
|
// 1. elements that were already returned by the ranking rule are subtracted from the universe, also done in the old impl (subtracted from the candidates)
|
||||||
|
// 2. NEW in the new impl.: distinct rule might have been applied btwn calls to next_bucket
|
||||||
|
// new impl ignores docs removed in (2), which is a missed perf opt issue, see `next_bucket`
|
||||||
|
// this perf problem is P2
|
||||||
|
// mostly happens when many documents map to the same distinct attribute value.
|
||||||
pub struct Sort<'ctx, Query> {
|
pub struct Sort<'ctx, Query> {
|
||||||
field_name: String,
|
field_name: String,
|
||||||
field_id: Option<FieldId>,
|
field_id: Option<FieldId>,
|
||||||
@ -127,6 +142,9 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
|
|||||||
) -> Result<Option<RankingRuleOutput<Query>>> {
|
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||||
let iter = self.iter.as_mut().unwrap();
|
let iter = self.iter.as_mut().unwrap();
|
||||||
// TODO: we should make use of the universe in the function below
|
// TODO: we should make use of the universe in the function below
|
||||||
|
// good for correctness, but ideally iter.next_bucket would take the current universe into account,
|
||||||
|
// as right now it could return buckets that don't intersect with the universe, meaning we will make many
|
||||||
|
// unneeded calls.
|
||||||
if let Some(mut bucket) = iter.next_bucket()? {
|
if let Some(mut bucket) = iter.next_bucket()? {
|
||||||
bucket.candidates &= universe;
|
bucket.candidates &= universe;
|
||||||
Ok(Some(bucket))
|
Ok(Some(bucket))
|
||||||
|
Loading…
Reference in New Issue
Block a user