mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
feat: Pre-compute matches query index groups
This commit is contained in:
parent
40ab9e7a55
commit
039a9a4cc7
@ -2,9 +2,8 @@ use std::cmp::Ordering;
|
||||
use std::ops::Deref;
|
||||
|
||||
use rocksdb::DB;
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::rank::{match_query_index, Document};
|
||||
use crate::rank::{Document, Matches};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::database::DatabaseView;
|
||||
use crate::Match;
|
||||
@ -15,8 +14,8 @@ fn contains_exact(matches: &[Match]) -> bool {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn number_exact_matches(matches: &[Match]) -> usize {
|
||||
GroupBy::new(matches, match_query_index).map(contains_exact).count()
|
||||
fn number_exact_matches(matches: &Matches) -> usize {
|
||||
matches.query_index_groups().map(contains_exact).count()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
|
@ -2,16 +2,14 @@ use std::cmp::Ordering;
|
||||
use std::ops::Deref;
|
||||
|
||||
use rocksdb::DB;
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::rank::{match_query_index, Document};
|
||||
use crate::rank::{Document, Matches};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::database::DatabaseView;
|
||||
use crate::Match;
|
||||
|
||||
#[inline]
|
||||
fn number_of_query_words(matches: &[Match]) -> usize {
|
||||
GroupBy::new(matches, match_query_index).count()
|
||||
fn number_of_query_words(matches: &Matches) -> usize {
|
||||
matches.query_index_groups().count()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
|
@ -3,21 +3,18 @@ use std::ops::Deref;
|
||||
|
||||
use rocksdb::DB;
|
||||
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::rank::{match_query_index, Document};
|
||||
use crate::rank::{Document, Matches};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::database::DatabaseView;
|
||||
use crate::Match;
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_typos(matches: &[Match]) -> i8 {
|
||||
fn sum_matches_typos(matches: &Matches) -> i8 {
|
||||
let mut sum_typos = 0;
|
||||
let mut number_words = 0;
|
||||
|
||||
// note that GroupBy will never return an empty group
|
||||
// so we can do this assumption safely
|
||||
for group in GroupBy::new(matches, match_query_index) {
|
||||
for group in matches.query_index_groups() {
|
||||
sum_typos += unsafe { group.get_unchecked(0).distance } as i8;
|
||||
number_words += 1;
|
||||
}
|
||||
@ -44,7 +41,7 @@ where D: Deref<Target=DB>
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::{DocumentId, Attribute, WordArea};
|
||||
use crate::{Match, DocumentId, Attribute, WordArea};
|
||||
|
||||
// typing: "Geox CEO"
|
||||
//
|
||||
|
@ -2,19 +2,17 @@ use std::cmp::Ordering;
|
||||
use std::ops::Deref;
|
||||
|
||||
use rocksdb::DB;
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::rank::{Document, Matches};
|
||||
use crate::database::DatabaseView;
|
||||
use crate::rank::{match_query_index, Document};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::Match;
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_attributes(matches: &[Match]) -> u16 {
|
||||
fn sum_matches_attributes(matches: &Matches) -> u16 {
|
||||
// note that GroupBy will never return an empty group
|
||||
// so we can do this assumption safely
|
||||
GroupBy::new(matches, match_query_index).map(|group| unsafe {
|
||||
group.get_unchecked(0).attribute.attribute()
|
||||
matches.query_index_groups().map(|group| {
|
||||
unsafe { group.get_unchecked(0).attribute.attribute() }
|
||||
}).sum()
|
||||
}
|
||||
|
||||
|
@ -2,19 +2,17 @@ use std::cmp::Ordering;
|
||||
use std::ops::Deref;
|
||||
|
||||
use rocksdb::DB;
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::database::DatabaseView;
|
||||
use crate::rank::{match_query_index, Document};
|
||||
use crate::rank::{Document, Matches};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::Match;
|
||||
use crate::database::DatabaseView;
|
||||
|
||||
#[inline]
|
||||
fn sum_matches_attribute_index(matches: &[Match]) -> u32 {
|
||||
fn sum_matches_attribute_index(matches: &Matches) -> u32 {
|
||||
// note that GroupBy will never return an empty group
|
||||
// so we can do this assumption safely
|
||||
GroupBy::new(matches, match_query_index).map(|group| unsafe {
|
||||
group.get_unchecked(0).attribute.word_index()
|
||||
matches.query_index_groups().map(|group| {
|
||||
unsafe { group.get_unchecked(0).attribute.word_index() }
|
||||
}).sum()
|
||||
}
|
||||
|
||||
|
@ -2,9 +2,8 @@ use std::cmp::{self, Ordering};
|
||||
use std::ops::Deref;
|
||||
|
||||
use rocksdb::DB;
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::rank::{match_query_index, Document};
|
||||
use crate::rank::{Document, Matches};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::database::DatabaseView;
|
||||
use crate::Match;
|
||||
@ -34,9 +33,9 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
|
||||
min_prox
|
||||
}
|
||||
|
||||
fn matches_proximity(matches: &[Match]) -> u32 {
|
||||
fn matches_proximity(matches: &Matches) -> u32 {
|
||||
let mut proximity = 0;
|
||||
let mut iter = GroupBy::new(matches, match_query_index);
|
||||
let mut iter = matches.query_index_groups();
|
||||
|
||||
// iterate over groups by windows of size 2
|
||||
let mut last = iter.next();
|
||||
@ -91,6 +90,7 @@ mod tests {
|
||||
// soup -> of = 8
|
||||
// + of -> the = 1
|
||||
// + the -> day = 8 (not 1)
|
||||
let matches = Matches::from_unsorted_matches(matches.to_vec());
|
||||
assert_eq!(matches_proximity(matches), 17);
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,11 @@ pub mod criterion;
|
||||
mod query_builder;
|
||||
mod distinct_map;
|
||||
|
||||
use std::slice::Windows;
|
||||
|
||||
use sdset::SetBuf;
|
||||
use group_by::GroupBy;
|
||||
|
||||
use crate::{Match, DocumentId};
|
||||
|
||||
pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
|
||||
@ -14,20 +19,70 @@ fn match_query_index(a: &Match, b: &Match) -> bool {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Document {
|
||||
pub id: DocumentId,
|
||||
pub matches: Vec<Match>,
|
||||
pub matches: Matches,
|
||||
}
|
||||
|
||||
impl Document {
|
||||
pub fn new(doc: DocumentId, match_: Match) -> Self {
|
||||
unsafe { Self::from_sorted_matches(doc, vec![match_]) }
|
||||
let matches = SetBuf::new_unchecked(vec![match_]);
|
||||
Self::from_matches(doc, matches)
|
||||
}
|
||||
|
||||
pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
|
||||
matches.sort_unstable();
|
||||
unsafe { Self::from_sorted_matches(doc, matches) }
|
||||
}
|
||||
pub fn from_matches(id: DocumentId, matches: SetBuf<Match>) -> Self {
|
||||
let mut last = 0;
|
||||
let mut slices = vec![0];
|
||||
for group in GroupBy::new(&matches, match_query_index) {
|
||||
let index = last + group.len();
|
||||
slices.push(index);
|
||||
last = index;
|
||||
}
|
||||
|
||||
pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
|
||||
let matches = Matches { matches, slices };
|
||||
Self { id, matches }
|
||||
}
|
||||
|
||||
pub fn from_unsorted_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
|
||||
matches.sort_unstable();
|
||||
let matches = SetBuf::new_unchecked(matches);
|
||||
Self::from_matches(doc, matches)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Matches {
|
||||
matches: SetBuf<Match>,
|
||||
slices: Vec<usize>,
|
||||
}
|
||||
|
||||
impl Matches {
|
||||
pub fn query_index_groups(&self) -> QueryIndexGroups {
|
||||
QueryIndexGroups {
|
||||
matches: &self.matches,
|
||||
windows: self.slices.windows(2),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct QueryIndexGroups<'a, 'b> {
|
||||
matches: &'a [Match],
|
||||
windows: Windows<'b, usize>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Iterator for QueryIndexGroups<'a, 'b> {
|
||||
type Item = &'a [Match];
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.windows.next().map(|range| {
|
||||
match *range {
|
||||
[left, right] => &self.matches[left..right],
|
||||
_ => unreachable!()
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// impl ExactSizeIterator for QueryIndexGroups<'_, '_> {
|
||||
// fn len(&self) -> usize {
|
||||
// self.windows.len() // FIXME (+1) ?
|
||||
// }
|
||||
// }
|
||||
|
@ -116,7 +116,7 @@ where D: Deref<Target=DB>,
|
||||
}
|
||||
}
|
||||
|
||||
matches.into_iter().map(|(id, matches)| Document::from_matches(id, matches)).collect()
|
||||
matches.into_iter().map(|(id, m)| Document::from_unsorted_matches(id, m)).collect()
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user