mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
feat: Use the GroupBy/Mut Traits of the slice-group-by library
This commit is contained in:
parent
820f1f9ac6
commit
c2f0df3f73
@ -19,6 +19,7 @@ sdset = "0.3"
|
|||||||
serde = "1.0"
|
serde = "1.0"
|
||||||
serde_derive = "1.0"
|
serde_derive = "1.0"
|
||||||
serde_json = { version = "1.0", features = ["preserve_order"] }
|
serde_json = { version = "1.0", features = ["preserve_order"] }
|
||||||
|
slice-group-by = "0.2"
|
||||||
unidecode = "0.3"
|
unidecode = "0.3"
|
||||||
|
|
||||||
[dependencies.toml]
|
[dependencies.toml]
|
||||||
@ -30,16 +31,12 @@ rev = "0372ba6"
|
|||||||
git = "https://github.com/pingcap/rust-rocksdb.git"
|
git = "https://github.com/pingcap/rust-rocksdb.git"
|
||||||
rev = "306e201"
|
rev = "306e201"
|
||||||
|
|
||||||
[dependencies.group-by]
|
|
||||||
git = "https://github.com/Kerollmops/group-by.git"
|
|
||||||
rev = "5a113fe"
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["simd"]
|
default = ["simd"]
|
||||||
i128 = ["bincode/i128", "byteorder/i128"]
|
i128 = ["bincode/i128", "byteorder/i128"]
|
||||||
portable = ["rocksdb/portable"]
|
portable = ["rocksdb/portable"]
|
||||||
simd = ["rocksdb/sse"]
|
simd = ["rocksdb/sse"]
|
||||||
nightly = ["hashbrown/nightly", "group-by/nightly"]
|
nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
csv = "1.0"
|
csv = "1.0"
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{match_query_index, Document};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
@ -16,7 +16,7 @@ fn contains_exact(matches: &&[Match]) -> bool {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn number_exact_matches(matches: &[Match]) -> usize {
|
fn number_exact_matches(matches: &[Match]) -> usize {
|
||||||
GroupBy::new(matches, match_query_index).filter(contains_exact).count()
|
matches.linear_group_by(match_query_index).filter(contains_exact).count()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{match_query_index, Document};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
@ -11,7 +11,7 @@ use crate::Match;
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn number_of_query_words(matches: &[Match]) -> usize {
|
fn number_of_query_words(matches: &[Match]) -> usize {
|
||||||
GroupBy::new(matches, match_query_index).count()
|
matches.linear_group_by(match_query_index).count()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
@ -2,8 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
|
use slice_group_by::GroupBy;
|
||||||
use group_by::GroupBy;
|
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{match_query_index, Document};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
@ -17,7 +16,7 @@ fn sum_matches_typos(matches: &[Match]) -> isize {
|
|||||||
|
|
||||||
// note that GroupBy will never return an empty group
|
// note that GroupBy will never return an empty group
|
||||||
// so we can do this assumption safely
|
// so we can do this assumption safely
|
||||||
for group in GroupBy::new(matches, match_query_index) {
|
for group in matches.linear_group_by(match_query_index) {
|
||||||
sum_typos += unsafe { group.get_unchecked(0).distance as isize };
|
sum_typos += unsafe { group.get_unchecked(0).distance as isize };
|
||||||
number_words += 1;
|
number_words += 1;
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{match_query_index, Document};
|
||||||
@ -13,7 +13,7 @@ use crate::Match;
|
|||||||
fn sum_matches_attributes(matches: &[Match]) -> usize {
|
fn sum_matches_attributes(matches: &[Match]) -> usize {
|
||||||
// note that GroupBy will never return an empty group
|
// note that GroupBy will never return an empty group
|
||||||
// so we can do this assumption safely
|
// so we can do this assumption safely
|
||||||
GroupBy::new(matches, match_query_index).map(|group| {
|
matches.linear_group_by(match_query_index).map(|group| {
|
||||||
unsafe { group.get_unchecked(0).attribute.attribute() as usize }
|
unsafe { group.get_unchecked(0).attribute.attribute() as usize }
|
||||||
}).sum()
|
}).sum()
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{match_query_index, Document};
|
||||||
@ -13,7 +13,7 @@ use crate::Match;
|
|||||||
fn sum_matches_attribute_index(matches: &[Match]) -> usize {
|
fn sum_matches_attribute_index(matches: &[Match]) -> usize {
|
||||||
// note that GroupBy will never return an empty group
|
// note that GroupBy will never return an empty group
|
||||||
// so we can do this assumption safely
|
// so we can do this assumption safely
|
||||||
GroupBy::new(matches, match_query_index).map(|group| {
|
matches.linear_group_by(match_query_index).map(|group| {
|
||||||
unsafe { group.get_unchecked(0).attribute.word_index() as usize }
|
unsafe { group.get_unchecked(0).attribute.word_index() as usize }
|
||||||
}).sum()
|
}).sum()
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{match_query_index, Document};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
@ -36,7 +36,7 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
|
|||||||
|
|
||||||
fn matches_proximity(matches: &[Match]) -> u32 {
|
fn matches_proximity(matches: &[Match]) -> u32 {
|
||||||
let mut proximity = 0;
|
let mut proximity = 0;
|
||||||
let mut iter = GroupBy::new(matches, match_query_index);
|
let mut iter = matches.linear_group_by(match_query_index);
|
||||||
|
|
||||||
// iterate over groups by windows of size 2
|
// iterate over groups by windows of size 2
|
||||||
let mut last = iter.next();
|
let mut last = iter.next();
|
||||||
|
@ -4,7 +4,7 @@ use std::error::Error;
|
|||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use group_by::BinaryGroupByMut;
|
use slice_group_by::GroupByMut;
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use fst::Streamer;
|
use fst::Streamer;
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
@ -164,7 +164,7 @@ where D: Deref<Target=DB>,
|
|||||||
});
|
});
|
||||||
info!("criterion {} sort took {}", ci, elapsed);
|
info!("criterion {} sort took {}", ci, elapsed);
|
||||||
|
|
||||||
for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
|
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
|
||||||
documents_seen += group.len();
|
documents_seen += group.len();
|
||||||
groups.push(group);
|
groups.push(group);
|
||||||
|
|
||||||
@ -241,7 +241,7 @@ where D: Deref<Target=DB>,
|
|||||||
|
|
||||||
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
|
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
|
||||||
|
|
||||||
for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
|
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
|
||||||
// we must compute the real distinguished len of this sub-group
|
// we must compute the real distinguished len of this sub-group
|
||||||
for document in group.iter() {
|
for document in group.iter() {
|
||||||
let filter_accepted = match &self.inner.filter {
|
let filter_accepted = match &self.inner.filter {
|
||||||
|
Loading…
Reference in New Issue
Block a user