diff --git a/Cargo.toml b/Cargo.toml index 1cbbe0aa6..5e7bba1fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ sdset = "0.3" serde = "1.0" serde_derive = "1.0" serde_json = { version = "1.0", features = ["preserve_order"] } +slice-group-by = "0.2" unidecode = "0.3" [dependencies.toml] @@ -30,16 +31,12 @@ rev = "0372ba6" git = "https://github.com/pingcap/rust-rocksdb.git" rev = "306e201" -[dependencies.group-by] -git = "https://github.com/Kerollmops/group-by.git" -rev = "5a113fe" - [features] default = ["simd"] i128 = ["bincode/i128", "byteorder/i128"] portable = ["rocksdb/portable"] simd = ["rocksdb/sse"] -nightly = ["hashbrown/nightly", "group-by/nightly"] +nightly = ["hashbrown/nightly", "slice-group-by/nightly"] [dev-dependencies] csv = "1.0" diff --git a/src/rank/criterion/exact.rs b/src/rank/criterion/exact.rs index df670161f..574649ed6 100644 --- a/src/rank/criterion/exact.rs +++ b/src/rank/criterion/exact.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; -use group_by::GroupBy; +use slice_group_by::GroupBy; use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; @@ -16,7 +16,7 @@ fn contains_exact(matches: &&[Match]) -> bool { #[inline] fn number_exact_matches(matches: &[Match]) -> usize { - GroupBy::new(matches, match_query_index).filter(contains_exact).count() + matches.linear_group_by(match_query_index).filter(contains_exact).count() } #[derive(Debug, Clone, Copy)] diff --git a/src/rank/criterion/number_of_words.rs b/src/rank/criterion/number_of_words.rs index 855d997ba..ac9ef9858 100644 --- a/src/rank/criterion/number_of_words.rs +++ b/src/rank/criterion/number_of_words.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; -use group_by::GroupBy; +use slice_group_by::GroupBy; use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; @@ -11,7 +11,7 @@ use crate::Match; #[inline] fn number_of_query_words(matches: &[Match]) -> usize { - GroupBy::new(matches, match_query_index).count() + matches.linear_group_by(match_query_index).count() } #[derive(Debug, Clone, Copy)] diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index ac6340591..be742e787 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -2,8 +2,7 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; - -use group_by::GroupBy; +use slice_group_by::GroupBy; use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; @@ -17,7 +16,7 @@ fn sum_matches_typos(matches: &[Match]) -> isize { // note that GroupBy will never return an empty group // so we can do this assumption safely - for group in GroupBy::new(matches, match_query_index) { + for group in matches.linear_group_by(match_query_index) { sum_typos += unsafe { group.get_unchecked(0).distance as isize }; number_words += 1; } diff --git a/src/rank/criterion/sum_of_words_attribute.rs b/src/rank/criterion/sum_of_words_attribute.rs index 90ee9240e..fb4910c51 100644 --- a/src/rank/criterion/sum_of_words_attribute.rs +++ b/src/rank/criterion/sum_of_words_attribute.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; -use group_by::GroupBy; +use slice_group_by::GroupBy; use crate::database::DatabaseView; use crate::rank::{match_query_index, Document}; @@ -13,7 +13,7 @@ use crate::Match; fn sum_matches_attributes(matches: &[Match]) -> usize { // note that GroupBy will never return an empty group // so we can do this assumption safely - GroupBy::new(matches, match_query_index).map(|group| { + matches.linear_group_by(match_query_index).map(|group| { unsafe { group.get_unchecked(0).attribute.attribute() as usize } }).sum() } diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs index 253f9e267..0978ac5fd 100644 --- a/src/rank/criterion/sum_of_words_position.rs +++ b/src/rank/criterion/sum_of_words_position.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; -use group_by::GroupBy; +use slice_group_by::GroupBy; use crate::database::DatabaseView; use crate::rank::{match_query_index, Document}; @@ -13,7 +13,7 @@ use crate::Match; fn sum_matches_attribute_index(matches: &[Match]) -> usize { // note that GroupBy will never return an empty group // so we can do this assumption safely - GroupBy::new(matches, match_query_index).map(|group| { + matches.linear_group_by(match_query_index).map(|group| { unsafe { group.get_unchecked(0).attribute.word_index() as usize } }).sum() } diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs index fc80dfaec..a61de6b62 100644 --- a/src/rank/criterion/words_proximity.rs +++ b/src/rank/criterion/words_proximity.rs @@ -2,7 +2,7 @@ use std::cmp::{self, Ordering}; use std::ops::Deref; use rocksdb::DB; -use group_by::GroupBy; +use slice_group_by::GroupBy; use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; @@ -36,7 +36,7 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 { fn matches_proximity(matches: &[Match]) -> u32 { let mut proximity = 0; - let mut iter = GroupBy::new(matches, match_query_index); + let mut iter = matches.linear_group_by(match_query_index); // iterate over groups by windows of size 2 let mut last = iter.next(); diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index b38a1dae8..8146fc7fa 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -4,7 +4,7 @@ use std::error::Error; use std::hash::Hash; use std::rc::Rc; -use group_by::BinaryGroupByMut; +use slice_group_by::GroupByMut; use hashbrown::HashMap; use fst::Streamer; use rocksdb::DB; @@ -164,7 +164,7 @@ where D: Deref, }); info!("criterion {} sort took {}", ci, elapsed); - for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { + for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) { documents_seen += group.len(); groups.push(group); @@ -241,7 +241,7 @@ where D: Deref, group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view)); - for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { + for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) { // we must compute the real distinguished len of this sub-group for document in group.iter() { let filter_accepted = match &self.inner.filter {