feat: Use the GroupBy/Mut Traits of the slice-group-by library

2025-06-14 20:11:38 +02:00 · 2019-01-30 16:30:27 +01:00 · 2019-01-30 16:30:27 +01:00 · c2f0df3f73
commit c2f0df3f73
parent 820f1f9ac6
8 changed files with 17 additions and 21 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -19,6 +19,7 @@ sdset = "0.3"
 serde = "1.0"
 serde_derive = "1.0"
 serde_json = { version = "1.0", features = ["preserve_order"] }
 slice-group-by = "0.2"
 unidecode = "0.3"
 [dependencies.toml]
@ -30,16 +31,12 @@ rev = "0372ba6"
 git = "https://github.com/pingcap/rust-rocksdb.git"
 rev = "306e201"
 [dependencies.group-by]
 git = "https://github.com/Kerollmops/group-by.git"
 rev = "5a113fe"
 [features]
 default = ["simd"]
 i128 = ["bincode/i128", "byteorder/i128"]
 portable = ["rocksdb/portable"]
 simd = ["rocksdb/sse"]
-nightly = ["hashbrown/nightly", "group-by/nightly"]
+nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
 [dev-dependencies]
 csv = "1.0"
--- a/src/rank/criterion/exact.rs
+++ b/src/rank/criterion/exact.rs
@ -2,7 +2,7 @@ use std::cmp::Ordering;
 use std::ops::Deref;
 use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;
 use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
@ -16,7 +16,7 @@ fn contains_exact(matches: &&[Match]) -> bool {
 #[inline]
 fn number_exact_matches(matches: &[Match]) -> usize {
-    GroupBy::new(matches, match_query_index).filter(contains_exact).count()
+    matches.linear_group_by(match_query_index).filter(contains_exact).count()
 }
 #[derive(Debug, Clone, Copy)]
--- a/src/rank/criterion/number_of_words.rs
+++ b/src/rank/criterion/number_of_words.rs
@ -2,7 +2,7 @@ use std::cmp::Ordering;
 use std::ops::Deref;
 use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;
 use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
@ -11,7 +11,7 @@ use crate::Match;
 #[inline]
 fn number_of_query_words(matches: &[Match]) -> usize {
-    GroupBy::new(matches, match_query_index).count()
+    matches.linear_group_by(match_query_index).count()
 }
 #[derive(Debug, Clone, Copy)]
--- a/src/rank/criterion/sum_of_typos.rs
+++ b/src/rank/criterion/sum_of_typos.rs
@ -2,8 +2,7 @@ use std::cmp::Ordering;
 use std::ops::Deref;
 use rocksdb::DB;
-
+use slice_group_by::GroupBy;
 use group_by::GroupBy;
 use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
@ -17,7 +16,7 @@ fn sum_matches_typos(matches: &[Match]) -> isize {
    // note that GroupBy will never return an empty group
    // so we can do this assumption safely
-    for group in GroupBy::new(matches, match_query_index) {
+    for group in matches.linear_group_by(match_query_index) {
        sum_typos += unsafe { group.get_unchecked(0).distance as isize };
        number_words += 1;
    }
--- a/src/rank/criterion/sum_of_words_attribute.rs
+++ b/src/rank/criterion/sum_of_words_attribute.rs
@ -2,7 +2,7 @@ use std::cmp::Ordering;
 use std::ops::Deref;
 use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;
 use crate::database::DatabaseView;
 use crate::rank::{match_query_index, Document};
@ -13,7 +13,7 @@ use crate::Match;
 fn sum_matches_attributes(matches: &[Match]) -> usize {
    // note that GroupBy will never return an empty group
    // so we can do this assumption safely
-    GroupBy::new(matches, match_query_index).map(|group| {
+    matches.linear_group_by(match_query_index).map(|group| {
        unsafe { group.get_unchecked(0).attribute.attribute() as usize }
    }).sum()
 }
--- a/src/rank/criterion/sum_of_words_position.rs
+++ b/src/rank/criterion/sum_of_words_position.rs
@ -2,7 +2,7 @@ use std::cmp::Ordering;
 use std::ops::Deref;
 use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;
 use crate::database::DatabaseView;
 use crate::rank::{match_query_index, Document};
@ -13,7 +13,7 @@ use crate::Match;
 fn sum_matches_attribute_index(matches: &[Match]) -> usize {
    // note that GroupBy will never return an empty group
    // so we can do this assumption safely
-    GroupBy::new(matches, match_query_index).map(|group| {
+    matches.linear_group_by(match_query_index).map(|group| {
        unsafe { group.get_unchecked(0).attribute.word_index() as usize }
    }).sum()
 }
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
 use std::ops::Deref;
 use rocksdb::DB;
-use group_by::GroupBy;
+use slice_group_by::GroupBy;
 use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
@ -36,7 +36,7 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
 fn matches_proximity(matches: &[Match]) -> u32 {
    let mut proximity = 0;
-    let mut iter = GroupBy::new(matches, match_query_index);
+    let mut iter = matches.linear_group_by(match_query_index);
    // iterate over groups by windows of size 2
    let mut last = iter.next();
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@ -4,7 +4,7 @@ use std::error::Error;
 use std::hash::Hash;
 use std::rc::Rc;
-use group_by::BinaryGroupByMut;
+use slice_group_by::GroupByMut;
 use hashbrown::HashMap;
 use fst::Streamer;
 use rocksdb::DB;
@ -164,7 +164,7 @@ where D: Deref<Target=DB>,
                });
                info!("criterion {} sort took {}", ci, elapsed);
-                for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
+                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
                    documents_seen += group.len();
                    groups.push(group);
@ -241,7 +241,7 @@ where D: Deref<Target=DB>,
                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
-                for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) {
+                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
                    // we must compute the real distinguished len of this sub-group
                    for document in group.iter() {
                        let filter_accepted = match &self.inner.filter {