diff --git a/examples/query-database.rs b/examples/query-database.rs index 20c04aca9..e61e2d0ab 100644 --- a/examples/query-database.rs +++ b/examples/query-database.rs @@ -116,7 +116,7 @@ fn main() -> Result<(), Box> { }; print!("{}: ", name); - let areas = create_highlight_areas(&text, doc.matches.as_matches(), attr); + let areas = create_highlight_areas(&text, &doc.matches, attr); display_highlights(&text, &areas)?; println!(); } @@ -125,7 +125,7 @@ fn main() -> Result<(), Box> { } let mut matching_attributes = HashSet::new(); - for _match in doc.matches.as_matches() { + for _match in doc.matches { let attr = SchemaAttr::new(_match.attribute.attribute()); let name = schema.attribute_name(attr); matching_attributes.insert(name); diff --git a/src/rank/criterion/exact.rs b/src/rank/criterion/exact.rs index 759bd951e..df670161f 100644 --- a/src/rank/criterion/exact.rs +++ b/src/rank/criterion/exact.rs @@ -2,8 +2,9 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; +use group_by::GroupBy; -use crate::rank::{Document, Matches}; +use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; use crate::database::DatabaseView; use crate::Match; @@ -14,8 +15,8 @@ fn contains_exact(matches: &&[Match]) -> bool { } #[inline] -fn number_exact_matches(matches: &Matches) -> usize { - matches.query_index_groups().filter(contains_exact).count() +fn number_exact_matches(matches: &[Match]) -> usize { + GroupBy::new(matches, match_query_index).filter(contains_exact).count() } #[derive(Debug, Clone, Copy)] diff --git a/src/rank/criterion/number_of_words.rs b/src/rank/criterion/number_of_words.rs index 23cf36a2c..855d997ba 100644 --- a/src/rank/criterion/number_of_words.rs +++ b/src/rank/criterion/number_of_words.rs @@ -2,14 +2,16 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; +use group_by::GroupBy; -use crate::rank::{Document, Matches}; +use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; use crate::database::DatabaseView; +use crate::Match; #[inline] -fn number_of_query_words(matches: &Matches) -> usize { - matches.query_index_groups().count() +fn number_of_query_words(matches: &[Match]) -> usize { + GroupBy::new(matches, match_query_index).count() } #[derive(Debug, Clone, Copy)] diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index 400650ad2..085ad19cc 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -3,19 +3,22 @@ use std::ops::Deref; use rocksdb::DB; -use crate::rank::{Document, Matches}; +use group_by::GroupBy; + +use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; use crate::database::DatabaseView; +use crate::Match; #[inline] -fn sum_matches_typos(matches: &Matches) -> i8 { +fn sum_matches_typos(matches: &[Match]) -> isize { let mut sum_typos = 0; let mut number_words = 0; // note that GroupBy will never return an empty group // so we can do this assumption safely - for group in matches.query_index_groups() { - sum_typos += unsafe { group.get_unchecked(0).distance } as i8; + for group in GroupBy::new(matches, match_query_index) { + sum_typos += unsafe { group.get_unchecked(0).distance } as isize; number_words += 1; } @@ -41,7 +44,7 @@ where D: Deref mod tests { use super::*; - use crate::{Match, DocumentId, Attribute, WordArea}; + use crate::{DocumentId, Attribute, WordArea}; // typing: "Geox CEO" // @@ -66,7 +69,10 @@ mod tests { word_area: WordArea::new_faillible(0, 6) }, ]; - Document::from_unsorted_matches(DocumentId(0), matches) + Document { + id: DocumentId(0), + matches: matches, + } }; let doc1 = { @@ -86,7 +92,10 @@ mod tests { word_area: WordArea::new_faillible(0, 6) }, ]; - Document::from_unsorted_matches(DocumentId(1), matches) + Document { + id: DocumentId(1), + matches: matches, + } }; let lhs = sum_matches_typos(&doc0.matches); @@ -117,7 +126,10 @@ mod tests { word_area: WordArea::new_faillible(0, 6) }, ]; - Document::from_unsorted_matches(DocumentId(0), matches) + Document { + id: DocumentId(0), + matches: matches, + } }; let doc1 = { @@ -130,7 +142,10 @@ mod tests { word_area: WordArea::new_faillible(0, 6) }, ]; - Document::from_unsorted_matches(DocumentId(1), matches) + Document { + id: DocumentId(1), + matches: matches, + } }; let lhs = sum_matches_typos(&doc0.matches); @@ -161,7 +176,10 @@ mod tests { word_area: WordArea::new_faillible(0, 6) }, ]; - Document::from_unsorted_matches(DocumentId(0), matches) + Document { + id: DocumentId(0), + matches: matches, + } }; let doc1 = { @@ -174,7 +192,10 @@ mod tests { word_area: WordArea::new_faillible(0, 6) }, ]; - Document::from_unsorted_matches(DocumentId(1), matches) + Document { + id: DocumentId(1), + matches: matches, + } }; let lhs = sum_matches_typos(&doc0.matches); diff --git a/src/rank/criterion/sum_of_words_attribute.rs b/src/rank/criterion/sum_of_words_attribute.rs index aea21c35f..90ee9240e 100644 --- a/src/rank/criterion/sum_of_words_attribute.rs +++ b/src/rank/criterion/sum_of_words_attribute.rs @@ -2,16 +2,18 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; +use group_by::GroupBy; -use crate::rank::{Document, Matches}; use crate::database::DatabaseView; +use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::Match; #[inline] -fn sum_matches_attributes(matches: &Matches) -> usize { +fn sum_matches_attributes(matches: &[Match]) -> usize { // note that GroupBy will never return an empty group // so we can do this assumption safely - matches.query_index_groups().map(|group| { + GroupBy::new(matches, match_query_index).map(|group| { unsafe { group.get_unchecked(0).attribute.attribute() as usize } }).sum() } diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs index 0b27184ba..253f9e267 100644 --- a/src/rank/criterion/sum_of_words_position.rs +++ b/src/rank/criterion/sum_of_words_position.rs @@ -2,16 +2,18 @@ use std::cmp::Ordering; use std::ops::Deref; use rocksdb::DB; +use group_by::GroupBy; -use crate::rank::{Document, Matches}; -use crate::rank::criterion::Criterion; use crate::database::DatabaseView; +use crate::rank::{match_query_index, Document}; +use crate::rank::criterion::Criterion; +use crate::Match; #[inline] -fn sum_matches_attribute_index(matches: &Matches) -> usize { +fn sum_matches_attribute_index(matches: &[Match]) -> usize { // note that GroupBy will never return an empty group // so we can do this assumption safely - matches.query_index_groups().map(|group| { + GroupBy::new(matches, match_query_index).map(|group| { unsafe { group.get_unchecked(0).attribute.word_index() as usize } }).sum() } diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs index 7fe3102d3..fc80dfaec 100644 --- a/src/rank/criterion/words_proximity.rs +++ b/src/rank/criterion/words_proximity.rs @@ -2,8 +2,9 @@ use std::cmp::{self, Ordering}; use std::ops::Deref; use rocksdb::DB; +use group_by::GroupBy; -use crate::rank::{Document, Matches}; +use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; use crate::database::DatabaseView; use crate::Match; @@ -33,9 +34,9 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 { min_prox } -fn matches_proximity(matches: &Matches) -> u32 { +fn matches_proximity(matches: &[Match]) -> u32 { let mut proximity = 0; - let mut iter = matches.query_index_groups(); + let mut iter = GroupBy::new(matches, match_query_index); // iterate over groups by windows of size 2 let mut last = iter.next(); @@ -90,8 +91,7 @@ mod tests { // soup -> of = 8 // + of -> the = 1 // + the -> day = 8 (not 1) - let matches = Matches::from_unsorted(matches.to_vec()); - assert_eq!(matches_proximity(&matches), 17); + assert_eq!(matches_proximity(matches), 17); } #[test] @@ -118,8 +118,7 @@ mod tests { // soup -> of = 1 // + of -> the = 1 // + the -> day = 1 - let matches = Matches::from_unsorted(matches.to_vec()); - assert_eq!(matches_proximity(&matches), 3); + assert_eq!(matches_proximity(matches), 3); } } @@ -153,8 +152,6 @@ mod bench { matches.push(match_); } - let matches = Matches::from_unsorted(matches.to_vec()); - bench.iter(|| { let proximity = matches_proximity(&matches); test::black_box(move || proximity) diff --git a/src/rank/mod.rs b/src/rank/mod.rs index 91392cbee..4d1b6b1ea 100644 --- a/src/rank/mod.rs +++ b/src/rank/mod.rs @@ -2,13 +2,6 @@ pub mod criterion; mod query_builder; mod distinct_map; -use std::iter::FusedIterator; -use std::slice::Iter; -use std::ops::Range; - -use sdset::SetBuf; -use group_by::GroupBy; - use crate::{Match, DocumentId}; pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder}; @@ -21,120 +14,20 @@ fn match_query_index(a: &Match, b: &Match) -> bool { #[derive(Debug, Clone)] pub struct Document { pub id: DocumentId, - pub matches: Matches, + pub matches: Vec, } impl Document { - pub fn new(id: DocumentId, match_: Match) -> Self { - let matches = SetBuf::new_unchecked(vec![match_]); - Self::from_matches(id, matches) + pub fn new(doc: DocumentId, match_: Match) -> Self { + unsafe { Self::from_sorted_matches(doc, vec![match_]) } } - pub fn from_matches(id: DocumentId, matches: SetBuf) -> Self { - let matches = Matches::new(matches); - Self { id, matches } - } - - pub fn from_unsorted_matches(id: DocumentId, matches: Vec) -> Self { - let matches = Matches::from_unsorted(matches); - Self { id, matches } - } -} - -#[derive(Debug, Clone)] -pub struct Matches { - matches: SetBuf, - slices: Vec>, -} - -impl Matches { - pub fn new(matches: SetBuf) -> Matches { - let mut last_end = 0; - let mut slices = Vec::new(); - - for group in GroupBy::new(&matches, match_query_index) { - let start = last_end; - let end = last_end + group.len(); - slices.push(Range { start, end }); - last_end = end; - } - - Matches { matches, slices } - } - - pub fn from_unsorted(mut matches: Vec) -> Matches { + pub fn from_matches(doc: DocumentId, mut matches: Vec) -> Self { matches.sort_unstable(); - let matches = SetBuf::new_unchecked(matches); - Matches::new(matches) + unsafe { Self::from_sorted_matches(doc, matches) } } - pub fn query_index_groups(&self) -> QueryIndexGroups { - QueryIndexGroups { - matches: &self.matches, - slices: self.slices.iter(), - } - } - - pub fn as_matches(&self) -> &[Match] { - &self.matches - } -} - -pub struct QueryIndexGroups<'a, 'b> { - matches: &'a [Match], - slices: Iter<'b, Range>, -} - -impl<'a> Iterator for QueryIndexGroups<'a, '_> { - type Item = &'a [Match]; - - #[inline] - fn next(&mut self) -> Option { - self.slices.next().cloned().map(|range| { - unsafe { self.matches.get_unchecked(range) } - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.slices.size_hint() - } - - #[inline] - fn count(self) -> usize { - self.len() - } - - #[inline] - fn nth(&mut self, n: usize) -> Option { - self.slices.nth(n).cloned().map(|range| { - unsafe { self.matches.get_unchecked(range) } - }) - } - - #[inline] - fn last(self) -> Option { - let (matches, slices) = (self.matches, self.slices); - slices.last().cloned().map(|range| { - unsafe { matches.get_unchecked(range) } - }) - } -} - -impl ExactSizeIterator for QueryIndexGroups<'_, '_> { - #[inline] - fn len(&self) -> usize { - self.slices.len() - } -} - -impl FusedIterator for QueryIndexGroups<'_, '_> { } - -impl DoubleEndedIterator for QueryIndexGroups<'_, '_> { - #[inline] - fn next_back(&mut self) -> Option { - self.slices.next_back().cloned().map(|range| { - unsafe { self.matches.get_unchecked(range) } - }) + pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec) -> Self { + Self { id, matches } } } diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index eb8f21582..6b03bf4e9 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -119,7 +119,7 @@ where D: Deref, info!("{} documents to classify", matches.len()); - matches.into_iter().map(|(i, m)| Document::from_unsorted_matches(i, m)).collect() + matches.into_iter().map(|(i, m)| Document::from_matches(i, m)).collect() } }