Revert "feat: Pre-compute matches query index groups"

This reverts commit 039a9a4cc7.
This commit is contained in:
Clément Renault 2019-01-06 21:27:41 +01:00
parent a78b5d225f
commit f00b978801
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
8 changed files with 40 additions and 85 deletions

View File

@ -2,8 +2,9 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy;
use crate::rank::{Document, Matches}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::Match; use crate::Match;
@ -14,8 +15,8 @@ fn contains_exact(matches: &&[Match]) -> bool {
} }
#[inline] #[inline]
fn number_exact_matches(matches: &Matches) -> usize { fn number_exact_matches(matches: &[Match]) -> usize {
matches.query_index_groups().filter(contains_exact).count() GroupBy::new(matches, match_query_index).filter(contains_exact).count()
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]

View File

@ -2,14 +2,16 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy;
use crate::rank::{Document, Matches}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::Match;
#[inline] #[inline]
fn number_of_query_words(matches: &Matches) -> usize { fn number_of_query_words(matches: &[Match]) -> usize {
matches.query_index_groups().count() GroupBy::new(matches, match_query_index).count()
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]

View File

@ -3,19 +3,22 @@ use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use crate::rank::{Document, Matches}; use group_by::GroupBy;
use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::Match;
#[inline] #[inline]
fn sum_matches_typos(matches: &Matches) -> i8 { fn sum_matches_typos(matches: &[Match]) -> isize {
let mut sum_typos = 0; let mut sum_typos = 0;
let mut number_words = 0; let mut number_words = 0;
// note that GroupBy will never return an empty group // note that GroupBy will never return an empty group
// so we can do this assumption safely // so we can do this assumption safely
for group in matches.query_index_groups() { for group in GroupBy::new(matches, match_query_index) {
sum_typos += unsafe { group.get_unchecked(0).distance } as i8; sum_typos += unsafe { group.get_unchecked(0).distance } as isize;
number_words += 1; number_words += 1;
} }
@ -41,7 +44,7 @@ where D: Deref<Target=DB>
mod tests { mod tests {
use super::*; use super::*;
use crate::{Match, DocumentId, Attribute, WordArea}; use crate::{DocumentId, Attribute, WordArea};
// typing: "Geox CEO" // typing: "Geox CEO"
// //

View File

@ -2,16 +2,18 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy;
use crate::rank::{Document, Matches};
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
use crate::Match;
#[inline] #[inline]
fn sum_matches_attributes(matches: &Matches) -> usize { fn sum_matches_attributes(matches: &[Match]) -> usize {
// note that GroupBy will never return an empty group // note that GroupBy will never return an empty group
// so we can do this assumption safely // so we can do this assumption safely
matches.query_index_groups().map(|group| { GroupBy::new(matches, match_query_index).map(|group| {
unsafe { group.get_unchecked(0).attribute.attribute() as usize } unsafe { group.get_unchecked(0).attribute.attribute() as usize }
}).sum() }).sum()
} }

View File

@ -2,16 +2,18 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy;
use crate::rank::{Document, Matches};
use crate::rank::criterion::Criterion;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion;
use crate::Match;
#[inline] #[inline]
fn sum_matches_attribute_index(matches: &Matches) -> usize { fn sum_matches_attribute_index(matches: &[Match]) -> usize {
// note that GroupBy will never return an empty group // note that GroupBy will never return an empty group
// so we can do this assumption safely // so we can do this assumption safely
matches.query_index_groups().map(|group| { GroupBy::new(matches, match_query_index).map(|group| {
unsafe { group.get_unchecked(0).attribute.word_index() as usize } unsafe { group.get_unchecked(0).attribute.word_index() as usize }
}).sum() }).sum()
} }

View File

@ -2,8 +2,9 @@ use std::cmp::{self, Ordering};
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy;
use crate::rank::{Document, Matches}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::Match; use crate::Match;
@ -33,9 +34,9 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
min_prox min_prox
} }
fn matches_proximity(matches: &Matches) -> u32 { fn matches_proximity(matches: &[Match]) -> u32 {
let mut proximity = 0; let mut proximity = 0;
let mut iter = matches.query_index_groups(); let mut iter = GroupBy::new(matches, match_query_index);
// iterate over groups by windows of size 2 // iterate over groups by windows of size 2
let mut last = iter.next(); let mut last = iter.next();
@ -90,7 +91,6 @@ mod tests {
// soup -> of = 8 // soup -> of = 8
// + of -> the = 1 // + of -> the = 1
// + the -> day = 8 (not 1) // + the -> day = 8 (not 1)
let matches = Matches::from_unsorted_matches(matches.to_vec());
assert_eq!(matches_proximity(matches), 17); assert_eq!(matches_proximity(matches), 17);
} }

View File

@ -2,11 +2,6 @@ pub mod criterion;
mod query_builder; mod query_builder;
mod distinct_map; mod distinct_map;
use std::slice::Windows;
use sdset::SetBuf;
use group_by::GroupBy;
use crate::{Match, DocumentId}; use crate::{Match, DocumentId};
pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder}; pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
@ -19,70 +14,20 @@ fn match_query_index(a: &Match, b: &Match) -> bool {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Document { pub struct Document {
pub id: DocumentId, pub id: DocumentId,
pub matches: Matches, pub matches: Vec<Match>,
} }
impl Document { impl Document {
pub fn new(doc: DocumentId, match_: Match) -> Self { pub fn new(doc: DocumentId, match_: Match) -> Self {
let matches = SetBuf::new_unchecked(vec![match_]); unsafe { Self::from_sorted_matches(doc, vec![match_]) }
Self::from_matches(doc, matches)
} }
pub fn from_matches(id: DocumentId, matches: SetBuf<Match>) -> Self { pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
let mut last = 0; matches.sort_unstable();
let mut slices = vec![0]; unsafe { Self::from_sorted_matches(doc, matches) }
for group in GroupBy::new(&matches, match_query_index) {
let index = last + group.len();
slices.push(index);
last = index;
} }
let matches = Matches { matches, slices }; pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
Self { id, matches } Self { id, matches }
} }
pub fn from_unsorted_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
matches.sort_unstable();
let matches = SetBuf::new_unchecked(matches);
Self::from_matches(doc, matches)
} }
}
#[derive(Debug, Clone)]
pub struct Matches {
matches: SetBuf<Match>,
slices: Vec<usize>,
}
impl Matches {
pub fn query_index_groups(&self) -> QueryIndexGroups {
QueryIndexGroups {
matches: &self.matches,
windows: self.slices.windows(2),
}
}
}
pub struct QueryIndexGroups<'a, 'b> {
matches: &'a [Match],
windows: Windows<'b, usize>,
}
impl<'a, 'b> Iterator for QueryIndexGroups<'a, 'b> {
type Item = &'a [Match];
fn next(&mut self) -> Option<Self::Item> {
self.windows.next().map(|range| {
match *range {
[left, right] => &self.matches[left..right],
_ => unreachable!()
}
})
}
}
// impl ExactSizeIterator for QueryIndexGroups<'_, '_> {
// fn len(&self) -> usize {
// self.windows.len() // FIXME (+1) ?
// }
// }

View File

@ -119,7 +119,7 @@ where D: Deref<Target=DB>,
info!("{} documents to classify", matches.len()); info!("{} documents to classify", matches.len());
matches.into_iter().map(|(i, m)| Document::from_unsorted_matches(i, m)).collect() matches.into_iter().map(|(i, m)| Document::from_matches(i, m)).collect()
} }
} }