mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 13:40:31 +01:00
Use the cache when retrieving the documents at the end
This commit is contained in:
parent
1628a31efa
commit
8148210860
@ -119,32 +119,29 @@ impl Node {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BestProximity<F> {
|
||||
pub struct BestProximity {
|
||||
positions: Vec<Vec<u32>>,
|
||||
best_proximity: u32,
|
||||
contains_documents: F,
|
||||
}
|
||||
|
||||
impl<F> BestProximity<F> {
|
||||
pub fn new(positions: Vec<Vec<u32>>, contains_documents: F) -> BestProximity<F> {
|
||||
impl BestProximity {
|
||||
pub fn new(positions: Vec<Vec<u32>>) -> BestProximity {
|
||||
let best_proximity = (positions.len() as u32).saturating_sub(1);
|
||||
BestProximity { positions, best_proximity, contains_documents }
|
||||
BestProximity { positions, best_proximity }
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> Iterator for BestProximity<F>
|
||||
impl BestProximity {
|
||||
pub fn next<F>(&mut self, mut contains_documents: F) -> Option<(u32, Vec<Vec<u32>>)>
|
||||
where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
||||
{
|
||||
type Item = (u32, Vec<Vec<u32>>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let before = Instant::now();
|
||||
|
||||
if self.best_proximity == self.positions.len() as u32 * (MAX_DISTANCE - 1) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let BestProximity { positions, best_proximity, contains_documents } = self;
|
||||
let BestProximity { positions, best_proximity } = self;
|
||||
|
||||
let result = astar_bag(
|
||||
&Node::Uninit, // start
|
||||
@ -152,7 +149,7 @@ where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
||||
|_| 0, // heuristic
|
||||
|n| { // success
|
||||
let c = n.is_complete(&positions) && n.proximity() >= *best_proximity;
|
||||
if n.is_reachable(contains_documents) { Some(c) } else { None }
|
||||
if n.is_reachable(&mut contains_documents) { Some(c) } else { None }
|
||||
},
|
||||
);
|
||||
|
||||
@ -186,16 +183,17 @@ mod tests {
|
||||
vec![ 1, ],
|
||||
vec![ 3, 6],
|
||||
];
|
||||
let mut iter = BestProximity::new(positions, |_, _| true);
|
||||
let mut iter = BestProximity::new(positions);
|
||||
let f = |_, _| true;
|
||||
|
||||
assert_eq!(iter.next(), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||
assert_eq!(iter.next(), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||
assert_eq!(iter.next(), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
||||
assert_eq!(iter.next(), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
||||
assert_eq!(iter.next(), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
||||
assert_eq!(iter.next(), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
||||
assert_eq!(iter.next(), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
||||
assert_eq!(iter.next(), None);
|
||||
assert_eq!(iter.next(f), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||
assert_eq!(iter.next(f), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||
assert_eq!(iter.next(f), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
||||
assert_eq!(iter.next(f), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
||||
assert_eq!(iter.next(f), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
||||
assert_eq!(iter.next(f), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
||||
assert_eq!(iter.next(f), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
||||
assert_eq!(iter.next(f), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -205,12 +203,13 @@ mod tests {
|
||||
vec![ 1, 1000, 2001 ],
|
||||
vec![ 3, 6, 2002, 3000],
|
||||
];
|
||||
let mut iter = BestProximity::new(positions, |_, _| true);
|
||||
let mut iter = BestProximity::new(positions);
|
||||
let f = |_, _| true;
|
||||
|
||||
assert_eq!(iter.next(), Some((1+1, vec![vec![2000, 2001, 2002]]))); // 2
|
||||
assert_eq!(iter.next(), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||
assert_eq!(iter.next(), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||
assert_eq!(iter.next(), Some((1+5, vec![vec![0, 1, 6]]))); // 6
|
||||
assert_eq!(iter.next(f), Some((1+1, vec![vec![2000, 2001, 2002]]))); // 2
|
||||
assert_eq!(iter.next(f), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||
assert_eq!(iter.next(f), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||
assert_eq!(iter.next(f), Some((1+5, vec![vec![0, 1, 6]]))); // 6
|
||||
// We ignore others here...
|
||||
}
|
||||
|
||||
|
31
src/lib.rs
31
src/lib.rs
@ -143,7 +143,7 @@ impl Index {
|
||||
let mut union_cache = HashMap::new();
|
||||
let mut intersect_cache = HashMap::new();
|
||||
// Returns `true` if there is documents in common between the two words and positions given.
|
||||
let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos): (usize, u32)| {
|
||||
let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>| {
|
||||
let proximity = best_proximity::positions_proximity(lpos, rpos);
|
||||
|
||||
if proximity == 0 { return false }
|
||||
@ -162,7 +162,8 @@ impl Index {
|
||||
})
|
||||
};
|
||||
|
||||
for (proximity, mut positions) in BestProximity::new(positions, contains_documents) {
|
||||
let mut iter = BestProximity::new(positions);
|
||||
while let Some((proximity, mut positions)) = iter.next(|l, r| contains_documents(l, r, &mut union_cache)) {
|
||||
positions.sort_unstable();
|
||||
|
||||
let same_prox_before = Instant::now();
|
||||
@ -172,34 +173,18 @@ impl Index {
|
||||
let before = Instant::now();
|
||||
|
||||
let mut intersect_docids: Option<RoaringBitmap> = None;
|
||||
for (derived_words, pos) in words.iter().zip(positions.clone()) {
|
||||
let mut count = 0;
|
||||
let mut union_docids = RoaringBitmap::default();
|
||||
|
||||
for (word, pos) in positions.iter().enumerate() {
|
||||
let before = Instant::now();
|
||||
|
||||
// TODO re-enable the prefixes system
|
||||
for (word, attrs) in derived_words.iter() {
|
||||
if attrs.contains(pos) {
|
||||
let mut key = word.clone();
|
||||
key.extend_from_slice(&pos.to_be_bytes());
|
||||
if let Some(attrs) = self.postings_ids.get(rtxn, &key)? {
|
||||
let right = RoaringBitmap::deserialize_from_slice(attrs)?;
|
||||
union_docids.union_with(&right);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
let union_docids = union_cache.entry((word, *pos)).or_insert_with(|| unions_word_pos(word, *pos));
|
||||
|
||||
let before_intersect = Instant::now();
|
||||
|
||||
match &mut intersect_docids {
|
||||
Some(left) => left.intersect_with(&union_docids),
|
||||
None => intersect_docids = Some(union_docids),
|
||||
None => intersect_docids = Some(union_docids.clone()),
|
||||
}
|
||||
|
||||
eprintln!("retrieving {} word took {:.02?} and took {:.02?} to intersect",
|
||||
count, before.elapsed(), before_intersect.elapsed());
|
||||
eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect",
|
||||
before.elapsed(), before_intersect.elapsed());
|
||||
}
|
||||
|
||||
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
|
||||
|
Loading…
x
Reference in New Issue
Block a user