mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 06:00:08 +01:00
Use the cache when retrieving the documents at the end
This commit is contained in:
parent
1628a31efa
commit
8148210860
@ -119,32 +119,29 @@ impl Node {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct BestProximity<F> {
|
pub struct BestProximity {
|
||||||
positions: Vec<Vec<u32>>,
|
positions: Vec<Vec<u32>>,
|
||||||
best_proximity: u32,
|
best_proximity: u32,
|
||||||
contains_documents: F,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F> BestProximity<F> {
|
impl BestProximity {
|
||||||
pub fn new(positions: Vec<Vec<u32>>, contains_documents: F) -> BestProximity<F> {
|
pub fn new(positions: Vec<Vec<u32>>) -> BestProximity {
|
||||||
let best_proximity = (positions.len() as u32).saturating_sub(1);
|
let best_proximity = (positions.len() as u32).saturating_sub(1);
|
||||||
BestProximity { positions, best_proximity, contains_documents }
|
BestProximity { positions, best_proximity }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F> Iterator for BestProximity<F>
|
impl BestProximity {
|
||||||
|
pub fn next<F>(&mut self, mut contains_documents: F) -> Option<(u32, Vec<Vec<u32>>)>
|
||||||
where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
||||||
{
|
{
|
||||||
type Item = (u32, Vec<Vec<u32>>);
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
if self.best_proximity == self.positions.len() as u32 * (MAX_DISTANCE - 1) {
|
if self.best_proximity == self.positions.len() as u32 * (MAX_DISTANCE - 1) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
let BestProximity { positions, best_proximity, contains_documents } = self;
|
let BestProximity { positions, best_proximity } = self;
|
||||||
|
|
||||||
let result = astar_bag(
|
let result = astar_bag(
|
||||||
&Node::Uninit, // start
|
&Node::Uninit, // start
|
||||||
@ -152,7 +149,7 @@ where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
|||||||
|_| 0, // heuristic
|
|_| 0, // heuristic
|
||||||
|n| { // success
|
|n| { // success
|
||||||
let c = n.is_complete(&positions) && n.proximity() >= *best_proximity;
|
let c = n.is_complete(&positions) && n.proximity() >= *best_proximity;
|
||||||
if n.is_reachable(contains_documents) { Some(c) } else { None }
|
if n.is_reachable(&mut contains_documents) { Some(c) } else { None }
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -186,16 +183,17 @@ mod tests {
|
|||||||
vec![ 1, ],
|
vec![ 1, ],
|
||||||
vec![ 3, 6],
|
vec![ 3, 6],
|
||||||
];
|
];
|
||||||
let mut iter = BestProximity::new(positions, |_, _| true);
|
let mut iter = BestProximity::new(positions);
|
||||||
|
let f = |_, _| true;
|
||||||
|
|
||||||
assert_eq!(iter.next(), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
assert_eq!(iter.next(f), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||||
assert_eq!(iter.next(), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
assert_eq!(iter.next(f), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||||
assert_eq!(iter.next(), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
assert_eq!(iter.next(f), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
||||||
assert_eq!(iter.next(), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
assert_eq!(iter.next(f), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
||||||
assert_eq!(iter.next(), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
assert_eq!(iter.next(f), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
||||||
assert_eq!(iter.next(), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
assert_eq!(iter.next(f), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
||||||
assert_eq!(iter.next(), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
assert_eq!(iter.next(f), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
||||||
assert_eq!(iter.next(), None);
|
assert_eq!(iter.next(f), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -205,12 +203,13 @@ mod tests {
|
|||||||
vec![ 1, 1000, 2001 ],
|
vec![ 1, 1000, 2001 ],
|
||||||
vec![ 3, 6, 2002, 3000],
|
vec![ 3, 6, 2002, 3000],
|
||||||
];
|
];
|
||||||
let mut iter = BestProximity::new(positions, |_, _| true);
|
let mut iter = BestProximity::new(positions);
|
||||||
|
let f = |_, _| true;
|
||||||
|
|
||||||
assert_eq!(iter.next(), Some((1+1, vec![vec![2000, 2001, 2002]]))); // 2
|
assert_eq!(iter.next(f), Some((1+1, vec![vec![2000, 2001, 2002]]))); // 2
|
||||||
assert_eq!(iter.next(), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
assert_eq!(iter.next(f), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||||
assert_eq!(iter.next(), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
assert_eq!(iter.next(f), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||||
assert_eq!(iter.next(), Some((1+5, vec![vec![0, 1, 6]]))); // 6
|
assert_eq!(iter.next(f), Some((1+5, vec![vec![0, 1, 6]]))); // 6
|
||||||
// We ignore others here...
|
// We ignore others here...
|
||||||
}
|
}
|
||||||
|
|
||||||
|
31
src/lib.rs
31
src/lib.rs
@ -143,7 +143,7 @@ impl Index {
|
|||||||
let mut union_cache = HashMap::new();
|
let mut union_cache = HashMap::new();
|
||||||
let mut intersect_cache = HashMap::new();
|
let mut intersect_cache = HashMap::new();
|
||||||
// Returns `true` if there is documents in common between the two words and positions given.
|
// Returns `true` if there is documents in common between the two words and positions given.
|
||||||
let contains_documents = |(lword, lpos): (usize, u32), (rword, rpos): (usize, u32)| {
|
let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>| {
|
||||||
let proximity = best_proximity::positions_proximity(lpos, rpos);
|
let proximity = best_proximity::positions_proximity(lpos, rpos);
|
||||||
|
|
||||||
if proximity == 0 { return false }
|
if proximity == 0 { return false }
|
||||||
@ -162,7 +162,8 @@ impl Index {
|
|||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
for (proximity, mut positions) in BestProximity::new(positions, contains_documents) {
|
let mut iter = BestProximity::new(positions);
|
||||||
|
while let Some((proximity, mut positions)) = iter.next(|l, r| contains_documents(l, r, &mut union_cache)) {
|
||||||
positions.sort_unstable();
|
positions.sort_unstable();
|
||||||
|
|
||||||
let same_prox_before = Instant::now();
|
let same_prox_before = Instant::now();
|
||||||
@ -172,34 +173,18 @@ impl Index {
|
|||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
let mut intersect_docids: Option<RoaringBitmap> = None;
|
let mut intersect_docids: Option<RoaringBitmap> = None;
|
||||||
for (derived_words, pos) in words.iter().zip(positions.clone()) {
|
for (word, pos) in positions.iter().enumerate() {
|
||||||
let mut count = 0;
|
|
||||||
let mut union_docids = RoaringBitmap::default();
|
|
||||||
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
let union_docids = union_cache.entry((word, *pos)).or_insert_with(|| unions_word_pos(word, *pos));
|
||||||
// TODO re-enable the prefixes system
|
|
||||||
for (word, attrs) in derived_words.iter() {
|
|
||||||
if attrs.contains(pos) {
|
|
||||||
let mut key = word.clone();
|
|
||||||
key.extend_from_slice(&pos.to_be_bytes());
|
|
||||||
if let Some(attrs) = self.postings_ids.get(rtxn, &key)? {
|
|
||||||
let right = RoaringBitmap::deserialize_from_slice(attrs)?;
|
|
||||||
union_docids.union_with(&right);
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let before_intersect = Instant::now();
|
let before_intersect = Instant::now();
|
||||||
|
|
||||||
match &mut intersect_docids {
|
match &mut intersect_docids {
|
||||||
Some(left) => left.intersect_with(&union_docids),
|
Some(left) => left.intersect_with(&union_docids),
|
||||||
None => intersect_docids = Some(union_docids),
|
None => intersect_docids = Some(union_docids.clone()),
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("retrieving {} word took {:.02?} and took {:.02?} to intersect",
|
eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect",
|
||||||
count, before.elapsed(), before_intersect.elapsed());
|
before.elapsed(), before_intersect.elapsed());
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
|
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user