mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Search for best proximities in multiple attributes
This commit is contained in:
parent
576dd011a1
commit
3d144e62c4
@ -23,7 +23,7 @@ pub fn positions_proximity(lhs: u32, rhs: u32) -> u32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns the attribute and index parts.
|
// Returns the attribute and index parts.
|
||||||
fn extract_position(position: u32) -> (u32, u32) {
|
pub fn extract_position(position: u32) -> (u32, u32) {
|
||||||
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
|
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ impl Node {
|
|||||||
parent_position: *position,
|
parent_position: *position,
|
||||||
};
|
};
|
||||||
// We do not produce the nodes we have already seen in previous iterations loops.
|
// We do not produce the nodes we have already seen in previous iterations loops.
|
||||||
if proximity > 7 || (node.is_complete(positions) && acc_proximity + proximity < best_proximity) {
|
if node.is_complete(positions) && acc_proximity + proximity < best_proximity {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some((node, proximity))
|
Some((node, proximity))
|
||||||
@ -138,7 +138,7 @@ impl BestProximity {
|
|||||||
{
|
{
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
if self.best_proximity == self.positions.len() as u32 * (MAX_DISTANCE - 1) {
|
if self.best_proximity == self.positions.len() as u32 * MAX_DISTANCE {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,6 +177,11 @@ impl BestProximity {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
fn sort<T: Ord>(mut val: (u32, Vec<T>)) -> (u32, Vec<T>) {
|
||||||
|
val.1.sort_unstable();
|
||||||
|
val
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn same_attribute() {
|
fn same_attribute() {
|
||||||
let positions = vec![
|
let positions = vec![
|
||||||
@ -190,7 +195,7 @@ mod tests {
|
|||||||
assert_eq!(iter.next(f), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
assert_eq!(iter.next(f), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
||||||
assert_eq!(iter.next(f), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
assert_eq!(iter.next(f), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
||||||
assert_eq!(iter.next(f), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
assert_eq!(iter.next(f), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
||||||
assert_eq!(iter.next(f), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
assert_eq!(iter.next(f).map(sort), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
||||||
assert_eq!(iter.next(f), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
assert_eq!(iter.next(f), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
||||||
assert_eq!(iter.next(f), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
assert_eq!(iter.next(f), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
||||||
assert_eq!(iter.next(f), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
assert_eq!(iter.next(f), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
||||||
|
66
src/lib.rs
66
src/lib.rs
@ -198,30 +198,66 @@ impl Index {
|
|||||||
union_docids
|
union_docids
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Returns the union of the same attribute for all the derived words.
|
||||||
|
let unions_word_attr = |word: usize, attr: u32| {
|
||||||
|
let mut union_docids = RoaringBitmap::new();
|
||||||
|
for (word, _) in &words[word] {
|
||||||
|
let mut key = word.clone();
|
||||||
|
key.extend_from_slice(&attr.to_be_bytes());
|
||||||
|
if let Some(right) = self.word_attribute_docids.get(rtxn, &key).unwrap() {
|
||||||
|
union_docids.union_with(&right);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
union_docids
|
||||||
|
};
|
||||||
|
|
||||||
let mut union_cache = HashMap::new();
|
let mut union_cache = HashMap::new();
|
||||||
let mut intersect_cache = HashMap::new();
|
let mut intersect_cache = HashMap::new();
|
||||||
|
|
||||||
|
let mut attribute_union_cache = HashMap::new();
|
||||||
|
let mut attribute_intersect_cache = HashMap::new();
|
||||||
|
|
||||||
// Returns `true` if there is documents in common between the two words and positions given.
|
// Returns `true` if there is documents in common between the two words and positions given.
|
||||||
let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>, candidates: &RoaringBitmap| {
|
let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>, candidates: &RoaringBitmap| {
|
||||||
let proximity = best_proximity::positions_proximity(lpos, rpos);
|
if lpos == rpos { return false }
|
||||||
|
|
||||||
if proximity == 0 { return false }
|
let (lattr, _) = best_proximity::extract_position(lpos);
|
||||||
|
let (rattr, _) = best_proximity::extract_position(rpos);
|
||||||
|
|
||||||
// We retrieve or compute the intersection between the two given words and positions.
|
if lattr == rattr {
|
||||||
*intersect_cache.entry(((lword, lpos), (rword, rpos))).or_insert_with(|| {
|
// We retrieve or compute the intersection between the two given words and positions.
|
||||||
// We retrieve or compute the unions for the two words and positions.
|
*intersect_cache.entry(((lword, lpos), (rword, rpos))).or_insert_with(|| {
|
||||||
union_cache.entry((lword, lpos)).or_insert_with(|| unions_word_pos(lword, lpos));
|
// We retrieve or compute the unions for the two words and positions.
|
||||||
union_cache.entry((rword, rpos)).or_insert_with(|| unions_word_pos(rword, rpos));
|
union_cache.entry((lword, lpos)).or_insert_with(|| unions_word_pos(lword, lpos));
|
||||||
|
union_cache.entry((rword, rpos)).or_insert_with(|| unions_word_pos(rword, rpos));
|
||||||
|
|
||||||
// TODO is there a way to avoid this double gets?
|
// TODO is there a way to avoid this double gets?
|
||||||
let lunion_docids = union_cache.get(&(lword, lpos)).unwrap();
|
let lunion_docids = union_cache.get(&(lword, lpos)).unwrap();
|
||||||
let runion_docids = union_cache.get(&(rword, rpos)).unwrap();
|
let runion_docids = union_cache.get(&(rword, rpos)).unwrap();
|
||||||
|
|
||||||
// We first check that the docids of these unions are part of the candidates.
|
// We first check that the docids of these unions are part of the candidates.
|
||||||
if lunion_docids.is_disjoint(candidates) { return false }
|
if lunion_docids.is_disjoint(candidates) { return false }
|
||||||
if runion_docids.is_disjoint(candidates) { return false }
|
if runion_docids.is_disjoint(candidates) { return false }
|
||||||
|
|
||||||
!lunion_docids.is_disjoint(&runion_docids)
|
!lunion_docids.is_disjoint(&runion_docids)
|
||||||
})
|
})
|
||||||
|
} else {
|
||||||
|
*attribute_intersect_cache.entry(((lword, lattr), (rword, rattr))).or_insert_with(|| {
|
||||||
|
// We retrieve or compute the unions for the two words and positions.
|
||||||
|
attribute_union_cache.entry((lword, lattr)).or_insert_with(|| unions_word_attr(lword, lattr));
|
||||||
|
attribute_union_cache.entry((rword, rattr)).or_insert_with(|| unions_word_attr(rword, rattr));
|
||||||
|
|
||||||
|
// TODO is there a way to avoid this double gets?
|
||||||
|
let lunion_docids = attribute_union_cache.get(&(lword, lattr)).unwrap();
|
||||||
|
let runion_docids = attribute_union_cache.get(&(rword, rattr)).unwrap();
|
||||||
|
|
||||||
|
// We first check that the docids of these unions are part of the candidates.
|
||||||
|
if lunion_docids.is_disjoint(candidates) { return false }
|
||||||
|
if runion_docids.is_disjoint(candidates) { return false }
|
||||||
|
|
||||||
|
!lunion_docids.is_disjoint(&runion_docids)
|
||||||
|
})
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
Loading…
Reference in New Issue
Block a user