From 7e16afbdce224a9c9f656b4101bc38c77a10e7a4 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 24 Jun 2020 15:06:45 +0200 Subject: [PATCH] Ignore documents which are not part of the candidates when exploring with A* --- src/lib.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9f9fcbedd..274ae1e54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -182,7 +182,7 @@ impl Index { let mut union_cache = HashMap::new(); let mut intersect_cache = HashMap::new(); // Returns `true` if there is documents in common between the two words and positions given. - let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>| { + let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>, words_attributes_docids: &HashMap<_, _>| { let proximity = best_proximity::positions_proximity(lpos, rpos); if proximity == 0 { return false } @@ -197,13 +197,22 @@ impl Index { let lunion_docids = union_cache.get(&(lword, lpos)).unwrap(); let runion_docids = union_cache.get(&(rword, rpos)).unwrap(); + let lattr = lpos / 1000; + let rattr = rpos / 1000; + if lattr == rattr { + if let Some(docids) = words_attributes_docids.get(&lattr) { + if lunion_docids.is_disjoint(&docids) { return false } + if runion_docids.is_disjoint(&docids) { return false } + } + } + !lunion_docids.is_disjoint(&runion_docids) }) }; let mut documents = Vec::new(); let mut iter = BestProximity::new(positions); - while let Some((proximity, mut positions)) = iter.next(|l, r| contains_documents(l, r, &mut union_cache)) { + while let Some((proximity, mut positions)) = iter.next(|l, r| contains_documents(l, r, &mut union_cache, &words_attributes_docids)) { positions.sort_unstable(); let same_prox_before = Instant::now();