mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Introduce the plane-sweep algorithm
This commit is contained in:
parent
dc88a86259
commit
1c504471d3
24
Cargo.lock
generated
24
Cargo.lock
generated
@ -997,6 +997,7 @@ dependencies = [
|
||||
"levenshtein_automata",
|
||||
"log 0.4.11",
|
||||
"memmap",
|
||||
"near-proximity",
|
||||
"once_cell",
|
||||
"oxidized-mtbl",
|
||||
"rayon",
|
||||
@ -1141,6 +1142,14 @@ dependencies = [
|
||||
"twoway",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "near-proximity"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/Kerollmops/plane-sweep-proximity#f6c9e7e5fc1f7b456d080981e877d0e7943f82bd"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "net2"
|
||||
version = "0.2.34"
|
||||
@ -1964,6 +1973,21 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f331a553cacb14e99d183e5573c86044dd177b5a5277b21e562fd1bd5e1076e1"
|
||||
dependencies = [
|
||||
"tinyvec_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec_macros"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "0.2.21"
|
||||
|
@ -22,6 +22,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
memmap = "0.7.0"
|
||||
once_cell = "1.4.0"
|
||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "5426182" }
|
||||
near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity" }
|
||||
rayon = "1.3.1"
|
||||
ringtail = "0.3.0"
|
||||
roaring = { git = "https://github.com/Kerollmops/roaring-rs.git", branch = "mem-usage" }
|
||||
|
@ -5,7 +5,9 @@ use levenshtein_automata::DFA;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||
use log::debug;
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::bitmap::{IntoIter, RoaringBitmap};
|
||||
|
||||
use near_proximity::near_proximity;
|
||||
|
||||
use crate::query_tokens::{QueryTokens, QueryToken};
|
||||
use crate::{Index, DocumentId, Position, Attribute};
|
||||
@ -136,6 +138,31 @@ impl<'a> Search<'a> {
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
fn fecth_keywords(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
derived_words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
|
||||
candidate: DocumentId,
|
||||
) -> anyhow::Result<Vec<IntoIter>>
|
||||
{
|
||||
let mut keywords = Vec::with_capacity(derived_words.len());
|
||||
|
||||
for (words, _) in derived_words {
|
||||
|
||||
let mut union_positions = RoaringBitmap::new();
|
||||
for (word, (_distance, docids)) in words {
|
||||
|
||||
if docids.contains(candidate) {
|
||||
let positions = index.word_docid_positions.get(rtxn, &(word, candidate))?.unwrap();
|
||||
union_positions.union_with(&positions);
|
||||
}
|
||||
}
|
||||
keywords.push(union_positions.into_iter());
|
||||
}
|
||||
|
||||
Ok(keywords)
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> anyhow::Result<SearchResult> {
|
||||
let rtxn = self.rtxn;
|
||||
let index = self.index;
|
||||
@ -162,10 +189,28 @@ impl<'a> Search<'a> {
|
||||
|
||||
debug!("candidates: {:?}", candidates);
|
||||
|
||||
let documents = vec![candidates];
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let min_proximity = derived_words.len() as u32;
|
||||
let mut number_min_proximity = 0;
|
||||
|
||||
let mut paths = Vec::new();
|
||||
for candidate in candidates {
|
||||
let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?;
|
||||
near_proximity(keywords, &mut paths);
|
||||
if let Some((prox, _path)) = paths.first() {
|
||||
documents.push((*prox, candidate));
|
||||
if *prox == min_proximity {
|
||||
number_min_proximity += 1;
|
||||
if number_min_proximity >= limit { break }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
documents.sort_unstable_by_key(|(prox, _)| *prox);
|
||||
|
||||
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
||||
let documents_ids = documents.iter().flatten().take(limit).collect();
|
||||
let documents_ids = documents.into_iter().map(|(_, id)| id).take(limit).collect();
|
||||
|
||||
Ok(SearchResult { found_words, documents_ids })
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user