mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Remove old query_tree code and make clippy happy
This commit is contained in:
parent
f5f5f03ec0
commit
7169d85115
@ -73,7 +73,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
|
|
||||||
let distribution_prelength = distribution.len();
|
let distribution_prelength = distribution.len();
|
||||||
let db = self.index.field_id_docid_facet_f64s;
|
let db = self.index.field_id_docid_facet_f64s;
|
||||||
for docid in candidates.into_iter() {
|
for docid in candidates {
|
||||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
let iter = db
|
let iter = db
|
||||||
@ -97,7 +97,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
||||||
|
|
||||||
let db = self.index.field_id_docid_facet_strings;
|
let db = self.index.field_id_docid_facet_strings;
|
||||||
'outer: for docid in candidates.into_iter() {
|
'outer: for docid in candidates {
|
||||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
let iter = db
|
let iter = db
|
||||||
@ -505,7 +505,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..10_000).into_iter().collect())
|
.candidates((0..10_000).collect())
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -513,7 +513,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..5_000).into_iter().collect())
|
.candidates((0..5_000).collect())
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -521,7 +521,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..5_000).into_iter().collect())
|
.candidates((0..5_000).collect())
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -529,7 +529,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..5_000).into_iter().collect())
|
.candidates((0..5_000).collect())
|
||||||
.max_values_per_facet(1)
|
.max_values_per_facet(1)
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -546,7 +546,7 @@ mod tests {
|
|||||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let facet_values = (0..1000).into_iter().map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut documents = vec![];
|
let mut documents = vec![];
|
||||||
for i in 0..10_000 {
|
for i in 0..10_000 {
|
||||||
@ -582,7 +582,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..10_000).into_iter().collect())
|
.candidates((0..10_000).collect())
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -590,7 +590,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..5_000).into_iter().collect())
|
.candidates((0..5_000).collect())
|
||||||
.execute()
|
.execute()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -606,7 +606,7 @@ mod tests {
|
|||||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut documents = vec![];
|
let mut documents = vec![];
|
||||||
for i in 0..1000 {
|
for i in 0..1000 {
|
||||||
@ -634,7 +634,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..1000).into_iter().collect())
|
.candidates((0..1000).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -642,7 +642,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((217..777).into_iter().collect())
|
.candidates((217..777).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -658,7 +658,7 @@ mod tests {
|
|||||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut documents = vec![];
|
let mut documents = vec![];
|
||||||
for i in 0..1000 {
|
for i in 0..1000 {
|
||||||
@ -686,7 +686,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..1000).into_iter().collect())
|
.candidates((0..1000).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -694,7 +694,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((217..777).into_iter().collect())
|
.candidates((217..777).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -710,7 +710,7 @@ mod tests {
|
|||||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut documents = vec![];
|
let mut documents = vec![];
|
||||||
for i in 0..1000 {
|
for i in 0..1000 {
|
||||||
@ -738,7 +738,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..1000).into_iter().collect())
|
.candidates((0..1000).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -746,7 +746,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((217..777).into_iter().collect())
|
.candidates((217..777).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -762,7 +762,7 @@ mod tests {
|
|||||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut documents = vec![];
|
let mut documents = vec![];
|
||||||
for i in 0..1000 {
|
for i in 0..1000 {
|
||||||
@ -794,7 +794,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((0..1000).into_iter().collect())
|
.candidates((0..1000).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -802,7 +802,7 @@ mod tests {
|
|||||||
|
|
||||||
let map = FacetDistribution::new(&txn, &index)
|
let map = FacetDistribution::new(&txn, &index)
|
||||||
.facets(std::iter::once("colour"))
|
.facets(std::iter::once("colour"))
|
||||||
.candidates((217..777).into_iter().collect())
|
.candidates((217..777).collect())
|
||||||
.compute_stats()
|
.compute_stats()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ mod tests {
|
|||||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||||
for (i, index) in indexes.iter().enumerate() {
|
for (i, index) in indexes.iter().enumerate() {
|
||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
let candidates = (0..=255).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
iterate_over_facet_distribution(
|
iterate_over_facet_distribution(
|
||||||
&txn,
|
&txn,
|
||||||
@ -166,7 +166,7 @@ mod tests {
|
|||||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||||
for (i, index) in indexes.iter().enumerate() {
|
for (i, index) in indexes.iter().enumerate() {
|
||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
let candidates = (0..=255).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let mut nbr_facets = 0;
|
let mut nbr_facets = 0;
|
||||||
iterate_over_facet_distribution(
|
iterate_over_facet_distribution(
|
||||||
|
@ -410,7 +410,7 @@ mod tests {
|
|||||||
|
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
|
|
||||||
for i in (0..=255).into_iter().rev() {
|
for i in (0..=255).rev() {
|
||||||
let i = i as f64;
|
let i = i as f64;
|
||||||
let start = Bound::Included(i);
|
let start = Bound::Included(i);
|
||||||
let end = Bound::Included(255.);
|
let end = Bound::Included(255.);
|
||||||
@ -431,7 +431,7 @@ mod tests {
|
|||||||
|
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
|
|
||||||
for i in (0..=255).into_iter().rev() {
|
for i in (0..=255).rev() {
|
||||||
let i = i as f64;
|
let i = i as f64;
|
||||||
let start = Bound::Excluded(i);
|
let start = Bound::Excluded(i);
|
||||||
let end = Bound::Excluded(255.);
|
let end = Bound::Excluded(255.);
|
||||||
@ -466,7 +466,7 @@ mod tests {
|
|||||||
|
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
|
|
||||||
for i in (0..=128).into_iter().rev() {
|
for i in (0..=128).rev() {
|
||||||
let i = i as f64;
|
let i = i as f64;
|
||||||
let start = Bound::Included(i);
|
let start = Bound::Included(i);
|
||||||
let end = Bound::Included(255. - i);
|
let end = Bound::Included(255. - i);
|
||||||
@ -491,7 +491,7 @@ mod tests {
|
|||||||
|
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
|
|
||||||
for i in (0..=128).into_iter().rev() {
|
for i in (0..=128).rev() {
|
||||||
let i = i as f64;
|
let i = i as f64;
|
||||||
let start = Bound::Excluded(i);
|
let start = Bound::Excluded(i);
|
||||||
let end = Bound::Excluded(255. - i);
|
let end = Bound::Excluded(255. - i);
|
||||||
|
@ -132,7 +132,7 @@ mod tests {
|
|||||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||||
for (i, index) in indexes.iter().enumerate() {
|
for (i, index) in indexes.iter().enumerate() {
|
||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
@ -154,7 +154,7 @@ mod tests {
|
|||||||
];
|
];
|
||||||
for (i, index) in indexes.iter().enumerate() {
|
for (i, index) in indexes.iter().enumerate() {
|
||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
|
@ -142,7 +142,7 @@ mod tests {
|
|||||||
];
|
];
|
||||||
for (i, index) in indexes.iter().enumerate() {
|
for (i, index) in indexes.iter().enumerate() {
|
||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||||
@ -165,7 +165,7 @@ mod tests {
|
|||||||
];
|
];
|
||||||
for (i, index) in indexes.iter().enumerate() {
|
for (i, index) in indexes.iter().enumerate() {
|
||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
||||||
|
@ -1,21 +1,14 @@
|
|||||||
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
|
||||||
pub use self::matches::{
|
pub use self::matches::{
|
||||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
||||||
};
|
};
|
||||||
use fst::automaton::Str;
|
|
||||||
use fst::{Automaton, IntoStreamer, Streamer};
|
|
||||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::hash_map::{Entry, HashMap};
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::result::Result as StdResult;
|
|
||||||
use std::str::Utf8Error;
|
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||||
@ -26,7 +19,6 @@ pub mod facet;
|
|||||||
mod fst_utils;
|
mod fst_utils;
|
||||||
mod matches;
|
mod matches;
|
||||||
pub mod new;
|
pub mod new;
|
||||||
mod query_tree;
|
|
||||||
|
|
||||||
pub struct Search<'a> {
|
pub struct Search<'a> {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
@ -200,70 +192,6 @@ impl Default for TermsMatchingStrategy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>;
|
|
||||||
|
|
||||||
pub fn word_derivations<'c>(
|
|
||||||
word: &str,
|
|
||||||
is_prefix: bool,
|
|
||||||
max_typo: u8,
|
|
||||||
fst: &fst::Set<Cow<[u8]>>,
|
|
||||||
cache: &'c mut WordDerivationsCache,
|
|
||||||
) -> StdResult<&'c [(String, u8)], Utf8Error> {
|
|
||||||
match cache.entry((word.to_string(), is_prefix, max_typo)) {
|
|
||||||
Entry::Occupied(entry) => Ok(entry.into_mut()),
|
|
||||||
Entry::Vacant(entry) => {
|
|
||||||
// println!("word derivations {word} {is_prefix} {max_typo}");
|
|
||||||
let mut derived_words = Vec::new();
|
|
||||||
if max_typo == 0 {
|
|
||||||
if is_prefix {
|
|
||||||
let prefix = Str::new(word).starts_with();
|
|
||||||
let mut stream = fst.search(prefix).into_stream();
|
|
||||||
|
|
||||||
while let Some(word) = stream.next() {
|
|
||||||
let word = std::str::from_utf8(word)?;
|
|
||||||
derived_words.push((word.to_string(), 0));
|
|
||||||
}
|
|
||||||
} else if fst.contains(word) {
|
|
||||||
derived_words.push((word.to_string(), 0));
|
|
||||||
}
|
|
||||||
} else if max_typo == 1 {
|
|
||||||
let dfa = build_dfa(word, 1, is_prefix);
|
|
||||||
let starts = StartsWith(Str::new(get_first(word)));
|
|
||||||
let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
|
|
||||||
|
|
||||||
while let Some((word, state)) = stream.next() {
|
|
||||||
let word = std::str::from_utf8(word)?;
|
|
||||||
let d = dfa.distance(state.1);
|
|
||||||
derived_words.push((word.to_string(), d.to_u8()));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let starts = StartsWith(Str::new(get_first(word)));
|
|
||||||
let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
|
|
||||||
let second_dfa = build_dfa(word, 2, is_prefix);
|
|
||||||
let second = Intersection(&second_dfa, &starts);
|
|
||||||
let automaton = Union(first, &second);
|
|
||||||
|
|
||||||
let mut stream = fst.search_with_state(automaton).into_stream();
|
|
||||||
|
|
||||||
while let Some((found_word, state)) = stream.next() {
|
|
||||||
let found_word = std::str::from_utf8(found_word)?;
|
|
||||||
// in the case the typo is on the first letter, we know the number of typo
|
|
||||||
// is two
|
|
||||||
if get_first(found_word) != get_first(word) {
|
|
||||||
derived_words.push((found_word.to_string(), 2));
|
|
||||||
} else {
|
|
||||||
// Else, we know that it is the second dfa that matched and compute the
|
|
||||||
// correct distance
|
|
||||||
let d = second_dfa.distance((state.1).0);
|
|
||||||
derived_words.push((found_word.to_string(), d.to_u8()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(entry.insert(derived_words))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_first(s: &str) -> &str {
|
fn get_first(s: &str) -> &str {
|
||||||
match s.chars().next() {
|
match s.chars().next() {
|
||||||
Some(c) => &s[..c.len_utf8()],
|
Some(c) => &s[..c.len_utf8()],
|
||||||
@ -337,66 +265,66 @@ mod test {
|
|||||||
assert!(!search.is_typo_authorized().unwrap());
|
assert!(!search.is_typo_authorized().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_one_typos_tolerance() {
|
// fn test_one_typos_tolerance() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();
|
// let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[("zealand".to_string(), 1)]);
|
// assert_eq!(found, &[("zealand".to_string(), 1)]);
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_one_typos_first_letter() {
|
// fn test_one_typos_first_letter() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();
|
// let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[]);
|
// assert_eq!(found, &[]);
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_two_typos_tolerance() {
|
// fn test_two_typos_tolerance() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();
|
// let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[("zealand".to_string(), 2)]);
|
// assert_eq!(found, &[("zealand".to_string(), 2)]);
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_two_typos_first_letter() {
|
// fn test_two_typos_first_letter() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();
|
// let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[("zealand".to_string(), 2)]);
|
// assert_eq!(found, &[("zealand".to_string(), 2)]);
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_prefix() {
|
// fn test_prefix() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();
|
// let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[("zealand".to_string(), 0)]);
|
// assert_eq!(found, &[("zealand".to_string(), 0)]);
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_bad_prefix() {
|
// fn test_bad_prefix() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();
|
// let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[]);
|
// assert_eq!(found, &[]);
|
||||||
}
|
// }
|
||||||
|
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_prefix_with_typo() {
|
// fn test_prefix_with_typo() {
|
||||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||||
let mut cache = HashMap::new();
|
// let mut cache = HashMap::new();
|
||||||
let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();
|
// let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();
|
||||||
|
|
||||||
assert_eq!(found, &[("zealand".to_string(), 1)]);
|
// assert_eq!(found, &[("zealand".to_string(), 1)]);
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
@ -15,10 +15,9 @@ mod sort;
|
|||||||
// TODO: documentation + comments
|
// TODO: documentation + comments
|
||||||
mod words;
|
mod words;
|
||||||
|
|
||||||
// #[cfg(test)]
|
|
||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
|
|
||||||
use charabia::{Tokenize, TokenizerBuilder};
|
use charabia::TokenizerBuilder;
|
||||||
use db_cache::DatabaseCache;
|
use db_cache::DatabaseCache;
|
||||||
use graph_based_ranking_rule::{Proximity, Typo};
|
use graph_based_ranking_rule::{Proximity, Typo};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
@ -254,7 +253,7 @@ pub fn execute_search(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let tokenizer = tokbuilder.build();
|
let tokenizer = tokbuilder.build();
|
||||||
let tokens = tokenizer.tokenize(&query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
|
|
||||||
let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?;
|
let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?;
|
||||||
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user