mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Differentiate short words as prefix or exact matches
This commit is contained in:
parent
9809ded23d
commit
70d4f47f37
@ -380,7 +380,7 @@ pub fn traverse_query_tree<'o, 'txn>(
|
|||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
let Query { id, prefix, kind } = query;
|
let Query { id, prefix, kind } = query;
|
||||||
let docids = match kind {
|
let docids: Cow<Set<_>> = match kind {
|
||||||
QueryKind::Tolerant(word) => {
|
QueryKind::Tolerant(word) => {
|
||||||
if *prefix && word.len() <= 2 {
|
if *prefix && word.len() <= 2 {
|
||||||
let prefix = {
|
let prefix = {
|
||||||
@ -390,10 +390,29 @@ pub fn traverse_query_tree<'o, 'txn>(
|
|||||||
array
|
array
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut docids = Vec::new();
|
||||||
|
|
||||||
|
// We retrieve the cached postings list for all
|
||||||
|
// the words that starts with this short prefix.
|
||||||
let result = ctx.prefix_postings_lists.prefix_postings_list(reader, prefix)?.unwrap_or_default();
|
let result = ctx.prefix_postings_lists.prefix_postings_list(reader, prefix)?.unwrap_or_default();
|
||||||
let distance = 0;
|
let distance = 0;
|
||||||
postings.insert((query, word.clone().into_bytes(), distance), result.matches);
|
postings.insert((query, word.clone().into_bytes(), distance), result.matches);
|
||||||
result.docids
|
docids.extend_from_slice(&result.docids);
|
||||||
|
|
||||||
|
// We retrieve the exact postings list for the prefix,
|
||||||
|
// because we must consider these matches as exact.
|
||||||
|
if let Some(result) = ctx.postings_lists.postings_list(reader, word.as_bytes())? {
|
||||||
|
let distance = 0;
|
||||||
|
postings.insert((query, word.clone().into_bytes(), distance), result.matches);
|
||||||
|
docids.extend_from_slice(&result.docids);
|
||||||
|
}
|
||||||
|
|
||||||
|
let before = Instant::now();
|
||||||
|
let docids = SetBuf::from_dirty(docids);
|
||||||
|
println!("{:2$}prefix docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||||
|
|
||||||
|
Cow::Owned(docids)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) };
|
let dfa = if *prefix { build_prefix_dfa(word) } else { build_dfa(word) };
|
||||||
|
|
||||||
@ -442,7 +461,11 @@ pub fn traverse_query_tree<'o, 'txn>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Cow::Owned(SetBuf::from_dirty(docids))
|
let before = Instant::now();
|
||||||
|
let docids = SetBuf::from_dirty(docids);
|
||||||
|
println!("{:2$}docids construction took {:.02?}", "", before.elapsed(), depth * 2);
|
||||||
|
|
||||||
|
Cow::Owned(docids)
|
||||||
},
|
},
|
||||||
QueryKind::Phrase(words) => {
|
QueryKind::Phrase(words) => {
|
||||||
// TODO support prefix and non-prefix exact DFA
|
// TODO support prefix and non-prefix exact DFA
|
||||||
|
@ -195,11 +195,16 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
pplc_store.clear(writer)?;
|
pplc_store.clear(writer)?;
|
||||||
|
|
||||||
for prefix_len in 1..=2 {
|
for prefix_len in 1..=2 {
|
||||||
// compute prefixes and store those in the PrefixPostingsListsCache.
|
// compute prefixes and store those in the PrefixPostingsListsCache store.
|
||||||
let mut previous_prefix: Option<([u8; 4], Vec<_>)> = None;
|
let mut previous_prefix: Option<([u8; 4], Vec<_>)> = None;
|
||||||
let mut stream = words_fst.into_stream();
|
let mut stream = words_fst.into_stream();
|
||||||
while let Some(input) = stream.next() {
|
while let Some(input) = stream.next() {
|
||||||
if input.len() < prefix_len { continue }
|
|
||||||
|
// We skip the prefixes that are shorter than the current length
|
||||||
|
// we want to cache (<). We must ignore the input when it is exactly the
|
||||||
|
// same word as the prefix because if we match exactly on it we need
|
||||||
|
// to consider it as an exact match and not as a prefix (=).
|
||||||
|
if input.len() <= prefix_len { continue }
|
||||||
|
|
||||||
if let Some(postings_list) = postings_lists_store.postings_list(writer, input)?.map(|p| p.matches.into_owned()) {
|
if let Some(postings_list) = postings_lists_store.postings_list(writer, input)?.map(|p| p.matches.into_owned()) {
|
||||||
let prefix = &input[..prefix_len];
|
let prefix = &input[..prefix_len];
|
||||||
@ -208,38 +213,33 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
arr_prefix[..prefix_len].copy_from_slice(prefix);
|
arr_prefix[..prefix_len].copy_from_slice(prefix);
|
||||||
|
|
||||||
match previous_prefix {
|
match previous_prefix {
|
||||||
Some((ref mut prev_prefix, ref mut prev_postings_list)) if *prev_prefix != arr_prefix => {
|
Some((ref mut prev_prefix, ref mut prev_pl)) if *prev_prefix != arr_prefix => {
|
||||||
prev_postings_list.sort_unstable();
|
prev_pl.sort_unstable();
|
||||||
prev_postings_list.dedup();
|
prev_pl.dedup();
|
||||||
|
|
||||||
if let Ok(prefix) = std::str::from_utf8(&prev_prefix[..prefix_len]) {
|
if let Ok(prefix) = std::str::from_utf8(&prev_prefix[..prefix_len]) {
|
||||||
debug!("writing the prefix of {:?} of length {}",
|
debug!("writing the prefix of {:?} of length {}", prefix, prev_pl.len());
|
||||||
prefix, prev_postings_list.len());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let pls = Set::new_unchecked(&prev_postings_list);
|
let pls = Set::new_unchecked(&prev_pl);
|
||||||
pplc_store.put_prefix_postings_list(writer, *prev_prefix, &pls)?;
|
pplc_store.put_prefix_postings_list(writer, *prev_prefix, &pls)?;
|
||||||
|
|
||||||
*prev_prefix = arr_prefix;
|
*prev_prefix = arr_prefix;
|
||||||
prev_postings_list.clear();
|
prev_pl.clear();
|
||||||
prev_postings_list.extend_from_slice(&postings_list);
|
prev_pl.extend_from_slice(&postings_list);
|
||||||
},
|
|
||||||
Some((_, ref mut prev_postings_list)) => {
|
|
||||||
prev_postings_list.extend_from_slice(&postings_list);
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
previous_prefix = Some((arr_prefix, postings_list.to_vec()));
|
|
||||||
},
|
},
|
||||||
|
Some((_, ref mut prev_pl)) => prev_pl.extend_from_slice(&postings_list),
|
||||||
|
None => previous_prefix = Some((arr_prefix, postings_list.to_vec())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// write the last prefix postings lists
|
// write the last prefix postings lists
|
||||||
if let Some((prev_prefix, mut prev_postings_list)) = previous_prefix.take() {
|
if let Some((prev_prefix, mut prev_pl)) = previous_prefix.take() {
|
||||||
prev_postings_list.sort_unstable();
|
prev_pl.sort_unstable();
|
||||||
prev_postings_list.dedup();
|
prev_pl.dedup();
|
||||||
|
|
||||||
let pls = Set::new_unchecked(&prev_postings_list);
|
let pls = Set::new_unchecked(&prev_pl);
|
||||||
pplc_store.put_prefix_postings_list(writer, prev_prefix, &pls)?;
|
pplc_store.put_prefix_postings_list(writer, prev_prefix, &pls)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user