mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Use the cached postings lists in the query system
This commit is contained in:
parent
eed07c724f
commit
670e80c151
@ -41,6 +41,7 @@ pub fn bucket_sort<'c, FI>(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
||||||
|
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
||||||
) -> MResult<Vec<Document>>
|
) -> MResult<Vec<Document>>
|
||||||
where
|
where
|
||||||
FI: Fn(DocumentId) -> bool,
|
FI: Fn(DocumentId) -> bool,
|
||||||
@ -64,6 +65,7 @@ where
|
|||||||
documents_fields_counts_store,
|
documents_fields_counts_store,
|
||||||
synonyms_store,
|
synonyms_store,
|
||||||
prefix_documents_cache_store,
|
prefix_documents_cache_store,
|
||||||
|
prefix_postings_lists_cache_store,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +98,14 @@ where
|
|||||||
let before_postings_lists_fetching = Instant::now();
|
let before_postings_lists_fetching = Instant::now();
|
||||||
mk_arena!(arena);
|
mk_arena!(arena);
|
||||||
let mut bare_matches =
|
let mut bare_matches =
|
||||||
fetch_matches(reader, &automatons, &mut arena, main_store, postings_lists_store)?;
|
fetch_matches(
|
||||||
|
reader,
|
||||||
|
&automatons,
|
||||||
|
&mut arena,
|
||||||
|
main_store,
|
||||||
|
postings_lists_store,
|
||||||
|
prefix_postings_lists_cache_store,
|
||||||
|
)?;
|
||||||
debug!("bare matches ({}) retrieved in {:.02?}",
|
debug!("bare matches ({}) retrieved in {:.02?}",
|
||||||
bare_matches.len(),
|
bare_matches.len(),
|
||||||
before_postings_lists_fetching.elapsed(),
|
before_postings_lists_fetching.elapsed(),
|
||||||
@ -203,6 +212,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
||||||
|
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
||||||
) -> MResult<Vec<Document>>
|
) -> MResult<Vec<Document>>
|
||||||
where
|
where
|
||||||
FI: Fn(DocumentId) -> bool,
|
FI: Fn(DocumentId) -> bool,
|
||||||
@ -213,7 +223,14 @@ where
|
|||||||
|
|
||||||
let before_postings_lists_fetching = Instant::now();
|
let before_postings_lists_fetching = Instant::now();
|
||||||
mk_arena!(arena);
|
mk_arena!(arena);
|
||||||
let mut bare_matches = fetch_matches(reader, &automatons, &mut arena, main_store, postings_lists_store)?;
|
let mut bare_matches = fetch_matches(
|
||||||
|
reader,
|
||||||
|
&automatons,
|
||||||
|
&mut arena,
|
||||||
|
main_store,
|
||||||
|
postings_lists_store,
|
||||||
|
prefix_postings_lists_cache_store,
|
||||||
|
)?;
|
||||||
debug!("bare matches ({}) retrieved in {:.02?}",
|
debug!("bare matches ({}) retrieved in {:.02?}",
|
||||||
bare_matches.len(),
|
bare_matches.len(),
|
||||||
before_postings_lists_fetching.elapsed(),
|
before_postings_lists_fetching.elapsed(),
|
||||||
@ -486,6 +503,7 @@ fn fetch_matches<'txn, 'tag>(
|
|||||||
arena: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
arena: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||||
main_store: store::Main,
|
main_store: store::Main,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
|
pplc_store: store::PrefixPostingsListsCache,
|
||||||
) -> MResult<Vec<BareMatch<'tag>>>
|
) -> MResult<Vec<BareMatch<'tag>>>
|
||||||
{
|
{
|
||||||
let before_words_fst = Instant::now();
|
let before_words_fst = Instant::now();
|
||||||
@ -504,10 +522,7 @@ fn fetch_matches<'txn, 'tag>(
|
|||||||
let automatons_loop = Instant::now();
|
let automatons_loop = Instant::now();
|
||||||
|
|
||||||
for (query_index, automaton) in automatons.iter().enumerate() {
|
for (query_index, automaton) in automatons.iter().enumerate() {
|
||||||
let before_dfa = Instant::now();
|
let QueryWordAutomaton { query, is_exact, is_prefix, .. } = automaton;
|
||||||
let dfa = automaton.dfa();
|
|
||||||
let QueryWordAutomaton { query, is_exact, .. } = automaton;
|
|
||||||
dfa_time += before_dfa.elapsed();
|
|
||||||
|
|
||||||
let before_word_postings_lists_fetching = Instant::now();
|
let before_word_postings_lists_fetching = Instant::now();
|
||||||
let mut stream_next_time = Duration::default();
|
let mut stream_next_time = Duration::default();
|
||||||
@ -515,34 +530,17 @@ fn fetch_matches<'txn, 'tag>(
|
|||||||
let mut postings_lists_original_length = 0;
|
let mut postings_lists_original_length = 0;
|
||||||
let mut postings_lists_length = 0;
|
let mut postings_lists_length = 0;
|
||||||
|
|
||||||
let byte = query.as_bytes()[0];
|
if *is_prefix && query.len() == 1 {
|
||||||
let mut stream = if byte == u8::max_value() {
|
let prefix = [query.as_bytes()[0], 0, 0, 0];
|
||||||
words.search(&dfa).ge(&[byte]).into_stream()
|
|
||||||
} else {
|
|
||||||
words.search(&dfa).ge(&[byte]).lt(&[byte + 1]).into_stream()
|
|
||||||
};
|
|
||||||
|
|
||||||
// while let Some(input) = stream.next() {
|
|
||||||
loop {
|
|
||||||
let before_stream_next = Instant::now();
|
|
||||||
let value = stream.next();
|
|
||||||
stream_next_time += before_stream_next.elapsed();
|
|
||||||
|
|
||||||
let input = match value {
|
|
||||||
Some(input) => input,
|
|
||||||
None => break,
|
|
||||||
};
|
|
||||||
|
|
||||||
number_of_words += 1;
|
number_of_words += 1;
|
||||||
|
|
||||||
let distance = dfa.eval(input).to_u8();
|
|
||||||
let is_exact = *is_exact && distance == 0 && input.len() == query.len();
|
|
||||||
|
|
||||||
let before_postings_lists_fetching = Instant::now();
|
let before_postings_lists_fetching = Instant::now();
|
||||||
if let Some(postings_list) = postings_lists_store.postings_list(reader, input)? {
|
if let Some(postings_list) = pplc_store.prefix_postings_list(reader, prefix)? {
|
||||||
|
debug!("Found cached postings list for {:?}", query);
|
||||||
postings_lists_original_length += postings_list.len();
|
postings_lists_original_length += postings_list.len();
|
||||||
|
|
||||||
let input = Rc::from(input);
|
let input = Rc::from(&prefix[..]);
|
||||||
let postings_list = Rc::new(postings_list);
|
let postings_list = Rc::new(postings_list);
|
||||||
let postings_list_view = PostingsListView::original(input, postings_list);
|
let postings_list_view = PostingsListView::original(input, postings_list);
|
||||||
|
|
||||||
@ -550,8 +548,11 @@ fn fetch_matches<'txn, 'tag>(
|
|||||||
for group in postings_list_view.linear_group_by_key(|di| di.document_id) {
|
for group in postings_list_view.linear_group_by_key(|di| di.document_id) {
|
||||||
let document_id = group[0].document_id;
|
let document_id = group[0].document_id;
|
||||||
|
|
||||||
if query_index != 0 && !documents_ids.contains(&document_id) { continue }
|
if query_index != 0 {
|
||||||
documents_ids.insert(document_id);
|
if !documents_ids.contains(&document_id) { continue }
|
||||||
|
} else {
|
||||||
|
documents_ids.insert(document_id);
|
||||||
|
}
|
||||||
|
|
||||||
postings_lists_length += group.len();
|
postings_lists_length += group.len();
|
||||||
|
|
||||||
@ -559,8 +560,8 @@ fn fetch_matches<'txn, 'tag>(
|
|||||||
let bare_match = BareMatch {
|
let bare_match = BareMatch {
|
||||||
document_id,
|
document_id,
|
||||||
query_index: query_index as u16,
|
query_index: query_index as u16,
|
||||||
distance,
|
distance: 0,
|
||||||
is_exact,
|
is_exact: *is_exact,
|
||||||
postings_list: posting_list_index,
|
postings_list: posting_list_index,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -570,6 +571,70 @@ fn fetch_matches<'txn, 'tag>(
|
|||||||
}
|
}
|
||||||
postings_lists_fetching_time += before_postings_lists_fetching.elapsed();
|
postings_lists_fetching_time += before_postings_lists_fetching.elapsed();
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
let before_dfa = Instant::now();
|
||||||
|
let dfa = automaton.dfa();
|
||||||
|
dfa_time += before_dfa.elapsed();
|
||||||
|
|
||||||
|
let byte = query.as_bytes()[0];
|
||||||
|
let mut stream = if byte == u8::max_value() {
|
||||||
|
words.search(&dfa).ge(&[byte]).into_stream()
|
||||||
|
} else {
|
||||||
|
words.search(&dfa).ge(&[byte]).lt(&[byte + 1]).into_stream()
|
||||||
|
};
|
||||||
|
|
||||||
|
// while let Some(input) = stream.next() {
|
||||||
|
loop {
|
||||||
|
let before_stream_next = Instant::now();
|
||||||
|
let value = stream.next();
|
||||||
|
stream_next_time += before_stream_next.elapsed();
|
||||||
|
|
||||||
|
let input = match value {
|
||||||
|
Some(input) => input,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
|
||||||
|
number_of_words += 1;
|
||||||
|
|
||||||
|
let distance = dfa.eval(input).to_u8();
|
||||||
|
let is_exact = *is_exact && distance == 0 && input.len() == query.len();
|
||||||
|
|
||||||
|
let before_postings_lists_fetching = Instant::now();
|
||||||
|
if let Some(postings_list) = postings_lists_store.postings_list(reader, input)? {
|
||||||
|
postings_lists_original_length += postings_list.len();
|
||||||
|
|
||||||
|
let input = Rc::from(input);
|
||||||
|
let postings_list = Rc::new(postings_list);
|
||||||
|
let postings_list_view = PostingsListView::original(input, postings_list);
|
||||||
|
|
||||||
|
let mut offset = 0;
|
||||||
|
for group in postings_list_view.linear_group_by_key(|di| di.document_id) {
|
||||||
|
let document_id = group[0].document_id;
|
||||||
|
|
||||||
|
if query_index != 0 {
|
||||||
|
if !documents_ids.contains(&document_id) { continue }
|
||||||
|
} else {
|
||||||
|
documents_ids.insert(document_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
postings_lists_length += group.len();
|
||||||
|
|
||||||
|
let posting_list_index = arena.add(postings_list_view.range(offset, group.len()));
|
||||||
|
let bare_match = BareMatch {
|
||||||
|
document_id,
|
||||||
|
query_index: query_index as u16,
|
||||||
|
distance,
|
||||||
|
is_exact,
|
||||||
|
postings_list: posting_list_index,
|
||||||
|
};
|
||||||
|
|
||||||
|
total_postings_lists.push(bare_match);
|
||||||
|
offset += group.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
postings_lists_fetching_time += before_postings_lists_fetching.elapsed();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
debug!("{:?} gives {} words", query, number_of_words);
|
debug!("{:?} gives {} words", query, number_of_words);
|
||||||
debug!("{:?} gives postings lists of length {} (original was {})",
|
debug!("{:?} gives postings lists of length {} (original was {})",
|
||||||
|
@ -16,7 +16,8 @@ pub struct QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
prefix_cache_store: store::PrefixDocumentsCache,
|
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
||||||
|
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
||||||
@ -25,14 +26,16 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists: store::PostingsLists,
|
postings_lists: store::PostingsLists,
|
||||||
documents_fields_counts: store::DocumentsFieldsCounts,
|
documents_fields_counts: store::DocumentsFieldsCounts,
|
||||||
synonyms: store::Synonyms,
|
synonyms: store::Synonyms,
|
||||||
prefix_cache: store::PrefixDocumentsCache,
|
prefix_documents_cache: store::PrefixDocumentsCache,
|
||||||
|
prefix_postings_lists_cache: store::PrefixPostingsListsCache,
|
||||||
) -> QueryBuilder<'c, 'f, 'd> {
|
) -> QueryBuilder<'c, 'f, 'd> {
|
||||||
QueryBuilder::with_criteria(
|
QueryBuilder::with_criteria(
|
||||||
main,
|
main,
|
||||||
postings_lists,
|
postings_lists,
|
||||||
documents_fields_counts,
|
documents_fields_counts,
|
||||||
synonyms,
|
synonyms,
|
||||||
prefix_cache,
|
prefix_documents_cache,
|
||||||
|
prefix_postings_lists_cache,
|
||||||
Criteria::default(),
|
Criteria::default(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -42,7 +45,8 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists: store::PostingsLists,
|
postings_lists: store::PostingsLists,
|
||||||
documents_fields_counts: store::DocumentsFieldsCounts,
|
documents_fields_counts: store::DocumentsFieldsCounts,
|
||||||
synonyms: store::Synonyms,
|
synonyms: store::Synonyms,
|
||||||
prefix_cache: store::PrefixDocumentsCache,
|
prefix_documents_cache: store::PrefixDocumentsCache,
|
||||||
|
prefix_postings_lists_cache: store::PrefixPostingsListsCache,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
) -> QueryBuilder<'c, 'f, 'd> {
|
) -> QueryBuilder<'c, 'f, 'd> {
|
||||||
QueryBuilder {
|
QueryBuilder {
|
||||||
@ -55,7 +59,8 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists_store: postings_lists,
|
postings_lists_store: postings_lists,
|
||||||
documents_fields_counts_store: documents_fields_counts,
|
documents_fields_counts_store: documents_fields_counts,
|
||||||
synonyms_store: synonyms,
|
synonyms_store: synonyms,
|
||||||
prefix_cache_store: prefix_cache,
|
prefix_documents_cache_store: prefix_documents_cache,
|
||||||
|
prefix_postings_lists_cache_store: prefix_postings_lists_cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,7 +107,8 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
self.postings_lists_store,
|
self.postings_lists_store,
|
||||||
self.documents_fields_counts_store,
|
self.documents_fields_counts_store,
|
||||||
self.synonyms_store,
|
self.synonyms_store,
|
||||||
self.prefix_cache_store,
|
self.prefix_documents_cache_store,
|
||||||
|
self.prefix_postings_lists_cache_store,
|
||||||
),
|
),
|
||||||
None => bucket_sort(
|
None => bucket_sort(
|
||||||
reader,
|
reader,
|
||||||
@ -115,7 +121,8 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
self.postings_lists_store,
|
self.postings_lists_store,
|
||||||
self.documents_fields_counts_store,
|
self.documents_fields_counts_store,
|
||||||
self.synonyms_store,
|
self.synonyms_store,
|
||||||
self.prefix_cache_store,
|
self.prefix_documents_cache_store,
|
||||||
|
self.prefix_postings_lists_cache_store,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -267,6 +267,7 @@ impl Index {
|
|||||||
self.documents_fields_counts,
|
self.documents_fields_counts,
|
||||||
self.synonyms,
|
self.synonyms,
|
||||||
self.prefix_documents_cache,
|
self.prefix_documents_cache,
|
||||||
|
self.prefix_postings_lists_cache,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,6 +281,7 @@ impl Index {
|
|||||||
self.documents_fields_counts,
|
self.documents_fields_counts,
|
||||||
self.synonyms,
|
self.synonyms,
|
||||||
self.prefix_documents_cache,
|
self.prefix_documents_cache,
|
||||||
|
self.prefix_postings_lists_cache,
|
||||||
criteria,
|
criteria,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user