mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Cache the prefix postings lists
This commit is contained in:
parent
928876b553
commit
106b886873
@ -1,8 +1,10 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use fst::{set::OpBuilder, SetBuilder};
|
use fst::{set::OpBuilder, SetBuilder, IntoStreamer, Streamer};
|
||||||
use sdset::{duo::Union, SetOperation};
|
use sdset::{duo::Union, SetOperation, SetBuf};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use log::debug;
|
||||||
|
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||||
@ -110,6 +112,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
||||||
|
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
||||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut documents_additions = HashMap::new();
|
let mut documents_additions = HashMap::new();
|
||||||
@ -180,7 +183,50 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
&ranked_map,
|
&ranked_map,
|
||||||
number_of_inserted_documents,
|
number_of_inserted_documents,
|
||||||
indexer,
|
indexer,
|
||||||
)
|
)?;
|
||||||
|
|
||||||
|
|
||||||
|
// retrieve the words fst to compute all those prefixes
|
||||||
|
let words_fst = match main_store.words_fst(writer)? {
|
||||||
|
Some(fst) => fst,
|
||||||
|
None => return Ok(()),
|
||||||
|
};
|
||||||
|
|
||||||
|
// clear the prefixes
|
||||||
|
let pplc_store = prefix_postings_lists_cache_store;
|
||||||
|
pplc_store.clear(writer)?;
|
||||||
|
|
||||||
|
const MAX_PREFIX_LENGTH: usize = 1;
|
||||||
|
|
||||||
|
// compute prefixes and store those in the PrefixPostingsListsCache.
|
||||||
|
let mut stream = words_fst.into_stream();
|
||||||
|
while let Some(input) = stream.next() {
|
||||||
|
for i in 1..=MAX_PREFIX_LENGTH {
|
||||||
|
let prefix = &input[..i];
|
||||||
|
if let Some(postings_list) = postings_lists_store.postings_list(writer, prefix)? {
|
||||||
|
if let (Ok(input), Ok(prefix)) = (std::str::from_utf8(input), std::str::from_utf8(prefix)) {
|
||||||
|
debug!("{:?} postings list (prefix {:?}) length {}", input, prefix, postings_list.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
// compute the new prefix postings lists
|
||||||
|
let mut p = [0; 4];
|
||||||
|
let len = std::cmp::min(4, prefix.len());
|
||||||
|
p[..len].copy_from_slice(&prefix[..len]);
|
||||||
|
|
||||||
|
let previous = match pplc_store.prefix_postings_list(writer, p)? {
|
||||||
|
Some(previous) => previous,
|
||||||
|
None => Cow::Owned(SetBuf::default()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let new_postings_list = Union::new(&postings_list, &previous).into_set_buf();
|
||||||
|
pplc_store.put_prefix_postings_list(writer, p, &new_postings_list)?;
|
||||||
|
|
||||||
|
debug!("new length {}", new_postings_list.len());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply_documents_partial_addition<'a, 'b>(
|
pub fn apply_documents_partial_addition<'a, 'b>(
|
||||||
|
@ -309,62 +309,10 @@ pub fn update_task<'a, 'b>(
|
|||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
index.prefix_documents_cache,
|
index.prefix_documents_cache,
|
||||||
|
index.prefix_postings_lists_cache,
|
||||||
documents,
|
documents,
|
||||||
);
|
);
|
||||||
|
|
||||||
let words_fst = index.main.words_fst(writer)?.unwrap();
|
|
||||||
let mut stream = words_fst.into_stream();
|
|
||||||
let mut previous_char = None;
|
|
||||||
while let Some(input) = stream.next() {
|
|
||||||
let (s, c) = match std::str::from_utf8(input) {
|
|
||||||
Ok(s) => {
|
|
||||||
let c = s.chars().next().unwrap();
|
|
||||||
(&s[..c.len_utf8()], c)
|
|
||||||
},
|
|
||||||
Err(_) => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
match previous_char {
|
|
||||||
Some(pc) if pc != c => {
|
|
||||||
debug!("searching and caching {:?}", s);
|
|
||||||
|
|
||||||
let documents = bucket_sort(
|
|
||||||
writer,
|
|
||||||
s,
|
|
||||||
0..20,
|
|
||||||
None as Option<fn(DocumentId) -> bool>,
|
|
||||||
Criteria::default(),
|
|
||||||
None,
|
|
||||||
index.main,
|
|
||||||
index.postings_lists,
|
|
||||||
index.documents_fields_counts,
|
|
||||||
index.synonyms,
|
|
||||||
index.prefix_documents_cache,
|
|
||||||
).unwrap();
|
|
||||||
|
|
||||||
let mut prefix = [0; 4];
|
|
||||||
let len = cmp::min(4, s.len());
|
|
||||||
prefix[..len].copy_from_slice(&s.as_bytes()[..len]);
|
|
||||||
|
|
||||||
for (i, document) in documents.into_iter().enumerate() {
|
|
||||||
index.prefix_documents_cache.put_prefix_document(
|
|
||||||
writer,
|
|
||||||
prefix,
|
|
||||||
i,
|
|
||||||
document.id,
|
|
||||||
&document.highlights,
|
|
||||||
).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
previous_char = Some(c)
|
|
||||||
},
|
|
||||||
Some(_) => (),
|
|
||||||
None => previous_char = Some(c),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO we forget to do it for the last prefix char
|
|
||||||
|
|
||||||
(update_type, result, start.elapsed())
|
(update_type, result, start.elapsed())
|
||||||
}
|
}
|
||||||
UpdateData::DocumentsPartial(documents) => {
|
UpdateData::DocumentsPartial(documents) => {
|
||||||
|
Loading…
Reference in New Issue
Block a user