mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Introduce a basic cache system for first letters
This commit is contained in:
parent
d21352a109
commit
1e1f0fcaf5
@ -38,6 +38,7 @@ pub fn bucket_sort<'c, FI>(
|
|||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
) -> MResult<Vec<Document>>
|
) -> MResult<Vec<Document>>
|
||||||
where
|
where
|
||||||
FI: Fn(DocumentId) -> bool,
|
FI: Fn(DocumentId) -> bool,
|
||||||
@ -60,12 +61,32 @@ where
|
|||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
documents_fields_counts_store,
|
documents_fields_counts_store,
|
||||||
synonyms_store,
|
synonyms_store,
|
||||||
|
prefix_cache_store,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (mut automatons, mut query_enhancer) =
|
let (mut automatons, mut query_enhancer) =
|
||||||
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
|
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
|
||||||
|
|
||||||
|
if let [automaton] = &automatons[..] {
|
||||||
|
if automaton.is_prefix && automaton.query.len() <= 4 {
|
||||||
|
let mut prefix = [0; 4];
|
||||||
|
let len = cmp::min(4, automaton.query.len());
|
||||||
|
prefix[..len].copy_from_slice(&automaton.query.as_bytes()[..len]);
|
||||||
|
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
let iter = prefix_cache_store.prefix_documents(reader, prefix)?;
|
||||||
|
for result in iter.skip(range.start).take(range.len()) {
|
||||||
|
let (docid, highlights) = result?;
|
||||||
|
documents.push(Document::from_highlights(docid, &highlights));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !documents.is_empty() {
|
||||||
|
return Ok(documents);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
debug!("{:?}", query_enhancer);
|
debug!("{:?}", query_enhancer);
|
||||||
|
|
||||||
let before_postings_lists_fetching = Instant::now();
|
let before_postings_lists_fetching = Instant::now();
|
||||||
@ -160,6 +181,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
) -> MResult<Vec<Document>>
|
) -> MResult<Vec<Document>>
|
||||||
where
|
where
|
||||||
FI: Fn(DocumentId) -> bool,
|
FI: Fn(DocumentId) -> bool,
|
||||||
|
@ -81,6 +81,16 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Document {
|
impl Document {
|
||||||
|
#[cfg(not(test))]
|
||||||
|
pub fn from_highlights(id: DocumentId, highlights: &[Highlight]) -> Document {
|
||||||
|
Document { id, highlights: highlights.to_owned() }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn from_highlights(id: DocumentId, highlights: &[Highlight]) -> Document {
|
||||||
|
Document { id, highlights: highlights.to_owned(), matches: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(not(test))]
|
#[cfg(not(test))]
|
||||||
pub fn from_raw<'a, 'tag, 'txn>(
|
pub fn from_raw<'a, 'tag, 'txn>(
|
||||||
raw_document: RawDocument<'a, 'tag>,
|
raw_document: RawDocument<'a, 'tag>,
|
||||||
|
@ -16,6 +16,7 @@ pub struct QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
||||||
@ -24,12 +25,14 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists: store::PostingsLists,
|
postings_lists: store::PostingsLists,
|
||||||
documents_fields_counts: store::DocumentsFieldsCounts,
|
documents_fields_counts: store::DocumentsFieldsCounts,
|
||||||
synonyms: store::Synonyms,
|
synonyms: store::Synonyms,
|
||||||
|
prefix_cache: store::PrefixCache,
|
||||||
) -> QueryBuilder<'c, 'f, 'd> {
|
) -> QueryBuilder<'c, 'f, 'd> {
|
||||||
QueryBuilder::with_criteria(
|
QueryBuilder::with_criteria(
|
||||||
main,
|
main,
|
||||||
postings_lists,
|
postings_lists,
|
||||||
documents_fields_counts,
|
documents_fields_counts,
|
||||||
synonyms,
|
synonyms,
|
||||||
|
prefix_cache,
|
||||||
Criteria::default(),
|
Criteria::default(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -39,6 +42,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists: store::PostingsLists,
|
postings_lists: store::PostingsLists,
|
||||||
documents_fields_counts: store::DocumentsFieldsCounts,
|
documents_fields_counts: store::DocumentsFieldsCounts,
|
||||||
synonyms: store::Synonyms,
|
synonyms: store::Synonyms,
|
||||||
|
prefix_cache: store::PrefixCache,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
) -> QueryBuilder<'c, 'f, 'd> {
|
) -> QueryBuilder<'c, 'f, 'd> {
|
||||||
QueryBuilder {
|
QueryBuilder {
|
||||||
@ -51,6 +55,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
postings_lists_store: postings_lists,
|
postings_lists_store: postings_lists,
|
||||||
documents_fields_counts_store: documents_fields_counts,
|
documents_fields_counts_store: documents_fields_counts,
|
||||||
synonyms_store: synonyms,
|
synonyms_store: synonyms,
|
||||||
|
prefix_cache_store: prefix_cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,6 +102,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
self.postings_lists_store,
|
self.postings_lists_store,
|
||||||
self.documents_fields_counts_store,
|
self.documents_fields_counts_store,
|
||||||
self.synonyms_store,
|
self.synonyms_store,
|
||||||
|
self.prefix_cache_store,
|
||||||
),
|
),
|
||||||
None => bucket_sort(
|
None => bucket_sort(
|
||||||
reader,
|
reader,
|
||||||
@ -109,6 +115,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
self.postings_lists_store,
|
self.postings_lists_store,
|
||||||
self.documents_fields_counts_store,
|
self.documents_fields_counts_store,
|
||||||
self.synonyms_store,
|
self.synonyms_store,
|
||||||
|
self.prefix_cache_store,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
mod docs_words;
|
mod docs_words;
|
||||||
|
mod prefix_cache;
|
||||||
mod documents_fields;
|
mod documents_fields;
|
||||||
mod documents_fields_counts;
|
mod documents_fields_counts;
|
||||||
mod main;
|
mod main;
|
||||||
@ -8,6 +9,7 @@ mod updates;
|
|||||||
mod updates_results;
|
mod updates_results;
|
||||||
|
|
||||||
pub use self::docs_words::DocsWords;
|
pub use self::docs_words::DocsWords;
|
||||||
|
pub use self::prefix_cache::PrefixCache;
|
||||||
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
|
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
|
||||||
pub use self::documents_fields_counts::{
|
pub use self::documents_fields_counts::{
|
||||||
DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter,
|
DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter,
|
||||||
@ -74,6 +76,10 @@ fn docs_words_name(name: &str) -> String {
|
|||||||
format!("store-{}-docs-words", name)
|
format!("store-{}-docs-words", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn prefix_cache_name(name: &str) -> String {
|
||||||
|
format!("store-{}-prefix-cache", name)
|
||||||
|
}
|
||||||
|
|
||||||
fn updates_name(name: &str) -> String {
|
fn updates_name(name: &str) -> String {
|
||||||
format!("store-{}-updates", name)
|
format!("store-{}-updates", name)
|
||||||
}
|
}
|
||||||
@ -90,6 +96,7 @@ pub struct Index {
|
|||||||
pub documents_fields_counts: DocumentsFieldsCounts,
|
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||||
pub synonyms: Synonyms,
|
pub synonyms: Synonyms,
|
||||||
pub docs_words: DocsWords,
|
pub docs_words: DocsWords,
|
||||||
|
pub prefix_cache: PrefixCache,
|
||||||
|
|
||||||
pub updates: Updates,
|
pub updates: Updates,
|
||||||
pub updates_results: UpdatesResults,
|
pub updates_results: UpdatesResults,
|
||||||
@ -252,6 +259,7 @@ impl Index {
|
|||||||
self.postings_lists,
|
self.postings_lists,
|
||||||
self.documents_fields_counts,
|
self.documents_fields_counts,
|
||||||
self.synonyms,
|
self.synonyms,
|
||||||
|
self.prefix_cache,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,6 +272,7 @@ impl Index {
|
|||||||
self.postings_lists,
|
self.postings_lists,
|
||||||
self.documents_fields_counts,
|
self.documents_fields_counts,
|
||||||
self.synonyms,
|
self.synonyms,
|
||||||
|
self.prefix_cache,
|
||||||
criteria,
|
criteria,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -282,6 +291,7 @@ pub fn create(
|
|||||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||||
let synonyms_name = synonyms_name(name);
|
let synonyms_name = synonyms_name(name);
|
||||||
let docs_words_name = docs_words_name(name);
|
let docs_words_name = docs_words_name(name);
|
||||||
|
let prefix_cache_name = prefix_cache_name(name);
|
||||||
let updates_name = updates_name(name);
|
let updates_name = updates_name(name);
|
||||||
let updates_results_name = updates_results_name(name);
|
let updates_results_name = updates_results_name(name);
|
||||||
|
|
||||||
@ -292,6 +302,7 @@ pub fn create(
|
|||||||
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
||||||
let synonyms = env.create_database(Some(&synonyms_name))?;
|
let synonyms = env.create_database(Some(&synonyms_name))?;
|
||||||
let docs_words = env.create_database(Some(&docs_words_name))?;
|
let docs_words = env.create_database(Some(&docs_words_name))?;
|
||||||
|
let prefix_cache = env.create_database(Some(&prefix_cache_name))?;
|
||||||
let updates = update_env.create_database(Some(&updates_name))?;
|
let updates = update_env.create_database(Some(&updates_name))?;
|
||||||
let updates_results = update_env.create_database(Some(&updates_results_name))?;
|
let updates_results = update_env.create_database(Some(&updates_results_name))?;
|
||||||
|
|
||||||
@ -304,6 +315,7 @@ pub fn create(
|
|||||||
},
|
},
|
||||||
synonyms: Synonyms { synonyms },
|
synonyms: Synonyms { synonyms },
|
||||||
docs_words: DocsWords { docs_words },
|
docs_words: DocsWords { docs_words },
|
||||||
|
prefix_cache: PrefixCache { prefix_cache },
|
||||||
updates: Updates { updates },
|
updates: Updates { updates },
|
||||||
updates_results: UpdatesResults { updates_results },
|
updates_results: UpdatesResults { updates_results },
|
||||||
updates_notifier,
|
updates_notifier,
|
||||||
@ -323,6 +335,7 @@ pub fn open(
|
|||||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||||
let synonyms_name = synonyms_name(name);
|
let synonyms_name = synonyms_name(name);
|
||||||
let docs_words_name = docs_words_name(name);
|
let docs_words_name = docs_words_name(name);
|
||||||
|
let prefix_cache_name = prefix_cache_name(name);
|
||||||
let updates_name = updates_name(name);
|
let updates_name = updates_name(name);
|
||||||
let updates_results_name = updates_results_name(name);
|
let updates_results_name = updates_results_name(name);
|
||||||
|
|
||||||
@ -351,6 +364,10 @@ pub fn open(
|
|||||||
Some(docs_words) => docs_words,
|
Some(docs_words) => docs_words,
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
};
|
};
|
||||||
|
let prefix_cache = match env.open_database(Some(&prefix_cache_name))? {
|
||||||
|
Some(prefix_cache) => prefix_cache,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
let updates = match update_env.open_database(Some(&updates_name))? {
|
let updates = match update_env.open_database(Some(&updates_name))? {
|
||||||
Some(updates) => updates,
|
Some(updates) => updates,
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
@ -369,6 +386,7 @@ pub fn open(
|
|||||||
},
|
},
|
||||||
synonyms: Synonyms { synonyms },
|
synonyms: Synonyms { synonyms },
|
||||||
docs_words: DocsWords { docs_words },
|
docs_words: DocsWords { docs_words },
|
||||||
|
prefix_cache: PrefixCache { prefix_cache },
|
||||||
updates: Updates { updates },
|
updates: Updates { updates },
|
||||||
updates_results: UpdatesResults { updates_results },
|
updates_results: UpdatesResults { updates_results },
|
||||||
updates_notifier,
|
updates_notifier,
|
||||||
@ -387,6 +405,7 @@ pub fn clear(
|
|||||||
index.documents_fields_counts.clear(writer)?;
|
index.documents_fields_counts.clear(writer)?;
|
||||||
index.synonyms.clear(writer)?;
|
index.synonyms.clear(writer)?;
|
||||||
index.docs_words.clear(writer)?;
|
index.docs_words.clear(writer)?;
|
||||||
|
index.prefix_cache.clear(writer)?;
|
||||||
index.updates.clear(update_writer)?;
|
index.updates.clear(update_writer)?;
|
||||||
index.updates_results.clear(update_writer)?;
|
index.updates_results.clear(update_writer)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
80
meilisearch-core/src/store/prefix_cache.rs
Normal file
80
meilisearch-core/src/store/prefix_cache.rs
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use heed::types::{OwnedType, CowSlice};
|
||||||
|
use heed::Result as ZResult;
|
||||||
|
use zerocopy::{AsBytes, FromBytes};
|
||||||
|
|
||||||
|
use super::BEU64;
|
||||||
|
use crate::{DocumentId, Highlight};
|
||||||
|
use crate::database::MainT;
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct PrefixKey {
|
||||||
|
prefix: [u8; 4],
|
||||||
|
index: BEU64,
|
||||||
|
docid: BEU64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PrefixKey {
|
||||||
|
pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey {
|
||||||
|
PrefixKey {
|
||||||
|
prefix: prefix,
|
||||||
|
index: BEU64::new(index),
|
||||||
|
docid: BEU64::new(docid),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct PrefixCache {
|
||||||
|
pub(crate) prefix_cache: heed::Database<OwnedType<PrefixKey>, CowSlice<Highlight>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PrefixCache {
|
||||||
|
pub fn put_prefix_document(
|
||||||
|
self,
|
||||||
|
writer: &mut heed::RwTxn<MainT>,
|
||||||
|
prefix: [u8; 4],
|
||||||
|
index: usize,
|
||||||
|
docid: DocumentId,
|
||||||
|
highlights: &[Highlight],
|
||||||
|
) -> ZResult<()> {
|
||||||
|
let key = PrefixKey::new(prefix, index as u64, docid.0);
|
||||||
|
self.prefix_cache.put(writer, &key, highlights)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||||
|
self.prefix_cache.clear(writer)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prefix_documents<'txn>(
|
||||||
|
self,
|
||||||
|
reader: &'txn heed::RoTxn<MainT>,
|
||||||
|
prefix: [u8; 4],
|
||||||
|
) -> ZResult<PrefixDocumentsIter<'txn>> {
|
||||||
|
let start = PrefixKey::new(prefix, 0, 0);
|
||||||
|
let end = PrefixKey::new(prefix, u64::max_value(), u64::max_value());
|
||||||
|
let iter = self.prefix_cache.range(reader, &(start..=end))?;
|
||||||
|
Ok(PrefixDocumentsIter { iter })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct PrefixDocumentsIter<'txn> {
|
||||||
|
iter: heed::RoRange<'txn, OwnedType<PrefixKey>, CowSlice<Highlight>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'txn> Iterator for PrefixDocumentsIter<'txn> {
|
||||||
|
type Item = ZResult<(DocumentId, Cow<'txn, [Highlight]>)>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.iter.next() {
|
||||||
|
Some(Ok((key, highlights))) => {
|
||||||
|
let docid = DocumentId(key.docid.get());
|
||||||
|
Some(Ok((docid, highlights)))
|
||||||
|
}
|
||||||
|
Some(Err(e)) => Some(Err(e)),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -109,6 +109,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut documents_additions = HashMap::new();
|
let mut documents_additions = HashMap::new();
|
||||||
@ -175,6 +176,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||||||
main_store,
|
main_store,
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
|
prefix_cache_store,
|
||||||
&ranked_map,
|
&ranked_map,
|
||||||
number_of_inserted_documents,
|
number_of_inserted_documents,
|
||||||
indexer,
|
indexer,
|
||||||
@ -188,6 +190,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut documents_additions = HashMap::new();
|
let mut documents_additions = HashMap::new();
|
||||||
@ -271,6 +274,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||||||
main_store,
|
main_store,
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
|
prefix_cache_store,
|
||||||
&ranked_map,
|
&ranked_map,
|
||||||
number_of_inserted_documents,
|
number_of_inserted_documents,
|
||||||
indexer,
|
indexer,
|
||||||
@ -284,6 +288,7 @@ pub fn reindex_all_documents(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let schema = match main_store.schema(writer)? {
|
let schema = match main_store.schema(writer)? {
|
||||||
Some(schema) => schema,
|
Some(schema) => schema,
|
||||||
@ -345,6 +350,7 @@ pub fn reindex_all_documents(
|
|||||||
main_store,
|
main_store,
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
|
prefix_cache_store,
|
||||||
&ranked_map,
|
&ranked_map,
|
||||||
number_of_inserted_documents,
|
number_of_inserted_documents,
|
||||||
indexer,
|
indexer,
|
||||||
@ -359,6 +365,7 @@ pub fn write_documents_addition_index(
|
|||||||
main_store: store::Main,
|
main_store: store::Main,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
ranked_map: &RankedMap,
|
ranked_map: &RankedMap,
|
||||||
number_of_inserted_documents: usize,
|
number_of_inserted_documents: usize,
|
||||||
indexer: RawIndexer,
|
indexer: RawIndexer,
|
||||||
|
@ -23,12 +23,15 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
use fst::{IntoStreamer, Streamer};
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{store, DocumentId, MResult};
|
use crate::{store, DocumentId, MResult};
|
||||||
use crate::database::{MainT, UpdateT};
|
use crate::database::{MainT, UpdateT};
|
||||||
|
use crate::bucket_sort::bucket_sort;
|
||||||
|
use crate::criterion::Criteria;
|
||||||
use meilisearch_schema::Schema;
|
use meilisearch_schema::Schema;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -278,6 +281,7 @@ pub fn update_task<'a, 'b>(
|
|||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
|
index.prefix_cache,
|
||||||
);
|
);
|
||||||
|
|
||||||
(update_type, result, start.elapsed())
|
(update_type, result, start.elapsed())
|
||||||
@ -304,9 +308,63 @@ pub fn update_task<'a, 'b>(
|
|||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
|
index.prefix_cache,
|
||||||
documents,
|
documents,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let words_fst = index.main.words_fst(writer)?.unwrap();
|
||||||
|
let mut stream = words_fst.into_stream();
|
||||||
|
let mut previous_char = None;
|
||||||
|
while let Some(input) = stream.next() {
|
||||||
|
let (s, c) = match std::str::from_utf8(input) {
|
||||||
|
Ok(s) => {
|
||||||
|
let c = s.chars().next().unwrap();
|
||||||
|
(&s[..c.len_utf8()], c)
|
||||||
|
},
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
match previous_char {
|
||||||
|
Some(pc) if pc != c => {
|
||||||
|
debug!("searching and caching {:?}", s);
|
||||||
|
|
||||||
|
let documents = bucket_sort(
|
||||||
|
writer,
|
||||||
|
s,
|
||||||
|
0..20,
|
||||||
|
None as Option<fn(DocumentId) -> bool>,
|
||||||
|
Criteria::default(),
|
||||||
|
None,
|
||||||
|
index.main,
|
||||||
|
index.postings_lists,
|
||||||
|
index.documents_fields_counts,
|
||||||
|
index.synonyms,
|
||||||
|
index.prefix_cache,
|
||||||
|
).unwrap();
|
||||||
|
|
||||||
|
let mut prefix = [0; 4];
|
||||||
|
let len = cmp::min(4, s.len());
|
||||||
|
prefix[..len].copy_from_slice(&s.as_bytes()[..len]);
|
||||||
|
|
||||||
|
for (i, document) in documents.into_iter().enumerate() {
|
||||||
|
index.prefix_cache.put_prefix_document(
|
||||||
|
writer,
|
||||||
|
prefix,
|
||||||
|
i,
|
||||||
|
document.id,
|
||||||
|
&document.highlights,
|
||||||
|
).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
previous_char = Some(c)
|
||||||
|
},
|
||||||
|
Some(_) => (),
|
||||||
|
None => previous_char = Some(c),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO we forget to do it for the last prefix char
|
||||||
|
|
||||||
(update_type, result, start.elapsed())
|
(update_type, result, start.elapsed())
|
||||||
}
|
}
|
||||||
UpdateData::DocumentsPartial(documents) => {
|
UpdateData::DocumentsPartial(documents) => {
|
||||||
@ -323,6 +381,7 @@ pub fn update_task<'a, 'b>(
|
|||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
|
index.prefix_cache,
|
||||||
documents,
|
documents,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -384,6 +443,7 @@ pub fn update_task<'a, 'b>(
|
|||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
|
index.prefix_cache,
|
||||||
stop_words,
|
stop_words,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ pub fn apply_schema_update(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
use UnsupportedOperation::{
|
use UnsupportedOperation::{
|
||||||
CanOnlyIntroduceNewSchemaAttributesAtEnd, CannotRemoveSchemaAttribute,
|
CanOnlyIntroduceNewSchemaAttributesAtEnd, CannotRemoveSchemaAttribute,
|
||||||
@ -55,6 +56,7 @@ pub fn apply_schema_update(
|
|||||||
documents_fields_counts_store,
|
documents_fields_counts_store,
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
|
prefix_cache_store,
|
||||||
)?
|
)?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,6 +68,7 @@ pub fn apply_stop_words_deletion(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
|
prefix_cache_store: store::PrefixCache,
|
||||||
deletion: BTreeSet<String>,
|
deletion: BTreeSet<String>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut stop_words_builder = SetBuilder::memory();
|
let mut stop_words_builder = SetBuilder::memory();
|
||||||
@ -110,6 +111,7 @@ pub fn apply_stop_words_deletion(
|
|||||||
documents_fields_counts_store,
|
documents_fields_counts_store,
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
|
prefix_cache_store,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,6 +46,8 @@ pub struct DocIndex {
|
|||||||
/// The order of the field is important because it defines
|
/// The order of the field is important because it defines
|
||||||
/// the way these structures are ordered between themselves.
|
/// the way these structures are ordered between themselves.
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))]
|
||||||
|
#[repr(C)]
|
||||||
pub struct Highlight {
|
pub struct Highlight {
|
||||||
/// The attribute in the document where the word was found
|
/// The attribute in the document where the word was found
|
||||||
/// along with the index in it.
|
/// along with the index in it.
|
||||||
|
Loading…
Reference in New Issue
Block a user