mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Introduce a basic cache system for first letters
This commit is contained in:
parent
d21352a109
commit
1e1f0fcaf5
10 changed files with 211 additions and 0 deletions
|
@ -109,6 +109,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
prefix_cache_store: store::PrefixCache,
|
||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
@ -175,6 +176,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
prefix_cache_store,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
|
@ -188,6 +190,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
prefix_cache_store: store::PrefixCache,
|
||||
addition: Vec<HashMap<String, serde_json::Value>>,
|
||||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
@ -271,6 +274,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
prefix_cache_store,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
|
@ -284,6 +288,7 @@ pub fn reindex_all_documents(
|
|||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
prefix_cache_store: store::PrefixCache,
|
||||
) -> MResult<()> {
|
||||
let schema = match main_store.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
|
@ -345,6 +350,7 @@ pub fn reindex_all_documents(
|
|||
main_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
prefix_cache_store,
|
||||
&ranked_map,
|
||||
number_of_inserted_documents,
|
||||
indexer,
|
||||
|
@ -359,6 +365,7 @@ pub fn write_documents_addition_index(
|
|||
main_store: store::Main,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
prefix_cache_store: store::PrefixCache,
|
||||
ranked_map: &RankedMap,
|
||||
number_of_inserted_documents: usize,
|
||||
indexer: RawIndexer,
|
||||
|
|
|
@ -23,12 +23,15 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
|
|||
use std::time::Instant;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use heed::Result as ZResult;
|
||||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{store, DocumentId, MResult};
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::bucket_sort::bucket_sort;
|
||||
use crate::criterion::Criteria;
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
@ -278,6 +281,7 @@ pub fn update_task<'a, 'b>(
|
|||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
index.prefix_cache,
|
||||
);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
|
@ -304,9 +308,63 @@ pub fn update_task<'a, 'b>(
|
|||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
index.prefix_cache,
|
||||
documents,
|
||||
);
|
||||
|
||||
let words_fst = index.main.words_fst(writer)?.unwrap();
|
||||
let mut stream = words_fst.into_stream();
|
||||
let mut previous_char = None;
|
||||
while let Some(input) = stream.next() {
|
||||
let (s, c) = match std::str::from_utf8(input) {
|
||||
Ok(s) => {
|
||||
let c = s.chars().next().unwrap();
|
||||
(&s[..c.len_utf8()], c)
|
||||
},
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
match previous_char {
|
||||
Some(pc) if pc != c => {
|
||||
debug!("searching and caching {:?}", s);
|
||||
|
||||
let documents = bucket_sort(
|
||||
writer,
|
||||
s,
|
||||
0..20,
|
||||
None as Option<fn(DocumentId) -> bool>,
|
||||
Criteria::default(),
|
||||
None,
|
||||
index.main,
|
||||
index.postings_lists,
|
||||
index.documents_fields_counts,
|
||||
index.synonyms,
|
||||
index.prefix_cache,
|
||||
).unwrap();
|
||||
|
||||
let mut prefix = [0; 4];
|
||||
let len = cmp::min(4, s.len());
|
||||
prefix[..len].copy_from_slice(&s.as_bytes()[..len]);
|
||||
|
||||
for (i, document) in documents.into_iter().enumerate() {
|
||||
index.prefix_cache.put_prefix_document(
|
||||
writer,
|
||||
prefix,
|
||||
i,
|
||||
document.id,
|
||||
&document.highlights,
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
previous_char = Some(c)
|
||||
},
|
||||
Some(_) => (),
|
||||
None => previous_char = Some(c),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO we forget to do it for the last prefix char
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsPartial(documents) => {
|
||||
|
@ -323,6 +381,7 @@ pub fn update_task<'a, 'b>(
|
|||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
index.prefix_cache,
|
||||
documents,
|
||||
);
|
||||
|
||||
|
@ -384,6 +443,7 @@ pub fn update_task<'a, 'b>(
|
|||
index.documents_fields_counts,
|
||||
index.postings_lists,
|
||||
index.docs_words,
|
||||
index.prefix_cache,
|
||||
stop_words,
|
||||
);
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ pub fn apply_schema_update(
|
|||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
prefix_cache_store: store::PrefixCache,
|
||||
) -> MResult<()> {
|
||||
use UnsupportedOperation::{
|
||||
CanOnlyIntroduceNewSchemaAttributesAtEnd, CannotRemoveSchemaAttribute,
|
||||
|
@ -55,6 +56,7 @@ pub fn apply_schema_update(
|
|||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
prefix_cache_store,
|
||||
)?
|
||||
}
|
||||
|
||||
|
|
|
@ -68,6 +68,7 @@ pub fn apply_stop_words_deletion(
|
|||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
postings_lists_store: store::PostingsLists,
|
||||
docs_words_store: store::DocsWords,
|
||||
prefix_cache_store: store::PrefixCache,
|
||||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
@ -110,6 +111,7 @@ pub fn apply_stop_words_deletion(
|
|||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
prefix_cache_store,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue