rebase from master

2025-07-03 20:07:09 +02:00 · 2020-02-02 22:59:19 +01:00 · 2020-02-02 22:59:19 +01:00 · dc6907e748
commit dc6907e748
parent 2143226f04
29 changed files with 92 additions and 105 deletions
--- a/meilisearch-core/src/bucket_sort.rs
+++ b/meilisearch-core/src/bucket_sort.rs
@ -14,6 +14,7 @@ use meilisearch_types::DocIndex;
 use sdset::{Set, SetBuf, exponential_search};
 use slice_group_by::{GroupBy, GroupByMut};

+use crate::error::Error;
 use crate::criterion::{Criteria, Context, ContextMut};
 use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
 use crate::raw_document::RawDocument;
@ -163,11 +164,11 @@ where

    let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
    let iter = raw_documents.into_iter().skip(range.start).take(range.len());
-    let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena, searchable_attrs.as_ref(), &schema));
+    let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
    let documents = iter.collect();

    debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
-    
+

    Ok(documents)
 }
@ -349,7 +350,7 @@ where
            };

            if distinct_accepted && seen.len() > range.start {
-                documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref()));
+                documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
                if documents.len() == range.len() {
                    break;
                }
--- a/meilisearch-core/src/database.rs
+++ b/meilisearch-core/src/database.rs
@ -743,12 +743,12 @@ mod tests {
        assert!(document.is_none());

        let document: Option<IgnoredAny> = index
-            .document(&reader, None, DocumentId(7900334843754999545))
+            .document(&reader, None, DocumentId(7_900_334_843_754_999_545))
            .unwrap();
        assert!(document.is_some());

        let document: Option<IgnoredAny> = index
-            .document(&reader, None, DocumentId(8367468610878465872))
+            .document(&reader, None, DocumentId(8_367_468_610_878_465_872))
            .unwrap();
        assert!(document.is_some());
    }
@ -820,12 +820,12 @@ mod tests {
        assert!(document.is_none());

        let document: Option<IgnoredAny> = index
-            .document(&reader, None, DocumentId(7900334843754999545))
+            .document(&reader, None, DocumentId(7_900_334_843_754_999_545))
            .unwrap();
        assert!(document.is_some());

        let document: Option<IgnoredAny> = index
-            .document(&reader, None, DocumentId(8367468610878465872))
+            .document(&reader, None, DocumentId(8_367_468_610_878_465_872))
            .unwrap();
        assert!(document.is_some());

@ -862,7 +862,7 @@ mod tests {

        let reader = db.main_read_txn().unwrap();
        let document: Option<serde_json::Value> = index
-            .document(&reader, None, DocumentId(7900334843754999545))
+            .document(&reader, None, DocumentId(7_900_334_843_754_999_545))
            .unwrap();

        let new_doc1 = serde_json::json!({
@ -873,7 +873,7 @@ mod tests {
        assert_eq!(document, Some(new_doc1));

        let document: Option<serde_json::Value> = index
-            .document(&reader, None, DocumentId(8367468610878465872))
+            .document(&reader, None, DocumentId(8_367_468_610_878_465_872))
            .unwrap();

        let new_doc2 = serde_json::json!({
@ -1039,14 +1039,14 @@ mod tests {
        assert_matches!(
            iter.next(),
            Some(Document {
-                id: DocumentId(7900334843754999545),
+                id: DocumentId(7_900_334_843_754_999_545),
                ..
            })
        );
        assert_matches!(
            iter.next(),
            Some(Document {
-                id: DocumentId(8367468610878465872),
+                id: DocumentId(8_367_468_610_878_465_872),
                ..
            })
        );
--- a/meilisearch-core/src/error.rs
+++ b/meilisearch-core/src/error.rs
@ -2,6 +2,10 @@ use crate::serde::{DeserializerError, SerializerError};
 use serde_json::Error as SerdeJsonError;
 use std::{error, fmt, io};

+pub use heed::Error as HeedError;
+pub use fst::Error as FstError;
+pub use bincode::Error as BincodeError;
+
 pub type MResult<T> = Result<T, Error>;

 #[derive(Debug)]
@ -35,14 +39,14 @@ impl From<meilisearch_schema::Error> for Error {
    }
 }

-impl From<heed::Error> for Error {
-    fn from(error: heed::Error) -> Error {
+impl From<HeedError> for Error {
+    fn from(error: HeedError) -> Error {
        Error::Zlmdb(error)
    }
 }

-impl From<fst::Error> for Error {
-    fn from(error: fst::Error) -> Error {
+impl From<FstError> for Error {
+    fn from(error: FstError) -> Error {
        Error::Fst(error)
    }
 }
@ -53,8 +57,8 @@ impl From<SerdeJsonError> for Error {
    }
 }

-impl From<bincode::Error> for Error {
-    fn from(error: bincode::Error) -> Error {
+impl From<BincodeError> for Error {
+    fn from(error: BincodeError) -> Error {
        Error::Bincode(error)
    }
 }
--- a/meilisearch-core/src/lib.rs
+++ b/meilisearch-core/src/lib.rs
@ -23,18 +23,20 @@ pub mod serde;
 pub mod store;

 pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
-pub use self::error::{Error, MResult};
+pub use self::error::{Error, HeedError, FstError, MResult};
 pub use self::number::{Number, ParseNumberError};
 pub use self::ranked_map::RankedMap;
 pub use self::raw_document::RawDocument;
 pub use self::store::Index;
 pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
 pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
+pub use meilisearch_schema::Schema;
 pub use query_words_mapper::QueryWordsMapper;

 use std::convert::TryFrom;
 use std::collections::HashMap;
 use compact_arena::SmallArena;
+use log::{error, trace};

 use crate::bucket_sort::PostingsListView;
 use crate::levenshtein::prefix_damerau_levenshtein;
@ -92,7 +94,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
            };

            let highlight = Highlight {
-                attribute: attribute,
+                attribute,
                char_index: di.char_index,
                char_length: covered_area,
            };
--- a/meilisearch-core/src/query_builder.rs
+++ b/meilisearch-core/src/query_builder.rs
@ -312,7 +312,7 @@ mod tests {
            for ((docid, attr, _), count) in fields_counts {
                let prev = index
                    .documents_fields_counts
-                    .document_field_count(&mut writer, docid, IndexedPos(attr))
+                    .document_field_count(&writer, docid, IndexedPos(attr))
                    .unwrap();

                let prev = prev.unwrap_or(0);
--- a/meilisearch-core/src/query_tree.rs
+++ b/meilisearch-core/src/query_tree.rs
@ -180,7 +180,7 @@ pub fn create_query_tree(
 ) -> MResult<(Operation, HashMap<QueryId, Range<usize>>)>
 {
    let words = split_query_string(query).map(str::to_lowercase);
-    let words: Vec<_> = words.into_iter().enumerate().collect();
+    let words: Vec<_> = words.enumerate().collect();

    let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w));

--- a/meilisearch-core/src/serde/extract_document_id.rs
+++ b/meilisearch-core/src/serde/extract_document_id.rs
@ -22,10 +22,10 @@ fn validate_number(value: &Number) -> Option<String> {
    if value.is_f64() {
        return None
    }
-    return Some(value.to_string())
+    Some(value.to_string())
 }

-fn validate_string(value: &String) -> Option<String> {
+fn validate_string(value: &str) -> Option<String> {
    if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
        Some(value.to_string())
    } else {
--- a/meilisearch-core/src/serde/serializer.rs
+++ b/meilisearch-core/src/serde/serializer.rs
@ -306,7 +306,6 @@ where
    T: ser::Serialize,
 {
    let field_id = schema.get_or_create(&attribute)?;
-
    serialize_value_with_id(
        txn,
        field_id,
--- a/meilisearch-core/src/settings.rs
+++ b/meilisearch-core/src/settings.rs
@ -49,7 +49,7 @@ impl Settings {
        };

        Ok(SettingsUpdate {
-            ranking_rules: ranking_rules,
+            ranking_rules,
            ranking_distinct: settings.ranking_distinct.into(),
            identifier: settings.identifier.into(),
            searchable_attributes: settings.searchable_attributes.into(),
--- a/meilisearch-core/src/store/mod.rs
+++ b/meilisearch-core/src/store/mod.rs
@ -29,7 +29,7 @@ use std::{mem, ptr};

 use heed::Result as ZResult;
 use heed::{BytesEncode, BytesDecode};
-use meilisearch_schema::{Schema, SchemaAttr};
+use meilisearch_schema::{IndexedPos, FieldId};
 use sdset::{Set, SetBuf};
 use serde::de::{self, Deserialize};
 use zerocopy::{AsBytes, FromBytes};
@ -38,6 +38,7 @@ use crate::criterion::Criteria;
 use crate::database::{MainT, UpdateT};
 use crate::database::{UpdateEvent, UpdateEventsEmitter};
 use crate::serde::Deserializer;
+use crate::settings::SettingsUpdate;
 use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};

 type BEU64 = zerocopy::U64<byteorder::BigEndian>;
--- a/meilisearch-core/src/store/prefix_documents_cache.rs
+++ b/meilisearch-core/src/store/prefix_documents_cache.rs
@ -19,7 +19,7 @@ pub struct PrefixKey {
 impl PrefixKey {
    pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey {
        PrefixKey {
-            prefix: prefix,
+            prefix,
            index: BEU64::new(index),
            docid: BEU64::new(docid),
        }
--- a/meilisearch-core/src/update/documents_addition.rs
+++ b/meilisearch-core/src/update/documents_addition.rs
@ -109,7 +109,7 @@ pub fn apply_documents_addition<'a, 'b>(
 ) -> MResult<()> {
    let mut documents_additions = HashMap::new();

-    let schema = match index.main.schema(writer)? {
+    let mut schema = match index.main.schema(writer)? {
        Some(schema) => schema,
        None => return Err(Error::SchemaMissing),
    };
@ -147,7 +147,7 @@ pub fn apply_documents_addition<'a, 'b>(
    for (document_id, document) in documents_additions {
        let serializer = Serializer {
            txn: writer,
-            schema: &schema,
+            schema: &mut schema,
            document_store: index.documents_fields,
            document_fields_counts: index.documents_fields_counts,
            indexer: &mut indexer,
@ -166,7 +166,7 @@ pub fn apply_documents_addition<'a, 'b>(
        indexer,
    )?;

-    compute_short_prefixes(writer, index)?;
+    index.main.put_schema(writer, &schema)?;

    Ok(())
 }
@ -178,7 +178,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
 ) -> MResult<()> {
    let mut documents_additions = HashMap::new();

-
+    let mut schema = match index.main.schema(writer)? {
        Some(schema) => schema,
        None => return Err(Error::SchemaMissing),
    };
@ -233,7 +233,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
    for (document_id, document) in documents_additions {
        let serializer = Serializer {
            txn: writer,
-            schema: &schema,
+            schema: &mut schema,
            document_store: index.documents_fields,
            document_fields_counts: index.documents_fields_counts,
            indexer: &mut indexer,
@ -252,7 +252,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
        indexer,
    )?;

-    compute_short_prefixes(writer, index)?;
+    index.main.put_schema(writer, &schema)?;

    Ok(())
 }
@ -292,7 +292,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind

        for document_id in documents_ids {
            for result in index.documents_fields.document_fields(writer, *document_id)? {
-                let (attr, bytes) = result?;
+                let (field_id, bytes) = result?;
                let value: serde_json::Value = serde_json::from_slice(bytes)?;
                ram_store.insert((document_id, field_id), value);
            }
@ -322,7 +322,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
        )?;
    }

-    compute_short_prefixes(writer, index)?;
+    index.main.put_schema(writer, &schema)?;

    Ok(())
 }
--- a/meilisearch-core/src/update/schema_update.rs
+++ b/meilisearch-core/src/update/schema_update.rs
@ -1 +0,0 @@
-
--- a/meilisearch-core/src/update/settings_update.rs
+++ b/meilisearch-core/src/update/settings_update.rs
@ -130,22 +130,10 @@ pub fn apply_settings_update(
        _ => (),
    }

-    let main_store = index.main;
-    let documents_fields_store = index.documents_fields;
-    let documents_fields_counts_store = index.documents_fields_counts;
-    let postings_lists_store = index.postings_lists;
-    let docs_words_store = index.docs_words;
-
    if must_reindex {
-        reindex_all_documents(
-            writer,
-            main_store,
-            documents_fields_store,
-            documents_fields_counts_store,
-            postings_lists_store,
-            docs_words_store,
-        )?;
+        reindex_all_documents(writer, index)?;
    }
+
    if let UpdateState::Clear = settings.identifier {
        index.main.delete_schema(writer)?;
    }
@ -158,10 +146,7 @@ pub fn apply_stop_words_update(
    stop_words: BTreeSet<String>,
 ) -> MResult<bool> {

-    let main_store = index.main;
-    let mut must_reindex = false;
-
-    let old_stop_words: BTreeSet<String> = main_store
+    let old_stop_words: BTreeSet<String> = index.main
        .stop_words_fst(writer)?
        .unwrap_or_default()
        .stream()
@ -184,10 +169,9 @@ pub fn apply_stop_words_update(
            index,
            deletion
        )?;
-        must_reindex = true;
+        return Ok(true)
    }
-
-    Ok(must_reindex)
+    Ok(false)
 }

 fn apply_stop_words_addition(
@ -256,8 +240,6 @@ fn apply_stop_words_deletion(
    deletion: BTreeSet<String>,
 ) -> MResult<()> {

-    let main_store = index.main;
-
    let mut stop_words_builder = SetBuilder::memory();

    for word in deletion {
@ -271,7 +253,7 @@ fn apply_stop_words_deletion(
        .unwrap();

    // now we delete all of these stop words from the main store
-    let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
+    let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();

    let op = OpBuilder::new()
        .add(&stop_words_fst)
@ -285,7 +267,7 @@ fn apply_stop_words_deletion(
        .and_then(fst::Set::from_bytes)
        .unwrap();

-    Ok(main_store.put_stop_words_fst(writer, &stop_words_fst)?)
+    Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
 }

 pub fn apply_synonyms_update(
--- a/meilisearch-core/src/update/stop_words_deletion.rs
+++ b/meilisearch-core/src/update/stop_words_deletion.rs
@ -1 +0,0 @@
-