mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
rebase from master
This commit is contained in:
parent
2143226f04
commit
dc6907e748
29 changed files with 92 additions and 105 deletions
|
@ -14,6 +14,7 @@ use meilisearch_types::DocIndex;
|
|||
use sdset::{Set, SetBuf, exponential_search};
|
||||
use slice_group_by::{GroupBy, GroupByMut};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::criterion::{Criteria, Context, ContextMut};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::raw_document::RawDocument;
|
||||
|
@ -163,11 +164,11 @@ where
|
|||
|
||||
let schema = main_store.schema(reader)?.ok_or(Error::SchemaMissing)?;
|
||||
let iter = raw_documents.into_iter().skip(range.start).take(range.len());
|
||||
let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena, searchable_attrs.as_ref(), &schema));
|
||||
let iter = iter.map(|rd| Document::from_raw(rd, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
|
||||
let documents = iter.collect();
|
||||
|
||||
debug!("bucket sort took {:.02?}", before_bucket_sort.elapsed());
|
||||
|
||||
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
@ -349,7 +350,7 @@ where
|
|||
};
|
||||
|
||||
if distinct_accepted && seen.len() > range.start {
|
||||
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref()));
|
||||
documents.push(Document::from_raw(raw_document, &queries_kinds, &arena, searchable_attrs.as_ref(), &schema));
|
||||
if documents.len() == range.len() {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -743,12 +743,12 @@ mod tests {
|
|||
assert!(document.is_none());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(7900334843754999545))
|
||||
.document(&reader, None, DocumentId(7_900_334_843_754_999_545))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(8367468610878465872))
|
||||
.document(&reader, None, DocumentId(8_367_468_610_878_465_872))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
}
|
||||
|
@ -820,12 +820,12 @@ mod tests {
|
|||
assert!(document.is_none());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(7900334843754999545))
|
||||
.document(&reader, None, DocumentId(7_900_334_843_754_999_545))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
|
||||
let document: Option<IgnoredAny> = index
|
||||
.document(&reader, None, DocumentId(8367468610878465872))
|
||||
.document(&reader, None, DocumentId(8_367_468_610_878_465_872))
|
||||
.unwrap();
|
||||
assert!(document.is_some());
|
||||
|
||||
|
@ -862,7 +862,7 @@ mod tests {
|
|||
|
||||
let reader = db.main_read_txn().unwrap();
|
||||
let document: Option<serde_json::Value> = index
|
||||
.document(&reader, None, DocumentId(7900334843754999545))
|
||||
.document(&reader, None, DocumentId(7_900_334_843_754_999_545))
|
||||
.unwrap();
|
||||
|
||||
let new_doc1 = serde_json::json!({
|
||||
|
@ -873,7 +873,7 @@ mod tests {
|
|||
assert_eq!(document, Some(new_doc1));
|
||||
|
||||
let document: Option<serde_json::Value> = index
|
||||
.document(&reader, None, DocumentId(8367468610878465872))
|
||||
.document(&reader, None, DocumentId(8_367_468_610_878_465_872))
|
||||
.unwrap();
|
||||
|
||||
let new_doc2 = serde_json::json!({
|
||||
|
@ -1039,14 +1039,14 @@ mod tests {
|
|||
assert_matches!(
|
||||
iter.next(),
|
||||
Some(Document {
|
||||
id: DocumentId(7900334843754999545),
|
||||
id: DocumentId(7_900_334_843_754_999_545),
|
||||
..
|
||||
})
|
||||
);
|
||||
assert_matches!(
|
||||
iter.next(),
|
||||
Some(Document {
|
||||
id: DocumentId(8367468610878465872),
|
||||
id: DocumentId(8_367_468_610_878_465_872),
|
||||
..
|
||||
})
|
||||
);
|
||||
|
|
|
@ -2,6 +2,10 @@ use crate::serde::{DeserializerError, SerializerError};
|
|||
use serde_json::Error as SerdeJsonError;
|
||||
use std::{error, fmt, io};
|
||||
|
||||
pub use heed::Error as HeedError;
|
||||
pub use fst::Error as FstError;
|
||||
pub use bincode::Error as BincodeError;
|
||||
|
||||
pub type MResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -35,14 +39,14 @@ impl From<meilisearch_schema::Error> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<heed::Error> for Error {
|
||||
fn from(error: heed::Error) -> Error {
|
||||
impl From<HeedError> for Error {
|
||||
fn from(error: HeedError) -> Error {
|
||||
Error::Zlmdb(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<fst::Error> for Error {
|
||||
fn from(error: fst::Error) -> Error {
|
||||
impl From<FstError> for Error {
|
||||
fn from(error: FstError) -> Error {
|
||||
Error::Fst(error)
|
||||
}
|
||||
}
|
||||
|
@ -53,8 +57,8 @@ impl From<SerdeJsonError> for Error {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<bincode::Error> for Error {
|
||||
fn from(error: bincode::Error) -> Error {
|
||||
impl From<BincodeError> for Error {
|
||||
fn from(error: BincodeError) -> Error {
|
||||
Error::Bincode(error)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,18 +23,20 @@ pub mod serde;
|
|||
pub mod store;
|
||||
|
||||
pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
|
||||
pub use self::error::{Error, MResult};
|
||||
pub use self::error::{Error, HeedError, FstError, MResult};
|
||||
pub use self::number::{Number, ParseNumberError};
|
||||
pub use self::ranked_map::RankedMap;
|
||||
pub use self::raw_document::RawDocument;
|
||||
pub use self::store::Index;
|
||||
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
|
||||
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
||||
pub use meilisearch_schema::Schema;
|
||||
pub use query_words_mapper::QueryWordsMapper;
|
||||
|
||||
use std::convert::TryFrom;
|
||||
use std::collections::HashMap;
|
||||
use compact_arena::SmallArena;
|
||||
use log::{error, trace};
|
||||
|
||||
use crate::bucket_sort::PostingsListView;
|
||||
use crate::levenshtein::prefix_damerau_levenshtein;
|
||||
|
@ -92,7 +94,7 @@ fn highlights_from_raw_document<'a, 'tag, 'txn>(
|
|||
};
|
||||
|
||||
let highlight = Highlight {
|
||||
attribute: attribute,
|
||||
attribute,
|
||||
char_index: di.char_index,
|
||||
char_length: covered_area,
|
||||
};
|
||||
|
|
|
@ -312,7 +312,7 @@ mod tests {
|
|||
for ((docid, attr, _), count) in fields_counts {
|
||||
let prev = index
|
||||
.documents_fields_counts
|
||||
.document_field_count(&mut writer, docid, IndexedPos(attr))
|
||||
.document_field_count(&writer, docid, IndexedPos(attr))
|
||||
.unwrap();
|
||||
|
||||
let prev = prev.unwrap_or(0);
|
||||
|
|
|
@ -180,7 +180,7 @@ pub fn create_query_tree(
|
|||
) -> MResult<(Operation, HashMap<QueryId, Range<usize>>)>
|
||||
{
|
||||
let words = split_query_string(query).map(str::to_lowercase);
|
||||
let words: Vec<_> = words.into_iter().enumerate().collect();
|
||||
let words: Vec<_> = words.enumerate().collect();
|
||||
|
||||
let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w));
|
||||
|
||||
|
|
|
@ -22,10 +22,10 @@ fn validate_number(value: &Number) -> Option<String> {
|
|||
if value.is_f64() {
|
||||
return None
|
||||
}
|
||||
return Some(value.to_string())
|
||||
Some(value.to_string())
|
||||
}
|
||||
|
||||
fn validate_string(value: &String) -> Option<String> {
|
||||
fn validate_string(value: &str) -> Option<String> {
|
||||
if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
|
||||
Some(value.to_string())
|
||||
} else {
|
||||
|
|
|
@ -306,7 +306,6 @@ where
|
|||
T: ser::Serialize,
|
||||
{
|
||||
let field_id = schema.get_or_create(&attribute)?;
|
||||
|
||||
serialize_value_with_id(
|
||||
txn,
|
||||
field_id,
|
||||
|
|
|
@ -49,7 +49,7 @@ impl Settings {
|
|||
};
|
||||
|
||||
Ok(SettingsUpdate {
|
||||
ranking_rules: ranking_rules,
|
||||
ranking_rules,
|
||||
ranking_distinct: settings.ranking_distinct.into(),
|
||||
identifier: settings.identifier.into(),
|
||||
searchable_attributes: settings.searchable_attributes.into(),
|
||||
|
|
|
@ -29,7 +29,7 @@ use std::{mem, ptr};
|
|||
|
||||
use heed::Result as ZResult;
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use meilisearch_schema::{Schema, SchemaAttr};
|
||||
use meilisearch_schema::{IndexedPos, FieldId};
|
||||
use sdset::{Set, SetBuf};
|
||||
use serde::de::{self, Deserialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
@ -38,6 +38,7 @@ use crate::criterion::Criteria;
|
|||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::serde::Deserializer;
|
||||
use crate::settings::SettingsUpdate;
|
||||
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
|
||||
|
||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||
|
|
|
@ -19,7 +19,7 @@ pub struct PrefixKey {
|
|||
impl PrefixKey {
|
||||
pub fn new(prefix: [u8; 4], index: u64, docid: u64) -> PrefixKey {
|
||||
PrefixKey {
|
||||
prefix: prefix,
|
||||
prefix,
|
||||
index: BEU64::new(index),
|
||||
docid: BEU64::new(docid),
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
||||
let schema = match index.main.schema(writer)? {
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
@ -147,7 +147,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||
for (document_id, document) in documents_additions {
|
||||
let serializer = Serializer {
|
||||
txn: writer,
|
||||
schema: &schema,
|
||||
schema: &mut schema,
|
||||
document_store: index.documents_fields,
|
||||
document_fields_counts: index.documents_fields_counts,
|
||||
indexer: &mut indexer,
|
||||
|
@ -166,7 +166,7 @@ pub fn apply_documents_addition<'a, 'b>(
|
|||
indexer,
|
||||
)?;
|
||||
|
||||
compute_short_prefixes(writer, index)?;
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
) -> MResult<()> {
|
||||
let mut documents_additions = HashMap::new();
|
||||
|
||||
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
@ -233,7 +233,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
for (document_id, document) in documents_additions {
|
||||
let serializer = Serializer {
|
||||
txn: writer,
|
||||
schema: &schema,
|
||||
schema: &mut schema,
|
||||
document_store: index.documents_fields,
|
||||
document_fields_counts: index.documents_fields_counts,
|
||||
indexer: &mut indexer,
|
||||
|
@ -252,7 +252,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
|||
indexer,
|
||||
)?;
|
||||
|
||||
compute_short_prefixes(writer, index)?;
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -292,7 +292,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
|||
|
||||
for document_id in documents_ids {
|
||||
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
||||
let (attr, bytes) = result?;
|
||||
let (field_id, bytes) = result?;
|
||||
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
||||
ram_store.insert((document_id, field_id), value);
|
||||
}
|
||||
|
@ -322,7 +322,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
|||
)?;
|
||||
}
|
||||
|
||||
compute_short_prefixes(writer, index)?;
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -130,22 +130,10 @@ pub fn apply_settings_update(
|
|||
_ => (),
|
||||
}
|
||||
|
||||
let main_store = index.main;
|
||||
let documents_fields_store = index.documents_fields;
|
||||
let documents_fields_counts_store = index.documents_fields_counts;
|
||||
let postings_lists_store = index.postings_lists;
|
||||
let docs_words_store = index.docs_words;
|
||||
|
||||
if must_reindex {
|
||||
reindex_all_documents(
|
||||
writer,
|
||||
main_store,
|
||||
documents_fields_store,
|
||||
documents_fields_counts_store,
|
||||
postings_lists_store,
|
||||
docs_words_store,
|
||||
)?;
|
||||
reindex_all_documents(writer, index)?;
|
||||
}
|
||||
|
||||
if let UpdateState::Clear = settings.identifier {
|
||||
index.main.delete_schema(writer)?;
|
||||
}
|
||||
|
@ -158,10 +146,7 @@ pub fn apply_stop_words_update(
|
|||
stop_words: BTreeSet<String>,
|
||||
) -> MResult<bool> {
|
||||
|
||||
let main_store = index.main;
|
||||
let mut must_reindex = false;
|
||||
|
||||
let old_stop_words: BTreeSet<String> = main_store
|
||||
let old_stop_words: BTreeSet<String> = index.main
|
||||
.stop_words_fst(writer)?
|
||||
.unwrap_or_default()
|
||||
.stream()
|
||||
|
@ -184,10 +169,9 @@ pub fn apply_stop_words_update(
|
|||
index,
|
||||
deletion
|
||||
)?;
|
||||
must_reindex = true;
|
||||
return Ok(true)
|
||||
}
|
||||
|
||||
Ok(must_reindex)
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn apply_stop_words_addition(
|
||||
|
@ -256,8 +240,6 @@ fn apply_stop_words_deletion(
|
|||
deletion: BTreeSet<String>,
|
||||
) -> MResult<()> {
|
||||
|
||||
let main_store = index.main;
|
||||
|
||||
let mut stop_words_builder = SetBuilder::memory();
|
||||
|
||||
for word in deletion {
|
||||
|
@ -271,7 +253,7 @@ fn apply_stop_words_deletion(
|
|||
.unwrap();
|
||||
|
||||
// now we delete all of these stop words from the main store
|
||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||
let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default();
|
||||
|
||||
let op = OpBuilder::new()
|
||||
.add(&stop_words_fst)
|
||||
|
@ -285,7 +267,7 @@ fn apply_stop_words_deletion(
|
|||
.and_then(fst::Set::from_bytes)
|
||||
.unwrap();
|
||||
|
||||
Ok(main_store.put_stop_words_fst(writer, &stop_words_fst)?)
|
||||
Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?)
|
||||
}
|
||||
|
||||
pub fn apply_synonyms_update(
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
|
Loading…
Add table
Add a link
Reference in a new issue