mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 05:00:06 +01:00
introduce a new schemaless way
This commit is contained in:
parent
bbe1845f66
commit
130fb74928
@ -7,6 +7,5 @@
|
||||
"overview",
|
||||
"release_date",
|
||||
"poster"
|
||||
],
|
||||
"attributes_ranked": ["release_date"]
|
||||
]
|
||||
}
|
||||
|
@ -13,7 +13,8 @@ use structopt::StructOpt;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
|
||||
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
|
||||
use meilisearch_schema::SchemaAttr;
|
||||
use meilisearch_core::settings::Settings;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
// #[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
@ -121,7 +122,8 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
|
||||
|
||||
let settings = {
|
||||
let string = fs::read_to_string(&command.settings)?;
|
||||
serde_json::from_str(&string).unwrap()
|
||||
let settings: Settings = serde_json::from_str(&string).unwrap();
|
||||
settings.into()
|
||||
};
|
||||
|
||||
let mut update_writer = db.update_write_txn().unwrap();
|
||||
@ -357,7 +359,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
||||
};
|
||||
|
||||
let attr = schema
|
||||
.attribute(&filter)
|
||||
.get_id(filter)
|
||||
.expect("Could not find filtered attribute");
|
||||
|
||||
builder.with_filter(move |document_id| {
|
||||
@ -388,11 +390,11 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
||||
for (name, text) in document.0 {
|
||||
print!("{}: ", name);
|
||||
|
||||
let attr = schema.attribute(&name).unwrap();
|
||||
let attr = schema.get_id(&name).unwrap();
|
||||
let highlights = doc
|
||||
.highlights
|
||||
.iter()
|
||||
.filter(|m| SchemaAttr::new(m.attribute) == attr)
|
||||
.filter(|m| FieldId::new(m.attribute) == attr)
|
||||
.cloned();
|
||||
let (text, highlights) =
|
||||
crop_text(&text, highlights, command.char_context);
|
||||
@ -407,8 +409,8 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
||||
|
||||
let mut matching_attributes = HashSet::new();
|
||||
for highlight in doc.highlights {
|
||||
let attr = SchemaAttr::new(highlight.attribute);
|
||||
let name = schema.attribute_name(attr);
|
||||
let attr = FieldId::new(highlight.attribute);
|
||||
let name = schema.get_name(attr);
|
||||
matching_attributes.insert(name);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::collections::hash_map::{HashMap, Entry};
|
||||
use meilisearch_schema::SchemaAttr;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use slice_group_by::GroupBy;
|
||||
use crate::{RawDocument, MResult};
|
||||
use crate::bucket_sort::BareMatch;
|
||||
@ -32,7 +32,7 @@ impl Criterion for Exact {
|
||||
for bm in group {
|
||||
for di in ctx.postings_lists[bm.postings_list].as_ref() {
|
||||
|
||||
let attr = SchemaAttr(di.attribute);
|
||||
let attr = IndexedPos(di.attribute);
|
||||
let count = match fields_counts.entry(attr) {
|
||||
Entry::Occupied(entry) => *entry.get(),
|
||||
Entry::Vacant(entry) => {
|
||||
|
@ -69,7 +69,7 @@ impl<'a> SortByAttr<'a> {
|
||||
reversed: bool,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError> {
|
||||
let field_id = match schema.get_id(attr_name) {
|
||||
Some(field_id) => *field_id,
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(SortByAttrError::AttributeNotFound),
|
||||
};
|
||||
|
||||
|
@ -8,11 +8,12 @@ pub type MResult<T> = Result<T, Error>;
|
||||
pub enum Error {
|
||||
Io(io::Error),
|
||||
IndexAlreadyExists,
|
||||
SchemaDiffer,
|
||||
MissingSchemaIdentifier,
|
||||
SchemaMissing,
|
||||
WordIndexMissing,
|
||||
MissingDocumentId,
|
||||
MaxFieldsLimitExceeded,
|
||||
Schema(meilisearch_schema::Error),
|
||||
Zlmdb(heed::Error),
|
||||
Fst(fst::Error),
|
||||
SerdeJson(SerdeJsonError),
|
||||
@ -28,6 +29,12 @@ impl From<io::Error> for Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<meilisearch_schema::Error> for Error {
|
||||
fn from(error: meilisearch_schema::Error) -> Error {
|
||||
Error::Schema(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<heed::Error> for Error {
|
||||
fn from(error: heed::Error) -> Error {
|
||||
Error::Zlmdb(error)
|
||||
@ -76,10 +83,12 @@ impl fmt::Display for Error {
|
||||
match self {
|
||||
Io(e) => write!(f, "{}", e),
|
||||
IndexAlreadyExists => write!(f, "index already exists"),
|
||||
SchemaDiffer => write!(f, "schemas differ"),
|
||||
MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"),
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
MaxFieldsLimitExceeded => write!(f, "maximum field in a document is exceeded"),
|
||||
Schema(e) => write!(f, "schemas error; {}", e),
|
||||
Zlmdb(e) => write!(f, "heed error; {}", e),
|
||||
Fst(e) => write!(f, "fst error; {}", e),
|
||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||
|
@ -136,7 +136,7 @@ mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use fst::{IntoStreamer, Set};
|
||||
use meilisearch_schema::SchemaAttr;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use sdset::SetBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@ -295,14 +295,14 @@ mod tests {
|
||||
for ((docid, attr, _), count) in fields_counts {
|
||||
let prev = index
|
||||
.documents_fields_counts
|
||||
.document_field_count(&mut writer, docid, SchemaAttr(attr))
|
||||
.document_field_count(&mut writer, docid, IndexedPos(attr))
|
||||
.unwrap();
|
||||
|
||||
let prev = prev.unwrap_or(0);
|
||||
|
||||
index
|
||||
.documents_fields_counts
|
||||
.put_document_field_count(&mut writer, docid, SchemaAttr(attr), prev + count)
|
||||
.put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
|
@ -180,16 +180,16 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use meilisearch_schema::SchemaAttr;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, attr, text);
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
@ -209,9 +209,9 @@ mod tests {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = vec!["Zut, l’aspirateur, j’ai oublié de l’éteindre !"];
|
||||
indexer.index_text_seq(docid, attr, text);
|
||||
indexer.index_text_seq(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
@ -234,9 +234,9 @@ mod tests {
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
||||
indexer.index_text(docid, attr, text);
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
@ -258,9 +258,9 @@ mod tests {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let attr = SchemaAttr(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let text = "🇯🇵";
|
||||
indexer.index_text(docid, attr, text);
|
||||
indexer.index_text(docid, indexed_pos, text);
|
||||
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
|
@ -99,7 +99,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||
let ioread = SerdeJsonIoRead::new(cursor);
|
||||
let value = Value(SerdeJsonDeserializer::new(ioread));
|
||||
|
||||
Some((*attribute_name, value))
|
||||
Some((attribute_name, value))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ pub use self::convert_to_string::ConvertToString;
|
||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
||||
pub use self::indexer::Indexer;
|
||||
pub use self::serializer::{serialize_value, Serializer};
|
||||
pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer};
|
||||
|
||||
use std::{error::Error, fmt};
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
use meilisearch_schema::{Schema, FieldsMap};
|
||||
use meilisearch_schema::{Schema, FieldId};
|
||||
use serde::ser;
|
||||
|
||||
use crate::database::MainT;
|
||||
@ -10,12 +10,11 @@ use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
|
||||
|
||||
pub struct Serializer<'a, 'b> {
|
||||
pub txn: &'a mut heed::RwTxn<'b, MainT>,
|
||||
pub schema: &'a Schema,
|
||||
pub schema: &'a mut Schema,
|
||||
pub document_store: DocumentsFields,
|
||||
pub document_fields_counts: DocumentsFieldsCounts,
|
||||
pub indexer: &'a mut RawIndexer,
|
||||
pub ranked_map: &'a mut RankedMap,
|
||||
pub fields_map: &'a mut FieldsMap,
|
||||
pub document_id: DocumentId,
|
||||
}
|
||||
|
||||
@ -159,7 +158,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
||||
document_fields_counts: self.document_fields_counts,
|
||||
indexer: self.indexer,
|
||||
ranked_map: self.ranked_map,
|
||||
fields_map: self.fields_map,
|
||||
current_key_name: None,
|
||||
})
|
||||
}
|
||||
@ -177,7 +175,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
||||
document_fields_counts: self.document_fields_counts,
|
||||
indexer: self.indexer,
|
||||
ranked_map: self.ranked_map,
|
||||
fields_map: self.fields_map,
|
||||
})
|
||||
}
|
||||
|
||||
@ -196,13 +193,12 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
||||
|
||||
pub struct MapSerializer<'a, 'b> {
|
||||
txn: &'a mut heed::RwTxn<'b, MainT>,
|
||||
schema: &'a Schema,
|
||||
schema: &'a mut Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: DocumentsFields,
|
||||
document_fields_counts: DocumentsFieldsCounts,
|
||||
indexer: &'a mut RawIndexer,
|
||||
ranked_map: &'a mut RankedMap,
|
||||
fields_map: &'a mut FieldsMap,
|
||||
current_key_name: Option<String>,
|
||||
}
|
||||
|
||||
@ -237,21 +233,17 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
|
||||
V: ser::Serialize,
|
||||
{
|
||||
let key = key.serialize(ConvertToString)?;
|
||||
match self.schema.attribute(&key) {
|
||||
Some(attribute) => serialize_value(
|
||||
self.txn,
|
||||
attribute,
|
||||
self.schema.props(attribute),
|
||||
self.document_id,
|
||||
self.document_store,
|
||||
self.document_fields_counts,
|
||||
self.indexer,
|
||||
self.ranked_map,
|
||||
self.fields_map,
|
||||
value,
|
||||
),
|
||||
None => Ok(()),
|
||||
}
|
||||
serialize_value(
|
||||
self.txn,
|
||||
key,
|
||||
self.schema,
|
||||
self.document_id,
|
||||
self.document_store,
|
||||
self.document_fields_counts,
|
||||
self.indexer,
|
||||
self.ranked_map,
|
||||
value,
|
||||
)
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
@ -261,13 +253,12 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
|
||||
|
||||
pub struct StructSerializer<'a, 'b> {
|
||||
txn: &'a mut heed::RwTxn<'b, MainT>,
|
||||
schema: &'a Schema,
|
||||
schema: &'a mut Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: DocumentsFields,
|
||||
document_fields_counts: DocumentsFieldsCounts,
|
||||
indexer: &'a mut RawIndexer,
|
||||
ranked_map: &'a mut RankedMap,
|
||||
fields_map: &'a mut FieldsMap,
|
||||
}
|
||||
|
||||
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
||||
@ -282,19 +273,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
// let id = fields_map.insert(key)?;
|
||||
|
||||
// let attribute = match self.schema.attribute(id) {
|
||||
// Some(attribute) => attribute,
|
||||
// None => {
|
||||
|
||||
// },
|
||||
// }
|
||||
|
||||
serialize_value(
|
||||
self.txn,
|
||||
attribute,
|
||||
self.schema.props(attribute),
|
||||
key.to_string(),
|
||||
self.schema,
|
||||
self.document_id,
|
||||
self.document_store,
|
||||
self.document_fields_counts,
|
||||
@ -311,7 +293,36 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
||||
|
||||
pub fn serialize_value<'a, T: ?Sized>(
|
||||
txn: &mut heed::RwTxn<MainT>,
|
||||
attribute: &'static str,
|
||||
attribute: String,
|
||||
schema: &'a mut Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: DocumentsFields,
|
||||
documents_fields_counts: DocumentsFieldsCounts,
|
||||
indexer: &mut RawIndexer,
|
||||
ranked_map: &mut RankedMap,
|
||||
value: &T,
|
||||
) -> Result<(), SerializerError>
|
||||
where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let field_id = schema.get_or_create(attribute)?;
|
||||
|
||||
serialize_value_with_id(
|
||||
txn,
|
||||
field_id,
|
||||
schema,
|
||||
document_id,
|
||||
document_store,
|
||||
documents_fields_counts,
|
||||
indexer,
|
||||
ranked_map,
|
||||
value
|
||||
)
|
||||
}
|
||||
|
||||
pub fn serialize_value_with_id<'a, T: ?Sized>(
|
||||
txn: &mut heed::RwTxn<MainT>,
|
||||
field_id: FieldId,
|
||||
schema: &'a Schema,
|
||||
document_id: DocumentId,
|
||||
document_store: DocumentsFields,
|
||||
@ -324,12 +335,11 @@ where
|
||||
T: ser::Serialize,
|
||||
{
|
||||
let serialized = serde_json::to_vec(value)?;
|
||||
let field_id = schema.get_or_create(attribute)?;
|
||||
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
|
||||
let indexer = Indexer {
|
||||
field_id,
|
||||
pos: *indexed_pos,
|
||||
indexer,
|
||||
document_id,
|
||||
};
|
||||
@ -337,13 +347,13 @@ where
|
||||
documents_fields_counts.put_document_field_count(
|
||||
txn,
|
||||
document_id,
|
||||
field_id,
|
||||
*indexed_pos,
|
||||
number_of_words as u16,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(field_id) = schema.id_is_ranked(field_id) {
|
||||
if schema.id_is_ranked(field_id) {
|
||||
let number = value.serialize(ConvertToNumber)?;
|
||||
ranked_map.insert(document_id, field_id, number);
|
||||
}
|
||||
|
@ -1,14 +1,14 @@
|
||||
use heed::types::{ByteSlice, OwnedType};
|
||||
use crate::database::MainT;
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::SchemaAttr;
|
||||
use meilisearch_schema::FieldId;
|
||||
|
||||
use super::DocumentAttrKey;
|
||||
use super::DocumentFieldStoredKey;
|
||||
use crate::DocumentId;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFields {
|
||||
pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
|
||||
pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl DocumentsFields {
|
||||
@ -16,10 +16,10 @@ impl DocumentsFields {
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
attribute: FieldId,
|
||||
value: &[u8],
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
let key = DocumentFieldStoredKey::new(document_id, attribute);
|
||||
self.documents_fields.put(writer, &key, value)
|
||||
}
|
||||
|
||||
@ -28,8 +28,8 @@ impl DocumentsFields {
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||
self.documents_fields.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
@ -41,9 +41,9 @@ impl DocumentsFields {
|
||||
self,
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
attribute: FieldId,
|
||||
) -> ZResult<Option<&'txn [u8]>> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
let key = DocumentFieldStoredKey::new(document_id, attribute);
|
||||
self.documents_fields.get(reader, &key)
|
||||
}
|
||||
|
||||
@ -52,25 +52,25 @@ impl DocumentsFields {
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsIter<'txn>> {
|
||||
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
|
||||
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
|
||||
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
|
||||
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
|
||||
let iter = self.documents_fields.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsIter { iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
|
||||
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
|
||||
type Item = ZResult<(FieldId, &'txn [u8])>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, bytes))) => {
|
||||
let attr = SchemaAttr(key.attr.get());
|
||||
Some(Ok((attr, bytes)))
|
||||
let field_id = FieldId(key.field_id.get());
|
||||
Some(Ok((field_id, bytes)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
|
@ -1,13 +1,13 @@
|
||||
use super::DocumentAttrKey;
|
||||
use super::DocumentFieldIndexedKey;
|
||||
use crate::database::MainT;
|
||||
use crate::DocumentId;
|
||||
use heed::types::OwnedType;
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::FieldId;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFieldsCounts {
|
||||
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
||||
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl DocumentsFieldsCounts {
|
||||
@ -15,10 +15,10 @@ impl DocumentsFieldsCounts {
|
||||
self,
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
attribute: IndexedPos,
|
||||
value: u16,
|
||||
) -> ZResult<()> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||
self.documents_fields_counts.put(writer, &key, &value)
|
||||
}
|
||||
|
||||
@ -27,10 +27,9 @@ impl DocumentsFieldsCounts {
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<usize> {
|
||||
let start = DocumentAttrKey::new(document_id, FieldId::min());
|
||||
let end = DocumentAttrKey::new(document_id, FieldId::max());
|
||||
self.documents_fields_counts
|
||||
.delete_range(writer, &(start..=end))
|
||||
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||
self.documents_fields_counts.delete_range(writer, &(start..=end))
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
@ -41,9 +40,9 @@ impl DocumentsFieldsCounts {
|
||||
self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: FieldId,
|
||||
attribute: IndexedPos,
|
||||
) -> ZResult<Option<u16>> {
|
||||
let key = DocumentAttrKey::new(document_id, attribute);
|
||||
let key = DocumentFieldIndexedKey::new(document_id, attribute);
|
||||
match self.documents_fields_counts.get(reader, &key)? {
|
||||
Some(count) => Ok(Some(count)),
|
||||
None => Ok(None),
|
||||
@ -55,8 +54,8 @@ impl DocumentsFieldsCounts {
|
||||
reader: &'txn heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
|
||||
let start = DocumentAttrKey::new(document_id, FieldId::min());
|
||||
let end = DocumentAttrKey::new(document_id, FieldId::max());
|
||||
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
|
||||
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
|
||||
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
|
||||
Ok(DocumentFieldsCountsIter { iter })
|
||||
}
|
||||
@ -79,17 +78,17 @@ impl DocumentsFieldsCounts {
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsCountsIter<'txn> {
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
||||
iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(FieldId, u16)>;
|
||||
type Item = ZResult<(IndexedPos, u16)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let attr = FieldId(key.attr.get());
|
||||
Some(Ok((attr, count)))
|
||||
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||
Some(Ok((indexed_pos, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {
|
||||
|
||||
pub struct DocumentsIdsIter<'txn> {
|
||||
last_seen_id: Option<DocumentId>,
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for DocumentsIdsIter<'_> {
|
||||
@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> {
|
||||
}
|
||||
|
||||
pub struct AllDocumentsFieldsCountsIter<'txn> {
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
|
||||
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
|
||||
}
|
||||
|
||||
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
|
||||
type Item = ZResult<(DocumentId, FieldId, u16)>;
|
||||
type Item = ZResult<(DocumentId, IndexedPos, u16)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, count))) => {
|
||||
let docid = DocumentId(key.docid.get());
|
||||
let attr = FieldId(key.attr.get());
|
||||
Some(Ok((docid, attr, count)))
|
||||
let indexed_pos = IndexedPos(key.indexed_pos.get());
|
||||
Some(Ok((docid, indexed_pos, count)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
|
@ -1,12 +1,13 @@
|
||||
use crate::fields_map::FieldsMap;
|
||||
use crate::database::MainT;
|
||||
use crate::RankedMap;
|
||||
use std::sync::Arc;
|
||||
use std::collections::{HashMap, BTreeMap, BTreeSet};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::Schema;
|
||||
use std::collections::{HashMap, BTreeMap, BTreeSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::RankedMap;
|
||||
|
||||
const CREATED_AT_KEY: &str = "created-at";
|
||||
const RANKING_RULES_KEY: &str = "ranking-rules-key";
|
||||
@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
|
||||
const NAME_KEY: &str = "name";
|
||||
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
|
||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||
const FIELDS_MAP_KEY: &str = "fields-map";
|
||||
const SCHEMA_KEY: &str = "schema";
|
||||
const UPDATED_AT_KEY: &str = "updated-at";
|
||||
const WORDS_KEY: &str = "words";
|
||||
@ -114,16 +114,6 @@ impl Main {
|
||||
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
|
||||
}
|
||||
|
||||
pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> {
|
||||
self.main
|
||||
.put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map)
|
||||
}
|
||||
|
||||
pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> {
|
||||
self.main
|
||||
.get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY)
|
||||
}
|
||||
|
||||
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
|
||||
let bytes = fst.as_fst().as_bytes();
|
||||
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)
|
||||
|
@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR
|
||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||
|
||||
// #[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
// #[repr(C)]
|
||||
// pub struct DocumentAttrKey {
|
||||
// docid: BEU64,
|
||||
// indexed_pos: BEU16,
|
||||
// }
|
||||
|
||||
// impl DocumentAttrKey {
|
||||
// fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey {
|
||||
// DocumentAttrKey {
|
||||
// docid: BEU64::new(docid.0),
|
||||
// indexed_pos: BEU16::new(indexed_pos.0),
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentAttrKey {
|
||||
pub struct DocumentFieldIndexedKey {
|
||||
docid: BEU64,
|
||||
attr: BEU16,
|
||||
indexed_pos: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentAttrKey {
|
||||
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
|
||||
DocumentAttrKey {
|
||||
impl DocumentFieldIndexedKey {
|
||||
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
|
||||
DocumentFieldIndexedKey {
|
||||
docid: BEU64::new(docid.0),
|
||||
attr: BEU16::new(attr.0),
|
||||
indexed_pos: BEU16::new(indexed_pos.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
pub struct DocumentFieldStoredKey {
|
||||
docid: BEU64,
|
||||
field_id: BEU16,
|
||||
}
|
||||
|
||||
impl DocumentFieldStoredKey {
|
||||
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
|
||||
DocumentFieldStoredKey {
|
||||
docid: BEU64::new(docid.0),
|
||||
field_id: BEU16::new(field_id.0),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -228,7 +260,7 @@ impl Index {
|
||||
&self,
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
attribute: FieldId,
|
||||
) -> MResult<Option<T>> {
|
||||
let bytes = self
|
||||
.documents_fields
|
||||
|
@ -1,14 +1,13 @@
|
||||
use std::collections::{HashMap, BTreeSet};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::{duo::Union, SetOperation};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use meilisearch_schema::{Schema, DISPLAYED, INDEXED};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
||||
use crate::raw_indexer::RawIndexer;
|
||||
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer};
|
||||
use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
|
||||
use crate::store;
|
||||
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
|
||||
use crate::{Error, MResult, RankedMap};
|
||||
@ -115,16 +114,11 @@ pub fn apply_documents_addition<'a, 'b>(
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
if let Some(new_schema) = lazy_new_schema(&schema, &addition) {
|
||||
main_store.put_schema(writer, &new_schema)?;
|
||||
schema = new_schema;
|
||||
}
|
||||
|
||||
let identifier = schema.identifier_name();
|
||||
let identifier = schema.identifier();
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
for document in addition {
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
let document_id = match extract_document_id(&identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
@ -147,8 +141,6 @@ pub fn apply_documents_addition<'a, 'b>(
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
@ -160,7 +152,6 @@ pub fn apply_documents_addition<'a, 'b>(
|
||||
document_fields_counts: index.documents_fields_counts,
|
||||
indexer: &mut indexer,
|
||||
ranked_map: &mut ranked_map,
|
||||
fields_map: &mut fields_map,
|
||||
document_id,
|
||||
};
|
||||
|
||||
@ -192,16 +183,11 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
None => return Err(Error::SchemaMissing),
|
||||
};
|
||||
|
||||
if let Some(new_schema) = lazy_new_schema(&schema, &addition) {
|
||||
main_store.put_schema(writer, &new_schema)?;
|
||||
schema = new_schema;
|
||||
}
|
||||
|
||||
let identifier = schema.identifier_name();
|
||||
let identifier = schema.identifier();
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
for mut document in addition {
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
let document_id = match extract_document_id(&identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
@ -241,8 +227,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
None => fst::Set::default(),
|
||||
};
|
||||
|
||||
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
|
||||
|
||||
// 3. index the documents fields in the stores
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
|
||||
@ -254,7 +238,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
document_fields_counts: index.documents_fields_counts,
|
||||
indexer: &mut indexer,
|
||||
ranked_map: &mut ranked_map,
|
||||
fields_map: &mut fields_map,
|
||||
document_id,
|
||||
};
|
||||
|
||||
@ -281,7 +264,6 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
};
|
||||
|
||||
let mut ranked_map = RankedMap::default();
|
||||
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
|
||||
|
||||
// 1. retrieve all documents ids
|
||||
let mut documents_ids_to_reindex = Vec::new();
|
||||
@ -312,21 +294,20 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
for result in index.documents_fields.document_fields(writer, *document_id)? {
|
||||
let (attr, bytes) = result?;
|
||||
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
||||
ram_store.insert((document_id, attr), value);
|
||||
ram_store.insert((document_id, field_id), value);
|
||||
}
|
||||
|
||||
for ((docid, attr), value) in ram_store.drain() {
|
||||
serialize_value(
|
||||
for ((docid, field_id), value) in ram_store.drain() {
|
||||
serialize_value_with_id(
|
||||
writer,
|
||||
attr,
|
||||
schema.props(attr),
|
||||
field_id,
|
||||
&schema,
|
||||
*docid,
|
||||
index.documents_fields,
|
||||
index.documents_fields_counts,
|
||||
&mut indexer,
|
||||
&mut ranked_map,
|
||||
&mut fields_map,
|
||||
&value,
|
||||
&value
|
||||
)?;
|
||||
}
|
||||
}
|
||||
@ -401,30 +382,3 @@ pub fn write_documents_addition_index(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn lazy_new_schema(
|
||||
schema: &Schema,
|
||||
documents: &[HashMap<String, serde_json::Value>],
|
||||
) -> Option<Schema> {
|
||||
let mut attributes_to_add = BTreeSet::new();
|
||||
|
||||
for document in documents {
|
||||
for (key, _) in document {
|
||||
if schema.attribute(key).is_none() {
|
||||
attributes_to_add.insert(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if attributes_to_add.is_empty() {
|
||||
return None
|
||||
}
|
||||
|
||||
let mut schema_builder = schema.to_builder();
|
||||
for attribute in attributes_to_add {
|
||||
schema_builder.new_attribute(attribute, DISPLAYED | INDEXED);
|
||||
}
|
||||
let schema = schema_builder.build();
|
||||
|
||||
Some(schema)
|
||||
}
|
||||
|
@ -40,8 +40,8 @@ impl DocumentsDeletion {
|
||||
where
|
||||
D: serde::Serialize,
|
||||
{
|
||||
let identifier = schema.identifier_name();
|
||||
let document_id = match extract_document_id(identifier, &document)? {
|
||||
let identifier = schema.identifier();
|
||||
let document_id = match extract_document_id(&identifier, &document)? {
|
||||
Some(id) => id,
|
||||
None => return Err(Error::MissingDocumentId),
|
||||
};
|
||||
@ -101,18 +101,7 @@ pub fn apply_documents_deletion(
|
||||
};
|
||||
|
||||
// collect the ranked attributes according to the schema
|
||||
let ranked_attrs: Vec<_> = schema
|
||||
.iter()
|
||||
.filter_map(
|
||||
|(_, attr, prop)| {
|
||||
if prop.is_ranked() {
|
||||
Some(attr)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
let ranked_attrs = schema.get_ranked();
|
||||
|
||||
let mut words_document_ids = HashMap::new();
|
||||
for id in idset {
|
||||
|
@ -1,16 +1,15 @@
|
||||
use std::collections::{HashMap, BTreeMap, BTreeSet};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use meilisearch_schema::{Schema, SchemaAttr, diff_transposition, generate_schema};
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::settings::{UpdateState, SettingsUpdate};
|
||||
use crate::update::documents_addition::reindex_all_documents;
|
||||
use crate::update::{next_update_id, Update};
|
||||
use crate::{store, MResult};
|
||||
use crate::{store, MResult, Error};
|
||||
|
||||
pub fn push_settings_update(
|
||||
writer: &mut heed::RwTxn<UpdateT>,
|
||||
@ -35,7 +34,17 @@ pub fn apply_settings_update(
|
||||
|
||||
let mut must_reindex = false;
|
||||
|
||||
let old_schema = index.main.schema(writer)?;
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
Some(schema) => schema,
|
||||
None => {
|
||||
match settings.attribute_identifier.clone() {
|
||||
UpdateState::Update(id) => Schema::with_identifier(id),
|
||||
_ => return Err(Error::MissingSchemaIdentifier)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
println!("settings: {:?}", settings);
|
||||
|
||||
match settings.ranking_rules {
|
||||
UpdateState::Update(v) => {
|
||||
@ -55,157 +64,69 @@ pub fn apply_settings_update(
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
let identifier = match settings.attribute_identifier.clone() {
|
||||
UpdateState::Update(v) => v,
|
||||
_ => {
|
||||
old_schema.clone().unwrap().identifier_name().to_owned()
|
||||
},
|
||||
|
||||
if let UpdateState::Update(id) = settings.attribute_identifier {
|
||||
schema.set_identifier(id)?;
|
||||
};
|
||||
let attributes_searchable: Vec<String> = match settings.attributes_searchable.clone() {
|
||||
UpdateState::Update(v) => v,
|
||||
UpdateState::Clear => Vec::new(),
|
||||
UpdateState::Nothing => {
|
||||
match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_indexed())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
}
|
||||
|
||||
match settings.attributes_searchable.clone() {
|
||||
UpdateState::Update(v) => schema.update_indexed(v)?,
|
||||
UpdateState::Clear => {
|
||||
let clear: Vec<String> = Vec::new();
|
||||
schema.update_indexed(clear)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
UpdateState::Add(attrs) => {
|
||||
let mut old_attrs = match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_indexed())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
for attr in attrs {
|
||||
if !old_attrs.contains(&attr) {
|
||||
old_attrs.push(attr);
|
||||
}
|
||||
schema.set_indexed(attr)?;
|
||||
}
|
||||
old_attrs
|
||||
},
|
||||
UpdateState::Delete(attrs) => {
|
||||
let mut old_attrs = match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_indexed())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
for attr in attrs {
|
||||
old_attrs.retain(|x| *x == attr)
|
||||
schema.remove_indexed(attr);
|
||||
}
|
||||
old_attrs
|
||||
}
|
||||
};
|
||||
let attributes_displayed: Vec<String> = match settings.attributes_displayed.clone() {
|
||||
UpdateState::Update(v) => v,
|
||||
UpdateState::Clear => Vec::new(),
|
||||
UpdateState::Nothing => {
|
||||
match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_displayed())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
}
|
||||
match settings.attributes_displayed.clone() {
|
||||
UpdateState::Update(v) => schema.update_displayed(v)?,
|
||||
UpdateState::Clear => {
|
||||
let clear: Vec<String> = Vec::new();
|
||||
schema.update_displayed(clear)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
UpdateState::Add(attrs) => {
|
||||
let mut old_attrs = match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_displayed())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
for attr in attrs {
|
||||
if !old_attrs.contains(&attr) {
|
||||
old_attrs.push(attr);
|
||||
}
|
||||
schema.set_displayed(attr)?;
|
||||
}
|
||||
old_attrs
|
||||
},
|
||||
UpdateState::Delete(attrs) => {
|
||||
let mut old_attrs = match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_displayed())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
for attr in attrs {
|
||||
old_attrs.retain(|x| *x == attr)
|
||||
schema.remove_displayed(attr);
|
||||
}
|
||||
old_attrs
|
||||
}
|
||||
};
|
||||
let attributes_ranked: Vec<String> = match settings.attributes_ranked.clone() {
|
||||
UpdateState::Update(v) => v,
|
||||
UpdateState::Clear => Vec::new(),
|
||||
UpdateState::Nothing => {
|
||||
match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_ranked())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
}
|
||||
match settings.attributes_ranked.clone() {
|
||||
UpdateState::Update(v) => schema.update_ranked(v)?,
|
||||
UpdateState::Clear => {
|
||||
let clear: Vec<String> = Vec::new();
|
||||
schema.update_ranked(clear)?;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
UpdateState::Add(attrs) => {
|
||||
let mut old_attrs = match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_ranked())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
for attr in attrs {
|
||||
if !old_attrs.contains(&attr) {
|
||||
old_attrs.push(attr);
|
||||
}
|
||||
schema.set_ranked(attr)?;
|
||||
}
|
||||
old_attrs
|
||||
},
|
||||
UpdateState::Delete(attrs) => {
|
||||
let mut old_attrs = match old_schema.clone() {
|
||||
Some(schema) => {
|
||||
schema.into_iter()
|
||||
.filter(|(_, props)| props.is_ranked())
|
||||
.map(|(name, _)| name)
|
||||
.collect()
|
||||
},
|
||||
None => Vec::new(),
|
||||
};
|
||||
for attr in attrs {
|
||||
old_attrs.retain(|x| *x == attr)
|
||||
schema.remove_ranked(attr);
|
||||
}
|
||||
old_attrs
|
||||
}
|
||||
};
|
||||
|
||||
let new_schema = generate_schema(identifier, attributes_searchable, attributes_displayed, attributes_ranked);
|
||||
index.main.put_schema(writer, &schema)?;
|
||||
|
||||
index.main.put_schema(writer, &new_schema)?;
|
||||
println!("schema: {:?}", schema);
|
||||
|
||||
match settings.stop_words {
|
||||
UpdateState::Update(stop_words) => {
|
||||
@ -233,16 +154,6 @@ pub fn apply_settings_update(
|
||||
let postings_lists_store = index.postings_lists;
|
||||
let docs_words_store = index.docs_words;
|
||||
|
||||
if settings.attribute_identifier.is_changed() ||
|
||||
settings.attributes_ranked.is_changed() ||
|
||||
settings.attributes_searchable.is_changed() ||
|
||||
settings.attributes_displayed.is_changed()
|
||||
{
|
||||
if let Some(old_schema) = old_schema {
|
||||
rewrite_all_documents(writer, index, &old_schema, &new_schema)?;
|
||||
must_reindex = true;
|
||||
}
|
||||
}
|
||||
if must_reindex {
|
||||
reindex_all_documents(
|
||||
writer,
|
||||
@ -438,46 +349,3 @@ pub fn apply_synonyms_update(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn rewrite_all_documents(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
old_schema: &Schema,
|
||||
new_schema: &Schema,
|
||||
) -> MResult<()> {
|
||||
|
||||
let mut documents_ids_to_reindex = Vec::new();
|
||||
|
||||
// Retrieve all documents present on the database
|
||||
for result in index.documents_fields_counts.documents_ids(writer)? {
|
||||
let document_id = result?;
|
||||
documents_ids_to_reindex.push(document_id);
|
||||
}
|
||||
|
||||
let transpotition = diff_transposition(old_schema, new_schema);
|
||||
|
||||
// Rewrite all documents one by one
|
||||
for id in documents_ids_to_reindex {
|
||||
let mut document: HashMap<SchemaAttr, Vec<u8>> = HashMap::new();
|
||||
|
||||
// Retrieve the old document
|
||||
for item in index.documents_fields.document_fields(writer, id)? {
|
||||
if let Ok(item) = item {
|
||||
if let Some(pos) = transpotition[(item.0).0 as usize] {
|
||||
// Save the current document with the new SchemaAttr
|
||||
document.insert(SchemaAttr::new(pos), item.1.to_vec());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove the current document
|
||||
index.documents_fields.del_all_document_fields(writer, id)?;
|
||||
|
||||
// Rewrite the new document
|
||||
// TODO: use cursor to not do memory jump at each call
|
||||
for (key, value) in document {
|
||||
index.documents_fields.put_document_field(writer, id, key, &value)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ pub type SResult<T> = Result<T, Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
FieldNameNotFound(String),
|
||||
MaxFieldsLimitExceeded,
|
||||
}
|
||||
|
||||
@ -12,6 +13,7 @@ impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::Error::*;
|
||||
match self {
|
||||
FieldNameNotFound(field) => write!(f, "The field {} doesn't exist", field),
|
||||
MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"),
|
||||
}
|
||||
}
|
||||
|
@ -3,9 +3,8 @@ use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{SResult, SchemaAttr};
|
||||
use crate::{SResult, FieldId};
|
||||
|
||||
pub type FieldId = SchemaAttr;
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FieldsMap {
|
||||
@ -43,13 +42,13 @@ impl FieldsMap {
|
||||
self.name_map.remove(&name);
|
||||
}
|
||||
|
||||
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
||||
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||
let name = name.into();
|
||||
self.name_map.get(&name)
|
||||
self.name_map.get(&name).map(|s| *s)
|
||||
}
|
||||
|
||||
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
|
||||
self.id_map.get(&id.into())
|
||||
pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
|
||||
self.id_map.get(&id.into()).map(|s| s.to_string())
|
||||
}
|
||||
|
||||
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
|
||||
@ -74,14 +73,14 @@ mod tests {
|
||||
assert_eq!(fields_map.insert("id").unwrap(), 0.into());
|
||||
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
|
||||
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
|
||||
assert_eq!(fields_map.get_id("id"), Some(&0.into()));
|
||||
assert_eq!(fields_map.get_id("title"), Some(&1.into()));
|
||||
assert_eq!(fields_map.get_id("descritpion"), Some(&2.into()));
|
||||
assert_eq!(fields_map.get_id("id"), Some(0.into()));
|
||||
assert_eq!(fields_map.get_id("title"), Some(1.into()));
|
||||
assert_eq!(fields_map.get_id("descritpion"), Some(2.into()));
|
||||
assert_eq!(fields_map.get_id("date"), None);
|
||||
assert_eq!(fields_map.len(), 3);
|
||||
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned()));
|
||||
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned()));
|
||||
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned()));
|
||||
assert_eq!(fields_map.get_name(0), Some("id".to_owned()));
|
||||
assert_eq!(fields_map.get_name(1), Some("title".to_owned()));
|
||||
assert_eq!(fields_map.get_name(2), Some("descritpion".to_owned()));
|
||||
assert_eq!(fields_map.get_name(4), None);
|
||||
fields_map.remove("title");
|
||||
assert_eq!(fields_map.get_id("title"), None);
|
||||
|
@ -3,48 +3,88 @@ mod fields_map;
|
||||
mod schema;
|
||||
|
||||
pub use error::{Error, SResult};
|
||||
pub use fields_map::{FieldsMap, FieldId};
|
||||
pub use schema::{Schema, IndexedPos};
|
||||
pub use fields_map::FieldsMap;
|
||||
pub use schema::Schema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
pub struct SchemaAttr(pub u16);
|
||||
pub struct IndexedPos(pub u16);
|
||||
|
||||
impl SchemaAttr {
|
||||
pub const fn new(value: u16) -> SchemaAttr {
|
||||
SchemaAttr(value)
|
||||
impl IndexedPos {
|
||||
pub const fn new(value: u16) -> IndexedPos {
|
||||
IndexedPos(value)
|
||||
}
|
||||
|
||||
pub const fn min() -> SchemaAttr {
|
||||
SchemaAttr(u16::min_value())
|
||||
pub const fn min() -> IndexedPos {
|
||||
IndexedPos(u16::min_value())
|
||||
}
|
||||
|
||||
pub const fn max() -> SchemaAttr {
|
||||
SchemaAttr(u16::max_value())
|
||||
pub const fn max() -> IndexedPos {
|
||||
IndexedPos(u16::max_value())
|
||||
}
|
||||
|
||||
pub fn next(self) -> SResult<SchemaAttr> {
|
||||
self.0.checked_add(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded)
|
||||
pub fn next(self) -> SResult<IndexedPos> {
|
||||
self.0.checked_add(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
|
||||
}
|
||||
|
||||
pub fn prev(self) -> SResult<SchemaAttr> {
|
||||
self.0.checked_sub(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded)
|
||||
pub fn prev(self) -> SResult<IndexedPos> {
|
||||
self.0.checked_sub(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u16> for SchemaAttr {
|
||||
fn from(value: u16) -> SchemaAttr {
|
||||
SchemaAttr(value)
|
||||
impl From<u16> for IndexedPos {
|
||||
fn from(value: u16) -> IndexedPos {
|
||||
IndexedPos(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<u16> for SchemaAttr {
|
||||
impl Into<u16> for IndexedPos {
|
||||
fn into(self) -> u16 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
pub struct FieldId(pub u16);
|
||||
|
||||
impl FieldId {
|
||||
pub const fn new(value: u16) -> FieldId {
|
||||
FieldId(value)
|
||||
}
|
||||
|
||||
pub const fn min() -> FieldId {
|
||||
FieldId(u16::min_value())
|
||||
}
|
||||
|
||||
pub const fn max() -> FieldId {
|
||||
FieldId(u16::max_value())
|
||||
}
|
||||
|
||||
pub fn next(self) -> SResult<FieldId> {
|
||||
self.0.checked_add(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
|
||||
}
|
||||
|
||||
pub fn prev(self) -> SResult<FieldId> {
|
||||
self.0.checked_sub(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u16> for FieldId {
|
||||
fn from(value: u16) -> FieldId {
|
||||
FieldId(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<u16> for FieldId {
|
||||
fn into(self) -> u16 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// use std::collections::{BTreeMap, HashMap};
|
||||
// use std::ops::BitOr;
|
||||
|
@ -1,10 +1,10 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use crate::{FieldsMap, FieldId, SResult, SchemaAttr};
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
pub type IndexedPos = SchemaAttr;
|
||||
use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos};
|
||||
|
||||
#[derive(Default)]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||
pub struct Schema {
|
||||
fields_map: FieldsMap,
|
||||
|
||||
@ -30,11 +30,21 @@ impl Schema {
|
||||
self.fields_map.get_name(self.identifier).unwrap().to_string()
|
||||
}
|
||||
|
||||
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
||||
pub fn set_identifier(&mut self, id: String) -> SResult<()> {
|
||||
match self.get_id(id.clone()) {
|
||||
Some(id) => {
|
||||
self.identifier = id;
|
||||
Ok(())
|
||||
},
|
||||
None => Err(Error::FieldNameNotFound(id))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||
self.fields_map.get_id(name)
|
||||
}
|
||||
|
||||
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
|
||||
pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
|
||||
self.fields_map.get_name(id)
|
||||
}
|
||||
|
||||
@ -52,7 +62,7 @@ impl Schema {
|
||||
pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> {
|
||||
match self.fields_map.get_id(name.clone()) {
|
||||
Some(id) => {
|
||||
Ok(*id)
|
||||
Ok(id)
|
||||
}
|
||||
None => {
|
||||
self.set_indexed(name.clone())?;
|
||||
@ -61,6 +71,30 @@ impl Schema {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_ranked(&self) -> HashSet<FieldId> {
|
||||
self.ranked.clone()
|
||||
}
|
||||
|
||||
pub fn get_ranked_name(&self) -> HashSet<String> {
|
||||
self.ranked.iter().filter_map(|a| self.get_name(*a)).collect()
|
||||
}
|
||||
|
||||
pub fn get_displayed(&self) -> HashSet<FieldId> {
|
||||
self.displayed.clone()
|
||||
}
|
||||
|
||||
pub fn get_displayed_name(&self) -> HashSet<String> {
|
||||
self.displayed.iter().filter_map(|a| self.get_name(*a)).collect()
|
||||
}
|
||||
|
||||
pub fn get_indexed(&self) -> Vec<FieldId> {
|
||||
self.indexed.clone()
|
||||
}
|
||||
|
||||
pub fn get_indexed_name(&self) -> Vec<String> {
|
||||
self.indexed.iter().filter_map(|a| self.get_name(*a)).collect()
|
||||
}
|
||||
|
||||
pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
|
||||
let id = self.fields_map.insert(name.into())?;
|
||||
self.ranked.insert(id);
|
||||
@ -81,23 +115,42 @@ impl Schema {
|
||||
Ok((id, pos.into()))
|
||||
}
|
||||
|
||||
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
||||
pub fn remove_ranked<S: Into<String>>(&mut self, name: S) {
|
||||
if let Some(id) = self.fields_map.get_id(name.into()) {
|
||||
self.ranked.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_displayed<S: Into<String>>(&mut self, name: S) {
|
||||
if let Some(id) = self.fields_map.get_id(name.into()) {
|
||||
self.displayed.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_indexed<S: Into<String>>(&mut self, name: S) {
|
||||
if let Some(id) = self.fields_map.get_id(name.into()) {
|
||||
self.indexed_map.remove(&id);
|
||||
self.indexed.retain(|x| *x != id);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||
match self.fields_map.get_id(name.into()) {
|
||||
Some(id) => self.ranked.get(id),
|
||||
Some(id) => self.ranked.get(&id).map(|s| *s),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
|
||||
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<FieldId> {
|
||||
match self.fields_map.get_id(name.into()) {
|
||||
Some(id) => self.displayed.get(id),
|
||||
Some(id) => self.displayed.get(&id).map(|s| *s),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<&IndexedPos> {
|
||||
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<IndexedPos> {
|
||||
match self.fields_map.get_id(name.into()) {
|
||||
Some(id) => self.indexed_map.get(id),
|
||||
Some(id) => self.indexed_map.get(&id).map(|s| *s),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ pub struct DocIndex {
|
||||
|
||||
/// The attribute in the document where the word was found
|
||||
/// along with the index in it.
|
||||
/// Is an IndexedPos and not FieldId. Must be convert each time.
|
||||
pub attribute: u16,
|
||||
pub word_index: u16,
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user