introduce a new schemaless way

This commit is contained in:
qdequele 2020-01-13 19:10:58 +01:00
parent bbe1845f66
commit 130fb74928
No known key found for this signature in database
GPG Key ID: B3F0A000EBF11745
22 changed files with 365 additions and 418 deletions

View File

@ -7,6 +7,5 @@
"overview",
"release_date",
"poster"
],
"attributes_ranked": ["release_date"]
]
}

View File

@ -13,7 +13,8 @@ use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
use meilisearch_schema::SchemaAttr;
use meilisearch_core::settings::Settings;
use meilisearch_schema::FieldId;
// #[cfg(target_os = "linux")]
#[global_allocator]
@ -121,7 +122,8 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
let settings = {
let string = fs::read_to_string(&command.settings)?;
serde_json::from_str(&string).unwrap()
let settings: Settings = serde_json::from_str(&string).unwrap();
settings.into()
};
let mut update_writer = db.update_write_txn().unwrap();
@ -357,7 +359,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
};
let attr = schema
.attribute(&filter)
.get_id(filter)
.expect("Could not find filtered attribute");
builder.with_filter(move |document_id| {
@ -388,11 +390,11 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
for (name, text) in document.0 {
print!("{}: ", name);
let attr = schema.attribute(&name).unwrap();
let attr = schema.get_id(&name).unwrap();
let highlights = doc
.highlights
.iter()
.filter(|m| SchemaAttr::new(m.attribute) == attr)
.filter(|m| FieldId::new(m.attribute) == attr)
.cloned();
let (text, highlights) =
crop_text(&text, highlights, command.char_context);
@ -407,8 +409,8 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
let mut matching_attributes = HashSet::new();
for highlight in doc.highlights {
let attr = SchemaAttr::new(highlight.attribute);
let name = schema.attribute_name(attr);
let attr = FieldId::new(highlight.attribute);
let name = schema.get_name(attr);
matching_attributes.insert(name);
}

View File

@ -1,6 +1,6 @@
use std::cmp::{Ordering, Reverse};
use std::collections::hash_map::{HashMap, Entry};
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::IndexedPos;
use slice_group_by::GroupBy;
use crate::{RawDocument, MResult};
use crate::bucket_sort::BareMatch;
@ -32,7 +32,7 @@ impl Criterion for Exact {
for bm in group {
for di in ctx.postings_lists[bm.postings_list].as_ref() {
let attr = SchemaAttr(di.attribute);
let attr = IndexedPos(di.attribute);
let count = match fields_counts.entry(attr) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {

View File

@ -69,7 +69,7 @@ impl<'a> SortByAttr<'a> {
reversed: bool,
) -> Result<SortByAttr<'a>, SortByAttrError> {
let field_id = match schema.get_id(attr_name) {
Some(field_id) => *field_id,
Some(field_id) => field_id,
None => return Err(SortByAttrError::AttributeNotFound),
};

View File

@ -8,11 +8,12 @@ pub type MResult<T> = Result<T, Error>;
pub enum Error {
Io(io::Error),
IndexAlreadyExists,
SchemaDiffer,
MissingSchemaIdentifier,
SchemaMissing,
WordIndexMissing,
MissingDocumentId,
MaxFieldsLimitExceeded,
Schema(meilisearch_schema::Error),
Zlmdb(heed::Error),
Fst(fst::Error),
SerdeJson(SerdeJsonError),
@ -28,6 +29,12 @@ impl From<io::Error> for Error {
}
}
impl From<meilisearch_schema::Error> for Error {
fn from(error: meilisearch_schema::Error) -> Error {
Error::Schema(error)
}
}
impl From<heed::Error> for Error {
fn from(error: heed::Error) -> Error {
Error::Zlmdb(error)
@ -76,10 +83,12 @@ impl fmt::Display for Error {
match self {
Io(e) => write!(f, "{}", e),
IndexAlreadyExists => write!(f, "index already exists"),
SchemaDiffer => write!(f, "schemas differ"),
MissingSchemaIdentifier => write!(f, "schema cannot be build without identifier"),
SchemaMissing => write!(f, "this index does not have a schema"),
WordIndexMissing => write!(f, "this index does not have a word index"),
MissingDocumentId => write!(f, "document id is missing"),
MaxFieldsLimitExceeded => write!(f, "maximum field in a document is exceeded"),
Schema(e) => write!(f, "schemas error; {}", e),
Zlmdb(e) => write!(f, "heed error; {}", e),
Fst(e) => write!(f, "fst error; {}", e),
SerdeJson(e) => write!(f, "serde json error; {}", e),

View File

@ -136,7 +136,7 @@ mod tests {
use std::iter::FromIterator;
use fst::{IntoStreamer, Set};
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::IndexedPos;
use sdset::SetBuf;
use tempfile::TempDir;
@ -295,14 +295,14 @@ mod tests {
for ((docid, attr, _), count) in fields_counts {
let prev = index
.documents_fields_counts
.document_field_count(&mut writer, docid, SchemaAttr(attr))
.document_field_count(&mut writer, docid, IndexedPos(attr))
.unwrap();
let prev = prev.unwrap_or(0);
index
.documents_fields_counts
.put_document_field_count(&mut writer, docid, SchemaAttr(attr), prev + count)
.put_document_field_count(&mut writer, docid, IndexedPos(attr), prev + count)
.unwrap();
}

View File

@ -180,16 +180,16 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O
mod tests {
use super::*;
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::IndexedPos;
#[test]
fn strange_apostrophe() {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let indexed_pos = IndexedPos(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
indexer.index_text(docid, indexed_pos, text);
let Indexed {
words_doc_indexes, ..
@ -209,9 +209,9 @@ mod tests {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let indexed_pos = IndexedPos(0);
let text = vec!["Zut, laspirateur, jai oublié de léteindre !"];
indexer.index_text_seq(docid, attr, text);
indexer.index_text_seq(docid, indexed_pos, text);
let Indexed {
words_doc_indexes, ..
@ -234,9 +234,9 @@ mod tests {
let mut indexer = RawIndexer::new(stop_words);
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let indexed_pos = IndexedPos(0);
let text = "Zut, laspirateur, jai oublié de léteindre !";
indexer.index_text(docid, attr, text);
indexer.index_text(docid, indexed_pos, text);
let Indexed {
words_doc_indexes, ..
@ -258,9 +258,9 @@ mod tests {
let mut indexer = RawIndexer::new(fst::Set::default());
let docid = DocumentId(0);
let attr = SchemaAttr(0);
let indexed_pos = IndexedPos(0);
let text = "🇯🇵";
indexer.index_text(docid, attr, text);
indexer.index_text(docid, indexed_pos, text);
let Indexed {
words_doc_indexes, ..

View File

@ -99,7 +99,7 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
let ioread = SerdeJsonIoRead::new(cursor);
let value = Value(SerdeJsonDeserializer::new(ioread));
Some((*attribute_name, value))
Some((attribute_name, value))
} else {
None
}

View File

@ -20,7 +20,7 @@ pub use self::convert_to_string::ConvertToString;
pub use self::deserializer::{Deserializer, DeserializerError};
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
pub use self::indexer::Indexer;
pub use self::serializer::{serialize_value, Serializer};
pub use self::serializer::{serialize_value, serialize_value_with_id, Serializer};
use std::{error::Error, fmt};

View File

@ -1,4 +1,4 @@
use meilisearch_schema::{Schema, FieldsMap};
use meilisearch_schema::{Schema, FieldId};
use serde::ser;
use crate::database::MainT;
@ -10,12 +10,11 @@ use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
pub struct Serializer<'a, 'b> {
pub txn: &'a mut heed::RwTxn<'b, MainT>,
pub schema: &'a Schema,
pub schema: &'a mut Schema,
pub document_store: DocumentsFields,
pub document_fields_counts: DocumentsFieldsCounts,
pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap,
pub fields_map: &'a mut FieldsMap,
pub document_id: DocumentId,
}
@ -159,7 +158,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
fields_map: self.fields_map,
current_key_name: None,
})
}
@ -177,7 +175,6 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
fields_map: self.fields_map,
})
}
@ -196,13 +193,12 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
pub struct MapSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b, MainT>,
schema: &'a Schema,
schema: &'a mut Schema,
document_id: DocumentId,
document_store: DocumentsFields,
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
current_key_name: Option<String>,
}
@ -237,21 +233,17 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
V: ser::Serialize,
{
let key = key.serialize(ConvertToString)?;
match self.schema.attribute(&key) {
Some(attribute) => serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
self.fields_map,
value,
),
None => Ok(()),
}
serialize_value(
self.txn,
key,
self.schema,
self.document_id,
self.document_store,
self.document_fields_counts,
self.indexer,
self.ranked_map,
value,
)
}
fn end(self) -> Result<Self::Ok, Self::Error> {
@ -261,13 +253,12 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
pub struct StructSerializer<'a, 'b> {
txn: &'a mut heed::RwTxn<'b, MainT>,
schema: &'a Schema,
schema: &'a mut Schema,
document_id: DocumentId,
document_store: DocumentsFields,
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
}
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
@ -282,19 +273,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
where
T: ser::Serialize,
{
// let id = fields_map.insert(key)?;
// let attribute = match self.schema.attribute(id) {
// Some(attribute) => attribute,
// None => {
// },
// }
serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
key.to_string(),
self.schema,
self.document_id,
self.document_store,
self.document_fields_counts,
@ -311,7 +293,36 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
pub fn serialize_value<'a, T: ?Sized>(
txn: &mut heed::RwTxn<MainT>,
attribute: &'static str,
attribute: String,
schema: &'a mut Schema,
document_id: DocumentId,
document_store: DocumentsFields,
documents_fields_counts: DocumentsFieldsCounts,
indexer: &mut RawIndexer,
ranked_map: &mut RankedMap,
value: &T,
) -> Result<(), SerializerError>
where
T: ser::Serialize,
{
let field_id = schema.get_or_create(attribute)?;
serialize_value_with_id(
txn,
field_id,
schema,
document_id,
document_store,
documents_fields_counts,
indexer,
ranked_map,
value
)
}
pub fn serialize_value_with_id<'a, T: ?Sized>(
txn: &mut heed::RwTxn<MainT>,
field_id: FieldId,
schema: &'a Schema,
document_id: DocumentId,
document_store: DocumentsFields,
@ -324,12 +335,11 @@ where
T: ser::Serialize,
{
let serialized = serde_json::to_vec(value)?;
let field_id = schema.get_or_create(attribute)?;
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
let indexer = Indexer {
field_id,
pos: *indexed_pos,
indexer,
document_id,
};
@ -337,13 +347,13 @@ where
documents_fields_counts.put_document_field_count(
txn,
document_id,
field_id,
*indexed_pos,
number_of_words as u16,
)?;
}
}
if let Some(field_id) = schema.id_is_ranked(field_id) {
if schema.id_is_ranked(field_id) {
let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, field_id, number);
}

View File

@ -1,14 +1,14 @@
use heed::types::{ByteSlice, OwnedType};
use crate::database::MainT;
use heed::Result as ZResult;
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::FieldId;
use super::DocumentAttrKey;
use super::DocumentFieldStoredKey;
use crate::DocumentId;
#[derive(Copy, Clone)]
pub struct DocumentsFields {
pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
}
impl DocumentsFields {
@ -16,10 +16,10 @@ impl DocumentsFields {
self,
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
value: &[u8],
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldStoredKey::new(document_id, attribute);
self.documents_fields.put(writer, &key, value)
}
@ -28,8 +28,8 @@ impl DocumentsFields {
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
self.documents_fields.delete_range(writer, &(start..=end))
}
@ -41,9 +41,9 @@ impl DocumentsFields {
self,
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
) -> ZResult<Option<&'txn [u8]>> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldStoredKey::new(document_id, attribute);
self.documents_fields.get(reader, &key)
}
@ -52,25 +52,25 @@ impl DocumentsFields {
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
) -> ZResult<DocumentFieldsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
let iter = self.documents_fields.range(reader, &(start..=end))?;
Ok(DocumentFieldsIter { iter })
}
}
pub struct DocumentFieldsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
}
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
type Item = ZResult<(FieldId, &'txn [u8])>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, bytes))) => {
let attr = SchemaAttr(key.attr.get());
Some(Ok((attr, bytes)))
let field_id = FieldId(key.field_id.get());
Some(Ok((field_id, bytes)))
}
Some(Err(e)) => Some(Err(e)),
None => None,

View File

@ -1,13 +1,13 @@
use super::DocumentAttrKey;
use super::DocumentFieldIndexedKey;
use crate::database::MainT;
use crate::DocumentId;
use heed::types::OwnedType;
use heed::Result as ZResult;
use meilisearch_schema::FieldId;
use meilisearch_schema::IndexedPos;
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>,
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl DocumentsFieldsCounts {
@ -15,10 +15,10 @@ impl DocumentsFieldsCounts {
self,
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
attribute: FieldId,
attribute: IndexedPos,
value: u16,
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldIndexedKey::new(document_id, attribute);
self.documents_fields_counts.put(writer, &key, &value)
}
@ -27,10 +27,9 @@ impl DocumentsFieldsCounts {
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, FieldId::max());
self.documents_fields_counts
.delete_range(writer, &(start..=end))
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
self.documents_fields_counts.delete_range(writer, &(start..=end))
}
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
@ -41,9 +40,9 @@ impl DocumentsFieldsCounts {
self,
reader: &heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: FieldId,
attribute: IndexedPos,
) -> ZResult<Option<u16>> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldIndexedKey::new(document_id, attribute);
match self.documents_fields_counts.get(reader, &key)? {
Some(count) => Ok(Some(count)),
None => Ok(None),
@ -55,8 +54,8 @@ impl DocumentsFieldsCounts {
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, FieldId::max());
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
Ok(DocumentFieldsCountsIter { iter })
}
@ -79,17 +78,17 @@ impl DocumentsFieldsCounts {
}
pub struct DocumentFieldsCountsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl Iterator for DocumentFieldsCountsIter<'_> {
type Item = ZResult<(FieldId, u16)>;
type Item = ZResult<(IndexedPos, u16)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let attr = FieldId(key.attr.get());
Some(Ok((attr, count)))
let indexed_pos = IndexedPos(key.indexed_pos.get());
Some(Ok((indexed_pos, count)))
}
Some(Err(e)) => Some(Err(e)),
None => None,
@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {
pub struct DocumentsIdsIter<'txn> {
last_seen_id: Option<DocumentId>,
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl Iterator for DocumentsIdsIter<'_> {
@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> {
}
pub struct AllDocumentsFieldsCountsIter<'txn> {
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
type Item = ZResult<(DocumentId, FieldId, u16)>;
type Item = ZResult<(DocumentId, IndexedPos, u16)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let docid = DocumentId(key.docid.get());
let attr = FieldId(key.attr.get());
Some(Ok((docid, attr, count)))
let indexed_pos = IndexedPos(key.indexed_pos.get());
Some(Ok((docid, indexed_pos, count)))
}
Some(Err(e)) => Some(Err(e)),
None => None,

View File

@ -1,12 +1,13 @@
use crate::fields_map::FieldsMap;
use crate::database::MainT;
use crate::RankedMap;
use std::sync::Arc;
use std::collections::{HashMap, BTreeMap, BTreeSet};
use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult;
use meilisearch_schema::Schema;
use std::collections::{HashMap, BTreeMap, BTreeSet};
use std::sync::Arc;
use crate::database::MainT;
use crate::RankedMap;
const CREATED_AT_KEY: &str = "created-at";
const RANKING_RULES_KEY: &str = "ranking-rules-key";
@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
const NAME_KEY: &str = "name";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const FIELDS_MAP_KEY: &str = "fields-map";
const SCHEMA_KEY: &str = "schema";
const UPDATED_AT_KEY: &str = "updated-at";
const WORDS_KEY: &str = "words";
@ -114,16 +114,6 @@ impl Main {
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> {
self.main
.put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map)
}
pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> {
self.main
.get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)

View File

@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
// #[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
// #[repr(C)]
// pub struct DocumentAttrKey {
// docid: BEU64,
// indexed_pos: BEU16,
// }
// impl DocumentAttrKey {
// fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey {
// DocumentAttrKey {
// docid: BEU64::new(docid.0),
// indexed_pos: BEU16::new(indexed_pos.0),
// }
// }
// }
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentAttrKey {
pub struct DocumentFieldIndexedKey {
docid: BEU64,
attr: BEU16,
indexed_pos: BEU16,
}
impl DocumentAttrKey {
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey {
impl DocumentFieldIndexedKey {
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
DocumentFieldIndexedKey {
docid: BEU64::new(docid.0),
attr: BEU16::new(attr.0),
indexed_pos: BEU16::new(indexed_pos.0),
}
}
}
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentFieldStoredKey {
docid: BEU64,
field_id: BEU16,
}
impl DocumentFieldStoredKey {
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
DocumentFieldStoredKey {
docid: BEU64::new(docid.0),
field_id: BEU16::new(field_id.0),
}
}
}
@ -228,7 +260,7 @@ impl Index {
&self,
reader: &heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
) -> MResult<Option<T>> {
let bytes = self
.documents_fields

View File

@ -1,14 +1,13 @@
use std::collections::{HashMap, BTreeSet};
use std::collections::HashMap;
use fst::{set::OpBuilder, SetBuilder};
use sdset::{duo::Union, SetOperation};
use serde::{Deserialize, Serialize};
use meilisearch_schema::{Schema, DISPLAYED, INDEXED};
use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::raw_indexer::RawIndexer;
use crate::serde::{extract_document_id, serialize_value, Deserializer, Serializer};
use crate::serde::{extract_document_id, serialize_value_with_id, Deserializer, Serializer};
use crate::store;
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
use crate::{Error, MResult, RankedMap};
@ -115,16 +114,11 @@ pub fn apply_documents_addition<'a, 'b>(
None => return Err(Error::SchemaMissing),
};
if let Some(new_schema) = lazy_new_schema(&schema, &addition) {
main_store.put_schema(writer, &new_schema)?;
schema = new_schema;
}
let identifier = schema.identifier_name();
let identifier = schema.identifier();
// 1. store documents ids for future deletion
for document in addition {
let document_id = match extract_document_id(identifier, &document)? {
let document_id = match extract_document_id(&identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
@ -147,8 +141,6 @@ pub fn apply_documents_addition<'a, 'b>(
None => fst::Set::default(),
};
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
@ -160,7 +152,6 @@ pub fn apply_documents_addition<'a, 'b>(
document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
fields_map: &mut fields_map,
document_id,
};
@ -192,16 +183,11 @@ pub fn apply_documents_partial_addition<'a, 'b>(
None => return Err(Error::SchemaMissing),
};
if let Some(new_schema) = lazy_new_schema(&schema, &addition) {
main_store.put_schema(writer, &new_schema)?;
schema = new_schema;
}
let identifier = schema.identifier_name();
let identifier = schema.identifier();
// 1. store documents ids for future deletion
for mut document in addition {
let document_id = match extract_document_id(identifier, &document)? {
let document_id = match extract_document_id(&identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
@ -241,8 +227,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
None => fst::Set::default(),
};
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
@ -254,7 +238,6 @@ pub fn apply_documents_partial_addition<'a, 'b>(
document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
fields_map: &mut fields_map,
document_id,
};
@ -281,7 +264,6 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
};
let mut ranked_map = RankedMap::default();
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 1. retrieve all documents ids
let mut documents_ids_to_reindex = Vec::new();
@ -312,21 +294,20 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
for result in index.documents_fields.document_fields(writer, *document_id)? {
let (attr, bytes) = result?;
let value: serde_json::Value = serde_json::from_slice(bytes)?;
ram_store.insert((document_id, attr), value);
ram_store.insert((document_id, field_id), value);
}
for ((docid, attr), value) in ram_store.drain() {
serialize_value(
for ((docid, field_id), value) in ram_store.drain() {
serialize_value_with_id(
writer,
attr,
schema.props(attr),
field_id,
&schema,
*docid,
index.documents_fields,
index.documents_fields_counts,
&mut indexer,
&mut ranked_map,
&mut fields_map,
&value,
&value
)?;
}
}
@ -401,30 +382,3 @@ pub fn write_documents_addition_index(
Ok(())
}
pub fn lazy_new_schema(
schema: &Schema,
documents: &[HashMap<String, serde_json::Value>],
) -> Option<Schema> {
let mut attributes_to_add = BTreeSet::new();
for document in documents {
for (key, _) in document {
if schema.attribute(key).is_none() {
attributes_to_add.insert(key);
}
}
}
if attributes_to_add.is_empty() {
return None
}
let mut schema_builder = schema.to_builder();
for attribute in attributes_to_add {
schema_builder.new_attribute(attribute, DISPLAYED | INDEXED);
}
let schema = schema_builder.build();
Some(schema)
}

View File

@ -40,8 +40,8 @@ impl DocumentsDeletion {
where
D: serde::Serialize,
{
let identifier = schema.identifier_name();
let document_id = match extract_document_id(identifier, &document)? {
let identifier = schema.identifier();
let document_id = match extract_document_id(&identifier, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
@ -101,18 +101,7 @@ pub fn apply_documents_deletion(
};
// collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema
.iter()
.filter_map(
|(_, attr, prop)| {
if prop.is_ranked() {
Some(attr)
} else {
None
}
},
)
.collect();
let ranked_attrs = schema.get_ranked();
let mut words_document_ids = HashMap::new();
for id in idset {

View File

@ -1,16 +1,15 @@
use std::collections::{HashMap, BTreeMap, BTreeSet};
use std::collections::{BTreeMap, BTreeSet};
use heed::Result as ZResult;
use fst::{set::OpBuilder, SetBuilder};
use sdset::SetBuf;
use meilisearch_schema::{Schema, SchemaAttr, diff_transposition, generate_schema};
use meilisearch_schema::Schema;
use crate::database::{MainT, UpdateT};
use crate::settings::{UpdateState, SettingsUpdate};
use crate::update::documents_addition::reindex_all_documents;
use crate::update::{next_update_id, Update};
use crate::{store, MResult};
use crate::{store, MResult, Error};
pub fn push_settings_update(
writer: &mut heed::RwTxn<UpdateT>,
@ -35,7 +34,17 @@ pub fn apply_settings_update(
let mut must_reindex = false;
let old_schema = index.main.schema(writer)?;
let mut schema = match index.main.schema(writer)? {
Some(schema) => schema,
None => {
match settings.attribute_identifier.clone() {
UpdateState::Update(id) => Schema::with_identifier(id),
_ => return Err(Error::MissingSchemaIdentifier)
}
}
};
println!("settings: {:?}", settings);
match settings.ranking_rules {
UpdateState::Update(v) => {
@ -55,157 +64,69 @@ pub fn apply_settings_update(
},
_ => (),
}
let identifier = match settings.attribute_identifier.clone() {
UpdateState::Update(v) => v,
_ => {
old_schema.clone().unwrap().identifier_name().to_owned()
},
if let UpdateState::Update(id) = settings.attribute_identifier {
schema.set_identifier(id)?;
};
let attributes_searchable: Vec<String> = match settings.attributes_searchable.clone() {
UpdateState::Update(v) => v,
UpdateState::Clear => Vec::new(),
UpdateState::Nothing => {
match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_indexed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
}
match settings.attributes_searchable.clone() {
UpdateState::Update(v) => schema.update_indexed(v)?,
UpdateState::Clear => {
let clear: Vec<String> = Vec::new();
schema.update_indexed(clear)?;
},
UpdateState::Nothing => (),
UpdateState::Add(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_indexed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs {
if !old_attrs.contains(&attr) {
old_attrs.push(attr);
}
schema.set_indexed(attr)?;
}
old_attrs
},
UpdateState::Delete(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_indexed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs {
old_attrs.retain(|x| *x == attr)
schema.remove_indexed(attr);
}
old_attrs
}
};
let attributes_displayed: Vec<String> = match settings.attributes_displayed.clone() {
UpdateState::Update(v) => v,
UpdateState::Clear => Vec::new(),
UpdateState::Nothing => {
match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_displayed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
}
match settings.attributes_displayed.clone() {
UpdateState::Update(v) => schema.update_displayed(v)?,
UpdateState::Clear => {
let clear: Vec<String> = Vec::new();
schema.update_displayed(clear)?;
},
UpdateState::Nothing => (),
UpdateState::Add(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_displayed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs {
if !old_attrs.contains(&attr) {
old_attrs.push(attr);
}
schema.set_displayed(attr)?;
}
old_attrs
},
UpdateState::Delete(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_displayed())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs {
old_attrs.retain(|x| *x == attr)
schema.remove_displayed(attr);
}
old_attrs
}
};
let attributes_ranked: Vec<String> = match settings.attributes_ranked.clone() {
UpdateState::Update(v) => v,
UpdateState::Clear => Vec::new(),
UpdateState::Nothing => {
match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_ranked())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
}
match settings.attributes_ranked.clone() {
UpdateState::Update(v) => schema.update_ranked(v)?,
UpdateState::Clear => {
let clear: Vec<String> = Vec::new();
schema.update_ranked(clear)?;
},
UpdateState::Nothing => (),
UpdateState::Add(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_ranked())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs {
if !old_attrs.contains(&attr) {
old_attrs.push(attr);
}
schema.set_ranked(attr)?;
}
old_attrs
},
UpdateState::Delete(attrs) => {
let mut old_attrs = match old_schema.clone() {
Some(schema) => {
schema.into_iter()
.filter(|(_, props)| props.is_ranked())
.map(|(name, _)| name)
.collect()
},
None => Vec::new(),
};
for attr in attrs {
old_attrs.retain(|x| *x == attr)
schema.remove_ranked(attr);
}
old_attrs
}
};
let new_schema = generate_schema(identifier, attributes_searchable, attributes_displayed, attributes_ranked);
index.main.put_schema(writer, &schema)?;
index.main.put_schema(writer, &new_schema)?;
println!("schema: {:?}", schema);
match settings.stop_words {
UpdateState::Update(stop_words) => {
@ -233,16 +154,6 @@ pub fn apply_settings_update(
let postings_lists_store = index.postings_lists;
let docs_words_store = index.docs_words;
if settings.attribute_identifier.is_changed() ||
settings.attributes_ranked.is_changed() ||
settings.attributes_searchable.is_changed() ||
settings.attributes_displayed.is_changed()
{
if let Some(old_schema) = old_schema {
rewrite_all_documents(writer, index, &old_schema, &new_schema)?;
must_reindex = true;
}
}
if must_reindex {
reindex_all_documents(
writer,
@ -438,46 +349,3 @@ pub fn apply_synonyms_update(
Ok(())
}
pub fn rewrite_all_documents(
writer: &mut heed::RwTxn<MainT>,
index: &store::Index,
old_schema: &Schema,
new_schema: &Schema,
) -> MResult<()> {
let mut documents_ids_to_reindex = Vec::new();
// Retrieve all documents present on the database
for result in index.documents_fields_counts.documents_ids(writer)? {
let document_id = result?;
documents_ids_to_reindex.push(document_id);
}
let transpotition = diff_transposition(old_schema, new_schema);
// Rewrite all documents one by one
for id in documents_ids_to_reindex {
let mut document: HashMap<SchemaAttr, Vec<u8>> = HashMap::new();
// Retrieve the old document
for item in index.documents_fields.document_fields(writer, id)? {
if let Ok(item) = item {
if let Some(pos) = transpotition[(item.0).0 as usize] {
// Save the current document with the new SchemaAttr
document.insert(SchemaAttr::new(pos), item.1.to_vec());
}
}
}
// Remove the current document
index.documents_fields.del_all_document_fields(writer, id)?;
// Rewrite the new document
// TODO: use cursor to not do memory jump at each call
for (key, value) in document {
index.documents_fields.put_document_field(writer, id, key, &value)?;
}
}
Ok(())
}

View File

@ -5,6 +5,7 @@ pub type SResult<T> = Result<T, Error>;
#[derive(Debug)]
pub enum Error {
FieldNameNotFound(String),
MaxFieldsLimitExceeded,
}
@ -12,6 +13,7 @@ impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
FieldNameNotFound(field) => write!(f, "The field {} doesn't exist", field),
MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"),
}
}

View File

@ -3,9 +3,8 @@ use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::{SResult, SchemaAttr};
use crate::{SResult, FieldId};
pub type FieldId = SchemaAttr;
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FieldsMap {
@ -43,13 +42,13 @@ impl FieldsMap {
self.name_map.remove(&name);
}
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
let name = name.into();
self.name_map.get(&name)
self.name_map.get(&name).map(|s| *s)
}
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
self.id_map.get(&id.into())
pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
self.id_map.get(&id.into()).map(|s| s.to_string())
}
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
@ -74,14 +73,14 @@ mod tests {
assert_eq!(fields_map.insert("id").unwrap(), 0.into());
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
assert_eq!(fields_map.get_id("id"), Some(&0.into()));
assert_eq!(fields_map.get_id("title"), Some(&1.into()));
assert_eq!(fields_map.get_id("descritpion"), Some(&2.into()));
assert_eq!(fields_map.get_id("id"), Some(0.into()));
assert_eq!(fields_map.get_id("title"), Some(1.into()));
assert_eq!(fields_map.get_id("descritpion"), Some(2.into()));
assert_eq!(fields_map.get_id("date"), None);
assert_eq!(fields_map.len(), 3);
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned()));
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned()));
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned()));
assert_eq!(fields_map.get_name(0), Some("id".to_owned()));
assert_eq!(fields_map.get_name(1), Some("title".to_owned()));
assert_eq!(fields_map.get_name(2), Some("descritpion".to_owned()));
assert_eq!(fields_map.get_name(4), None);
fields_map.remove("title");
assert_eq!(fields_map.get_id("title"), None);

View File

@ -3,48 +3,88 @@ mod fields_map;
mod schema;
pub use error::{Error, SResult};
pub use fields_map::{FieldsMap, FieldId};
pub use schema::{Schema, IndexedPos};
pub use fields_map::FieldsMap;
pub use schema::Schema;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct SchemaAttr(pub u16);
pub struct IndexedPos(pub u16);
impl SchemaAttr {
pub const fn new(value: u16) -> SchemaAttr {
SchemaAttr(value)
impl IndexedPos {
pub const fn new(value: u16) -> IndexedPos {
IndexedPos(value)
}
pub const fn min() -> SchemaAttr {
SchemaAttr(u16::min_value())
pub const fn min() -> IndexedPos {
IndexedPos(u16::min_value())
}
pub const fn max() -> SchemaAttr {
SchemaAttr(u16::max_value())
pub const fn max() -> IndexedPos {
IndexedPos(u16::max_value())
}
pub fn next(self) -> SResult<SchemaAttr> {
self.0.checked_add(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded)
pub fn next(self) -> SResult<IndexedPos> {
self.0.checked_add(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
}
pub fn prev(self) -> SResult<SchemaAttr> {
self.0.checked_sub(1).map(SchemaAttr).ok_or(Error::MaxFieldsLimitExceeded)
pub fn prev(self) -> SResult<IndexedPos> {
self.0.checked_sub(1).map(IndexedPos).ok_or(Error::MaxFieldsLimitExceeded)
}
}
impl From<u16> for SchemaAttr {
fn from(value: u16) -> SchemaAttr {
SchemaAttr(value)
impl From<u16> for IndexedPos {
fn from(value: u16) -> IndexedPos {
IndexedPos(value)
}
}
impl Into<u16> for SchemaAttr {
impl Into<u16> for IndexedPos {
fn into(self) -> u16 {
self.0
}
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Default, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct FieldId(pub u16);
impl FieldId {
pub const fn new(value: u16) -> FieldId {
FieldId(value)
}
pub const fn min() -> FieldId {
FieldId(u16::min_value())
}
pub const fn max() -> FieldId {
FieldId(u16::max_value())
}
pub fn next(self) -> SResult<FieldId> {
self.0.checked_add(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
}
pub fn prev(self) -> SResult<FieldId> {
self.0.checked_sub(1).map(FieldId).ok_or(Error::MaxFieldsLimitExceeded)
}
}
impl From<u16> for FieldId {
fn from(value: u16) -> FieldId {
FieldId(value)
}
}
impl Into<u16> for FieldId {
fn into(self) -> u16 {
self.0
}
}
// use std::collections::{BTreeMap, HashMap};
// use std::ops::BitOr;

View File

@ -1,10 +1,10 @@
use std::collections::{HashMap, HashSet};
use crate::{FieldsMap, FieldId, SResult, SchemaAttr};
use serde::{Serialize, Deserialize};
pub type IndexedPos = SchemaAttr;
use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos};
#[derive(Default)]
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct Schema {
fields_map: FieldsMap,
@ -30,11 +30,21 @@ impl Schema {
self.fields_map.get_name(self.identifier).unwrap().to_string()
}
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
pub fn set_identifier(&mut self, id: String) -> SResult<()> {
match self.get_id(id.clone()) {
Some(id) => {
self.identifier = id;
Ok(())
},
None => Err(Error::FieldNameNotFound(id))
}
}
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<FieldId> {
self.fields_map.get_id(name)
}
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
pub fn get_name<I: Into<FieldId>>(&self, id: I) -> Option<String> {
self.fields_map.get_name(id)
}
@ -52,7 +62,7 @@ impl Schema {
pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> {
match self.fields_map.get_id(name.clone()) {
Some(id) => {
Ok(*id)
Ok(id)
}
None => {
self.set_indexed(name.clone())?;
@ -61,6 +71,30 @@ impl Schema {
}
}
pub fn get_ranked(&self) -> HashSet<FieldId> {
self.ranked.clone()
}
pub fn get_ranked_name(&self) -> HashSet<String> {
self.ranked.iter().filter_map(|a| self.get_name(*a)).collect()
}
pub fn get_displayed(&self) -> HashSet<FieldId> {
self.displayed.clone()
}
pub fn get_displayed_name(&self) -> HashSet<String> {
self.displayed.iter().filter_map(|a| self.get_name(*a)).collect()
}
pub fn get_indexed(&self) -> Vec<FieldId> {
self.indexed.clone()
}
pub fn get_indexed_name(&self) -> Vec<String> {
self.indexed.iter().filter_map(|a| self.get_name(*a)).collect()
}
pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
let id = self.fields_map.insert(name.into())?;
self.ranked.insert(id);
@ -81,23 +115,42 @@ impl Schema {
Ok((id, pos.into()))
}
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
pub fn remove_ranked<S: Into<String>>(&mut self, name: S) {
if let Some(id) = self.fields_map.get_id(name.into()) {
self.ranked.remove(&id);
}
}
pub fn remove_displayed<S: Into<String>>(&mut self, name: S) {
if let Some(id) = self.fields_map.get_id(name.into()) {
self.displayed.remove(&id);
}
}
pub fn remove_indexed<S: Into<String>>(&mut self, name: S) {
if let Some(id) = self.fields_map.get_id(name.into()) {
self.indexed_map.remove(&id);
self.indexed.retain(|x| *x != id);
}
}
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<FieldId> {
match self.fields_map.get_id(name.into()) {
Some(id) => self.ranked.get(id),
Some(id) => self.ranked.get(&id).map(|s| *s),
None => None,
}
}
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<FieldId> {
match self.fields_map.get_id(name.into()) {
Some(id) => self.displayed.get(id),
Some(id) => self.displayed.get(&id).map(|s| *s),
None => None,
}
}
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<&IndexedPos> {
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<IndexedPos> {
match self.fields_map.get_id(name.into()) {
Some(id) => self.indexed_map.get(id),
Some(id) => self.indexed_map.get(&id).map(|s| *s),
None => None,
}
}

View File

@ -28,6 +28,7 @@ pub struct DocIndex {
/// The attribute in the document where the word was found
/// along with the index in it.
/// Is an IndexedPos and not FieldId. Must be convert each time.
pub attribute: u16,
pub word_index: u16,