introduce a new schemaless way

This commit is contained in:
qdequele 2020-01-13 19:10:58 +01:00
parent bbe1845f66
commit 130fb74928
No known key found for this signature in database
GPG key ID: B3F0A000EBF11745
22 changed files with 365 additions and 418 deletions

View file

@ -1,14 +1,14 @@
use heed::types::{ByteSlice, OwnedType};
use crate::database::MainT;
use heed::Result as ZResult;
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::FieldId;
use super::DocumentAttrKey;
use super::DocumentFieldStoredKey;
use crate::DocumentId;
#[derive(Copy, Clone)]
pub struct DocumentsFields {
pub(crate) documents_fields: heed::Database<OwnedType<DocumentAttrKey>, ByteSlice>,
pub(crate) documents_fields: heed::Database<OwnedType<DocumentFieldStoredKey>, ByteSlice>,
}
impl DocumentsFields {
@ -16,10 +16,10 @@ impl DocumentsFields {
self,
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
value: &[u8],
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldStoredKey::new(document_id, attribute);
self.documents_fields.put(writer, &key, value)
}
@ -28,8 +28,8 @@ impl DocumentsFields {
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
self.documents_fields.delete_range(writer, &(start..=end))
}
@ -41,9 +41,9 @@ impl DocumentsFields {
self,
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
) -> ZResult<Option<&'txn [u8]>> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldStoredKey::new(document_id, attribute);
self.documents_fields.get(reader, &key)
}
@ -52,25 +52,25 @@ impl DocumentsFields {
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
) -> ZResult<DocumentFieldsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let start = DocumentFieldStoredKey::new(document_id, FieldId::min());
let end = DocumentFieldStoredKey::new(document_id, FieldId::max());
let iter = self.documents_fields.range(reader, &(start..=end))?;
Ok(DocumentFieldsIter { iter })
}
}
pub struct DocumentFieldsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, ByteSlice>,
iter: heed::RoRange<'txn, OwnedType<DocumentFieldStoredKey>, ByteSlice>,
}
impl<'txn> Iterator for DocumentFieldsIter<'txn> {
type Item = ZResult<(SchemaAttr, &'txn [u8])>;
type Item = ZResult<(FieldId, &'txn [u8])>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, bytes))) => {
let attr = SchemaAttr(key.attr.get());
Some(Ok((attr, bytes)))
let field_id = FieldId(key.field_id.get());
Some(Ok((field_id, bytes)))
}
Some(Err(e)) => Some(Err(e)),
None => None,

View file

@ -1,13 +1,13 @@
use super::DocumentAttrKey;
use super::DocumentFieldIndexedKey;
use crate::database::MainT;
use crate::DocumentId;
use heed::types::OwnedType;
use heed::Result as ZResult;
use meilisearch_schema::FieldId;
use meilisearch_schema::IndexedPos;
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>,
pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl DocumentsFieldsCounts {
@ -15,10 +15,10 @@ impl DocumentsFieldsCounts {
self,
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
attribute: FieldId,
attribute: IndexedPos,
value: u16,
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldIndexedKey::new(document_id, attribute);
self.documents_fields_counts.put(writer, &key, &value)
}
@ -27,10 +27,9 @@ impl DocumentsFieldsCounts {
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, FieldId::max());
self.documents_fields_counts
.delete_range(writer, &(start..=end))
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
self.documents_fields_counts.delete_range(writer, &(start..=end))
}
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
@ -41,9 +40,9 @@ impl DocumentsFieldsCounts {
self,
reader: &heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: FieldId,
attribute: IndexedPos,
) -> ZResult<Option<u16>> {
let key = DocumentAttrKey::new(document_id, attribute);
let key = DocumentFieldIndexedKey::new(document_id, attribute);
match self.documents_fields_counts.get(reader, &key)? {
Some(count) => Ok(Some(count)),
None => Ok(None),
@ -55,8 +54,8 @@ impl DocumentsFieldsCounts {
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, FieldId::max());
let start = DocumentFieldIndexedKey::new(document_id, IndexedPos::min());
let end = DocumentFieldIndexedKey::new(document_id, IndexedPos::max());
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
Ok(DocumentFieldsCountsIter { iter })
}
@ -79,17 +78,17 @@ impl DocumentsFieldsCounts {
}
pub struct DocumentFieldsCountsIter<'txn> {
iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
iter: heed::RoRange<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl Iterator for DocumentFieldsCountsIter<'_> {
type Item = ZResult<(FieldId, u16)>;
type Item = ZResult<(IndexedPos, u16)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let attr = FieldId(key.attr.get());
Some(Ok((attr, count)))
let indexed_pos = IndexedPos(key.indexed_pos.get());
Some(Ok((indexed_pos, count)))
}
Some(Err(e)) => Some(Err(e)),
None => None,
@ -99,7 +98,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {
pub struct DocumentsIdsIter<'txn> {
last_seen_id: Option<DocumentId>,
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl Iterator for DocumentsIdsIter<'_> {
@ -123,18 +122,18 @@ impl Iterator for DocumentsIdsIter<'_> {
}
pub struct AllDocumentsFieldsCountsIter<'txn> {
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
iter: heed::RoIter<'txn, OwnedType<DocumentFieldIndexedKey>, OwnedType<u16>>,
}
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
type Item = ZResult<(DocumentId, FieldId, u16)>;
type Item = ZResult<(DocumentId, IndexedPos, u16)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let docid = DocumentId(key.docid.get());
let attr = FieldId(key.attr.get());
Some(Ok((docid, attr, count)))
let indexed_pos = IndexedPos(key.indexed_pos.get());
Some(Ok((docid, indexed_pos, count)))
}
Some(Err(e)) => Some(Err(e)),
None => None,

View file

@ -1,12 +1,13 @@
use crate::fields_map::FieldsMap;
use crate::database::MainT;
use crate::RankedMap;
use std::sync::Arc;
use std::collections::{HashMap, BTreeMap, BTreeSet};
use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult;
use meilisearch_schema::Schema;
use std::collections::{HashMap, BTreeMap, BTreeSet};
use std::sync::Arc;
use crate::database::MainT;
use crate::RankedMap;
const CREATED_AT_KEY: &str = "created-at";
const RANKING_RULES_KEY: &str = "ranking-rules-key";
@ -18,7 +19,6 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
const NAME_KEY: &str = "name";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const FIELDS_MAP_KEY: &str = "fields-map";
const SCHEMA_KEY: &str = "schema";
const UPDATED_AT_KEY: &str = "updated-at";
const WORDS_KEY: &str = "words";
@ -114,16 +114,6 @@ impl Main {
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> {
self.main
.put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map)
}
pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> {
self.main
.get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)

View file

@ -43,18 +43,50 @@ use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MR
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
// #[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
// #[repr(C)]
// pub struct DocumentAttrKey {
// docid: BEU64,
// indexed_pos: BEU16,
// }
// impl DocumentAttrKey {
// fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentAttrKey {
// DocumentAttrKey {
// docid: BEU64::new(docid.0),
// indexed_pos: BEU16::new(indexed_pos.0),
// }
// }
// }
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentAttrKey {
pub struct DocumentFieldIndexedKey {
docid: BEU64,
attr: BEU16,
indexed_pos: BEU16,
}
impl DocumentAttrKey {
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
DocumentAttrKey {
impl DocumentFieldIndexedKey {
fn new(docid: DocumentId, indexed_pos: IndexedPos) -> DocumentFieldIndexedKey {
DocumentFieldIndexedKey {
docid: BEU64::new(docid.0),
attr: BEU16::new(attr.0),
indexed_pos: BEU16::new(indexed_pos.0),
}
}
}
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
pub struct DocumentFieldStoredKey {
docid: BEU64,
field_id: BEU16,
}
impl DocumentFieldStoredKey {
fn new(docid: DocumentId, field_id: FieldId) -> DocumentFieldStoredKey {
DocumentFieldStoredKey {
docid: BEU64::new(docid.0),
field_id: BEU16::new(field_id.0),
}
}
}
@ -228,7 +260,7 @@ impl Index {
&self,
reader: &heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
) -> MResult<Option<T>> {
let bytes = self
.documents_fields