From a7e40a78c1f0f4b08306673abea5a425664997ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 14 Oct 2019 14:06:34 +0200 Subject: [PATCH] Introduce the DocumentsFieldsCounts store --- meilidb-core/src/store/documents_fields.rs | 62 +------- .../src/store/documents_fields_counts.rs | 139 ++++++++++++++++++ meilidb-core/src/store/main.rs | 1 - meilidb-core/src/store/mod.rs | 40 ++++- 4 files changed, 179 insertions(+), 63 deletions(-) create mode 100644 meilidb-core/src/store/documents_fields_counts.rs diff --git a/meilidb-core/src/store/documents_fields.rs b/meilidb-core/src/store/documents_fields.rs index e6acb87b6..d3c28f990 100644 --- a/meilidb-core/src/store/documents_fields.rs +++ b/meilidb-core/src/store/documents_fields.rs @@ -1,37 +1,13 @@ use std::convert::TryFrom; use meilidb_schema::SchemaAttr; use crate::DocumentId; +use super::{document_attribute_into_key, document_attribute_from_key}; #[derive(Copy, Clone)] pub struct DocumentsFields { pub(crate) documents_fields: rkv::SingleStore, } -fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] { - let document_id_bytes = document_id.0.to_be_bytes(); - let attr_bytes = attribute.0.to_be_bytes(); - - let mut key = [0u8; 10]; - key[0..8].copy_from_slice(&document_id_bytes); - key[8..10].copy_from_slice(&attr_bytes); - - key -} - -fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) { - let document_id = { - let array = TryFrom::try_from(&key[0..8]).unwrap(); - DocumentId(u64::from_be_bytes(array)) - }; - - let schema_attr = { - let array = TryFrom::try_from(&key[8..8+2]).unwrap(); - SchemaAttr(u16::from_be_bytes(array)) - }; - - (document_id, schema_attr) -} - impl DocumentsFields { pub fn put_document_field( &self, @@ -100,15 +76,6 @@ impl DocumentsFields { let iter = self.documents_fields.iter_from(reader, document_id_bytes)?; Ok(DocumentFieldsIter { document_id, iter }) } - - pub fn documents_ids<'r, T: rkv::Readable>( - &self, - reader: &'r T, - ) -> Result, rkv::StoreError> - { - let iter = self.documents_fields.iter_start(reader)?; - Ok(DocumentsIdsIter { last_seen_id: None, iter }) - } } pub struct DocumentFieldsIter<'r> { @@ -134,30 +101,3 @@ impl<'r> Iterator for DocumentFieldsIter<'r> { } } } - -pub struct DocumentsIdsIter<'r> { - last_seen_id: Option, - iter: rkv::store::single::Iter<'r>, -} - -impl<'r> Iterator for DocumentsIdsIter<'r> { - type Item = Result; - - fn next(&mut self) -> Option { - for result in &mut self.iter { - match result { - Ok((key, _)) => { - let array = TryFrom::try_from(key).unwrap(); - let (document_id, _) = document_attribute_from_key(array); - if Some(document_id) != self.last_seen_id { - self.last_seen_id = Some(document_id); - return Some(Ok(document_id)) - } - }, - Err(e) => return Some(Err(e)), - } - } - - None - } -} diff --git a/meilidb-core/src/store/documents_fields_counts.rs b/meilidb-core/src/store/documents_fields_counts.rs new file mode 100644 index 000000000..95aff986a --- /dev/null +++ b/meilidb-core/src/store/documents_fields_counts.rs @@ -0,0 +1,139 @@ +use std::convert::TryFrom; +use meilidb_schema::SchemaAttr; +use crate::DocumentId; +use super::{document_attribute_into_key, document_attribute_from_key}; + +#[derive(Copy, Clone)] +pub struct DocumentsFieldsCounts { + pub(crate) documents_fields_counts: rkv::SingleStore, +} + +impl DocumentsFieldsCounts { + pub fn put_document_field_count( + &self, + writer: &mut rkv::Writer, + document_id: DocumentId, + attribute: SchemaAttr, + value: u64, + ) -> Result<(), rkv::StoreError> + { + let key = document_attribute_into_key(document_id, attribute); + self.documents_fields_counts.put(writer, key, &rkv::Value::U64(value)) + } + + pub fn del_all_document_fields_counts( + &self, + writer: &mut rkv::Writer, + document_id: DocumentId, + ) -> Result + { + let document_id_bytes = document_id.0.to_be_bytes(); + let mut keys_to_delete = Vec::new(); + + // WARN we can not delete the keys using the iterator + // so we store them and delete them just after + let iter = self.documents_fields_counts.iter_from(writer, document_id_bytes)?; + for result in iter { + let (key, _) = result?; + let array = TryFrom::try_from(key).unwrap(); + let (current_document_id, _) = document_attribute_from_key(array); + if current_document_id != document_id { break } + + keys_to_delete.push(key.to_owned()); + } + + let count = keys_to_delete.len(); + for key in keys_to_delete { + self.documents_fields_counts.delete(writer, key)?; + } + + Ok(count) + } + + pub fn document_attribute_count<'a>( + &self, + reader: &'a impl rkv::Readable, + document_id: DocumentId, + attribute: SchemaAttr, + ) -> Result, rkv::StoreError> + { + let key = document_attribute_into_key(document_id, attribute); + + match self.documents_fields_counts.get(reader, key)? { + Some(rkv::Value::U64(count)) => Ok(Some(count)), + Some(value) => panic!("invalid type {:?}", value), + None => Ok(None), + } + } + + pub fn document_fields_counts<'r, T: rkv::Readable>( + &self, + reader: &'r T, + document_id: DocumentId, + ) -> Result, rkv::StoreError> + { + let document_id_bytes = document_id.0.to_be_bytes(); + let iter = self.documents_fields_counts.iter_from(reader, document_id_bytes)?; + Ok(DocumentFieldsCountsIter { document_id, iter }) + } + + pub fn documents_ids<'r, T: rkv::Readable>( + &self, + reader: &'r T, + ) -> Result, rkv::StoreError> + { + let iter = self.documents_fields_counts.iter_start(reader)?; + Ok(DocumentsIdsIter { last_seen_id: None, iter }) + } +} + +pub struct DocumentFieldsCountsIter<'r> { + document_id: DocumentId, + iter: rkv::store::single::Iter<'r>, +} + +impl<'r> Iterator for DocumentFieldsCountsIter<'r> { + type Item = Result<(SchemaAttr, u64), rkv::StoreError>; + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(Ok((key, Some(rkv::Value::U64(count))))) => { + let array = TryFrom::try_from(key).unwrap(); + let (current_document_id, attr) = document_attribute_from_key(array); + if current_document_id != self.document_id { return None; } + + Some(Ok((attr, count))) + }, + Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data), + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} + +pub struct DocumentsIdsIter<'r> { + last_seen_id: Option, + iter: rkv::store::single::Iter<'r>, +} + +impl<'r> Iterator for DocumentsIdsIter<'r> { + type Item = Result; + + fn next(&mut self) -> Option { + for result in &mut self.iter { + match result { + Ok((key, _)) => { + let array = TryFrom::try_from(key).unwrap(); + let (document_id, _) = document_attribute_from_key(array); + if Some(document_id) != self.last_seen_id { + self.last_seen_id = Some(document_id); + return Some(Ok(document_id)) + } + }, + Err(e) => return Some(Err(e)), + } + } + + None + } +} diff --git a/meilidb-core/src/store/main.rs b/meilidb-core/src/store/main.rs index cb5266187..5beecdc5f 100644 --- a/meilidb-core/src/store/main.rs +++ b/meilidb-core/src/store/main.rs @@ -3,7 +3,6 @@ use std::convert::TryInto; use meilidb_schema::Schema; use rkv::Value; -use serde::de; use crate::{RankedMap, MResult}; const CUSTOMS_KEY: &str = "customs-key"; diff --git a/meilidb-core/src/store/mod.rs b/meilidb-core/src/store/mod.rs index 183ec7b0b..136b10bab 100644 --- a/meilidb-core/src/store/mod.rs +++ b/meilidb-core/src/store/mod.rs @@ -1,5 +1,6 @@ mod docs_words; mod documents_fields; +mod documents_fields_counts; mod main; mod postings_lists; mod synonyms; @@ -8,6 +9,7 @@ mod updates_results; pub use self::docs_words::DocsWords; pub use self::documents_fields::{DocumentsFields, DocumentFieldsIter}; +pub use self::documents_fields_counts::{DocumentsFieldsCounts, DocumentFieldsCountsIter, DocumentsIdsIter}; pub use self::main::Main; pub use self::postings_lists::PostingsLists; pub use self::synonyms::Synonyms; @@ -15,8 +17,11 @@ pub use self::updates::Updates; pub use self::updates_results::UpdatesResults; use std::collections::HashSet; +use std::convert::TryFrom; + use meilidb_schema::{Schema, SchemaAttr}; -use serde::{ser, de}; +use serde::de; + use crate::criterion::Criteria; use crate::serde::Deserializer; use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error}; @@ -25,6 +30,31 @@ fn aligned_to(bytes: &[u8], align: usize) -> bool { (bytes as *const _ as *const () as usize) % align == 0 } +fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] { + let document_id_bytes = document_id.0.to_be_bytes(); + let attr_bytes = attribute.0.to_be_bytes(); + + let mut key = [0u8; 10]; + key[0..8].copy_from_slice(&document_id_bytes); + key[8..10].copy_from_slice(&attr_bytes); + + key +} + +fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) { + let document_id = { + let array = TryFrom::try_from(&key[0..8]).unwrap(); + DocumentId(u64::from_be_bytes(array)) + }; + + let schema_attr = { + let array = TryFrom::try_from(&key[8..8+2]).unwrap(); + SchemaAttr(u16::from_be_bytes(array)) + }; + + (document_id, schema_attr) +} + fn main_name(name: &str) -> String { format!("store-{}", name) } @@ -37,6 +67,10 @@ fn documents_fields_name(name: &str) -> String { format!("store-{}-documents-fields", name) } +fn documents_fields_counts_name(name: &str) -> String { + format!("store-{}-documents-fields-counts", name) +} + fn synonyms_name(name: &str) -> String { format!("store-{}-synonyms", name) } @@ -58,6 +92,7 @@ pub struct Index { pub main: Main, pub postings_lists: PostingsLists, pub documents_fields: DocumentsFields, + pub documents_fields_counts: DocumentsFieldsCounts, pub synonyms: Synonyms, pub docs_words: DocsWords, @@ -205,6 +240,7 @@ fn open_options( let main_name = main_name(name); let postings_lists_name = postings_lists_name(name); let documents_fields_name = documents_fields_name(name); + let documents_fields_counts_name = documents_fields_counts_name(name); let synonyms_name = synonyms_name(name); let docs_words_name = docs_words_name(name); let updates_name = updates_name(name); @@ -214,6 +250,7 @@ fn open_options( let main = env.open_single(main_name.as_str(), options)?; let postings_lists = env.open_single(postings_lists_name.as_str(), options)?; let documents_fields = env.open_single(documents_fields_name.as_str(), options)?; + let documents_fields_counts = env.open_single(documents_fields_counts_name.as_str(), options)?; let synonyms = env.open_single(synonyms_name.as_str(), options)?; let docs_words = env.open_single(docs_words_name.as_str(), options)?; let updates = env.open_single(updates_name.as_str(), options)?; @@ -223,6 +260,7 @@ fn open_options( main: Main { main }, postings_lists: PostingsLists { postings_lists }, documents_fields: DocumentsFields { documents_fields }, + documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts }, synonyms: Synonyms { synonyms }, docs_words: DocsWords { docs_words }, updates: Updates { updates },