Introduce the DocumentsFieldsCounts store

This commit is contained in:
Clément Renault 2019-10-14 14:06:34 +02:00
parent 9cdda8c46a
commit a7e40a78c1
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 179 additions and 63 deletions

View File

@ -1,37 +1,13 @@
use std::convert::TryFrom;
use meilidb_schema::SchemaAttr;
use crate::DocumentId;
use super::{document_attribute_into_key, document_attribute_from_key};
#[derive(Copy, Clone)]
pub struct DocumentsFields {
pub(crate) documents_fields: rkv::SingleStore,
}
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
let document_id_bytes = document_id.0.to_be_bytes();
let attr_bytes = attribute.0.to_be_bytes();
let mut key = [0u8; 10];
key[0..8].copy_from_slice(&document_id_bytes);
key[8..10].copy_from_slice(&attr_bytes);
key
}
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
let document_id = {
let array = TryFrom::try_from(&key[0..8]).unwrap();
DocumentId(u64::from_be_bytes(array))
};
let schema_attr = {
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
SchemaAttr(u16::from_be_bytes(array))
};
(document_id, schema_attr)
}
impl DocumentsFields {
pub fn put_document_field(
&self,
@ -100,15 +76,6 @@ impl DocumentsFields {
let iter = self.documents_fields.iter_from(reader, document_id_bytes)?;
Ok(DocumentFieldsIter { document_id, iter })
}
pub fn documents_ids<'r, T: rkv::Readable>(
&self,
reader: &'r T,
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
{
let iter = self.documents_fields.iter_start(reader)?;
Ok(DocumentsIdsIter { last_seen_id: None, iter })
}
}
pub struct DocumentFieldsIter<'r> {
@ -134,30 +101,3 @@ impl<'r> Iterator for DocumentFieldsIter<'r> {
}
}
}
pub struct DocumentsIdsIter<'r> {
last_seen_id: Option<DocumentId>,
iter: rkv::store::single::Iter<'r>,
}
impl<'r> Iterator for DocumentsIdsIter<'r> {
type Item = Result<DocumentId, rkv::StoreError>;
fn next(&mut self) -> Option<Self::Item> {
for result in &mut self.iter {
match result {
Ok((key, _)) => {
let array = TryFrom::try_from(key).unwrap();
let (document_id, _) = document_attribute_from_key(array);
if Some(document_id) != self.last_seen_id {
self.last_seen_id = Some(document_id);
return Some(Ok(document_id))
}
},
Err(e) => return Some(Err(e)),
}
}
None
}
}

View File

@ -0,0 +1,139 @@
use std::convert::TryFrom;
use meilidb_schema::SchemaAttr;
use crate::DocumentId;
use super::{document_attribute_into_key, document_attribute_from_key};
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {
pub(crate) documents_fields_counts: rkv::SingleStore,
}
impl DocumentsFieldsCounts {
pub fn put_document_field_count(
&self,
writer: &mut rkv::Writer,
document_id: DocumentId,
attribute: SchemaAttr,
value: u64,
) -> Result<(), rkv::StoreError>
{
let key = document_attribute_into_key(document_id, attribute);
self.documents_fields_counts.put(writer, key, &rkv::Value::U64(value))
}
pub fn del_all_document_fields_counts(
&self,
writer: &mut rkv::Writer,
document_id: DocumentId,
) -> Result<usize, rkv::StoreError>
{
let document_id_bytes = document_id.0.to_be_bytes();
let mut keys_to_delete = Vec::new();
// WARN we can not delete the keys using the iterator
// so we store them and delete them just after
let iter = self.documents_fields_counts.iter_from(writer, document_id_bytes)?;
for result in iter {
let (key, _) = result?;
let array = TryFrom::try_from(key).unwrap();
let (current_document_id, _) = document_attribute_from_key(array);
if current_document_id != document_id { break }
keys_to_delete.push(key.to_owned());
}
let count = keys_to_delete.len();
for key in keys_to_delete {
self.documents_fields_counts.delete(writer, key)?;
}
Ok(count)
}
pub fn document_attribute_count<'a>(
&self,
reader: &'a impl rkv::Readable,
document_id: DocumentId,
attribute: SchemaAttr,
) -> Result<Option<u64>, rkv::StoreError>
{
let key = document_attribute_into_key(document_id, attribute);
match self.documents_fields_counts.get(reader, key)? {
Some(rkv::Value::U64(count)) => Ok(Some(count)),
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn document_fields_counts<'r, T: rkv::Readable>(
&self,
reader: &'r T,
document_id: DocumentId,
) -> Result<DocumentFieldsCountsIter<'r>, rkv::StoreError>
{
let document_id_bytes = document_id.0.to_be_bytes();
let iter = self.documents_fields_counts.iter_from(reader, document_id_bytes)?;
Ok(DocumentFieldsCountsIter { document_id, iter })
}
pub fn documents_ids<'r, T: rkv::Readable>(
&self,
reader: &'r T,
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
{
let iter = self.documents_fields_counts.iter_start(reader)?;
Ok(DocumentsIdsIter { last_seen_id: None, iter })
}
}
pub struct DocumentFieldsCountsIter<'r> {
document_id: DocumentId,
iter: rkv::store::single::Iter<'r>,
}
impl<'r> Iterator for DocumentFieldsCountsIter<'r> {
type Item = Result<(SchemaAttr, u64), rkv::StoreError>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, Some(rkv::Value::U64(count))))) => {
let array = TryFrom::try_from(key).unwrap();
let (current_document_id, attr) = document_attribute_from_key(array);
if current_document_id != self.document_id { return None; }
Some(Ok((attr, count)))
},
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}
pub struct DocumentsIdsIter<'r> {
last_seen_id: Option<DocumentId>,
iter: rkv::store::single::Iter<'r>,
}
impl<'r> Iterator for DocumentsIdsIter<'r> {
type Item = Result<DocumentId, rkv::StoreError>;
fn next(&mut self) -> Option<Self::Item> {
for result in &mut self.iter {
match result {
Ok((key, _)) => {
let array = TryFrom::try_from(key).unwrap();
let (document_id, _) = document_attribute_from_key(array);
if Some(document_id) != self.last_seen_id {
self.last_seen_id = Some(document_id);
return Some(Ok(document_id))
}
},
Err(e) => return Some(Err(e)),
}
}
None
}
}

View File

@ -3,7 +3,6 @@ use std::convert::TryInto;
use meilidb_schema::Schema;
use rkv::Value;
use serde::de;
use crate::{RankedMap, MResult};
const CUSTOMS_KEY: &str = "customs-key";

View File

@ -1,5 +1,6 @@
mod docs_words;
mod documents_fields;
mod documents_fields_counts;
mod main;
mod postings_lists;
mod synonyms;
@ -8,6 +9,7 @@ mod updates_results;
pub use self::docs_words::DocsWords;
pub use self::documents_fields::{DocumentsFields, DocumentFieldsIter};
pub use self::documents_fields_counts::{DocumentsFieldsCounts, DocumentFieldsCountsIter, DocumentsIdsIter};
pub use self::main::Main;
pub use self::postings_lists::PostingsLists;
pub use self::synonyms::Synonyms;
@ -15,8 +17,11 @@ pub use self::updates::Updates;
pub use self::updates_results::UpdatesResults;
use std::collections::HashSet;
use std::convert::TryFrom;
use meilidb_schema::{Schema, SchemaAttr};
use serde::{ser, de};
use serde::de;
use crate::criterion::Criteria;
use crate::serde::Deserializer;
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
@ -25,6 +30,31 @@ fn aligned_to(bytes: &[u8], align: usize) -> bool {
(bytes as *const _ as *const () as usize) % align == 0
}
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
let document_id_bytes = document_id.0.to_be_bytes();
let attr_bytes = attribute.0.to_be_bytes();
let mut key = [0u8; 10];
key[0..8].copy_from_slice(&document_id_bytes);
key[8..10].copy_from_slice(&attr_bytes);
key
}
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
let document_id = {
let array = TryFrom::try_from(&key[0..8]).unwrap();
DocumentId(u64::from_be_bytes(array))
};
let schema_attr = {
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
SchemaAttr(u16::from_be_bytes(array))
};
(document_id, schema_attr)
}
fn main_name(name: &str) -> String {
format!("store-{}", name)
}
@ -37,6 +67,10 @@ fn documents_fields_name(name: &str) -> String {
format!("store-{}-documents-fields", name)
}
fn documents_fields_counts_name(name: &str) -> String {
format!("store-{}-documents-fields-counts", name)
}
fn synonyms_name(name: &str) -> String {
format!("store-{}-synonyms", name)
}
@ -58,6 +92,7 @@ pub struct Index {
pub main: Main,
pub postings_lists: PostingsLists,
pub documents_fields: DocumentsFields,
pub documents_fields_counts: DocumentsFieldsCounts,
pub synonyms: Synonyms,
pub docs_words: DocsWords,
@ -205,6 +240,7 @@ fn open_options(
let main_name = main_name(name);
let postings_lists_name = postings_lists_name(name);
let documents_fields_name = documents_fields_name(name);
let documents_fields_counts_name = documents_fields_counts_name(name);
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
@ -214,6 +250,7 @@ fn open_options(
let main = env.open_single(main_name.as_str(), options)?;
let postings_lists = env.open_single(postings_lists_name.as_str(), options)?;
let documents_fields = env.open_single(documents_fields_name.as_str(), options)?;
let documents_fields_counts = env.open_single(documents_fields_counts_name.as_str(), options)?;
let synonyms = env.open_single(synonyms_name.as_str(), options)?;
let docs_words = env.open_single(docs_words_name.as_str(), options)?;
let updates = env.open_single(updates_name.as_str(), options)?;
@ -223,6 +260,7 @@ fn open_options(
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },