mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Introduce the DocumentsFieldsCounts store
This commit is contained in:
parent
9cdda8c46a
commit
a7e40a78c1
@ -1,37 +1,13 @@
|
||||
use std::convert::TryFrom;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use crate::DocumentId;
|
||||
use super::{document_attribute_into_key, document_attribute_from_key};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFields {
|
||||
pub(crate) documents_fields: rkv::SingleStore,
|
||||
}
|
||||
|
||||
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
|
||||
let document_id_bytes = document_id.0.to_be_bytes();
|
||||
let attr_bytes = attribute.0.to_be_bytes();
|
||||
|
||||
let mut key = [0u8; 10];
|
||||
key[0..8].copy_from_slice(&document_id_bytes);
|
||||
key[8..10].copy_from_slice(&attr_bytes);
|
||||
|
||||
key
|
||||
}
|
||||
|
||||
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
|
||||
let document_id = {
|
||||
let array = TryFrom::try_from(&key[0..8]).unwrap();
|
||||
DocumentId(u64::from_be_bytes(array))
|
||||
};
|
||||
|
||||
let schema_attr = {
|
||||
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
|
||||
SchemaAttr(u16::from_be_bytes(array))
|
||||
};
|
||||
|
||||
(document_id, schema_attr)
|
||||
}
|
||||
|
||||
impl DocumentsFields {
|
||||
pub fn put_document_field(
|
||||
&self,
|
||||
@ -100,15 +76,6 @@ impl DocumentsFields {
|
||||
let iter = self.documents_fields.iter_from(reader, document_id_bytes)?;
|
||||
Ok(DocumentFieldsIter { document_id, iter })
|
||||
}
|
||||
|
||||
pub fn documents_ids<'r, T: rkv::Readable>(
|
||||
&self,
|
||||
reader: &'r T,
|
||||
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
|
||||
{
|
||||
let iter = self.documents_fields.iter_start(reader)?;
|
||||
Ok(DocumentsIdsIter { last_seen_id: None, iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'r> {
|
||||
@ -134,30 +101,3 @@ impl<'r> Iterator for DocumentFieldsIter<'r> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsIdsIter<'r> {
|
||||
last_seen_id: Option<DocumentId>,
|
||||
iter: rkv::store::single::Iter<'r>,
|
||||
}
|
||||
|
||||
impl<'r> Iterator for DocumentsIdsIter<'r> {
|
||||
type Item = Result<DocumentId, rkv::StoreError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for result in &mut self.iter {
|
||||
match result {
|
||||
Ok((key, _)) => {
|
||||
let array = TryFrom::try_from(key).unwrap();
|
||||
let (document_id, _) = document_attribute_from_key(array);
|
||||
if Some(document_id) != self.last_seen_id {
|
||||
self.last_seen_id = Some(document_id);
|
||||
return Some(Ok(document_id))
|
||||
}
|
||||
},
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
139
meilidb-core/src/store/documents_fields_counts.rs
Normal file
139
meilidb-core/src/store/documents_fields_counts.rs
Normal file
@ -0,0 +1,139 @@
|
||||
use std::convert::TryFrom;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use crate::DocumentId;
|
||||
use super::{document_attribute_into_key, document_attribute_from_key};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct DocumentsFieldsCounts {
|
||||
pub(crate) documents_fields_counts: rkv::SingleStore,
|
||||
}
|
||||
|
||||
impl DocumentsFieldsCounts {
|
||||
pub fn put_document_field_count(
|
||||
&self,
|
||||
writer: &mut rkv::Writer,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
value: u64,
|
||||
) -> Result<(), rkv::StoreError>
|
||||
{
|
||||
let key = document_attribute_into_key(document_id, attribute);
|
||||
self.documents_fields_counts.put(writer, key, &rkv::Value::U64(value))
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields_counts(
|
||||
&self,
|
||||
writer: &mut rkv::Writer,
|
||||
document_id: DocumentId,
|
||||
) -> Result<usize, rkv::StoreError>
|
||||
{
|
||||
let document_id_bytes = document_id.0.to_be_bytes();
|
||||
let mut keys_to_delete = Vec::new();
|
||||
|
||||
// WARN we can not delete the keys using the iterator
|
||||
// so we store them and delete them just after
|
||||
let iter = self.documents_fields_counts.iter_from(writer, document_id_bytes)?;
|
||||
for result in iter {
|
||||
let (key, _) = result?;
|
||||
let array = TryFrom::try_from(key).unwrap();
|
||||
let (current_document_id, _) = document_attribute_from_key(array);
|
||||
if current_document_id != document_id { break }
|
||||
|
||||
keys_to_delete.push(key.to_owned());
|
||||
}
|
||||
|
||||
let count = keys_to_delete.len();
|
||||
for key in keys_to_delete {
|
||||
self.documents_fields_counts.delete(writer, key)?;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
pub fn document_attribute_count<'a>(
|
||||
&self,
|
||||
reader: &'a impl rkv::Readable,
|
||||
document_id: DocumentId,
|
||||
attribute: SchemaAttr,
|
||||
) -> Result<Option<u64>, rkv::StoreError>
|
||||
{
|
||||
let key = document_attribute_into_key(document_id, attribute);
|
||||
|
||||
match self.documents_fields_counts.get(reader, key)? {
|
||||
Some(rkv::Value::U64(count)) => Ok(Some(count)),
|
||||
Some(value) => panic!("invalid type {:?}", value),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn document_fields_counts<'r, T: rkv::Readable>(
|
||||
&self,
|
||||
reader: &'r T,
|
||||
document_id: DocumentId,
|
||||
) -> Result<DocumentFieldsCountsIter<'r>, rkv::StoreError>
|
||||
{
|
||||
let document_id_bytes = document_id.0.to_be_bytes();
|
||||
let iter = self.documents_fields_counts.iter_from(reader, document_id_bytes)?;
|
||||
Ok(DocumentFieldsCountsIter { document_id, iter })
|
||||
}
|
||||
|
||||
pub fn documents_ids<'r, T: rkv::Readable>(
|
||||
&self,
|
||||
reader: &'r T,
|
||||
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
|
||||
{
|
||||
let iter = self.documents_fields_counts.iter_start(reader)?;
|
||||
Ok(DocumentsIdsIter { last_seen_id: None, iter })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsCountsIter<'r> {
|
||||
document_id: DocumentId,
|
||||
iter: rkv::store::single::Iter<'r>,
|
||||
}
|
||||
|
||||
impl<'r> Iterator for DocumentFieldsCountsIter<'r> {
|
||||
type Item = Result<(SchemaAttr, u64), rkv::StoreError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok((key, Some(rkv::Value::U64(count))))) => {
|
||||
let array = TryFrom::try_from(key).unwrap();
|
||||
let (current_document_id, attr) = document_attribute_from_key(array);
|
||||
if current_document_id != self.document_id { return None; }
|
||||
|
||||
Some(Ok((attr, count)))
|
||||
},
|
||||
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsIdsIter<'r> {
|
||||
last_seen_id: Option<DocumentId>,
|
||||
iter: rkv::store::single::Iter<'r>,
|
||||
}
|
||||
|
||||
impl<'r> Iterator for DocumentsIdsIter<'r> {
|
||||
type Item = Result<DocumentId, rkv::StoreError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for result in &mut self.iter {
|
||||
match result {
|
||||
Ok((key, _)) => {
|
||||
let array = TryFrom::try_from(key).unwrap();
|
||||
let (document_id, _) = document_attribute_from_key(array);
|
||||
if Some(document_id) != self.last_seen_id {
|
||||
self.last_seen_id = Some(document_id);
|
||||
return Some(Ok(document_id))
|
||||
}
|
||||
},
|
||||
Err(e) => return Some(Err(e)),
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
@ -3,7 +3,6 @@ use std::convert::TryInto;
|
||||
|
||||
use meilidb_schema::Schema;
|
||||
use rkv::Value;
|
||||
use serde::de;
|
||||
use crate::{RankedMap, MResult};
|
||||
|
||||
const CUSTOMS_KEY: &str = "customs-key";
|
||||
|
@ -1,5 +1,6 @@
|
||||
mod docs_words;
|
||||
mod documents_fields;
|
||||
mod documents_fields_counts;
|
||||
mod main;
|
||||
mod postings_lists;
|
||||
mod synonyms;
|
||||
@ -8,6 +9,7 @@ mod updates_results;
|
||||
|
||||
pub use self::docs_words::DocsWords;
|
||||
pub use self::documents_fields::{DocumentsFields, DocumentFieldsIter};
|
||||
pub use self::documents_fields_counts::{DocumentsFieldsCounts, DocumentFieldsCountsIter, DocumentsIdsIter};
|
||||
pub use self::main::Main;
|
||||
pub use self::postings_lists::PostingsLists;
|
||||
pub use self::synonyms::Synonyms;
|
||||
@ -15,8 +17,11 @@ pub use self::updates::Updates;
|
||||
pub use self::updates_results::UpdatesResults;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use serde::{ser, de};
|
||||
use serde::de;
|
||||
|
||||
use crate::criterion::Criteria;
|
||||
use crate::serde::Deserializer;
|
||||
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
|
||||
@ -25,6 +30,31 @@ fn aligned_to(bytes: &[u8], align: usize) -> bool {
|
||||
(bytes as *const _ as *const () as usize) % align == 0
|
||||
}
|
||||
|
||||
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
|
||||
let document_id_bytes = document_id.0.to_be_bytes();
|
||||
let attr_bytes = attribute.0.to_be_bytes();
|
||||
|
||||
let mut key = [0u8; 10];
|
||||
key[0..8].copy_from_slice(&document_id_bytes);
|
||||
key[8..10].copy_from_slice(&attr_bytes);
|
||||
|
||||
key
|
||||
}
|
||||
|
||||
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
|
||||
let document_id = {
|
||||
let array = TryFrom::try_from(&key[0..8]).unwrap();
|
||||
DocumentId(u64::from_be_bytes(array))
|
||||
};
|
||||
|
||||
let schema_attr = {
|
||||
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
|
||||
SchemaAttr(u16::from_be_bytes(array))
|
||||
};
|
||||
|
||||
(document_id, schema_attr)
|
||||
}
|
||||
|
||||
fn main_name(name: &str) -> String {
|
||||
format!("store-{}", name)
|
||||
}
|
||||
@ -37,6 +67,10 @@ fn documents_fields_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields", name)
|
||||
}
|
||||
|
||||
fn documents_fields_counts_name(name: &str) -> String {
|
||||
format!("store-{}-documents-fields-counts", name)
|
||||
}
|
||||
|
||||
fn synonyms_name(name: &str) -> String {
|
||||
format!("store-{}-synonyms", name)
|
||||
}
|
||||
@ -58,6 +92,7 @@ pub struct Index {
|
||||
pub main: Main,
|
||||
pub postings_lists: PostingsLists,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||
pub synonyms: Synonyms,
|
||||
pub docs_words: DocsWords,
|
||||
|
||||
@ -205,6 +240,7 @@ fn open_options(
|
||||
let main_name = main_name(name);
|
||||
let postings_lists_name = postings_lists_name(name);
|
||||
let documents_fields_name = documents_fields_name(name);
|
||||
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
@ -214,6 +250,7 @@ fn open_options(
|
||||
let main = env.open_single(main_name.as_str(), options)?;
|
||||
let postings_lists = env.open_single(postings_lists_name.as_str(), options)?;
|
||||
let documents_fields = env.open_single(documents_fields_name.as_str(), options)?;
|
||||
let documents_fields_counts = env.open_single(documents_fields_counts_name.as_str(), options)?;
|
||||
let synonyms = env.open_single(synonyms_name.as_str(), options)?;
|
||||
let docs_words = env.open_single(docs_words_name.as_str(), options)?;
|
||||
let updates = env.open_single(updates_name.as_str(), options)?;
|
||||
@ -223,6 +260,7 @@ fn open_options(
|
||||
main: Main { main },
|
||||
postings_lists: PostingsLists { postings_lists },
|
||||
documents_fields: DocumentsFields { documents_fields },
|
||||
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
|
||||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
updates: Updates { updates },
|
||||
|
Loading…
x
Reference in New Issue
Block a user