mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Introduce the DocumentsFieldsCounts store
This commit is contained in:
parent
9cdda8c46a
commit
a7e40a78c1
@ -1,37 +1,13 @@
|
|||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilidb_schema::SchemaAttr;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
|
use super::{document_attribute_into_key, document_attribute_from_key};
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct DocumentsFields {
|
pub struct DocumentsFields {
|
||||||
pub(crate) documents_fields: rkv::SingleStore,
|
pub(crate) documents_fields: rkv::SingleStore,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
|
|
||||||
let document_id_bytes = document_id.0.to_be_bytes();
|
|
||||||
let attr_bytes = attribute.0.to_be_bytes();
|
|
||||||
|
|
||||||
let mut key = [0u8; 10];
|
|
||||||
key[0..8].copy_from_slice(&document_id_bytes);
|
|
||||||
key[8..10].copy_from_slice(&attr_bytes);
|
|
||||||
|
|
||||||
key
|
|
||||||
}
|
|
||||||
|
|
||||||
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
|
|
||||||
let document_id = {
|
|
||||||
let array = TryFrom::try_from(&key[0..8]).unwrap();
|
|
||||||
DocumentId(u64::from_be_bytes(array))
|
|
||||||
};
|
|
||||||
|
|
||||||
let schema_attr = {
|
|
||||||
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
|
|
||||||
SchemaAttr(u16::from_be_bytes(array))
|
|
||||||
};
|
|
||||||
|
|
||||||
(document_id, schema_attr)
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DocumentsFields {
|
impl DocumentsFields {
|
||||||
pub fn put_document_field(
|
pub fn put_document_field(
|
||||||
&self,
|
&self,
|
||||||
@ -100,15 +76,6 @@ impl DocumentsFields {
|
|||||||
let iter = self.documents_fields.iter_from(reader, document_id_bytes)?;
|
let iter = self.documents_fields.iter_from(reader, document_id_bytes)?;
|
||||||
Ok(DocumentFieldsIter { document_id, iter })
|
Ok(DocumentFieldsIter { document_id, iter })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn documents_ids<'r, T: rkv::Readable>(
|
|
||||||
&self,
|
|
||||||
reader: &'r T,
|
|
||||||
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
|
|
||||||
{
|
|
||||||
let iter = self.documents_fields.iter_start(reader)?;
|
|
||||||
Ok(DocumentsIdsIter { last_seen_id: None, iter })
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentFieldsIter<'r> {
|
pub struct DocumentFieldsIter<'r> {
|
||||||
@ -134,30 +101,3 @@ impl<'r> Iterator for DocumentFieldsIter<'r> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DocumentsIdsIter<'r> {
|
|
||||||
last_seen_id: Option<DocumentId>,
|
|
||||||
iter: rkv::store::single::Iter<'r>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'r> Iterator for DocumentsIdsIter<'r> {
|
|
||||||
type Item = Result<DocumentId, rkv::StoreError>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
for result in &mut self.iter {
|
|
||||||
match result {
|
|
||||||
Ok((key, _)) => {
|
|
||||||
let array = TryFrom::try_from(key).unwrap();
|
|
||||||
let (document_id, _) = document_attribute_from_key(array);
|
|
||||||
if Some(document_id) != self.last_seen_id {
|
|
||||||
self.last_seen_id = Some(document_id);
|
|
||||||
return Some(Ok(document_id))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => return Some(Err(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
139
meilidb-core/src/store/documents_fields_counts.rs
Normal file
139
meilidb-core/src/store/documents_fields_counts.rs
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
use std::convert::TryFrom;
|
||||||
|
use meilidb_schema::SchemaAttr;
|
||||||
|
use crate::DocumentId;
|
||||||
|
use super::{document_attribute_into_key, document_attribute_from_key};
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct DocumentsFieldsCounts {
|
||||||
|
pub(crate) documents_fields_counts: rkv::SingleStore,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DocumentsFieldsCounts {
|
||||||
|
pub fn put_document_field_count(
|
||||||
|
&self,
|
||||||
|
writer: &mut rkv::Writer,
|
||||||
|
document_id: DocumentId,
|
||||||
|
attribute: SchemaAttr,
|
||||||
|
value: u64,
|
||||||
|
) -> Result<(), rkv::StoreError>
|
||||||
|
{
|
||||||
|
let key = document_attribute_into_key(document_id, attribute);
|
||||||
|
self.documents_fields_counts.put(writer, key, &rkv::Value::U64(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn del_all_document_fields_counts(
|
||||||
|
&self,
|
||||||
|
writer: &mut rkv::Writer,
|
||||||
|
document_id: DocumentId,
|
||||||
|
) -> Result<usize, rkv::StoreError>
|
||||||
|
{
|
||||||
|
let document_id_bytes = document_id.0.to_be_bytes();
|
||||||
|
let mut keys_to_delete = Vec::new();
|
||||||
|
|
||||||
|
// WARN we can not delete the keys using the iterator
|
||||||
|
// so we store them and delete them just after
|
||||||
|
let iter = self.documents_fields_counts.iter_from(writer, document_id_bytes)?;
|
||||||
|
for result in iter {
|
||||||
|
let (key, _) = result?;
|
||||||
|
let array = TryFrom::try_from(key).unwrap();
|
||||||
|
let (current_document_id, _) = document_attribute_from_key(array);
|
||||||
|
if current_document_id != document_id { break }
|
||||||
|
|
||||||
|
keys_to_delete.push(key.to_owned());
|
||||||
|
}
|
||||||
|
|
||||||
|
let count = keys_to_delete.len();
|
||||||
|
for key in keys_to_delete {
|
||||||
|
self.documents_fields_counts.delete(writer, key)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(count)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn document_attribute_count<'a>(
|
||||||
|
&self,
|
||||||
|
reader: &'a impl rkv::Readable,
|
||||||
|
document_id: DocumentId,
|
||||||
|
attribute: SchemaAttr,
|
||||||
|
) -> Result<Option<u64>, rkv::StoreError>
|
||||||
|
{
|
||||||
|
let key = document_attribute_into_key(document_id, attribute);
|
||||||
|
|
||||||
|
match self.documents_fields_counts.get(reader, key)? {
|
||||||
|
Some(rkv::Value::U64(count)) => Ok(Some(count)),
|
||||||
|
Some(value) => panic!("invalid type {:?}", value),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn document_fields_counts<'r, T: rkv::Readable>(
|
||||||
|
&self,
|
||||||
|
reader: &'r T,
|
||||||
|
document_id: DocumentId,
|
||||||
|
) -> Result<DocumentFieldsCountsIter<'r>, rkv::StoreError>
|
||||||
|
{
|
||||||
|
let document_id_bytes = document_id.0.to_be_bytes();
|
||||||
|
let iter = self.documents_fields_counts.iter_from(reader, document_id_bytes)?;
|
||||||
|
Ok(DocumentFieldsCountsIter { document_id, iter })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn documents_ids<'r, T: rkv::Readable>(
|
||||||
|
&self,
|
||||||
|
reader: &'r T,
|
||||||
|
) -> Result<DocumentsIdsIter<'r>, rkv::StoreError>
|
||||||
|
{
|
||||||
|
let iter = self.documents_fields_counts.iter_start(reader)?;
|
||||||
|
Ok(DocumentsIdsIter { last_seen_id: None, iter })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DocumentFieldsCountsIter<'r> {
|
||||||
|
document_id: DocumentId,
|
||||||
|
iter: rkv::store::single::Iter<'r>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'r> Iterator for DocumentFieldsCountsIter<'r> {
|
||||||
|
type Item = Result<(SchemaAttr, u64), rkv::StoreError>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.iter.next() {
|
||||||
|
Some(Ok((key, Some(rkv::Value::U64(count))))) => {
|
||||||
|
let array = TryFrom::try_from(key).unwrap();
|
||||||
|
let (current_document_id, attr) = document_attribute_from_key(array);
|
||||||
|
if current_document_id != self.document_id { return None; }
|
||||||
|
|
||||||
|
Some(Ok((attr, count)))
|
||||||
|
},
|
||||||
|
Some(Ok((key, data))) => panic!("{:?}, {:?}", key, data),
|
||||||
|
Some(Err(e)) => Some(Err(e)),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DocumentsIdsIter<'r> {
|
||||||
|
last_seen_id: Option<DocumentId>,
|
||||||
|
iter: rkv::store::single::Iter<'r>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'r> Iterator for DocumentsIdsIter<'r> {
|
||||||
|
type Item = Result<DocumentId, rkv::StoreError>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
for result in &mut self.iter {
|
||||||
|
match result {
|
||||||
|
Ok((key, _)) => {
|
||||||
|
let array = TryFrom::try_from(key).unwrap();
|
||||||
|
let (document_id, _) = document_attribute_from_key(array);
|
||||||
|
if Some(document_id) != self.last_seen_id {
|
||||||
|
self.last_seen_id = Some(document_id);
|
||||||
|
return Some(Ok(document_id))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
@ -3,7 +3,6 @@ use std::convert::TryInto;
|
|||||||
|
|
||||||
use meilidb_schema::Schema;
|
use meilidb_schema::Schema;
|
||||||
use rkv::Value;
|
use rkv::Value;
|
||||||
use serde::de;
|
|
||||||
use crate::{RankedMap, MResult};
|
use crate::{RankedMap, MResult};
|
||||||
|
|
||||||
const CUSTOMS_KEY: &str = "customs-key";
|
const CUSTOMS_KEY: &str = "customs-key";
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
mod docs_words;
|
mod docs_words;
|
||||||
mod documents_fields;
|
mod documents_fields;
|
||||||
|
mod documents_fields_counts;
|
||||||
mod main;
|
mod main;
|
||||||
mod postings_lists;
|
mod postings_lists;
|
||||||
mod synonyms;
|
mod synonyms;
|
||||||
@ -8,6 +9,7 @@ mod updates_results;
|
|||||||
|
|
||||||
pub use self::docs_words::DocsWords;
|
pub use self::docs_words::DocsWords;
|
||||||
pub use self::documents_fields::{DocumentsFields, DocumentFieldsIter};
|
pub use self::documents_fields::{DocumentsFields, DocumentFieldsIter};
|
||||||
|
pub use self::documents_fields_counts::{DocumentsFieldsCounts, DocumentFieldsCountsIter, DocumentsIdsIter};
|
||||||
pub use self::main::Main;
|
pub use self::main::Main;
|
||||||
pub use self::postings_lists::PostingsLists;
|
pub use self::postings_lists::PostingsLists;
|
||||||
pub use self::synonyms::Synonyms;
|
pub use self::synonyms::Synonyms;
|
||||||
@ -15,8 +17,11 @@ pub use self::updates::Updates;
|
|||||||
pub use self::updates_results::UpdatesResults;
|
pub use self::updates_results::UpdatesResults;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
use meilidb_schema::{Schema, SchemaAttr};
|
use meilidb_schema::{Schema, SchemaAttr};
|
||||||
use serde::{ser, de};
|
use serde::de;
|
||||||
|
|
||||||
use crate::criterion::Criteria;
|
use crate::criterion::Criteria;
|
||||||
use crate::serde::Deserializer;
|
use crate::serde::Deserializer;
|
||||||
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
|
use crate::{update, query_builder::QueryBuilder, DocumentId, MResult, Error};
|
||||||
@ -25,6 +30,31 @@ fn aligned_to(bytes: &[u8], align: usize) -> bool {
|
|||||||
(bytes as *const _ as *const () as usize) % align == 0
|
(bytes as *const _ as *const () as usize) % align == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn document_attribute_into_key(document_id: DocumentId, attribute: SchemaAttr) -> [u8; 10] {
|
||||||
|
let document_id_bytes = document_id.0.to_be_bytes();
|
||||||
|
let attr_bytes = attribute.0.to_be_bytes();
|
||||||
|
|
||||||
|
let mut key = [0u8; 10];
|
||||||
|
key[0..8].copy_from_slice(&document_id_bytes);
|
||||||
|
key[8..10].copy_from_slice(&attr_bytes);
|
||||||
|
|
||||||
|
key
|
||||||
|
}
|
||||||
|
|
||||||
|
fn document_attribute_from_key(key: [u8; 10]) -> (DocumentId, SchemaAttr) {
|
||||||
|
let document_id = {
|
||||||
|
let array = TryFrom::try_from(&key[0..8]).unwrap();
|
||||||
|
DocumentId(u64::from_be_bytes(array))
|
||||||
|
};
|
||||||
|
|
||||||
|
let schema_attr = {
|
||||||
|
let array = TryFrom::try_from(&key[8..8+2]).unwrap();
|
||||||
|
SchemaAttr(u16::from_be_bytes(array))
|
||||||
|
};
|
||||||
|
|
||||||
|
(document_id, schema_attr)
|
||||||
|
}
|
||||||
|
|
||||||
fn main_name(name: &str) -> String {
|
fn main_name(name: &str) -> String {
|
||||||
format!("store-{}", name)
|
format!("store-{}", name)
|
||||||
}
|
}
|
||||||
@ -37,6 +67,10 @@ fn documents_fields_name(name: &str) -> String {
|
|||||||
format!("store-{}-documents-fields", name)
|
format!("store-{}-documents-fields", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn documents_fields_counts_name(name: &str) -> String {
|
||||||
|
format!("store-{}-documents-fields-counts", name)
|
||||||
|
}
|
||||||
|
|
||||||
fn synonyms_name(name: &str) -> String {
|
fn synonyms_name(name: &str) -> String {
|
||||||
format!("store-{}-synonyms", name)
|
format!("store-{}-synonyms", name)
|
||||||
}
|
}
|
||||||
@ -58,6 +92,7 @@ pub struct Index {
|
|||||||
pub main: Main,
|
pub main: Main,
|
||||||
pub postings_lists: PostingsLists,
|
pub postings_lists: PostingsLists,
|
||||||
pub documents_fields: DocumentsFields,
|
pub documents_fields: DocumentsFields,
|
||||||
|
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||||
pub synonyms: Synonyms,
|
pub synonyms: Synonyms,
|
||||||
pub docs_words: DocsWords,
|
pub docs_words: DocsWords,
|
||||||
|
|
||||||
@ -205,6 +240,7 @@ fn open_options(
|
|||||||
let main_name = main_name(name);
|
let main_name = main_name(name);
|
||||||
let postings_lists_name = postings_lists_name(name);
|
let postings_lists_name = postings_lists_name(name);
|
||||||
let documents_fields_name = documents_fields_name(name);
|
let documents_fields_name = documents_fields_name(name);
|
||||||
|
let documents_fields_counts_name = documents_fields_counts_name(name);
|
||||||
let synonyms_name = synonyms_name(name);
|
let synonyms_name = synonyms_name(name);
|
||||||
let docs_words_name = docs_words_name(name);
|
let docs_words_name = docs_words_name(name);
|
||||||
let updates_name = updates_name(name);
|
let updates_name = updates_name(name);
|
||||||
@ -214,6 +250,7 @@ fn open_options(
|
|||||||
let main = env.open_single(main_name.as_str(), options)?;
|
let main = env.open_single(main_name.as_str(), options)?;
|
||||||
let postings_lists = env.open_single(postings_lists_name.as_str(), options)?;
|
let postings_lists = env.open_single(postings_lists_name.as_str(), options)?;
|
||||||
let documents_fields = env.open_single(documents_fields_name.as_str(), options)?;
|
let documents_fields = env.open_single(documents_fields_name.as_str(), options)?;
|
||||||
|
let documents_fields_counts = env.open_single(documents_fields_counts_name.as_str(), options)?;
|
||||||
let synonyms = env.open_single(synonyms_name.as_str(), options)?;
|
let synonyms = env.open_single(synonyms_name.as_str(), options)?;
|
||||||
let docs_words = env.open_single(docs_words_name.as_str(), options)?;
|
let docs_words = env.open_single(docs_words_name.as_str(), options)?;
|
||||||
let updates = env.open_single(updates_name.as_str(), options)?;
|
let updates = env.open_single(updates_name.as_str(), options)?;
|
||||||
@ -223,6 +260,7 @@ fn open_options(
|
|||||||
main: Main { main },
|
main: Main { main },
|
||||||
postings_lists: PostingsLists { postings_lists },
|
postings_lists: PostingsLists { postings_lists },
|
||||||
documents_fields: DocumentsFields { documents_fields },
|
documents_fields: DocumentsFields { documents_fields },
|
||||||
|
documents_fields_counts: DocumentsFieldsCounts { documents_fields_counts },
|
||||||
synonyms: Synonyms { synonyms },
|
synonyms: Synonyms { synonyms },
|
||||||
docs_words: DocsWords { docs_words },
|
docs_words: DocsWords { docs_words },
|
||||||
updates: Updates { updates },
|
updates: Updates { updates },
|
||||||
|
Loading…
Reference in New Issue
Block a user