mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
feat: Introduce typed keys constructors
This commit is contained in:
parent
66dac923bf
commit
a43a772e9a
@ -16,6 +16,7 @@ use fst::Map;
|
||||
use uuid::Uuid;
|
||||
use rocksdb::rocksdb::{DB, Snapshot};
|
||||
|
||||
use crate::index::identifier::Identifier;
|
||||
use crate::data::DocIndexes;
|
||||
|
||||
pub enum Blob {
|
||||
@ -54,6 +55,10 @@ impl BlobName {
|
||||
pub fn new() -> BlobName {
|
||||
BlobName(Uuid::new_v4())
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> &[u8; 16] {
|
||||
self.0.as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for BlobName {
|
||||
@ -113,21 +118,21 @@ pub fn blobs_from_blob_infos(infos: &[BlobInfo], snapshot: &Snapshot<&DB>) -> Re
|
||||
for info in infos {
|
||||
let blob = match info.sign {
|
||||
Sign::Positive => {
|
||||
let key_map = format!("blob-{}-fst", info.name);
|
||||
let map = match snapshot.get(key_map.as_bytes())? {
|
||||
let blob_key = Identifier::blob(info.name).fst_map().build();
|
||||
let map = match snapshot.get(&blob_key)? {
|
||||
Some(value) => value.to_vec(),
|
||||
None => return Err(format!("No fst entry found for blob {}", info.name).into()),
|
||||
};
|
||||
let key_doc_idx = format!("blob-{}-doc-idx", info.name);
|
||||
let doc_idx = match snapshot.get(key_doc_idx.as_bytes())? {
|
||||
let blob_key = Identifier::blob(info.name).document_indexes().build();
|
||||
let doc_idx = match snapshot.get(&blob_key)? {
|
||||
Some(value) => value.to_vec(),
|
||||
None => return Err(format!("No doc-idx entry found for blob {}", info.name).into()),
|
||||
};
|
||||
PositiveBlob::from_bytes(map, doc_idx).map(Blob::Positive)?
|
||||
},
|
||||
Sign::Negative => {
|
||||
let key_doc_ids = format!("blob-{}-doc-ids", info.name);
|
||||
let doc_ids = match snapshot.get(key_doc_ids.as_bytes())? {
|
||||
let blob_key = Identifier::blob(info.name).document_ids().build();
|
||||
let doc_ids = match snapshot.get(&blob_key)? {
|
||||
Some(value) => value.to_vec(),
|
||||
None => return Err(format!("No doc-ids entry found for blob {}", info.name).into()),
|
||||
};
|
||||
|
@ -23,7 +23,8 @@ impl DocIds {
|
||||
Ok(DocIds { doc_ids })
|
||||
}
|
||||
|
||||
pub fn from_bytes(vec: Vec<u8>) -> io::Result<Self> {
|
||||
pub fn from_bytes(vec: Vec<u8>) -> Result<Self, Box<Error>> {
|
||||
// FIXME check if modulo DocumentId
|
||||
let len = vec.len();
|
||||
let doc_ids = Data::Shared {
|
||||
vec: Arc::new(vec),
|
||||
|
100
src/index/identifier.rs
Normal file
100
src/index/identifier.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use std::io::Write;
|
||||
|
||||
use byteorder::{NetworkEndian, WriteBytesExt};
|
||||
|
||||
use crate::index::schema::SchemaAttr;
|
||||
use crate::blob::BlobName;
|
||||
use crate::DocumentId;
|
||||
|
||||
pub struct Identifier {
|
||||
inner: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
pub fn data() -> Data {
|
||||
let mut inner = Vec::new();
|
||||
let _ = inner.write(b"data");
|
||||
Data { inner }
|
||||
}
|
||||
|
||||
pub fn blob(name: BlobName) -> Blob {
|
||||
let mut inner = Vec::new();
|
||||
let _ = inner.write(b"blob");
|
||||
let _ = inner.write(name.as_bytes());
|
||||
Blob { inner }
|
||||
}
|
||||
|
||||
pub fn document(id: DocumentId) -> Document {
|
||||
let mut inner = Vec::new();
|
||||
let _ = inner.write(b"docu");
|
||||
let _ = inner.write(b"-");
|
||||
let _ = inner.write_u64::<NetworkEndian>(id);
|
||||
Document { inner }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Data {
|
||||
inner: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Data {
|
||||
pub fn blobs_order(mut self) -> Self {
|
||||
let _ = self.inner.write(b"-");
|
||||
let _ = self.inner.write(b"blobs-order");
|
||||
self
|
||||
}
|
||||
|
||||
pub fn schema(mut self) -> Self {
|
||||
let _ = self.inner.write(b"-");
|
||||
let _ = self.inner.write(b"schema");
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> Vec<u8> {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Blob {
|
||||
inner: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Blob {
|
||||
pub fn document_indexes(mut self) -> Self {
|
||||
let _ = self.inner.write(b"-");
|
||||
let _ = self.inner.write(b"doc-idx");
|
||||
self
|
||||
}
|
||||
|
||||
pub fn document_ids(mut self) -> Self {
|
||||
let _ = self.inner.write(b"-");
|
||||
let _ = self.inner.write(b"doc-ids");
|
||||
self
|
||||
}
|
||||
|
||||
pub fn fst_map(mut self) -> Self {
|
||||
let _ = self.inner.write(b"-");
|
||||
let _ = self.inner.write(b"fst");
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> Vec<u8> {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Document {
|
||||
inner: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Document {
|
||||
pub fn attribute(mut self, attr: SchemaAttr) -> Self {
|
||||
let _ = self.inner.write(b"-");
|
||||
let _ = self.inner.write_u32::<NetworkEndian>(attr.as_u32());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> Vec<u8> {
|
||||
self.inner
|
||||
}
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
pub mod identifier;
|
||||
pub mod schema;
|
||||
pub mod update;
|
||||
|
||||
@ -20,18 +21,12 @@ use crate::data::DocIdsBuilder;
|
||||
use crate::{DocIndex, DocumentId};
|
||||
use crate::index::schema::Schema;
|
||||
use crate::index::update::Update;
|
||||
use crate::index::identifier::Identifier;
|
||||
use crate::blob::{PositiveBlobBuilder, BlobInfo, Sign, Blob, blobs_from_blob_infos};
|
||||
use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer};
|
||||
use crate::rank::{criterion, Config, RankedStream};
|
||||
use crate::automaton;
|
||||
|
||||
const DATA_PREFIX: &str = "data";
|
||||
const BLOB_PREFIX: &str = "blob";
|
||||
const DOCU_PREFIX: &str = "docu";
|
||||
|
||||
const DATA_BLOBS_ORDER: &str = "data-blobs-order";
|
||||
const DATA_SCHEMA: &str = "data-schema";
|
||||
|
||||
fn simple_vec_append(key: &[u8], value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
||||
let mut output = Vec::new();
|
||||
for bytes in operands.chain(value) {
|
||||
@ -67,7 +62,8 @@ impl Index {
|
||||
|
||||
let mut schema_bytes = Vec::new();
|
||||
schema.write_to(&mut schema_bytes)?;
|
||||
database.put(DATA_SCHEMA.as_bytes(), &schema_bytes)?;
|
||||
let data_key = Identifier::data().schema().build();
|
||||
database.put(&data_key, &schema_bytes)?;
|
||||
|
||||
Ok(Self { database })
|
||||
}
|
||||
@ -83,7 +79,8 @@ impl Index {
|
||||
|
||||
let database = rocksdb::DB::open_cf(opts, &path, vec![("default", cf_opts)])?;
|
||||
|
||||
let _schema = match database.get(DATA_SCHEMA.as_bytes())? {
|
||||
let data_key = Identifier::data().schema().build();
|
||||
let _schema = match database.get(&data_key)? {
|
||||
Some(value) => Schema::read_from(&*value)?,
|
||||
None => return Err(String::from("Database does not contain a schema").into()),
|
||||
};
|
||||
@ -105,7 +102,8 @@ impl Index {
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> Result<Schema, Box<Error>> {
|
||||
let bytes = self.database.get(DATA_SCHEMA.as_bytes())?.expect("data-schema entry not found");
|
||||
let data_key = Identifier::data().schema().build();
|
||||
let bytes = self.database.get(&data_key)?.expect("data-schema entry not found");
|
||||
Ok(Schema::read_from(&*bytes).expect("Invalid schema"))
|
||||
}
|
||||
|
||||
@ -113,7 +111,8 @@ impl Index {
|
||||
// this snapshot will allow consistent reads for the whole search operation
|
||||
let snapshot = self.database.snapshot();
|
||||
|
||||
let blobs = match snapshot.get(DATA_BLOBS_ORDER.as_bytes())? {
|
||||
let data_key = Identifier::data().blobs_order().build();
|
||||
let blobs = match snapshot.get(&data_key)? {
|
||||
Some(value) => {
|
||||
let blob_infos = BlobInfo::read_from_slice(&value)?;
|
||||
blobs_from_blob_infos(&blob_infos, &snapshot)?
|
||||
|
@ -4,9 +4,9 @@ use std::error::Error;
|
||||
use ::rocksdb::rocksdb_options;
|
||||
|
||||
use crate::blob::BlobInfo;
|
||||
use crate::index::DATA_BLOBS_ORDER;
|
||||
use crate::index::update::Update;
|
||||
use crate::data::DocIdsBuilder;
|
||||
use crate::index::identifier::Identifier;
|
||||
use crate::data::{DocIds, DocIdsBuilder};
|
||||
use crate::DocumentId;
|
||||
|
||||
pub struct NegativeUpdateBuilder {
|
||||
@ -35,21 +35,23 @@ impl NegativeUpdateBuilder {
|
||||
file_writer.open(&self.path.to_string_lossy())?;
|
||||
|
||||
// write the doc ids
|
||||
let blob_key = format!("blob-{}-doc-ids", blob_info.name);
|
||||
let blob_key = Identifier::blob(blob_info.name).document_ids().build();
|
||||
let blob_doc_ids = self.doc_ids.into_inner()?;
|
||||
file_writer.put(blob_key.as_bytes(), &blob_doc_ids)?;
|
||||
file_writer.put(&blob_key, &blob_doc_ids)?;
|
||||
|
||||
{
|
||||
// write the blob name to be merged
|
||||
let mut buffer = Vec::new();
|
||||
blob_info.write_into(&mut buffer);
|
||||
file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), &buffer)?;
|
||||
let data_key = Identifier::data().blobs_order().build();
|
||||
file_writer.merge(&data_key, &buffer)?;
|
||||
}
|
||||
|
||||
for id in blob_doc_ids {
|
||||
let start = format!("docu-{}", id);
|
||||
let end = format!("docu-{}", id + 1);
|
||||
file_writer.delete_range(start.as_bytes(), end.as_bytes())?;
|
||||
let blob_doc_ids = DocIds::from_bytes(blob_doc_ids)?;
|
||||
for id in blob_doc_ids.doc_ids().iter().cloned() {
|
||||
let start = Identifier::document(id).build();
|
||||
let end = Identifier::document(id + 1).build();
|
||||
file_writer.delete_range(&start, &end)?;
|
||||
}
|
||||
|
||||
file_writer.finish()?;
|
||||
|
@ -5,8 +5,8 @@ use std::fmt::Write;
|
||||
|
||||
use ::rocksdb::rocksdb_options;
|
||||
|
||||
use crate::index::DATA_BLOBS_ORDER;
|
||||
use crate::index::update::Update;
|
||||
use crate::index::identifier::Identifier;
|
||||
use crate::index::schema::{SchemaProps, Schema, SchemaAttr};
|
||||
use crate::tokenizer::TokenizerBuilder;
|
||||
use crate::blob::{BlobInfo, PositiveBlobBuilder};
|
||||
@ -88,34 +88,29 @@ where B: TokenizerBuilder
|
||||
let (blob_fst_map, blob_doc_idx) = builder.into_inner()?;
|
||||
|
||||
// write the doc-idx
|
||||
let blob_key = format!("blob-{}-doc-idx", blob_info.name);
|
||||
file_writer.put(blob_key.as_bytes(), &blob_doc_idx)?;
|
||||
let blob_key = Identifier::blob(blob_info.name).document_indexes().build();
|
||||
file_writer.put(&blob_key, &blob_doc_idx)?;
|
||||
|
||||
// write the fst
|
||||
let blob_key = format!("blob-{}-fst", blob_info.name);
|
||||
file_writer.put(blob_key.as_bytes(), &blob_fst_map)?;
|
||||
let blob_key = Identifier::blob(blob_info.name).fst_map().build();
|
||||
file_writer.put(&blob_key, &blob_fst_map)?;
|
||||
|
||||
{
|
||||
// write the blob name to be merged
|
||||
let mut buffer = Vec::new();
|
||||
blob_info.write_into(&mut buffer);
|
||||
file_writer.merge(DATA_BLOBS_ORDER.as_bytes(), &buffer)?;
|
||||
let data_key = Identifier::data().blobs_order().build();
|
||||
file_writer.merge(&data_key, &buffer)?;
|
||||
}
|
||||
|
||||
// write all the documents fields updates
|
||||
let mut key = String::from("docu-");
|
||||
let prefix_len = key.len();
|
||||
|
||||
// FIXME write numbers in bytes not decimal representation
|
||||
|
||||
for ((id, field), state) in self.new_states {
|
||||
key.truncate(prefix_len);
|
||||
write!(&mut key, "{}-{}", id, field)?;
|
||||
for ((id, attr), state) in self.new_states {
|
||||
let key = Identifier::document(id).attribute(attr).build();
|
||||
match state {
|
||||
NewState::Updated { value, props } => if props.is_stored() {
|
||||
file_writer.put(key.as_bytes(), value.as_bytes())?
|
||||
file_writer.put(&key, value.as_bytes())?
|
||||
},
|
||||
NewState::Removed => file_writer.delete(key.as_bytes())?,
|
||||
NewState::Removed => file_writer.delete(&key)?,
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user