mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
feat: Introduce the meilidb-data schema module
This commit is contained in:
parent
287d5dee4d
commit
95dfbd1fe0
@ -14,8 +14,7 @@ log = "0.4.6"
|
|||||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||||
rayon = "1.0.3"
|
rayon = "1.0.3"
|
||||||
sdset = "0.3.1"
|
sdset = "0.3.1"
|
||||||
serde = "1.0.88"
|
serde = { version = "1.0.88", features = ["derive"] }
|
||||||
serde_derive = "1.0.88"
|
|
||||||
slice-group-by = "0.2.4"
|
slice-group-by = "0.2.4"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
@ -9,7 +9,7 @@ pub mod shared_data_cursor;
|
|||||||
pub mod write_to_bytes;
|
pub mod write_to_bytes;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use serde_derive::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
use rayon::slice::ParallelSliceMut;
|
use rayon::slice::ParallelSliceMut;
|
||||||
|
@ -5,4 +5,10 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
bincode = "1.1.2"
|
||||||
|
linked-hash-map = { version = "0.5.2", features = ["serde_impl"] }
|
||||||
|
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||||
|
serde = { version = "1.0.88", features = ["derive"] }
|
||||||
|
serde_json = { version = "1.0.39", features = ["preserve_order"] }
|
||||||
sled = "0.20.0"
|
sled = "0.20.0"
|
||||||
|
toml = { version = "0.5.0", features = ["preserve_order"] }
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
mod database;
|
mod database;
|
||||||
|
mod schema;
|
||||||
|
|
||||||
pub use self::database::{Database, Index};
|
pub use self::database::{Database, Index};
|
||||||
|
pub use self::schema::{Schema, SchemaAttr, SchemaBuilder};
|
||||||
|
309
meilidb-data/src/schema.rs
Normal file
309
meilidb-data/src/schema.rs
Normal file
@ -0,0 +1,309 @@
|
|||||||
|
use std::collections::{HashMap, BTreeMap};
|
||||||
|
use std::io::{Read, Write};
|
||||||
|
use std::error::Error;
|
||||||
|
use std::{fmt, u16};
|
||||||
|
use std::ops::BitOr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use serde::{Serialize, Deserialize};
|
||||||
|
use linked_hash_map::LinkedHashMap;
|
||||||
|
|
||||||
|
use meilidb_core::DocumentId;
|
||||||
|
|
||||||
|
pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false, ranked: false };
|
||||||
|
pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true, ranked: false };
|
||||||
|
pub const RANKED: SchemaProps = SchemaProps { stored: false, indexed: false, ranked: true };
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
pub struct SchemaProps {
|
||||||
|
#[serde(default)]
|
||||||
|
stored: bool,
|
||||||
|
|
||||||
|
#[serde(default)]
|
||||||
|
indexed: bool,
|
||||||
|
|
||||||
|
#[serde(default)]
|
||||||
|
ranked: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SchemaProps {
|
||||||
|
pub fn is_stored(self) -> bool {
|
||||||
|
self.stored
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_indexed(self) -> bool {
|
||||||
|
self.indexed
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_ranked(self) -> bool {
|
||||||
|
self.ranked
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BitOr for SchemaProps {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
fn bitor(self, other: Self) -> Self::Output {
|
||||||
|
SchemaProps {
|
||||||
|
stored: self.stored | other.stored,
|
||||||
|
indexed: self.indexed | other.indexed,
|
||||||
|
ranked: self.ranked | other.ranked,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct SchemaBuilder {
|
||||||
|
identifier: String,
|
||||||
|
attributes: LinkedHashMap<String, SchemaProps>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SchemaBuilder {
|
||||||
|
pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder {
|
||||||
|
SchemaBuilder {
|
||||||
|
identifier: name.into(),
|
||||||
|
attributes: LinkedHashMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
|
||||||
|
let len = self.attributes.len();
|
||||||
|
if self.attributes.insert(name.into(), props).is_some() {
|
||||||
|
panic!("Field already inserted.")
|
||||||
|
}
|
||||||
|
SchemaAttr(len as u16)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) -> Schema {
|
||||||
|
let mut attrs = HashMap::new();
|
||||||
|
let mut props = Vec::new();
|
||||||
|
|
||||||
|
for (i, (name, prop)) in self.attributes.into_iter().enumerate() {
|
||||||
|
attrs.insert(name.clone(), SchemaAttr(i as u16));
|
||||||
|
props.push((name, prop));
|
||||||
|
}
|
||||||
|
|
||||||
|
let identifier = self.identifier;
|
||||||
|
Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct Schema {
|
||||||
|
inner: Arc<InnerSchema>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
struct InnerSchema {
|
||||||
|
identifier: String,
|
||||||
|
attrs: HashMap<String, SchemaAttr>,
|
||||||
|
props: Vec<(String, SchemaProps)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Schema {
|
||||||
|
pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
reader.read_to_end(&mut buffer)?;
|
||||||
|
let builder: SchemaBuilder = toml::from_slice(&buffer)?;
|
||||||
|
Ok(builder.build())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
|
||||||
|
let identifier = self.inner.identifier.clone();
|
||||||
|
let attributes = self.attributes_ordered();
|
||||||
|
let builder = SchemaBuilder { identifier, attributes };
|
||||||
|
|
||||||
|
let string = toml::to_string_pretty(&builder)?;
|
||||||
|
writer.write_all(string.as_bytes())?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
reader.read_to_end(&mut buffer)?;
|
||||||
|
let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
|
||||||
|
Ok(builder.build())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
|
||||||
|
let identifier = self.inner.identifier.clone();
|
||||||
|
let attributes = self.attributes_ordered();
|
||||||
|
let builder = SchemaBuilder { identifier, attributes };
|
||||||
|
let string = serde_json::to_string_pretty(&builder)?;
|
||||||
|
writer.write_all(string.as_bytes())?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
|
||||||
|
let builder: SchemaBuilder = bincode::deserialize_from(reader)?;
|
||||||
|
Ok(builder.build())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
||||||
|
let identifier = self.inner.identifier.clone();
|
||||||
|
let attributes = self.attributes_ordered();
|
||||||
|
let builder = SchemaBuilder { identifier, attributes };
|
||||||
|
|
||||||
|
bincode::serialize_into(writer, &builder)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn attributes_ordered(&self) -> LinkedHashMap<String, SchemaProps> {
|
||||||
|
let mut ordered = BTreeMap::new();
|
||||||
|
for (name, attr) in &self.inner.attrs {
|
||||||
|
let (_, props) = self.inner.props[attr.0 as usize];
|
||||||
|
ordered.insert(attr.0, (name, props));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut attributes = LinkedHashMap::with_capacity(ordered.len());
|
||||||
|
for (_, (name, props)) in ordered {
|
||||||
|
attributes.insert(name.clone(), props);
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
|
||||||
|
let (_, props) = self.inner.props[attr.0 as usize];
|
||||||
|
props
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn identifier_name(&self) -> &str {
|
||||||
|
&self.inner.identifier
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
|
||||||
|
self.inner.attrs.get(name.as_ref()).cloned()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
|
||||||
|
let (name, _) = &self.inner.props[attr.0 as usize];
|
||||||
|
name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
|
pub struct SchemaAttr(pub u16);
|
||||||
|
|
||||||
|
impl SchemaAttr {
|
||||||
|
pub fn new(value: u16) -> SchemaAttr {
|
||||||
|
SchemaAttr(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn min() -> SchemaAttr {
|
||||||
|
SchemaAttr(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next(self) -> Option<SchemaAttr> {
|
||||||
|
self.0.checked_add(1).map(SchemaAttr)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prev(self) -> Option<SchemaAttr> {
|
||||||
|
self.0.checked_sub(1).map(SchemaAttr)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max() -> SchemaAttr {
|
||||||
|
SchemaAttr(u16::MAX)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for SchemaAttr {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
self.0.fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_deserialize() -> bincode::Result<()> {
|
||||||
|
let mut builder = SchemaBuilder::with_identifier("id");
|
||||||
|
builder.new_attribute("alpha", STORED);
|
||||||
|
builder.new_attribute("beta", STORED | INDEXED);
|
||||||
|
builder.new_attribute("gamma", INDEXED);
|
||||||
|
let schema = builder.build();
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
|
||||||
|
schema.write_to_bin(&mut buffer)?;
|
||||||
|
let schema2 = Schema::read_from_bin(buffer.as_slice())?;
|
||||||
|
|
||||||
|
assert_eq!(schema, schema2);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_deserialize_toml() -> Result<(), Box<Error>> {
|
||||||
|
let mut builder = SchemaBuilder::with_identifier("id");
|
||||||
|
builder.new_attribute("alpha", STORED);
|
||||||
|
builder.new_attribute("beta", STORED | INDEXED);
|
||||||
|
builder.new_attribute("gamma", INDEXED);
|
||||||
|
let schema = builder.build();
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
schema.to_toml(&mut buffer)?;
|
||||||
|
|
||||||
|
let schema2 = Schema::from_toml(buffer.as_slice())?;
|
||||||
|
assert_eq!(schema, schema2);
|
||||||
|
|
||||||
|
let data = r#"
|
||||||
|
identifier = "id"
|
||||||
|
|
||||||
|
[attributes."alpha"]
|
||||||
|
stored = true
|
||||||
|
|
||||||
|
[attributes."beta"]
|
||||||
|
stored = true
|
||||||
|
indexed = true
|
||||||
|
|
||||||
|
[attributes."gamma"]
|
||||||
|
indexed = true
|
||||||
|
"#;
|
||||||
|
let schema2 = Schema::from_toml(data.as_bytes())?;
|
||||||
|
assert_eq!(schema, schema2);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_deserialize_json() -> Result<(), Box<Error>> {
|
||||||
|
let mut builder = SchemaBuilder::with_identifier("id");
|
||||||
|
builder.new_attribute("alpha", STORED);
|
||||||
|
builder.new_attribute("beta", STORED | INDEXED);
|
||||||
|
builder.new_attribute("gamma", INDEXED);
|
||||||
|
let schema = builder.build();
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
schema.to_json(&mut buffer)?;
|
||||||
|
|
||||||
|
let schema2 = Schema::from_json(buffer.as_slice())?;
|
||||||
|
assert_eq!(schema, schema2);
|
||||||
|
|
||||||
|
let data = r#"
|
||||||
|
{
|
||||||
|
"identifier": "id",
|
||||||
|
"attributes": {
|
||||||
|
"alpha": {
|
||||||
|
"stored": true
|
||||||
|
},
|
||||||
|
"beta": {
|
||||||
|
"stored": true,
|
||||||
|
"indexed": true
|
||||||
|
},
|
||||||
|
"gamma": {
|
||||||
|
"indexed": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"#;
|
||||||
|
let schema2 = Schema::from_json(data.as_bytes())?;
|
||||||
|
assert_eq!(schema, schema2);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -6,6 +6,7 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||||
|
meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
|
||||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
@ -5,8 +5,8 @@ use std::fmt;
|
|||||||
use meilidb_core::criterion::Criterion;
|
use meilidb_core::criterion::Criterion;
|
||||||
use meilidb_core::RawDocument;
|
use meilidb_core::RawDocument;
|
||||||
|
|
||||||
use crate::database::schema::{Schema, SchemaAttr};
|
use meilidb_data::{Schema, SchemaAttr};
|
||||||
use crate::database::RankedMap;
|
use meilidb_data::RankedMap;
|
||||||
|
|
||||||
/// An helper struct that permit to sort documents by
|
/// An helper struct that permit to sort documents by
|
||||||
/// some of their stored attributes.
|
/// some of their stored attributes.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user