mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
feat: Introduce the Schema
This commit is contained in:
parent
7c1a17520d
commit
8df068af3c
9 changed files with 85 additions and 571 deletions
|
@ -136,12 +136,12 @@ mod tests {
|
|||
let mut builder = PositiveUpdateBuilder::new("update-positive-0001.sst", schema.clone(), tokenizer_builder);
|
||||
|
||||
// you can insert documents in any order, it is sorted internally
|
||||
let title_field = schema.field("title").unwrap();
|
||||
let title_field = schema.attribute("title").unwrap();
|
||||
builder.update_field(1, title_field, "hallo!".to_owned());
|
||||
builder.update_field(5, title_field, "hello!".to_owned());
|
||||
builder.update_field(2, title_field, "hi!".to_owned());
|
||||
|
||||
let name_field = schema.field("name").unwrap();
|
||||
let name_field = schema.attribute("name").unwrap();
|
||||
builder.remove_field(4, name_field);
|
||||
|
||||
let update = builder.build()?;
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
use std::collections::{HashMap, BTreeMap};
|
||||
use std::io::{Read, Write};
|
||||
use std::error::Error;
|
||||
use std::path::Path;
|
||||
use std::ops::BitOr;
|
||||
use std::fs::File;
|
||||
use std::fmt;
|
||||
|
||||
use linked_hash_map::LinkedHashMap;
|
||||
|
||||
pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false };
|
||||
pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true };
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct SchemaProps {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
|
@ -36,66 +38,110 @@ impl BitOr for SchemaProps {
|
|||
}
|
||||
|
||||
pub struct SchemaBuilder {
|
||||
fields: Vec<(String, SchemaProps)>,
|
||||
attrs: LinkedHashMap<String, SchemaProps>,
|
||||
}
|
||||
|
||||
impl SchemaBuilder {
|
||||
pub fn new() -> SchemaBuilder {
|
||||
SchemaBuilder { fields: Vec::new() }
|
||||
SchemaBuilder { attrs: LinkedHashMap::new() }
|
||||
}
|
||||
|
||||
pub fn field<N>(&mut self, name: N, props: SchemaProps) -> SchemaField
|
||||
where N: Into<String>,
|
||||
{
|
||||
let len = self.fields.len();
|
||||
let name = name.into();
|
||||
self.fields.push((name, props));
|
||||
|
||||
SchemaField(len as u32)
|
||||
pub fn new_field<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
|
||||
let len = self.attrs.len();
|
||||
self.attrs.insert(name.into(), props);
|
||||
SchemaAttr(len as u32)
|
||||
}
|
||||
|
||||
pub fn build(self) -> Schema {
|
||||
unimplemented!()
|
||||
let mut attrs = HashMap::new();
|
||||
let mut props = Vec::new();
|
||||
|
||||
for (i, (name, prop)) in self.attrs.into_iter().enumerate() {
|
||||
attrs.insert(name, SchemaAttr(i as u32));
|
||||
props.push(prop);
|
||||
}
|
||||
|
||||
Schema { attrs, props }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Schema;
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Schema {
|
||||
attrs: HashMap<String, SchemaAttr>,
|
||||
props: Vec<SchemaProps>,
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
pub fn open<P: AsRef<Path>>(path: P) -> Result<Schema, Box<Error>> {
|
||||
pub fn open<P: AsRef<Path>>(path: P) -> bincode::Result<Schema> {
|
||||
let file = File::open(path)?;
|
||||
Schema::read_from(file)
|
||||
}
|
||||
|
||||
pub fn read_from<R: Read>(reader: R) -> Result<Schema, Box<Error>> {
|
||||
unimplemented!()
|
||||
pub fn read_from<R: Read>(reader: R) -> bincode::Result<Schema> {
|
||||
let attrs = bincode::deserialize_from(reader)?;
|
||||
let builder = SchemaBuilder { attrs };
|
||||
Ok(builder.build())
|
||||
}
|
||||
|
||||
pub fn write_to<W: Write>(writer: W) -> Result<(), Box<Error>> {
|
||||
unimplemented!()
|
||||
pub fn write_to<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
||||
let mut ordered = BTreeMap::new();
|
||||
for (name, field) in &self.attrs {
|
||||
let index = field.as_u32();
|
||||
let props = self.props[index as usize];
|
||||
ordered.insert(index, (name, props));
|
||||
}
|
||||
|
||||
let mut attrs = LinkedHashMap::with_capacity(ordered.len());
|
||||
for (_, (name, props)) in ordered {
|
||||
attrs.insert(name, props);
|
||||
}
|
||||
|
||||
bincode::serialize_into(writer, &attrs)
|
||||
}
|
||||
|
||||
pub fn props(&self, field: SchemaField) -> SchemaProps {
|
||||
unimplemented!()
|
||||
pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
|
||||
self.props[attr.as_u32() as usize]
|
||||
}
|
||||
|
||||
pub fn field(&self, name: &str) -> Option<SchemaField> {
|
||||
unimplemented!()
|
||||
pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
|
||||
self.attrs.get(name.as_ref()).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
|
||||
pub struct SchemaField(u32);
|
||||
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
|
||||
pub struct SchemaAttr(u32);
|
||||
|
||||
impl SchemaField {
|
||||
impl SchemaAttr {
|
||||
pub fn as_u32(&self) -> u32 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SchemaField {
|
||||
impl fmt::Display for SchemaAttr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn serialize_deserialize() -> bincode::Result<()> {
|
||||
let mut builder = SchemaBuilder::new();
|
||||
builder.new_field("alphabet", STORED);
|
||||
builder.new_field("beta", STORED | INDEXED);
|
||||
builder.new_field("gamma", INDEXED);
|
||||
let schema = builder.build();
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
schema.write_to(&mut buffer)?;
|
||||
let schema2 = Schema::read_from(buffer.as_slice())?;
|
||||
|
||||
assert_eq!(schema, schema2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use std::fmt::Write;
|
|||
|
||||
use ::rocksdb::rocksdb_options;
|
||||
|
||||
use crate::index::schema::{SchemaProps, Schema, SchemaField};
|
||||
use crate::index::schema::{SchemaProps, Schema, SchemaAttr};
|
||||
use crate::index::update::{FIELD_BLOBS_ORDER, Update};
|
||||
use crate::tokenizer::TokenizerBuilder;
|
||||
use crate::index::blob_name::BlobName;
|
||||
|
@ -24,7 +24,7 @@ pub struct PositiveUpdateBuilder<B> {
|
|||
path: PathBuf,
|
||||
schema: Schema,
|
||||
tokenizer_builder: B,
|
||||
new_states: BTreeMap<(DocumentId, SchemaField), NewState>,
|
||||
new_states: BTreeMap<(DocumentId, SchemaAttr), NewState>,
|
||||
}
|
||||
|
||||
impl<B> PositiveUpdateBuilder<B> {
|
||||
|
@ -38,12 +38,12 @@ impl<B> PositiveUpdateBuilder<B> {
|
|||
}
|
||||
|
||||
// TODO value must be a field that can be indexed
|
||||
pub fn update_field(&mut self, id: DocumentId, field: SchemaField, value: String) {
|
||||
pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) {
|
||||
let state = NewState::Updated { value, props: self.schema.props(field) };
|
||||
self.new_states.insert((id, field), state);
|
||||
}
|
||||
|
||||
pub fn remove_field(&mut self, id: DocumentId, field: SchemaField) {
|
||||
pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) {
|
||||
self.new_states.insert((id, field), NewState::Removed);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#![feature(range_contains)]
|
||||
|
||||
#[macro_use] extern crate lazy_static;
|
||||
#[macro_use] extern crate serde_derive;
|
||||
|
||||
pub mod index;
|
||||
pub mod blob;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue