feat: Introduce the index module

This commit is contained in:
Clément Renault 2018-11-20 11:37:19 +01:00
parent b3249d515d
commit 7c1a17520d
No known key found for this signature in database
GPG key ID: 0151CDAB43460DAE
9 changed files with 188 additions and 92 deletions

View file

@ -8,8 +8,8 @@ pub use self::merge::Merge;
pub use self::positive_blob::{PositiveBlob, PositiveBlobBuilder};
pub use self::negative_blob::{NegativeBlob, NegativeBlobBuilder};
use std::error::Error;
use fst::Map;
use crate::data::DocIndexes;
pub enum Blob {
@ -40,3 +40,7 @@ impl Sign {
}
}
}
pub fn ordered_blobs_from_slice(slice: &[u8]) -> Result<Vec<Blob>, Box<Error>> {
unimplemented!()
}

View file

@ -1,6 +1,5 @@
pub mod blob_name;
pub mod schema;
pub mod search;
pub mod update;
use std::io;
@ -19,9 +18,12 @@ use ::rocksdb::merge_operator::MergeOperands;
use crate::rank::Document;
use crate::data::DocIdsBuilder;
use crate::{DocIndex, DocumentId};
use crate::index::{update::Update, search::Search};
use crate::index::update::Update;
use crate::blob::{PositiveBlobBuilder, Blob, Sign};
use crate::blob::ordered_blobs_from_slice;
use crate::tokenizer::{TokenizerBuilder, DefaultBuilder, Tokenizer};
use crate::rank::{criterion, Config, RankedStream};
use crate::automaton;
fn simple_vec_append(key: &[u8], value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
let mut output = Vec::new();
@ -36,6 +38,12 @@ pub struct Index {
}
impl Index {
pub fn create<P: AsRef<Path>>(path: P) -> Result<Index, Box<Error>> {
unimplemented!("return a soft error: the database already exist at the given path")
// Self::open must not take a parameter for create_if_missing
// or we must create an OpenOptions with many parameters
// https://doc.rust-lang.org/std/fs/struct.OpenOptions.html
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Index, Box<Error>> {
let path = path.as_ref().to_string_lossy();
@ -66,50 +74,47 @@ impl Index {
Ok(())
}
pub fn snapshot(&self) -> Snapshot<&rocksdb::DB> {
Snapshot::new(&self.database)
fn blobs(&self) -> Result<Vec<Blob>, Box<Error>> {
match self.database.get(b"00-blobs-order")? {
Some(value) => Ok(ordered_blobs_from_slice(&value)?),
None => Ok(Vec::new()),
}
}
}
impl Search for Index {
fn search(&self, text: &str) -> Vec<Document> {
unimplemented!()
}
}
pub fn search(&self, query: &str) -> Result<Vec<Document>, Box<Error>> {
pub struct Snapshot<D>
where D: Deref<Target=rocksdb::DB>,
{
inner: rocksdb::Snapshot<D>,
}
// FIXME create a SNAPSHOT for the search !
let blobs = self.blobs()?;
impl<D> Snapshot<D>
where D: Deref<Target=rocksdb::DB>,
{
pub fn new(inner: D) -> Snapshot<D> {
Self { inner: rocksdb::Snapshot::new(inner) }
}
}
let mut automatons = Vec::new();
for query in query.split_whitespace().map(str::to_lowercase) {
let lev = automaton::build_prefix_dfa(&query);
automatons.push(lev);
}
impl<D> Search for Snapshot<D>
where D: Deref<Target=rocksdb::DB>,
{
fn search(&self, text: &str) -> Vec<Document> {
unimplemented!()
let config = Config {
blobs: &blobs,
automatons: automatons,
criteria: criterion::default(),
distinct: ((), 1),
};
Ok(RankedStream::new(config).retrieve_documents(0..20))
}
}
#[cfg(test)]
mod tests {
use tempfile::NamedTempFile;
use super::*;
use crate::index::schema::Schema;
use crate::index::update::{PositiveUpdateBuilder, NegativeUpdateBuilder};
#[test]
fn generate_negative_update() -> Result<(), Box<Error>> {
let schema = Schema::open("/meili/default.sch")?;
let mut builder = NegativeUpdateBuilder::new("update-delete-0001.sst");
let path = NamedTempFile::new()?.into_temp_path();
let mut builder = NegativeUpdateBuilder::new(&path);
// you can insert documents in any order, it is sorted internally
builder.remove(1);
@ -157,18 +162,18 @@ mod tests {
//////////////
let index = Index::open("/meili/data")?;
let update = Update::open("update-0001.sst")?;
// let index = Index::open("/meili/data")?;
// let update = Update::open("update-0001.sst")?;
// if you create a snapshot before an update
let snapshot = index.snapshot();
index.ingest_update(update)?;
// // if you create a snapshot before an update
// let snapshot = index.snapshot();
// index.ingest_update(update)?;
// the snapshot does not see the updates
let results = snapshot.search("helo");
// // the snapshot does not see the updates
// let results = snapshot.search("helo");
// the raw index itself see new results
let results = index.search("helo");
// // the raw index itself see new results
// let results = index.search("helo");
Ok(())
}

View file

@ -1,6 +1,8 @@
use std::io::{Read, Write};
use std::error::Error;
use std::path::Path;
use std::ops::BitOr;
use std::fs::File;
use std::fmt;
pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false };
@ -33,15 +35,23 @@ impl BitOr for SchemaProps {
}
}
pub struct SchemaBuilder;
pub struct SchemaBuilder {
fields: Vec<(String, SchemaProps)>,
}
impl SchemaBuilder {
pub fn new() -> SchemaBuilder {
unimplemented!()
SchemaBuilder { fields: Vec::new() }
}
pub fn field(&mut self, name: &str, props: SchemaProps) -> SchemaField {
unimplemented!()
pub fn field<N>(&mut self, name: N, props: SchemaProps) -> SchemaField
where N: Into<String>,
{
let len = self.fields.len();
let name = name.into();
self.fields.push((name, props));
SchemaField(len as u32)
}
pub fn build(self) -> Schema {
@ -49,6 +59,32 @@ impl SchemaBuilder {
}
}
#[derive(Clone)]
pub struct Schema;
impl Schema {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Schema, Box<Error>> {
let file = File::open(path)?;
Schema::read_from(file)
}
pub fn read_from<R: Read>(reader: R) -> Result<Schema, Box<Error>> {
unimplemented!()
}
pub fn write_to<W: Write>(writer: W) -> Result<(), Box<Error>> {
unimplemented!()
}
pub fn props(&self, field: SchemaField) -> SchemaProps {
unimplemented!()
}
pub fn field(&self, name: &str) -> Option<SchemaField> {
unimplemented!()
}
}
#[derive(Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
pub struct SchemaField(u32);
@ -63,20 +99,3 @@ impl fmt::Display for SchemaField {
write!(f, "{}", self.0)
}
}
#[derive(Clone)]
pub struct Schema;
impl Schema {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Schema, Box<Error>> {
unimplemented!()
}
pub fn props(&self, field: SchemaField) -> SchemaProps {
unimplemented!()
}
pub fn field(&self, name: &str) -> Option<SchemaField> {
unimplemented!()
}
}

View file

@ -1,5 +0,0 @@
use crate::rank::Document;
pub trait Search {
fn search(&self, text: &str) -> Vec<Document>;
}

View file

@ -22,8 +22,8 @@ fn clamp_range<T: Copy + Ord>(range: Range<T>, big: Range<T>) -> Range<T> {
}
}
pub struct Config<C, F> {
pub index: Index,
pub struct Config<'a, C, F> {
pub blobs: &'a [Blob],
pub automatons: Vec<DfaExt>,
pub criteria: Vec<C>,
pub distinct: (F, usize),
@ -37,11 +37,11 @@ pub struct RankedStream<'m, C, F> {
}
impl<'m, C, F> RankedStream<'m, C, F> {
pub fn new(config: Config<C, F>) -> Self {
pub fn new(config: Config<'m, C, F>) -> Self {
let automatons: Vec<_> = config.automatons.into_iter().map(Rc::new).collect();
RankedStream {
stream: Merge::with_automatons(automatons.clone(), unimplemented!()),
stream: Merge::with_automatons(automatons.clone(), config.blobs),
automatons: automatons,
criteria: config.criteria,
distinct: config.distinct,