diff --git a/README.md b/README.md index f64015bbd..cb98d2dea 100644 --- a/README.md +++ b/README.md @@ -59,15 +59,34 @@ We have seen much better performances when [using jemalloc as the global allocat ## Usage and examples -MeiliDB runs with an index like most search engines. -So to test the library you can create one by indexing a simple csv file. +You can test a little part of MeiliDB by using this command, it create an index named _movies_ and initialize it with to great Tarantino movies. ```bash -cargo run --release --example create-database -- test.mdb examples/movies/movies.csv --schema examples/movies/schema-movies.toml +cargo run --release + +curl -XPOST 'http://127.0.0.1:8000/movies' \ + -d ' +identifier = "id" + +[attributes.id] +stored = true + +[attributes.title] +stored = true +indexed = true +' + +curl -H 'Content-Type: application/json' \ + -XPUT 'http://127.0.0.1:8000/movies' \ + -d '{ "id": 123, "title": "Inglorious Bastards" }' + +curl -H 'Content-Type: application/json' \ + -XPUT 'http://127.0.0.1:8000/movies' \ + -d '{ "id": 456, "title": "Django Unchained" }' ``` -Once the command is executed, the index should be in the `test.mdb` folder. You are now able to run the `query-database` example and play with MeiliDB. +Once the database is initialized you can query it by using the following command: ```bash -cargo run --release --example query-database -- test.mdb -n 10 id title overview release_date +curl -XGET 'http://127.0.0.1:8000/movies/search?q=inglo' ``` diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index adf618657..0c5ff9f5e 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -14,7 +14,7 @@ meilidb-core = { path = "../meilidb-core", version = "0.1.0" } meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } ordered-float = { version = "1.0.2", features = ["serde"] } sdset = "0.3.1" -serde = { version = "1.0.90", features = ["derive"] } +serde = { version = "1.0.91", features = ["derive"] } serde_json = { version = "1.0.39", features = ["preserve_order"] } sled = "0.23.0" toml = { version = "0.5.0", features = ["preserve_order"] } diff --git a/meilidb/Cargo.toml b/meilidb/Cargo.toml index c2f4ad0fc..8f9517419 100644 --- a/meilidb/Cargo.toml +++ b/meilidb/Cargo.toml @@ -5,23 +5,19 @@ version = "0.3.1" authors = ["Kerollmops "] [dependencies] -meilidb-core = { path = "../meilidb-core", version = "0.1.0" } meilidb-data = { path = "../meilidb-data", version = "0.1.0" } -meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } - -[features] -default = [] -i128 = ["meilidb-core/i128"] -nightly = ["meilidb-core/nightly"] +serde = { version = "1.0.91" , features = ["derive"] } +serde_json = "1.0.39" +tempfile = "3.0.7" +tide = "0.2.0" [dev-dependencies] +meilidb-core = { path = "../meilidb-core", version = "0.1.0" } csv = "1.0.7" env_logger = "0.6.1" jemallocator = "0.1.9" quickcheck = "0.8.2" rand = "0.6.5" rand_xorshift = "0.1.1" -serde = { version = "1.0.90", features = ["derive"] } structopt = "0.2.15" -tempfile = "3.0.7" termcolor = "1.0.4" diff --git a/meilidb/src/common_words.rs b/meilidb/src/common_words.rs deleted file mode 100644 index 32cd79336..000000000 --- a/meilidb/src/common_words.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::io::{self, BufReader, BufRead}; -use std::collections::HashSet; -use std::path::Path; -use std::fs::File; - -#[derive(Debug)] -pub struct CommonWords(HashSet); - -impl CommonWords { - pub fn from_file

(path: P) -> io::Result - where P: AsRef - { - let file = File::open(path)?; - let file = BufReader::new(file); - let mut set = HashSet::new(); - for line in file.lines().filter_map(|l| l.ok()) { - let word = line.trim().to_owned(); - set.insert(word); - } - Ok(CommonWords(set)) - } - - pub fn contains(&self, word: &str) -> bool { - self.0.contains(word) - } -} diff --git a/meilidb/src/lib.rs b/meilidb/src/lib.rs deleted file mode 100644 index aba7ab6a7..000000000 --- a/meilidb/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -#![cfg_attr(feature = "nightly", feature(test))] - -mod common_words; -mod sort_by_attr; - -pub use self::sort_by_attr::SortByAttr; -pub use self::common_words::CommonWords; diff --git a/meilidb/src/main.rs b/meilidb/src/main.rs new file mode 100644 index 000000000..822d941b5 --- /dev/null +++ b/meilidb/src/main.rs @@ -0,0 +1,74 @@ +#![feature(async_await)] + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; +use tide::querystring::ExtractQuery; +use tide::http::status::StatusCode; +use tide::{error::ResultExt, response, App, Context, EndpointResult}; +use serde_json::Value; +use meilidb_data::{Database, Schema}; + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct SearchQuery { + q: String, +} + +async fn create_index(mut cx: Context) -> EndpointResult<()> { + let index: String = cx.param("index").client_err()?; + let schema = cx.body_bytes().await.client_err()?; + let schema = Schema::from_toml(schema.as_slice()).unwrap(); + + let database = cx.app_data(); + database.create_index(&index, schema).unwrap(); + + Ok(()) +} + +async fn update_documents(mut cx: Context) -> EndpointResult<()> { + let index: String = cx.param("index").client_err()?; + let document: HashMap = cx.body_json().await.client_err()?; + + let database = cx.app_data(); + let index = match database.open_index(&index).unwrap() { + Some(index) => index, + None => Err(StatusCode::NOT_FOUND)?, + }; + + let mut addition = index.documents_addition(); + addition.update_document(document).unwrap(); + addition.finalize().unwrap(); + + Ok(()) +} + +async fn search_index(cx: Context) -> EndpointResult { + let index: String = cx.param("index").client_err()?; + let query: SearchQuery = cx.url_query()?; + + let database = cx.app_data(); + + let index = match database.open_index(&index).unwrap() { + Some(index) => index, + None => Err(StatusCode::NOT_FOUND)?, + }; + + let documents_ids = index.query_builder().query(&query.q, 0..100).unwrap(); + let documents: Vec = documents_ids + .into_iter() + .filter_map(|x| index.document(None, x.id).unwrap()) + .collect(); + + Ok(response::json(documents)) +} + +fn main() -> std::io::Result<()> { + let tmp_dir = tempfile::tempdir().unwrap(); + let database = Database::start_default(&tmp_dir).unwrap(); + let mut app = App::new(database); + + app.at("/:index").post(create_index).put(update_documents); + app.at("/:index/search").get(search_index); + + app.serve("127.0.0.1:8000") +} diff --git a/meilidb/src/sort_by_attr.rs b/meilidb/src/sort_by_attr.rs deleted file mode 100644 index 2cacaae13..000000000 --- a/meilidb/src/sort_by_attr.rs +++ /dev/null @@ -1,121 +0,0 @@ -use std::cmp::Ordering; -use std::error::Error; -use std::fmt; - -use meilidb_core::criterion::Criterion; -use meilidb_core::RawDocument; -use meilidb_data::{Schema, SchemaAttr, RankedMap}; - -/// An helper struct that permit to sort documents by -/// some of their stored attributes. -/// -/// # Note -/// -/// If a document cannot be deserialized it will be considered [`None`][]. -/// -/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`, -/// so you must check the [`Ord`] of `Option` implementation. -/// -/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None -/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord -/// -/// # Example -/// -/// ```ignore -/// use serde_derive::Deserialize; -/// use meilidb::rank::criterion::*; -/// -/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?; -/// -/// let builder = CriteriaBuilder::with_capacity(8) -/// .add(SumOfTypos) -/// .add(NumberOfWords) -/// .add(WordsProximity) -/// .add(SumOfWordsAttribute) -/// .add(SumOfWordsPosition) -/// .add(Exact) -/// .add(custom_ranking) -/// .add(DocumentId); -/// -/// let criterion = builder.build(); -/// -/// ``` -pub struct SortByAttr<'a> { - ranked_map: &'a RankedMap, - attr: SchemaAttr, - reversed: bool, -} - -impl<'a> SortByAttr<'a> { - pub fn lower_is_better( - ranked_map: &'a RankedMap, - schema: &Schema, - attr_name: &str, - ) -> Result, SortByAttrError> - { - SortByAttr::new(ranked_map, schema, attr_name, false) - } - - pub fn higher_is_better( - ranked_map: &'a RankedMap, - schema: &Schema, - attr_name: &str, - ) -> Result, SortByAttrError> - { - SortByAttr::new(ranked_map, schema, attr_name, true) - } - - fn new( - ranked_map: &'a RankedMap, - schema: &Schema, - attr_name: &str, - reversed: bool, - ) -> Result, SortByAttrError> - { - let attr = match schema.attribute(attr_name) { - Some(attr) => attr, - None => return Err(SortByAttrError::AttributeNotFound), - }; - - if !schema.props(attr).is_ranked() { - return Err(SortByAttrError::AttributeNotRegisteredForRanking); - } - - Ok(SortByAttr { ranked_map, attr, reversed }) - } -} - -impl<'a> Criterion for SortByAttr<'a> { - fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering { - let lhs = self.ranked_map.get(lhs.id, self.attr); - let rhs = self.ranked_map.get(rhs.id, self.attr); - - match (lhs, rhs) { - (Some(lhs), Some(rhs)) => { - let order = lhs.cmp(&rhs); - if self.reversed { order.reverse() } else { order } - }, - (None, Some(_)) => Ordering::Greater, - (Some(_), None) => Ordering::Less, - (None, None) => Ordering::Equal, - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum SortByAttrError { - AttributeNotFound, - AttributeNotRegisteredForRanking, -} - -impl fmt::Display for SortByAttrError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use SortByAttrError::*; - match self { - AttributeNotFound => f.write_str("attribute not found in the schema"), - AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"), - } - } -} - -impl Error for SortByAttrError { }