feat: Introduce a little simple http server for demo

This commit is contained in:
Clément Renault 2019-05-16 15:51:08 +02:00
parent 4b36fa0739
commit 08d89053da
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
7 changed files with 104 additions and 169 deletions

View File

@ -59,15 +59,34 @@ We have seen much better performances when [using jemalloc as the global allocat
## Usage and examples ## Usage and examples
MeiliDB runs with an index like most search engines. You can test a little part of MeiliDB by using this command, it create an index named _movies_ and initialize it with to great Tarantino movies.
So to test the library you can create one by indexing a simple csv file.
```bash ```bash
cargo run --release --example create-database -- test.mdb examples/movies/movies.csv --schema examples/movies/schema-movies.toml cargo run --release
curl -XPOST 'http://127.0.0.1:8000/movies' \
-d '
identifier = "id"
[attributes.id]
stored = true
[attributes.title]
stored = true
indexed = true
'
curl -H 'Content-Type: application/json' \
-XPUT 'http://127.0.0.1:8000/movies' \
-d '{ "id": 123, "title": "Inglorious Bastards" }'
curl -H 'Content-Type: application/json' \
-XPUT 'http://127.0.0.1:8000/movies' \
-d '{ "id": 456, "title": "Django Unchained" }'
``` ```
Once the command is executed, the index should be in the `test.mdb` folder. You are now able to run the `query-database` example and play with MeiliDB. Once the database is initialized you can query it by using the following command:
```bash ```bash
cargo run --release --example query-database -- test.mdb -n 10 id title overview release_date curl -XGET 'http://127.0.0.1:8000/movies/search?q=inglo'
``` ```

View File

@ -14,7 +14,7 @@ meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
ordered-float = { version = "1.0.2", features = ["serde"] } ordered-float = { version = "1.0.2", features = ["serde"] }
sdset = "0.3.1" sdset = "0.3.1"
serde = { version = "1.0.90", features = ["derive"] } serde = { version = "1.0.91", features = ["derive"] }
serde_json = { version = "1.0.39", features = ["preserve_order"] } serde_json = { version = "1.0.39", features = ["preserve_order"] }
sled = "0.23.0" sled = "0.23.0"
toml = { version = "0.5.0", features = ["preserve_order"] } toml = { version = "0.5.0", features = ["preserve_order"] }

View File

@ -5,23 +5,19 @@ version = "0.3.1"
authors = ["Kerollmops <renault.cle@gmail.com>"] authors = ["Kerollmops <renault.cle@gmail.com>"]
[dependencies] [dependencies]
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
meilidb-data = { path = "../meilidb-data", version = "0.1.0" } meilidb-data = { path = "../meilidb-data", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } serde = { version = "1.0.91" , features = ["derive"] }
serde_json = "1.0.39"
[features] tempfile = "3.0.7"
default = [] tide = "0.2.0"
i128 = ["meilidb-core/i128"]
nightly = ["meilidb-core/nightly"]
[dev-dependencies] [dev-dependencies]
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
csv = "1.0.7" csv = "1.0.7"
env_logger = "0.6.1" env_logger = "0.6.1"
jemallocator = "0.1.9" jemallocator = "0.1.9"
quickcheck = "0.8.2" quickcheck = "0.8.2"
rand = "0.6.5" rand = "0.6.5"
rand_xorshift = "0.1.1" rand_xorshift = "0.1.1"
serde = { version = "1.0.90", features = ["derive"] }
structopt = "0.2.15" structopt = "0.2.15"
tempfile = "3.0.7"
termcolor = "1.0.4" termcolor = "1.0.4"

View File

@ -1,26 +0,0 @@
use std::io::{self, BufReader, BufRead};
use std::collections::HashSet;
use std::path::Path;
use std::fs::File;
#[derive(Debug)]
pub struct CommonWords(HashSet<String>);
impl CommonWords {
pub fn from_file<P>(path: P) -> io::Result<Self>
where P: AsRef<Path>
{
let file = File::open(path)?;
let file = BufReader::new(file);
let mut set = HashSet::new();
for line in file.lines().filter_map(|l| l.ok()) {
let word = line.trim().to_owned();
set.insert(word);
}
Ok(CommonWords(set))
}
pub fn contains(&self, word: &str) -> bool {
self.0.contains(word)
}
}

View File

@ -1,7 +0,0 @@
#![cfg_attr(feature = "nightly", feature(test))]
mod common_words;
mod sort_by_attr;
pub use self::sort_by_attr::SortByAttr;
pub use self::common_words::CommonWords;

74
meilidb/src/main.rs Normal file
View File

@ -0,0 +1,74 @@
#![feature(async_await)]
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use tide::querystring::ExtractQuery;
use tide::http::status::StatusCode;
use tide::{error::ResultExt, response, App, Context, EndpointResult};
use serde_json::Value;
use meilidb_data::{Database, Schema};
#[derive(Debug, Serialize, Deserialize, Clone)]
struct SearchQuery {
q: String,
}
async fn create_index(mut cx: Context<Database>) -> EndpointResult<()> {
let index: String = cx.param("index").client_err()?;
let schema = cx.body_bytes().await.client_err()?;
let schema = Schema::from_toml(schema.as_slice()).unwrap();
let database = cx.app_data();
database.create_index(&index, schema).unwrap();
Ok(())
}
async fn update_documents(mut cx: Context<Database>) -> EndpointResult<()> {
let index: String = cx.param("index").client_err()?;
let document: HashMap<String, Value> = cx.body_json().await.client_err()?;
let database = cx.app_data();
let index = match database.open_index(&index).unwrap() {
Some(index) => index,
None => Err(StatusCode::NOT_FOUND)?,
};
let mut addition = index.documents_addition();
addition.update_document(document).unwrap();
addition.finalize().unwrap();
Ok(())
}
async fn search_index(cx: Context<Database>) -> EndpointResult {
let index: String = cx.param("index").client_err()?;
let query: SearchQuery = cx.url_query()?;
let database = cx.app_data();
let index = match database.open_index(&index).unwrap() {
Some(index) => index,
None => Err(StatusCode::NOT_FOUND)?,
};
let documents_ids = index.query_builder().query(&query.q, 0..100).unwrap();
let documents: Vec<Value> = documents_ids
.into_iter()
.filter_map(|x| index.document(None, x.id).unwrap())
.collect();
Ok(response::json(documents))
}
fn main() -> std::io::Result<()> {
let tmp_dir = tempfile::tempdir().unwrap();
let database = Database::start_default(&tmp_dir).unwrap();
let mut app = App::new(database);
app.at("/:index").post(create_index).put(update_documents);
app.at("/:index/search").get(search_index);
app.serve("127.0.0.1:8000")
}

View File

@ -1,121 +0,0 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use meilidb_core::criterion::Criterion;
use meilidb_core::RawDocument;
use meilidb_data::{Schema, SchemaAttr, RankedMap};
/// An helper struct that permit to sort documents by
/// some of their stored attributes.
///
/// # Note
///
/// If a document cannot be deserialized it will be considered [`None`][].
///
/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
/// so you must check the [`Ord`] of `Option` implementation.
///
/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
///
/// # Example
///
/// ```ignore
/// use serde_derive::Deserialize;
/// use meilidb::rank::criterion::*;
///
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
///
/// let builder = CriteriaBuilder::with_capacity(8)
/// .add(SumOfTypos)
/// .add(NumberOfWords)
/// .add(WordsProximity)
/// .add(SumOfWordsAttribute)
/// .add(SumOfWordsPosition)
/// .add(Exact)
/// .add(custom_ranking)
/// .add(DocumentId);
///
/// let criterion = builder.build();
///
/// ```
pub struct SortByAttr<'a> {
ranked_map: &'a RankedMap,
attr: SchemaAttr,
reversed: bool,
}
impl<'a> SortByAttr<'a> {
pub fn lower_is_better(
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
SortByAttr::new(ranked_map, schema, attr_name, false)
}
pub fn higher_is_better(
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
SortByAttr::new(ranked_map, schema, attr_name, true)
}
fn new(
ranked_map: &'a RankedMap,
schema: &Schema,
attr_name: &str,
reversed: bool,
) -> Result<SortByAttr<'a>, SortByAttrError>
{
let attr = match schema.attribute(attr_name) {
Some(attr) => attr,
None => return Err(SortByAttrError::AttributeNotFound),
};
if !schema.props(attr).is_ranked() {
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
}
Ok(SortByAttr { ranked_map, attr, reversed })
}
}
impl<'a> Criterion for SortByAttr<'a> {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = self.ranked_map.get(lhs.id, self.attr);
let rhs = self.ranked_map.get(rhs.id, self.attr);
match (lhs, rhs) {
(Some(lhs), Some(rhs)) => {
let order = lhs.cmp(&rhs);
if self.reversed { order.reverse() } else { order }
},
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
(None, None) => Ordering::Equal,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SortByAttrError {
AttributeNotFound,
AttributeNotRegisteredForRanking,
}
impl fmt::Display for SortByAttrError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use SortByAttrError::*;
match self {
AttributeNotFound => f.write_str("attribute not found in the schema"),
AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
}
}
}
impl Error for SortByAttrError { }