implements parser

This commit is contained in:
mposmta 2020-04-06 19:30:00 +02:00 committed by marin
parent 66568a913c
commit dcf1096ac3
10 changed files with 111 additions and 330 deletions

241
Cargo.lock generated
View File

@ -54,8 +54,8 @@ version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efd3d156917d94862e779f356c5acae312b08fd3121e792c857d7928c8088423"
dependencies = [
"quote 1.0.3",
"syn 1.0.17",
"quote",
"syn",
]
[[package]]
@ -627,9 +627,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a5081aa3de1f7542a794a397cde100ed903b0630152d0973479018fd85423a7"
dependencies = [
"proc-macro-hack",
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -738,24 +738,6 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "heed"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2b71c3a156adccd29e4fce2fe873ec775a50c2a69e0cc811e9a79fe7eb9605"
dependencies = [
"bincode",
"byteorder",
"libc",
"lmdb-rkv-sys 0.10.0",
"once_cell",
"page_size",
"serde",
"serde_json",
"url 2.1.1",
"zerocopy 0.2.8",
]
[[package]]
name = "heed"
version = "0.7.0"
@ -765,13 +747,13 @@ dependencies = [
"bincode",
"byteorder",
"libc",
"lmdb-rkv-sys 0.11.0",
"lmdb-rkv-sys",
"once_cell",
"page_size",
"serde",
"serde_json",
"url 2.1.1",
"zerocopy 0.3.0",
"zerocopy",
]
[[package]]
@ -1030,17 +1012,6 @@ version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
[[package]]
name = "lmdb-rkv-sys"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74168edb46923d92c3833b41a5a55b712e1b3fcf2050a142b3873902fd79aab4"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "lmdb-rkv-sys"
version = "0.11.0"
@ -1111,11 +1082,12 @@ dependencies = [
"env_logger",
"fst",
"hashbrown",
"heed 0.7.0",
"heed",
"indexmap",
"intervaltree",
"itertools 0.9.0",
"jemallocator",
"lazy_static",
"levenshtein_automata",
"log",
"meilisearch-schema",
@ -1123,6 +1095,8 @@ dependencies = [
"meilisearch-types",
"once_cell",
"ordered-float",
"pest 2.1.3 (git+https://github.com/MarinPostma/pest.git)",
"pest_derive",
"regex",
"rustyline",
"sdset",
@ -1133,18 +1107,8 @@ dependencies = [
"structopt",
"tempfile",
"termcolor",
"zerocopy 0.3.0",
]
[[package]]
name = "meilisearch-filters"
version = "0.9.0"
dependencies = [
"heed 0.6.5",
"lazy_static",
"meilisearch-core",
"pest",
"pest_derive",
"unicase",
"zerocopy",
]
[[package]]
@ -1157,7 +1121,7 @@ dependencies = [
"crossbeam-channel",
"env_logger",
"futures 0.3.4",
"heed 0.7.0",
"heed",
"http",
"http-service",
"http-service-mock",
@ -1211,7 +1175,7 @@ name = "meilisearch-types"
version = "0.9.0"
dependencies = [
"serde",
"zerocopy 0.3.0",
"zerocopy",
]
[[package]]
@ -1439,6 +1403,14 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "pest"
version = "2.1.3"
source = "git+https://github.com/MarinPostma/pest.git#e1031ad0134d5e9893c470dbea50811b2b746926"
dependencies = [
"ucd-trie",
]
[[package]]
name = "pest"
version = "2.1.3"
@ -1454,7 +1426,7 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0"
dependencies = [
"pest",
"pest 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"pest_generator",
]
@ -1464,11 +1436,11 @@ version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55"
dependencies = [
"pest",
"pest 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"pest_meta",
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -1478,7 +1450,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d"
dependencies = [
"maplit",
"pest",
"pest 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"sha-1",
]
@ -1530,26 +1502,26 @@ dependencies = [
[[package]]
name = "proc-macro-error"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8931031034aa65c73f3f1a05c3ec0fa51287fcd06557ecf4e88b2768bdca375e"
checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678"
dependencies = [
"proc-macro-error-attr",
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2147536f412ee7ae5529364ed50172ca0220fd64591e236296f45f36b38b2f98"
checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53"
dependencies = [
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
"syn-mid",
"version_check",
]
@ -1566,22 +1538,13 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e946095f9d3ed29ec38de908c22f95d9ac008e424c7bcae54c75a79c527c694"
[[package]]
name = "proc-macro2"
version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
dependencies = [
"unicode-xid 0.1.0",
]
[[package]]
name = "proc-macro2"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3"
dependencies = [
"unicode-xid 0.2.0",
"unicode-xid",
]
[[package]]
@ -1599,22 +1562,13 @@ version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quote"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
dependencies = [
"proc-macro2 0.4.30",
]
[[package]]
name = "quote"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f"
dependencies = [
"proc-macro2 1.0.10",
"proc-macro2",
]
[[package]]
@ -1894,9 +1848,9 @@ version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c"
dependencies = [
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -2020,20 +1974,9 @@ checksum = "a489c87c08fbaf12e386665109dd13470dcc9c4583ea3e10dd2b4523e5ebd9ac"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
]
[[package]]
name = "syn"
version = "0.15.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
dependencies = [
"proc-macro2 0.4.30",
"quote 0.6.13",
"unicode-xid 0.1.0",
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -2042,9 +1985,9 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0df0eb663f387145cab623dea85b09c2c5b4b0aef44e945d928e682fce71bb03"
dependencies = [
"proc-macro2 1.0.10",
"quote 1.0.3",
"unicode-xid 0.2.0",
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
@ -2053,21 +1996,9 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a"
dependencies = [
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
]
[[package]]
name = "synstructure"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"
dependencies = [
"proc-macro2 0.4.30",
"quote 0.6.13",
"syn 0.15.44",
"unicode-xid 0.1.0",
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -2076,10 +2007,10 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67656ea1dc1b41b1451851562ea232ec2e5a80242139f7e679ceccfb5d61f545"
dependencies = [
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"unicode-xid 0.2.0",
"proc-macro2",
"quote",
"syn",
"unicode-xid",
]
[[package]]
@ -2350,6 +2281,15 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
[[package]]
name = "unicase"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
dependencies = [
"version_check",
]
[[package]]
name = "unicode-bidi"
version = "0.3.4"
@ -2380,12 +2320,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
[[package]]
name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
[[package]]
name = "unicode-xid"
version = "0.2.0"
@ -2517,9 +2451,9 @@ dependencies = [
"bumpalo",
"lazy_static",
"log",
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
@ -2529,7 +2463,7 @@ version = "0.2.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bd151b63e1ea881bb742cd20e1d6127cef28399558f3b5d415289bc41eee3a4"
dependencies = [
"quote 1.0.3",
"quote",
"wasm-bindgen-macro-support",
]
@ -2539,9 +2473,9 @@ version = "0.2.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d68a5b36eef1be7868f668632863292e37739656a80fc4b9acec7b0bd35a4931"
dependencies = [
"proc-macro2 1.0.10",
"quote 1.0.3",
"syn 1.0.17",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -2640,16 +2574,6 @@ dependencies = [
"winapi-build",
]
[[package]]
name = "zerocopy"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "992b9b31f80fd4a167f903f879b8ca43d6716cc368ea01df90538baa2dd34056"
dependencies = [
"byteorder",
"zerocopy-derive 0.1.4",
]
[[package]]
name = "zerocopy"
version = "0.3.0"
@ -2657,18 +2581,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6580539ad917b7c026220c4b3f2c08d52ce54d6ce0dc491e66002e35388fab46"
dependencies = [
"byteorder",
"zerocopy-derive 0.2.0",
]
[[package]]
name = "zerocopy-derive"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b090467ecd0624026e8a6405d343ac7382592530d54881330b3fc8e400280fa5"
dependencies = [
"proc-macro2 0.4.30",
"syn 0.15.44",
"synstructure 0.10.2",
"zerocopy-derive",
]
[[package]]
@ -2677,7 +2590,7 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb"
dependencies = [
"proc-macro2 1.0.10",
"syn 1.0.17",
"synstructure 0.12.3",
"proc-macro2",
"syn",
"synstructure",
]

View File

@ -5,7 +5,6 @@ members = [
"meilisearch-schema",
"meilisearch-tokenizer",
"meilisearch-types",
"meilisearch-filters",
]
[profile.release]

View File

@ -20,6 +20,7 @@ heed = "0.7.0"
indexmap = { version = "1.3.2", features = ["serde-1"] }
intervaltree = "0.2.5"
itertools = "0.9.0"
lazy_static = "1.4.0"
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
log = "0.4.8"
meilisearch-schema = { path = "../meilisearch-schema", version = "0.9.0" }
@ -27,12 +28,15 @@ meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.9.0" }
meilisearch-types = { path = "../meilisearch-types", version = "0.9.0" }
once_cell = "1.3.1"
ordered-float = { version = "1.0.2", features = ["serde"] }
pest = {git = "https://github.com/MarinPostma/pest.git"}
pest_derive = "2.0"
regex = "1.3.6"
sdset = "0.4.0"
serde = { version = "1.0.105", features = ["derive"] }
serde_json = "1.0.50"
siphasher = "0.3.2"
slice-group-by = "0.2.6"
unicase = "2.6.0"
zerocopy = "0.3.0"
[dev-dependencies]

View File

@ -0,0 +1,28 @@
key = _{quoted | word}
value = _{quoted | word}
quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP }
string = {char*}
word = ${(LETTER | NUMBER | "_" | "-")+}
char = _{ !(PEEK | "\\") ~ ANY
| "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})}
condition = _{eq | greater | less | geq | leq | neq}
geq = {key ~ ">=" ~ value}
leq = {key ~ "<=" ~ value}
neq = {key ~ "!=" ~ value}
eq = {key ~ "=" ~ value}
greater = {key ~ ">" ~ value}
less = {key ~ "<" ~ value}
prgm = {SOI ~ expr ~ EOI}
expr = _{ ( term ~ (operation ~ term)* ) }
term = _{ ("(" ~ expr ~ ")") | condition | not }
operation = _{ and | or }
and = {"AND"}
or = {"OR"}
not = {"NOT" ~ term}
WHITESPACE = _{ " " }

View File

@ -1,12 +1,8 @@
pub mod operation;
use lazy_static::lazy_static;
use pest::prec_climber::{Operator, Assoc, PrecClimber};
pub use operation::Operation;
lazy_static! {
static ref PREC_CLIMBER: PrecClimber<Rule> = {
pub static ref PREC_CLIMBER: PrecClimber<Rule> = {
use Assoc::*;
use Rule::*;
pest::prec_climber::PrecClimber::new(vec![Operator::new(or, Left), Operator::new(and, Left)])
@ -14,5 +10,5 @@ lazy_static! {
}
#[derive(Parser)]
#[grammar = "parser/grammar.pest"]
#[grammar = "filters/parser/grammar.pest"]
pub struct FilterParser;

View File

@ -1,15 +0,0 @@
[package]
name = "meilisearch-filters"
version = "0.9.0"
authors = ["mposmta <postma.marin@protonmail.com>"]
edition = "2018"
license = "MIT"
[dependencies]
lazy_static = "1.4.0"
meilisearch-core = { path = "../meilisearch-core", version = "0.9.0" }
pest = "2.0"
pest_derive = "2.0"
heed = "0.6.1"

View File

@ -1,23 +0,0 @@
use crate::parser::Operation;
use meilisearch_core::{DocumentId, Schema, MainT };
use heed::RoTxn;
pub struct Filter<'r> {
reader: &'r RoTxn<MainT>,
operation: Box<Operation>,
}
impl<'r> Filter<'r> {
pub fn new<T: AsRef<str>>(expr: T, schema: &Schema, reader: &'r RoTxn<MainT>) -> Result<Self, Box<dyn std::error::Error>> {
let operation = Box::new(Operation::parse_with_schema(expr, schema)?);
Ok( Self {
reader,
operation,
})
}
pub fn test(&self, _document_id: &DocumentId) -> Result<bool, Box<dyn std::error::Error>> {
unimplemented!()
}
}

View File

@ -1,6 +0,0 @@
extern crate pest;
#[macro_use]
extern crate pest_derive;
mod parser;
pub mod filter;

View File

@ -1,15 +0,0 @@
key = @{ASCII_ALPHANUMERIC+}
value = @{ASCII_ALPHANUMERIC*}
query = {key ~ ":" ~ value}
prgm = {SOI ~ expr ~ EOI}
expr = _{ term ~ (operation ~ term)* }
term = _{query | "(" ~ expr ~ ")" | not}
operation = _{ and | or }
and = {"AND"}
or = {"OR"}
not = {"NOT" ~ term}
WHITESPACE = _{ " " }

View File

@ -1,100 +0,0 @@
use super::{FilterParser, Rule, PREC_CLIMBER};
use pest::{
iterators::{Pair, Pairs},
Parser,
};
use std::convert::From;
use std::fmt;
use meilisearch_core::Schema;
pub enum Query {
Contains { field: String, value: String },
IsEqual { field: String, value: String },
IsLower { field: String, value: String },
}
impl fmt::Debug for Query {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Contains { field, value } => write!(f, "{}:{}", field, value),
_ => todo!(),
}
}
}
impl From<Pair<'_, Rule>> for Query {
fn from(item: Pair<Rule>) -> Self {
let mut items = item.into_inner();
let key = items.next().unwrap();
// do additional parsing here and get the correct query type
let value = items.next().unwrap();
Self::Contains {
field: key.as_str().to_owned(),
value: value.as_str().to_owned(),
}
}
}
#[derive(Debug)]
pub struct Span(usize, usize);
impl Span {
pub fn merge(&self, other: &Span) -> Self {
let start = if self.0 > other.0 { other.0 } else { self.0 };
let end = if self.0 < other.0 { other.0 } else { self.0 };
Span(start, end)
}
}
impl From<pest::Span<'_>> for Span {
fn from(other: pest::Span<'_>) -> Self {
Span(other.start(), other.end())
}
}
#[derive(Debug)]
pub enum Operation {
Query(Query, Span),
Or(Box<Operation>, Box<Operation>, Span),
And(Box<Operation>, Box<Operation>, Span),
Not(Box<Operation>, Span),
}
impl Operation {
pub fn as_span<'a>(&'a self) -> &'a Span {
use Operation::*;
match self {
Query(_, span) | Or(_, _, span) | And(_, _, span) | Not(_, span) => span,
}
}
}
fn eval(expression: Pairs<Rule>) -> Operation {
PREC_CLIMBER.climb(
expression,
|pair: Pair<Rule>| {
let span = Span::from(pair.as_span());
match pair.as_rule() {
Rule::query => Operation::Query(Query::from(pair), span),
Rule::prgm => eval(pair.into_inner()),
Rule::not => Operation::Not(Box::new(eval(pair.into_inner())), span),
_ => unreachable!(),
}
},
|lhs: Operation, op: Pair<Rule>, rhs: Operation| {
let span = lhs.as_span().merge(rhs.as_span());
match op.as_rule() {
Rule::or => Operation::Or(Box::new(lhs), Box::new(rhs), span),
Rule::and => Operation::And(Box::new(lhs), Box::new(rhs), span),
_ => unreachable!(),
}
},
)
}
impl Operation {
pub fn parse_with_schema<T: AsRef<str>>(_expr: T, _schema: &Schema) -> Result<Self, Box<dyn std::error::Error>> {
unimplemented!()
}
}