mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 22:34:28 +01:00
feat(search): Add a StreamWithState Streamer
This commit is contained in:
parent
7fba62fc22
commit
6d57a8af05
22
Cargo.lock
generated
22
Cargo.lock
generated
@ -85,7 +85,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "fst"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
source = "git+https://github.com/Kerollmops/fst.git?branch=stream-with-state#a969462433944a22f1356a8bf2affb8e9bde6f67"
|
||||
dependencies = [
|
||||
"byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -161,9 +161,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/tantivy-search/levenshtein-automata.git#ba2b62e3631593c408e2b9b8bb95c430384a331e"
|
||||
source = "git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst#13a685e087efcf253936342c055166fa5d5c9b9c"
|
||||
dependencies = [
|
||||
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -305,9 +305,9 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"levenshtein_automata 0.1.0 (git+https://github.com/tantivy-search/levenshtein-automata.git)",
|
||||
"levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)",
|
||||
"serde 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -345,7 +345,7 @@ dependencies = [
|
||||
"proc-macro2 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive_internals 0.23.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -354,7 +354,7 @@ version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -384,7 +384,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.13.6"
|
||||
version = "0.13.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -651,7 +651,7 @@ dependencies = [
|
||||
"checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b"
|
||||
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d94485a00b1827b861dd9d1a2cc9764f9044d4c535514c0760a5a2012ef3399f"
|
||||
"checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)" = "<none>"
|
||||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c"
|
||||
@ -662,7 +662,7 @@ dependencies = [
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d"
|
||||
"checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef"
|
||||
"checksum levenshtein_automata 0.1.0 (git+https://github.com/tantivy-search/levenshtein-automata.git)" = "<none>"
|
||||
"checksum levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)" = "<none>"
|
||||
"checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b"
|
||||
"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
||||
"checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2"
|
||||
@ -689,7 +689,7 @@ dependencies = [
|
||||
"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
|
||||
"checksum slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fdeff4cd9ecff59ec7e3744cbca73dfe5ac35c2aedb2cfba8a1c715a18912e9d"
|
||||
"checksum smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c8cbcd6df1e117c2210e13ab5109635ad68a929fcbb8964dc965b76cb5ee013"
|
||||
"checksum syn 0.13.6 (registry+https://github.com/rust-lang/crates.io-index)" = "cd06d020ab141832177869072dffb95d84e76c0cc0ab26d6eb38583e07d0403b"
|
||||
"checksum syn 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)" = "61b8f1b737f929c6516ba46a3133fd6d5215ad8a62f66760f851f7048aebedfb"
|
||||
"checksum take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b157868d8ac1f56b64604539990685fa7611d8fa9e5476cf0c02cf34d32917c5"
|
||||
"checksum time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "a15375f1df02096fb3317256ce2cee6a1f42fc84ea5ad5fc8c421cfe40c73098"
|
||||
"checksum tokio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d00555353b013e170ed8bc4e13f648a317d1fd12157dbcae13f7013f6cf29f5"
|
||||
|
11
Cargo.toml
11
Cargo.toml
@ -6,8 +6,6 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
[dependencies]
|
||||
bincode = "1.0"
|
||||
env_logger = { version = "0.3", default-features = false }
|
||||
fst = "0.3"
|
||||
levenshtein_automata = { git = "https://github.com/tantivy-search/levenshtein-automata.git", features = ["fst_automaton"] }
|
||||
futures = "0.1"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
@ -17,5 +15,14 @@ tokio-proto = "0.1"
|
||||
tokio-service = "0.1"
|
||||
url = "1.7"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "stream-with-state"
|
||||
|
||||
[dependencies.levenshtein_automata]
|
||||
git = "https://github.com/Kerollmops/levenshtein-automata.git"
|
||||
branch = "custom-fst"
|
||||
features = ["fst_automaton"]
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
|
@ -14,8 +14,8 @@ use std::fs::File;
|
||||
use std::io::{Read, BufReader};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder;
|
||||
use futures::future;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||
use tokio_minihttp::{Request, Response, Http};
|
||||
use tokio_proto::TcpServer;
|
||||
use tokio_service::Service;
|
||||
@ -23,35 +23,18 @@ use tokio_service::Service;
|
||||
use raptor::FstMap;
|
||||
|
||||
static mut MAP: Option<FstMap<u64>> = None;
|
||||
static mut LEV_AUT_BLDR_0: Option<LevenshteinAutomatonBuilder> = None;
|
||||
static mut LEV_AUT_BLDR_1: Option<LevenshteinAutomatonBuilder> = None;
|
||||
static mut LEV_AUT_BLDR_2: Option<LevenshteinAutomatonBuilder> = None;
|
||||
static mut LEV_BUILDER_0: Option<LevBuilder> = None;
|
||||
static mut LEV_BUILDER_1: Option<LevBuilder> = None;
|
||||
static mut LEV_BUILDER_2: Option<LevBuilder> = None;
|
||||
|
||||
struct MainService {
|
||||
map: &'static FstMap<u64>,
|
||||
lev_aut_bldr_0: &'static LevenshteinAutomatonBuilder,
|
||||
lev_aut_bldr_1: &'static LevenshteinAutomatonBuilder,
|
||||
lev_aut_bldr_2: &'static LevenshteinAutomatonBuilder,
|
||||
struct MainService<'a> {
|
||||
map: &'a FstMap<u64>,
|
||||
lev_builder_0: &'a LevBuilder,
|
||||
lev_builder_1: &'a LevBuilder,
|
||||
lev_builder_2: &'a LevBuilder,
|
||||
}
|
||||
|
||||
fn construct_body<'f, S>(mut stream: S) -> String
|
||||
where
|
||||
S: 'f + for<'a> Streamer<'a, Item=(&'a str, &'a [u64])>
|
||||
{
|
||||
let mut body = String::new();
|
||||
body.push_str("<html><body>");
|
||||
|
||||
while let Some((key, values)) = stream.next() {
|
||||
let values = &values[..values.len().min(10)];
|
||||
body.push_str(&format!("{:?} {:?}</br>", key, values));
|
||||
}
|
||||
|
||||
body.push_str("</body></html>");
|
||||
|
||||
body
|
||||
}
|
||||
|
||||
impl Service for MainService {
|
||||
impl<'a> Service for MainService<'a> {
|
||||
type Request = Request;
|
||||
type Response = Response;
|
||||
type Error = io::Error;
|
||||
@ -66,19 +49,29 @@ impl Service for MainService {
|
||||
resp.header("Content-Type", "text/html");
|
||||
resp.header("charset", "utf-8");
|
||||
|
||||
if let Some((_, key)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
||||
let key = key.to_lowercase();
|
||||
if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
||||
let query = query.to_lowercase();
|
||||
|
||||
let lev = if key.len() <= 4 {
|
||||
self.lev_aut_bldr_0.build_dfa(&key)
|
||||
} else if key.len() <= 8 {
|
||||
self.lev_aut_bldr_1.build_dfa(&key)
|
||||
let lev = if query.len() <= 4 {
|
||||
self.lev_builder_0.build_dfa(&query)
|
||||
} else if query.len() <= 8 {
|
||||
self.lev_builder_1.build_dfa(&query)
|
||||
} else {
|
||||
self.lev_aut_bldr_2.build_dfa(&key)
|
||||
self.lev_builder_2.build_dfa(&query)
|
||||
};
|
||||
|
||||
let stream = self.map.search(lev).into_stream();
|
||||
let body = construct_body(stream);
|
||||
let mut stream = self.map.search(&lev).with_state().into_stream();
|
||||
|
||||
let mut body = String::new();
|
||||
body.push_str("<html><body>");
|
||||
|
||||
while let Some((key, values, state)) = stream.next() {
|
||||
let values = &values[..values.len().min(10)];
|
||||
let distance = lev.distance(state);
|
||||
body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, values));
|
||||
}
|
||||
|
||||
body.push_str("</body></html>");
|
||||
|
||||
resp.body_vec(body.into_bytes());
|
||||
}
|
||||
@ -108,9 +101,9 @@ fn main() {
|
||||
|
||||
Some(FstMap::from_bytes(map, &values).unwrap())
|
||||
};
|
||||
LEV_AUT_BLDR_0 = Some(LevenshteinAutomatonBuilder::new(0, false));
|
||||
LEV_AUT_BLDR_1 = Some(LevenshteinAutomatonBuilder::new(1, false));
|
||||
LEV_AUT_BLDR_2 = Some(LevenshteinAutomatonBuilder::new(2, false));
|
||||
LEV_BUILDER_0 = Some(LevBuilder::new(0, false));
|
||||
LEV_BUILDER_1 = Some(LevBuilder::new(1, false));
|
||||
LEV_BUILDER_2 = Some(LevBuilder::new(2, false));
|
||||
}
|
||||
|
||||
let addr = "0.0.0.0:8080".parse().unwrap();
|
||||
@ -118,9 +111,9 @@ fn main() {
|
||||
unsafe {
|
||||
TcpServer::new(Http, addr).serve(|| Ok(MainService {
|
||||
map: MAP.as_ref().unwrap(),
|
||||
lev_aut_bldr_0: LEV_AUT_BLDR_0.as_ref().unwrap(),
|
||||
lev_aut_bldr_1: LEV_AUT_BLDR_1.as_ref().unwrap(),
|
||||
lev_aut_bldr_2: LEV_AUT_BLDR_2.as_ref().unwrap(),
|
||||
lev_builder_0: LEV_BUILDER_0.as_ref().unwrap(),
|
||||
lev_builder_1: LEV_BUILDER_1.as_ref().unwrap(),
|
||||
lev_builder_2: LEV_BUILDER_2.as_ref().unwrap(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
73
src/lib.rs
73
src/lib.rs
@ -15,15 +15,23 @@ use fst::Automaton;
|
||||
pub use self::fst_map::{FstMap, FstMapBuilder};
|
||||
use self::fst_map::Values;
|
||||
|
||||
pub struct StreamBuilder<'a, T: 'a, A: Automaton> {
|
||||
inner: fst::map::StreamBuilder<'a, A>,
|
||||
values: &'a Values<T>,
|
||||
pub struct StreamBuilder<'m, 'v, T: 'v, A> {
|
||||
inner: fst::map::StreamBuilder<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, T: 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'a, T, A> {
|
||||
type Item = (&'a str, &'a [T]);
|
||||
impl<'m, 'v, T: 'v, A> StreamBuilder<'m, 'v, T, A> {
|
||||
pub fn with_state(self) -> StreamWithStateBuilder<'m, 'v, T, A> {
|
||||
StreamWithStateBuilder {
|
||||
inner: self.inner.with_state(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Into = Stream<'a, T, A>;
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'m, 'v, T, A> {
|
||||
type Item = (&'a str, &'a [T]);
|
||||
type Into = Stream<'m, 'v, T, A>;
|
||||
|
||||
fn into_stream(self) -> Self::Into {
|
||||
Stream {
|
||||
@ -33,12 +41,12 @@ impl<'a, T: 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'a, T, A>
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Stream<'a, T: 'a, A: Automaton = fst::automaton::AlwaysMatch> {
|
||||
inner: fst::map::Stream<'a, A>,
|
||||
values: &'a Values<T>,
|
||||
pub struct Stream<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> {
|
||||
inner: fst::map::Stream<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, T: 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, T, A> {
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, 'v, T, A> {
|
||||
type Item = (&'a str, &'a [T]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
@ -53,3 +61,48 @@ impl<'a, 'm, T: 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, T, A> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithStateBuilder<'m, 'v, T: 'v, A> {
|
||||
inner: fst::map::StreamWithStateBuilder<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::IntoStreamer<'a> for StreamWithStateBuilder<'m, 'v, T, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
{
|
||||
type Item = (&'a str, &'a [T], A::State);
|
||||
type Into = StreamWithState<'m, 'v, T, A>;
|
||||
|
||||
fn into_stream(self) -> Self::Into {
|
||||
StreamWithState {
|
||||
inner: self.inner.into_stream(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithState<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> {
|
||||
inner: fst::map::StreamWithState<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::Streamer<'a> for StreamWithState<'m, 'v, T, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
{
|
||||
type Item = (&'a str, &'a [T], A::State);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((k, i, state)) => {
|
||||
let key = unsafe { from_utf8_unchecked(k) };
|
||||
let values = unsafe { self.values.get_unchecked(i as usize) };
|
||||
Some((key, values, state))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user