mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-01 17:15:46 +01:00
map: Allow using the Levenshtein algorithm to search
This commit is contained in:
parent
0581b296bb
commit
96d2fbcd3d
17
Cargo.lock
generated
17
Cargo.lock
generated
@ -99,6 +99,15 @@ dependencies = [
|
|||||||
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fst-levenshtein"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuchsia-zircon"
|
name = "fuchsia-zircon"
|
||||||
version = "0.3.3"
|
version = "0.3.3"
|
||||||
@ -306,6 +315,7 @@ dependencies = [
|
|||||||
"bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"fst-levenshtein 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde_derive 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde_derive 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -598,6 +608,11 @@ dependencies = [
|
|||||||
"percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8-ranges"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.2.8"
|
version = "0.2.8"
|
||||||
@ -650,6 +665,7 @@ dependencies = [
|
|||||||
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
|
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
|
||||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||||
"checksum fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d94485a00b1827b861dd9d1a2cc9764f9044d4c535514c0760a5a2012ef3399f"
|
"checksum fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d94485a00b1827b861dd9d1a2cc9764f9044d4c535514c0760a5a2012ef3399f"
|
||||||
|
"checksum fst-levenshtein 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "64f12af1569dd78afbefe476034bbdce0372d18e9dc75b634bde0e7b8bf994c8"
|
||||||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||||
"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c"
|
"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c"
|
||||||
@ -706,6 +722,7 @@ dependencies = [
|
|||||||
"checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
|
"checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
|
||||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||||
"checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
|
"checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
|
||||||
|
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||||
"checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3"
|
"checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3"
|
||||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||||
|
@ -7,12 +7,13 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
|||||||
bincode = "1.0"
|
bincode = "1.0"
|
||||||
env_logger = { version = "0.3", default-features = false }
|
env_logger = { version = "0.3", default-features = false }
|
||||||
fst = "0.3"
|
fst = "0.3"
|
||||||
|
fst-levenshtein = "0.2"
|
||||||
futures = "0.1"
|
futures = "0.1"
|
||||||
|
serde = "1.0"
|
||||||
|
serde_derive = "1.0"
|
||||||
|
serde_json = "1.0"
|
||||||
|
smallvec = { version = "0.6", features = ["serde"] }
|
||||||
tokio-minihttp = { git = "https://github.com/tokio-rs/tokio-minihttp.git" }
|
tokio-minihttp = { git = "https://github.com/tokio-rs/tokio-minihttp.git" }
|
||||||
tokio-proto = "0.1"
|
tokio-proto = "0.1"
|
||||||
tokio-service = "0.1"
|
tokio-service = "0.1"
|
||||||
serde = "1.0"
|
|
||||||
serde_json = "1.0"
|
|
||||||
serde_derive = "1.0"
|
|
||||||
smallvec = { version = "0.6", features = ["serde"] }
|
|
||||||
url = "1.7"
|
url = "1.7"
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
extern crate env_logger;
|
extern crate env_logger;
|
||||||
|
extern crate fst;
|
||||||
|
extern crate fst_levenshtein;
|
||||||
extern crate futures;
|
extern crate futures;
|
||||||
extern crate raptor;
|
extern crate raptor;
|
||||||
extern crate tokio_minihttp;
|
extern crate tokio_minihttp;
|
||||||
@ -8,6 +10,8 @@ extern crate url;
|
|||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
use fst_levenshtein::Levenshtein;
|
||||||
|
use fst::{IntoStreamer, Streamer};
|
||||||
use futures::future;
|
use futures::future;
|
||||||
use tokio_minihttp::{Request, Response, Http};
|
use tokio_minihttp::{Request, Response, Http};
|
||||||
use tokio_proto::TcpServer;
|
use tokio_proto::TcpServer;
|
||||||
@ -34,8 +38,18 @@ impl Service for MainService {
|
|||||||
|
|
||||||
if let Some((_, key)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
if let Some((_, key)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
||||||
let key = key.to_lowercase();
|
let key = key.to_lowercase();
|
||||||
let values = self.map.get(&key).map(|a| &a[..10]);
|
|
||||||
resp.body(&format!("{:?}", values));
|
let lev = Levenshtein::new(&key, 2).unwrap();
|
||||||
|
|
||||||
|
let mut body = String::new();
|
||||||
|
|
||||||
|
let mut stream = self.map.search(lev).into_stream();
|
||||||
|
while let Some((key, values)) = stream.next() {
|
||||||
|
let values = &values[..values.len().min(10)];
|
||||||
|
body.push_str(&format!("{:?} {:?}\n", key, values));
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.body(&body);
|
||||||
}
|
}
|
||||||
|
|
||||||
future::ok(resp)
|
future::ok(resp)
|
||||||
|
40
src/lib.rs
40
src/lib.rs
@ -5,6 +5,7 @@ extern crate serde_json;
|
|||||||
#[macro_use] extern crate serde_derive;
|
#[macro_use] extern crate serde_derive;
|
||||||
extern crate smallvec;
|
extern crate smallvec;
|
||||||
|
|
||||||
|
use std::ops::{Deref, DerefMut};
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@ -50,6 +51,45 @@ impl MultiMap {
|
|||||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[u64]> {
|
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[u64]> {
|
||||||
self.map.get(key).map(|i| &*self.values[i as usize])
|
self.map.get(key).map(|i| &*self.values[i as usize])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn search<A: fst::Automaton>(&self, aut: A) -> StreamBuilder<A> {
|
||||||
|
StreamBuilder {
|
||||||
|
inner: self.map.search(aut),
|
||||||
|
values: &self.values,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct StreamBuilder<'a, A: fst::Automaton> {
|
||||||
|
inner: fst::map::StreamBuilder<'a, A>,
|
||||||
|
values: &'a [SmallVec32<u64>],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, A: fst::Automaton> Deref for StreamBuilder<'a, A> {
|
||||||
|
type Target = fst::map::StreamBuilder<'a, A>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, A: fst::Automaton> DerefMut for StreamBuilder<'a, A> {
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
|
&mut self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, A: fst::Automaton> fst::IntoStreamer<'a> for StreamBuilder<'a, A> {
|
||||||
|
type Item = (&'a str, &'a [u64]);
|
||||||
|
|
||||||
|
type Into = Stream<'a, A>;
|
||||||
|
|
||||||
|
fn into_stream(self) -> Self::Into {
|
||||||
|
Stream {
|
||||||
|
inner: self.inner.into_stream(),
|
||||||
|
values: self.values,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Stream<'a, A: fst::Automaton = fst::automaton::AlwaysMatch> {
|
pub struct Stream<'a, A: fst::Automaton = fst::automaton::AlwaysMatch> {
|
||||||
|
Loading…
Reference in New Issue
Block a user