mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
feat(search): Accept multiple words and do a simple union
This commit is contained in:
parent
758baeb8e1
commit
1476aa3dba
5 changed files with 208 additions and 37 deletions
|
@ -13,14 +13,14 @@ use std::path::Path;
|
|||
use std::fs::File;
|
||||
use std::io::{Read, BufReader};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use fst::Streamer;
|
||||
use futures::future;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||
use tokio_minihttp::{Request, Response, Http};
|
||||
use tokio_proto::TcpServer;
|
||||
use tokio_service::Service;
|
||||
|
||||
use raptor::FstMap;
|
||||
use raptor::{FstMap, OpWithStateBuilder};
|
||||
|
||||
static mut MAP: Option<FstMap<u64>> = None;
|
||||
static mut LEV_BUILDER_0: Option<LevBuilder> = None;
|
||||
|
@ -52,25 +52,40 @@ impl<'a> Service for MainService<'a> {
|
|||
if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
||||
let query = query.to_lowercase();
|
||||
|
||||
let lev = if query.len() <= 4 {
|
||||
self.lev_builder_0.build_dfa(&query)
|
||||
} else if query.len() <= 8 {
|
||||
self.lev_builder_1.build_dfa(&query)
|
||||
} else {
|
||||
self.lev_builder_2.build_dfa(&query)
|
||||
};
|
||||
let mut automatons = Vec::new();
|
||||
|
||||
let mut stream = self.map.search(&lev).with_state().into_stream();
|
||||
for query in query.split_whitespace() {
|
||||
let lev = if query.len() <= 4 {
|
||||
self.lev_builder_0.build_dfa(&query)
|
||||
} else if query.len() <= 8 {
|
||||
self.lev_builder_1.build_dfa(&query)
|
||||
} else {
|
||||
self.lev_builder_2.build_dfa(&query)
|
||||
};
|
||||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut op = OpWithStateBuilder::new(self.map.values());
|
||||
|
||||
for automaton in automatons.iter().cloned() {
|
||||
let stream = self.map.as_map().search(automaton).with_state();
|
||||
op.push(stream);
|
||||
}
|
||||
|
||||
let mut stream = op.union();
|
||||
|
||||
let mut body = String::new();
|
||||
body.push_str("<html><body>");
|
||||
|
||||
while let Some((key, values, state)) = stream.next() {
|
||||
while let Some((key, ivalues)) = stream.next() {
|
||||
match std::str::from_utf8(key) {
|
||||
Ok(key) => {
|
||||
let values = &values[..values.len().min(10)];
|
||||
let distance = lev.distance(state);
|
||||
body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, values));
|
||||
for ivalue in ivalues {
|
||||
let i = ivalue.index;
|
||||
let state = ivalue.state;
|
||||
let distance = automatons[i].distance(state);
|
||||
body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, ivalue.values));
|
||||
}
|
||||
},
|
||||
Err(e) => eprintln!("{:?}", e),
|
||||
}
|
||||
|
|
170
src/fst_map.rs
170
src/fst_map.rs
|
@ -1,5 +1,5 @@
|
|||
use bincode;
|
||||
use fst::{self, Map, MapBuilder, Automaton};
|
||||
use fst::{self, Automaton};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::ser::Serialize;
|
||||
use std::fs::File;
|
||||
|
@ -10,7 +10,7 @@ use {StreamBuilder, Stream};
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct FstMap<T> {
|
||||
inner: Map,
|
||||
inner: fst::Map,
|
||||
values: Values<T>,
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ impl<T> FstMap<T> {
|
|||
P: AsRef<Path>,
|
||||
Q: AsRef<Path>
|
||||
{
|
||||
let inner = Map::from_path(map)?;
|
||||
let inner = fst::Map::from_path(map)?;
|
||||
|
||||
// TODO handle errors !!!
|
||||
let values = File::open(values).unwrap();
|
||||
|
@ -35,7 +35,7 @@ impl<T> FstMap<T> {
|
|||
where
|
||||
T: DeserializeOwned
|
||||
{
|
||||
let inner = Map::from_bytes(map)?;
|
||||
let inner = fst::Map::from_bytes(map)?;
|
||||
let values = bincode::deserialize(values).unwrap();
|
||||
|
||||
Ok(Self { inner, values })
|
||||
|
@ -62,6 +62,19 @@ impl<T> FstMap<T> {
|
|||
values: &self.values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn op(&self) -> OpBuilder<T> {
|
||||
// OpBuilder::new(&self.values).add(self.as_inner())
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn as_map(&self) -> &fst::Map {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
pub fn values(&self) -> &Values<T> {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
@ -137,7 +150,7 @@ impl<T> FstMapBuilder<T> {
|
|||
|
||||
pub fn build_memory(self) -> fst::Result<FstMap<T>> {
|
||||
Ok(FstMap {
|
||||
inner: Map::from_iter(self.map)?,
|
||||
inner: fst::Map::from_iter(self.map)?,
|
||||
values: Values::new(self.values),
|
||||
})
|
||||
}
|
||||
|
@ -148,7 +161,7 @@ impl<T> FstMapBuilder<T> {
|
|||
W: Write,
|
||||
X: Write
|
||||
{
|
||||
let mut builder = MapBuilder::new(map_wrt)?;
|
||||
let mut builder = fst::MapBuilder::new(map_wrt)?;
|
||||
builder.extend_iter(self.map)?;
|
||||
let map = builder.into_inner()?;
|
||||
let values = Values::new(self.values);
|
||||
|
@ -159,3 +172,148 @@ impl<T> FstMapBuilder<T> {
|
|||
Ok((map, values_wrt))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpBuilder<'m, 'v, T: 'v> {
|
||||
inner: fst::map::OpBuilder<'m>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v> OpBuilder<'m, 'v, T> {
|
||||
pub fn new(values: &'v Values<T>) -> Self {
|
||||
OpBuilder {
|
||||
inner: fst::map::OpBuilder::new(),
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> Union<'m, 'v, T> {
|
||||
Union {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Union<'m, 'v, T: 'v> {
|
||||
inner: fst::map::Union<'m>,
|
||||
outs: Vec<IndexedValues<'v, T>>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, T: 'v + 'a> fst::Streamer<'a> for Union<'m, 'v, T> {
|
||||
type Item = (&'a [u8], &'a [IndexedValues<'a, T>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
for ivalue in ivalues {
|
||||
let index = ivalue.index;
|
||||
let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
|
||||
self.outs.push(IndexedValues { index, values })
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexedValues<'a, T: 'a> {
|
||||
pub index: usize,
|
||||
pub values: &'a [T],
|
||||
}
|
||||
|
||||
pub struct OpWithStateBuilder<'m, 'v, T: 'v, U> {
|
||||
inner: fst::map::OpWithStateBuilder<'m, U>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v, U: 'static> OpWithStateBuilder<'m, 'v, T, U> {
|
||||
pub fn new(values: &'v Values<T>) -> Self {
|
||||
Self {
|
||||
inner: fst::map::OpWithStateBuilder::new(),
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> UnionWithState<'m, 'v, T, U> {
|
||||
UnionWithState {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UnionWithState<'m, 'v, T: 'v, U> {
|
||||
inner: fst::map::UnionWithState<'m, U>,
|
||||
outs: Vec<IndexedValuesWithState<'v, T, U>>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, T: 'v + 'a, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, T, U>
|
||||
where
|
||||
U: Clone,
|
||||
{
|
||||
type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, T, U>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
for ivalue in ivalues {
|
||||
let index = ivalue.index;
|
||||
let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
|
||||
let state = ivalue.state.clone();
|
||||
self.outs.push(IndexedValuesWithState { index, values, state })
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexedValuesWithState<'a, T: 'a, U> {
|
||||
pub index: usize,
|
||||
pub values: &'a [T],
|
||||
pub state: U,
|
||||
}
|
||||
|
|
|
@ -5,13 +5,13 @@ extern crate serde;
|
|||
|
||||
mod fst_map;
|
||||
|
||||
use std::ops::Range;
|
||||
use std::io::{Write, BufReader};
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use fst::Automaton;
|
||||
|
||||
pub use self::fst_map::{FstMap, FstMapBuilder};
|
||||
pub use self::fst_map::{
|
||||
OpBuilder, IndexedValues,
|
||||
OpWithStateBuilder, IndexedValuesWithState,
|
||||
};
|
||||
use self::fst_map::Values;
|
||||
|
||||
pub struct StreamBuilder<'m, 'v, T: 'v, A> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue