Merge pull request #170 from meilisearch/async-word-index-fetching-with-rayon-scope

Async word index fetching with rayon scope
2025-06-18 12:47:35 +02:00 · 2019-08-28 14:37:38 +02:00 · 2019-08-28 14:37:38 +02:00 · bae86e978e
commit bae86e978e
parent e0cadaa68d 8030a822ab
11 changed files with 1399 additions and 515 deletions
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -6,6 +6,7 @@ edition = "2018"
 [dependencies]
 byteorder = "1.3.1"
 crossbeam-channel = "0.3.9"
 deunicode = "1.0.0"
 hashbrown = "0.2.2"
 lazy_static = "1.2.0"
@ -14,7 +15,7 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
 rayon = "1.0.3"
 sdset = "0.3.2"
 serde = { version = "1.0.88", features = ["derive"] }
-slice-group-by = "0.2.4"
+slice-group-by = "0.2.6"
 zerocopy = "0.2.2"
 [dependencies.fst]
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -21,7 +21,7 @@ fn custom_log10(n: u8) -> f32 {
 #[inline]
 fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
-    let mut number_words = 0;
+    let mut number_words: usize = 0;
    let mut sum_typos = 0.0;
    let mut index = 0;
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -1,22 +1,24 @@
 #![feature(checked_duration_since)]
 #[cfg(test)]
 #[macro_use] extern crate assert_matches;
 mod automaton;
 mod distinct_map;
 mod query_builder;
 mod query_enhancer;
 mod raw_document;
 mod reordered_attrs;
 mod store;
 pub mod criterion;
 use std::fmt;
 use std::sync::Arc;
 use sdset::SetBuf;
 use serde::{Serialize, Deserialize};
 use slice_group_by::GroupBy;
 use zerocopy::{AsBytes, FromBytes};
 use self::raw_document::raw_documents_from;
 pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
 pub use self::raw_document::RawDocument;
 pub use self::store::Store;
 /// Represent an internally generated document unique identifier.
@ -130,132 +132,6 @@ impl Document {
    }
 }
 #[derive(Clone)]
 pub struct RawDocument {
    pub id: DocumentId,
    pub matches: SharedMatches,
    pub highlights: Vec<Highlight>,
 }
 impl RawDocument {
    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
        RawDocument { id, matches, highlights }
    }
    pub fn query_index(&self) -> &[u32] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
    }
    pub fn distance(&self) -> &[u8] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
    }
    pub fn attribute(&self) -> &[u16] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
    }
    pub fn word_index(&self) -> &[u16] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
    }
    pub fn is_exact(&self) -> &[bool] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
    }
 }
 impl fmt::Debug for RawDocument {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.debug_struct("RawDocument")
            .field("id", &self.id)
            .field("query_index", &self.query_index())
            .field("distance", &self.distance())
            .field("attribute", &self.attribute())
            .field("word_index", &self.word_index())
            .field("is_exact", &self.is_exact())
            .finish()
    }
 }
 fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec<RawDocument> {
    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
    let mut matches2 = Matches::with_capacity(matches.len());
    for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) {
        let document_id = group[0].0;
        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
        let end = start + group.len();
        let highlights = group.iter().map(|(_, _, h)| *h).collect();
        docs_ranges.push((document_id, Range { start, end }, highlights));
        matches2.extend_from_slice(group);
    }
    let matches = Arc::new(matches2);
    docs_ranges.into_iter().map(|(i, range, highlights)| {
        let matches = SharedMatches { range, matches: matches.clone() };
        RawDocument::new(i, matches, highlights)
    }).collect()
 }
 #[derive(Debug, Copy, Clone)]
 struct Range {
    start: usize,
    end: usize,
 }
 #[derive(Clone)]
 pub struct SharedMatches {
    range: Range,
    matches: Arc<Matches>,
 }
 #[derive(Clone)]
 struct Matches {
    query_index: Vec<u32>,
    distance: Vec<u8>,
    attribute: Vec<u16>,
    word_index: Vec<u16>,
    is_exact: Vec<bool>,
 }
 impl Matches {
    fn with_capacity(cap: usize) -> Matches {
        Matches {
            query_index: Vec::with_capacity(cap),
            distance: Vec::with_capacity(cap),
            attribute: Vec::with_capacity(cap),
            word_index: Vec::with_capacity(cap),
            is_exact: Vec::with_capacity(cap),
        }
    }
    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) {
        for (_, match_, _) in matches {
            self.query_index.push(match_.query_index);
            self.distance.push(match_.distance);
            self.attribute.push(match_.attribute);
            self.word_index.push(match_.word_index);
            self.is_exact.push(match_.is_exact);
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
--- a/meilidb-core/src/query_enhancer.rs
+++ b/meilidb-core/src/query_enhancer.rs
@ -0,0 +1,398 @@
 use std::ops::Range;
 use std::cmp::Ordering::{Less, Greater, Equal};
 /// Return `true` if the specified range can accept the given replacements words.
 /// Returns `false` if the replacements words are already present in the original query
 /// or if there is fewer replacement words than the range to replace.
 //
 //
 // ## Ignored because already present in original
 //
 //     new york city subway
 //     -------- ^^^^
 //   /          \
 //  [new york city]
 //
 //
 // ## Ignored because smaller than the original
 //
 //   new york city subway
 //   -------------
 //   \          /
 //    [new york]
 //
 //
 // ## Accepted because bigger than the original
 //
 //        NYC subway
 //        ---
 //       /   \
 //      /     \
 //     /       \
 //    /         \
 //   /           \
 //  [new york city]
 //
 fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
 where S: AsRef<str>,
      T: AsRef<str>,
 {
    if words.len() <= range.len() {
        // there is fewer or equal replacement words
        // than there is already in the replaced range
        return false
    }
    // retrieve the part to rewrite but with the length
    // of the replacement part
    let original = query.iter().skip(range.start).take(words.len());
    // check if the original query doesn't already contain
    // the replacement words
    !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
 }
 type Origin = usize;
 type RealLength = usize;
 struct FakeIntervalTree {
    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
 }
 impl FakeIntervalTree {
    fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
        intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
        FakeIntervalTree { intervals }
    }
    fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
        let element = self.intervals.binary_search_by(|(r, _)| {
            if point >= r.start {
                if point < r.end { Equal } else { Less }
            } else { Greater }
        });
        let n = match element { Ok(n) => n, Err(n) => n };
        match self.intervals.get(n) {
            Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
            _otherwise => None,
        }
    }
 }
 pub struct QueryEnhancerBuilder<'a, S> {
    query: &'a [S],
    origins: Vec<usize>,
    real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
 }
 impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
    pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
        // we initialize origins query indices based on their positions
        let origins: Vec<_> = (0..query.len() + 1).collect();
        let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
        QueryEnhancerBuilder { query, origins, real_to_origin }
    }
    /// Update the final real to origin query indices mapping.
    ///
    /// `range` is the original words range that this `replacement` words replace
    /// and `real` is the first real query index of these replacement words.
    pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
    where T: AsRef<str>,
    {
        // check if the range of original words
        // can be rewritten with the replacement words
        if rewrite_range_with(self.query, range.clone(), replacement) {
            // this range can be replaced so we need to
            // modify the origins accordingly
            let offset = replacement.len() - range.len();
            let previous_padding = self.origins[range.end - 1];
            let current_offset = (self.origins[range.end] - 1) - previous_padding;
            let diff = offset.saturating_sub(current_offset);
            self.origins[range.end] += diff;
            for r in &mut self.origins[range.end + 1..] {
                *r += diff;
            }
        }
        // we need to store the real number and origins relations
        // this way it will be possible to know by how many
        // we need to pad real query indices
        let real_range = real..real + replacement.len().max(range.len());
        let real_length = replacement.len();
        self.real_to_origin.push((real_range, (range.start, real_length)));
    }
    pub fn build(self) -> QueryEnhancer {
        QueryEnhancer {
            origins: self.origins,
            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
        }
    }
 }
 pub struct QueryEnhancer {
    origins: Vec<usize>,
    real_to_origin: FakeIntervalTree,
 }
 impl QueryEnhancer {
    /// Returns the query indices to use to replace this real query index.
    pub fn replacement(&self, real: u32) -> Range<u32> {
        let real = real as usize;
        // query the fake interval tree with the real query index
        let (range, (origin, real_length)) =
            self.real_to_origin
                .query(real)
                .expect("real has never been declared");
        // if `real` is the end bound of the range
        if (range.start + real_length - 1) == real {
            let mut count = range.len();
            let mut new_origin = origin;
            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
                let len = slice[1] - slice[0];
                count = count.saturating_sub(len);
                if count == 0 { new_origin = origin + i; break }
            }
            let n = real - range.start;
            let start = self.origins[origin];
            let end = self.origins[new_origin + 1];
            let remaining = (end - start) - n;
            Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
        } else {
            // just return the origin along with
            // the real position of the word
            let n = real as usize - range.start;
            let origin = self.origins[origin];
            Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn original_unmodified() {
        let query = ["new", "york", "city", "subway"];
        //             0       1       2        3
        let mut builder = QueryEnhancerBuilder::new(&query);
        // new york = new york city
        builder.declare(0..2, 4, &["new", "york", "city"]);
        //                    ^      4       5       6
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..1); // new
        assert_eq!(enhancer.replacement(1), 1..2); // york
        assert_eq!(enhancer.replacement(2), 2..3); // city
        assert_eq!(enhancer.replacement(3), 3..4); // subway
        assert_eq!(enhancer.replacement(4), 0..1); // new
        assert_eq!(enhancer.replacement(5), 1..2); // york
        assert_eq!(enhancer.replacement(6), 2..3); // city
    }
    #[test]
    fn simple_growing() {
        let query = ["new", "york", "subway"];
        //             0       1        2
        let mut builder = QueryEnhancerBuilder::new(&query);
        // new york = new york city
        builder.declare(0..2, 3, &["new", "york", "city"]);
        //                    ^      3       4       5
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..1); // new
        assert_eq!(enhancer.replacement(1), 1..3); // york
        assert_eq!(enhancer.replacement(2), 3..4); // subway
        assert_eq!(enhancer.replacement(3), 0..1); // new
        assert_eq!(enhancer.replacement(4), 1..2); // york
        assert_eq!(enhancer.replacement(5), 2..3); // city
    }
    #[test]
    fn same_place_growings() {
        let query = ["NY", "subway"];
        //             0       1
        let mut builder = QueryEnhancerBuilder::new(&query);
        // NY = new york
        builder.declare(0..1, 2, &["new", "york"]);
        //                    ^      2       3
        // NY = new york city
        builder.declare(0..1, 4, &["new", "york", "city"]);
        //                    ^      4       5       6
        // NY = NYC
        builder.declare(0..1, 7, &["NYC"]);
        //                    ^      7
        // NY = new york city
        builder.declare(0..1, 8, &["new", "york", "city"]);
        //                    ^      8       9      10
        // subway = underground train
        builder.declare(1..2, 11, &["underground", "train"]);
        //                    ^          11          12
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..3); // NY
        assert_eq!(enhancer.replacement(1), 3..5); // subway
        assert_eq!(enhancer.replacement(2), 0..1); // new
        assert_eq!(enhancer.replacement(3), 1..3); // york
        assert_eq!(enhancer.replacement(4), 0..1); // new
        assert_eq!(enhancer.replacement(5), 1..2); // york
        assert_eq!(enhancer.replacement(6), 2..3); // city
        assert_eq!(enhancer.replacement(7), 0..3); // NYC
        assert_eq!(enhancer.replacement(8), 0..1); // new
        assert_eq!(enhancer.replacement(9), 1..2); // york
        assert_eq!(enhancer.replacement(10), 2..3); // city
        assert_eq!(enhancer.replacement(11), 3..4); // underground
        assert_eq!(enhancer.replacement(12), 4..5); // train
    }
    #[test]
    fn bigger_growing() {
        let query = ["NYC", "subway"];
        //             0        1
        let mut builder = QueryEnhancerBuilder::new(&query);
        // NYC = new york city
        builder.declare(0..1, 2, &["new", "york", "city"]);
        //                    ^      2       3       4
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..3); // NYC
        assert_eq!(enhancer.replacement(1), 3..4); // subway
        assert_eq!(enhancer.replacement(2), 0..1); // new
        assert_eq!(enhancer.replacement(3), 1..2); // york
        assert_eq!(enhancer.replacement(4), 2..3); // city
    }
    #[test]
    fn middle_query_growing() {
        let query = ["great", "awesome", "NYC", "subway"];
        //              0         1        2        3
        let mut builder = QueryEnhancerBuilder::new(&query);
        // NYC = new york city
        builder.declare(2..3, 4, &["new", "york", "city"]);
        //                    ^      4       5       6
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..1); // great
        assert_eq!(enhancer.replacement(1), 1..2); // awesome
        assert_eq!(enhancer.replacement(2), 2..5); // NYC
        assert_eq!(enhancer.replacement(3), 5..6); // subway
        assert_eq!(enhancer.replacement(4), 2..3); // new
        assert_eq!(enhancer.replacement(5), 3..4); // york
        assert_eq!(enhancer.replacement(6), 4..5); // city
    }
    #[test]
    fn end_query_growing() {
        let query = ["NYC", "subway"];
        //             0        1
        let mut builder = QueryEnhancerBuilder::new(&query);
        // NYC = new york city
        builder.declare(1..2, 2, &["underground", "train"]);
        //                    ^         2            3
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..1); // NYC
        assert_eq!(enhancer.replacement(1), 1..3); // subway
        assert_eq!(enhancer.replacement(2), 1..2); // underground
        assert_eq!(enhancer.replacement(3), 2..3); // train
    }
    #[test]
    fn multiple_growings() {
        let query = ["great", "awesome", "NYC", "subway"];
        //              0         1        2        3
        let mut builder = QueryEnhancerBuilder::new(&query);
        // NYC = new york city
        builder.declare(2..3, 4, &["new", "york", "city"]);
        //                    ^      4       5       6
        // subway = underground train
        builder.declare(3..4, 7, &["underground", "train"]);
        //                    ^          7           8
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0), 0..1); // great
        assert_eq!(enhancer.replacement(1), 1..2); // awesome
        assert_eq!(enhancer.replacement(2), 2..5); // NYC
        assert_eq!(enhancer.replacement(3), 5..7); // subway
        assert_eq!(enhancer.replacement(4), 2..3); // new
        assert_eq!(enhancer.replacement(5), 3..4); // york
        assert_eq!(enhancer.replacement(6), 4..5); // city
        assert_eq!(enhancer.replacement(7), 5..6); // underground
        assert_eq!(enhancer.replacement(8), 6..7); // train
    }
    #[test]
    fn multiple_probable_growings() {
        let query = ["great", "awesome", "NYC", "subway"];
        //              0         1        2        3
        let mut builder = QueryEnhancerBuilder::new(&query);
        // NYC = new york city
        builder.declare(2..3, 4, &["new", "york", "city"]);
        //                    ^      4       5       6
        // subway = underground train
        builder.declare(3..4, 7, &["underground", "train"]);
        //                    ^          7           8
        // great awesome = good
        builder.declare(0..2, 9, &["good"]);
        //                    ^       9
        // awesome NYC = NY
        builder.declare(1..3, 10, &["NY"]);
        //                    ^^     10
        // NYC subway = metro
        builder.declare(2..4, 11, &["metro"]);
        //                    ^^      11
        let enhancer = builder.build();
        assert_eq!(enhancer.replacement(0),  0..1); // great
        assert_eq!(enhancer.replacement(1),  1..2); // awesome
        assert_eq!(enhancer.replacement(2),  2..5); // NYC
        assert_eq!(enhancer.replacement(3),  5..7); // subway
        assert_eq!(enhancer.replacement(4),  2..3); // new
        assert_eq!(enhancer.replacement(5),  3..4); // york
        assert_eq!(enhancer.replacement(6),  4..5); // city
        assert_eq!(enhancer.replacement(7),  5..6); // underground
        assert_eq!(enhancer.replacement(8),  6..7); // train
        assert_eq!(enhancer.replacement(9),  0..2); // good
        assert_eq!(enhancer.replacement(10), 1..5); // NY
        assert_eq!(enhancer.replacement(11), 2..5); // metro
    }
 }
--- a/meilidb-core/src/raw_document.rs
+++ b/meilidb-core/src/raw_document.rs
@ -0,0 +1,141 @@
 use std::sync::Arc;
 use std::fmt;
 use sdset::SetBuf;
 use slice_group_by::GroupBy;
 use crate::{TmpMatch, DocumentId, Highlight};
 #[derive(Clone)]
 pub struct RawDocument {
    pub id: DocumentId,
    pub matches: SharedMatches,
    pub highlights: Vec<Highlight>,
 }
 impl RawDocument {
    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
        RawDocument { id, matches, highlights }
    }
    pub fn query_index(&self) -> &[u32] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
    }
    pub fn distance(&self) -> &[u8] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
    }
    pub fn attribute(&self) -> &[u16] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
    }
    pub fn word_index(&self) -> &[u16] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
    }
    pub fn is_exact(&self) -> &[bool] {
        let r = self.matches.range;
        // it is safe because construction/modifications
        // can only be done in this module
        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
    }
 }
 impl fmt::Debug for RawDocument {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.write_str("RawDocument {\r\n")?;
        f.write_fmt(format_args!("{:>15}: {:?},\r\n",    "id",          self.id))?;
        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "query_index", self.query_index()))?;
        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "distance",    self.distance()))?;
        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "attribute",   self.attribute()))?;
        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "word_index",  self.word_index()))?;
        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact",    self.is_exact()))?;
        f.write_str("}")?;
        Ok(())
    }
 }
 pub fn raw_documents_from(
    matches: SetBuf<(DocumentId, TmpMatch)>,
    highlights: SetBuf<(DocumentId, Highlight)>,
 ) -> Vec<RawDocument>
 {
    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
    let mut matches2 = Matches::with_capacity(matches.len());
    let matches = matches.linear_group_by_key(|(id, _)| *id);
    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
    for (mgroup, hgroup) in matches.zip(highlights) {
        debug_assert_eq!(mgroup[0].0, hgroup[0].0);
        let document_id = mgroup[0].0;
        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
        let end = start + mgroup.len();
        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
        docs_ranges.push((document_id, Range { start, end }, highlights));
        matches2.extend_from_slice(mgroup);
    }
    let matches = Arc::new(matches2);
    docs_ranges.into_iter().map(|(id, range, highlights)| {
        let matches = SharedMatches { range, matches: matches.clone() };
        RawDocument::new(id, matches, highlights)
    }).collect()
 }
 #[derive(Debug, Copy, Clone)]
 struct Range {
    start: usize,
    end: usize,
 }
 #[derive(Clone)]
 pub struct SharedMatches {
    range: Range,
    matches: Arc<Matches>,
 }
 #[derive(Clone)]
 struct Matches {
    query_index: Vec<u32>,
    distance: Vec<u8>,
    attribute: Vec<u16>,
    word_index: Vec<u16>,
    is_exact: Vec<bool>,
 }
 impl Matches {
    fn with_capacity(cap: usize) -> Matches {
        Matches {
            query_index: Vec::with_capacity(cap),
            distance: Vec::with_capacity(cap),
            attribute: Vec::with_capacity(cap),
            word_index: Vec::with_capacity(cap),
            is_exact: Vec::with_capacity(cap),
        }
    }
    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
        for (_, match_) in matches {
            self.query_index.push(match_.query_index);
            self.distance.push(match_.distance);
            self.attribute.push(match_.attribute);
            self.word_index.push(match_.word_index);
            self.is_exact.push(match_.is_exact);
        }
    }
 }
--- a/meilidb-core/src/reordered_attrs.rs
+++ b/meilidb-core/src/reordered_attrs.rs
@ -1,4 +1,4 @@
-#[derive(Default)]
+#[derive(Default, Clone)]
 pub struct ReorderedAttrs {
    count: usize,
    reorders: Vec<Option<u16>>,
--- a/meilidb-data/src/database/synonyms_addition.rs
+++ b/meilidb-data/src/database/synonyms_addition.rs
@ -21,10 +21,10 @@ impl<'a> SynonymsAddition<'a> {
    pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
    where S: AsRef<str>,
          T: AsRef<str>,
-          I: Iterator<Item=T>,
+          I: IntoIterator<Item=T>,
    {
        let synonym = normalize_str(synonym.as_ref());
-        let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
+        let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
        self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
    }
@ -73,7 +73,7 @@ impl<'a> SynonymsAddition<'a> {
        // update the "consistent" view of the Index
        let words = main.words_set()?.unwrap_or_default();
-        let ranked_map = lease_inner.ranked_map.clone();;
+        let ranked_map = lease_inner.ranked_map.clone();
        let schema = lease_inner.schema.clone();
        let raw = lease_inner.raw.clone();
        lease_inner.raw.compact();
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -14,10 +14,12 @@ csv = "1.0.7"
 diskus = "0.5.0"
 env_logger = "0.6.1"
 jemallocator = "0.1.9"
 linked-hash-map = "0.5.2"
 meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
 quickcheck = "0.8.2"
 rand = "0.6.5"
 rand_xorshift = "0.1.1"
 rustyline = { version = "5.0.0", default-features = false }
 serde = { version = "1.0.91" , features = ["derive"] }
 serde_json = "1.0.39"
 structopt = "0.2.15"
--- a/meilidb/examples/create-database.rs
+++ b/meilidb/examples/create-database.rs
@ -31,9 +31,13 @@ pub struct Opt {
    #[structopt(long = "schema", parse(from_os_str))]
    pub schema_path: PathBuf,
    /// The file with the synonyms.
    #[structopt(long = "synonyms", parse(from_os_str))]
    pub synonyms: Option<PathBuf>,
    /// The path to the list of stop words (one by line).
    #[structopt(long = "stop-words", parse(from_os_str))]
-    pub stop_words_path: Option<PathBuf>,
+    pub stop_words: Option<PathBuf>,
    #[structopt(long = "update-group-size")]
    pub update_group_size: Option<usize>,
@ -45,12 +49,40 @@ struct Document<'a> (
    HashMap<Cow<'a, str>, Cow<'a, str>>
 );
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum Synonym {
    OneWay(SynonymOneWay),
    MultiWay { synonyms: Vec<String> },
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SynonymOneWay {
    pub search_terms: String,
    pub synonyms: Synonyms,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum Synonyms {
    Multiple(Vec<String>),
    Single(String),
 }
 fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
    let file = File::open(path)?;
    let synonyms = serde_json::from_reader(file)?;
    Ok(synonyms)
 }
 fn index(
    schema: Schema,
    database_path: &Path,
    csv_data_path: &Path,
    update_group_size: Option<usize>,
    stop_words: &HashSet<String>,
    synonyms: Vec<Synonym>,
 ) -> Result<Database, Box<dyn Error>>
 {
    let database = Database::start_default(database_path)?;
@ -62,6 +94,28 @@ fn index(
    let index = database.create_index("test", schema.clone())?;
    let mut synonyms_adder = index.synonyms_addition();
    for synonym in synonyms {
        match synonym {
            Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
                let alternatives = match synonyms {
                    Synonyms::Multiple(alternatives) => alternatives,
                    Synonyms::Single(alternative) => vec![alternative],
                };
                synonyms_adder.add_synonym(search_terms, alternatives);
            },
            Synonym::MultiWay { mut synonyms } => {
                for _ in 0..synonyms.len() {
                    if let Some((synonym, alternatives)) = synonyms.split_first() {
                        synonyms_adder.add_synonym(synonym, alternatives);
                    }
                    synonyms.rotate_left(1);
                }
            },
        }
    }
    synonyms_adder.finalize()?;
    let mut rdr = csv::Reader::from_path(csv_data_path)?;
    let mut raw_record = csv::StringRecord::new();
    let headers = rdr.headers()?.clone();
@ -133,13 +187,25 @@ fn main() -> Result<(), Box<dyn Error>> {
        Schema::from_toml(file)?
    };
-    let stop_words = match opt.stop_words_path {
+    let stop_words = match opt.stop_words {
        Some(ref path) => retrieve_stop_words(path)?,
        None           => HashSet::new(),
    };
    let synonyms = match opt.synonyms {
        Some(ref path) => read_synomys(path)?,
        None           => Vec::new(),
    };
    let start = Instant::now();
-    let result = index(schema, &opt.database_path, &opt.csv_data_path, opt.update_group_size, &stop_words);
+    let result = index(
        schema,
        &opt.database_path,
        &opt.csv_data_path,
        opt.update_group_size,
        &stop_words,
        synonyms,
    );
    if let Err(e) = result {
        return Err(e.into())
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@ -2,17 +2,19 @@
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 use std::collections::btree_map::{BTreeMap, Entry};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::iter::FromIterator;
 use std::io::{self, Write};
 use std::time::{Instant, Duration};
 use std::path::PathBuf;
 use std::error::Error;
 use std::io::{self, Write};
 use std::iter::FromIterator;
 use std::path::PathBuf;
 use std::time::{Instant, Duration};
-use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
+use linked_hash_map::LinkedHashMap;
 use rustyline::{Editor, Config};
 use structopt::StructOpt;
-use meilidb_core::Highlight;
+use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 use meilidb_core::Highlight;
 use meilidb_data::Database;
 use meilidb_schema::SchemaAttr;
@ -22,6 +24,9 @@ pub struct Opt {
    #[structopt(parse(from_os_str))]
    pub database_path: PathBuf,
    #[structopt(long = "fetch-timeout-ms")]
    pub fetch_timeout_ms: Option<u64>,
    /// Fields that must be displayed.
    pub displayed_fields: Vec<String>,
@ -34,7 +39,7 @@ pub struct Opt {
    pub char_context: usize,
 }
-type Document = HashMap<String, String>;
+type Document = LinkedHashMap<String, String>;
 fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
    let mut stdout = StandardStream::stdout(ColorChoice::Always);
@ -140,9 +145,6 @@ fn main() -> Result<(), Box<dyn Error>> {
    let start = Instant::now();
    let database = Database::start_default(&opt.database_path)?;
    let mut buffer = String::new();
    let input = io::stdin();
    let index = database.open_index("test")?.unwrap();
    let schema = index.schema();
@ -151,17 +153,23 @@ fn main() -> Result<(), Box<dyn Error>> {
    let fields = opt.displayed_fields.iter().map(String::as_str);
    let fields = HashSet::from_iter(fields);
-    loop {
+    let config = Config::builder().auto_add_history(true).build();
-        print!("Searching for: ");
+    let mut readline = Editor::<()>::with_config(config);
-        io::stdout().flush()?;
+    let _ = readline.load_history("query-history.txt");
        if input.read_line(&mut buffer)? == 0 { break }
        let query = buffer.trim_end_matches('\n');
    for result in readline.iter("Searching for: ") {
        match result {
            Ok(query) => {
                let start_total = Instant::now();
-        let builder = index.query_builder();
+                let builder = match opt.fetch_timeout_ms {
-        let documents = builder.query(query, 0..opt.number_results)?;
+                    Some(timeout_ms) => {
                        let timeout = Duration::from_millis(timeout_ms);
                        index.query_builder().with_fetch_timeout(timeout)
                    },
                    None => index.query_builder(),
                };
                let documents = builder.query(&query, 0..opt.number_results)?;
                let mut retrieve_duration = Duration::default();
@ -208,8 +216,14 @@ fn main() -> Result<(), Box<dyn Error>> {
                eprintln!("document field retrieve took {:.2?}", retrieve_duration);
                eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
-        buffer.clear();
+            },
            Err(err) => {
                println!("Error: {:?}", err);
                break
            }
        }
    }
    readline.save_history("query-history.txt").unwrap();
    Ok(())
 }