Merge pull request #170 from meilisearch/async-word-index-fetching-with-rayon-scope

Async word index fetching with rayon scope
2025-07-04 20:37:15 +02:00 · 2019-08-28 14:37:38 +02:00 · 2019-08-28 14:37:38 +02:00 · bae86e978e
commit bae86e978e
parent e0cadaa68d 8030a822ab
11 changed files with 1399 additions and 515 deletions
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@ -6,6 +6,7 @@ edition = "2018"

 [dependencies]
 byteorder = "1.3.1"
+crossbeam-channel = "0.3.9"
 deunicode = "1.0.0"
 hashbrown = "0.2.2"
 lazy_static = "1.2.0"
@ -14,7 +15,7 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
 rayon = "1.0.3"
 sdset = "0.3.2"
 serde = { version = "1.0.88", features = ["derive"] }
-slice-group-by = "0.2.4"
+slice-group-by = "0.2.6"
 zerocopy = "0.2.2"

 [dependencies.fst]
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@ -21,7 +21,7 @@ fn custom_log10(n: u8) -> f32 {

 #[inline]
 fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
-    let mut number_words = 0;
+    let mut number_words: usize = 0;
    let mut sum_typos = 0.0;
    let mut index = 0;

--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@ -1,22 +1,24 @@
+#![feature(checked_duration_since)]
+
 #[cfg(test)]
 #[macro_use] extern crate assert_matches;

 mod automaton;
 mod distinct_map;
 mod query_builder;
+mod query_enhancer;
+mod raw_document;
 mod reordered_attrs;
 mod store;
 pub mod criterion;

-use std::fmt;
-use std::sync::Arc;
-
-use sdset::SetBuf;
 use serde::{Serialize, Deserialize};
-use slice_group_by::GroupBy;
 use zerocopy::{AsBytes, FromBytes};

+use self::raw_document::raw_documents_from;
+
 pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
+pub use self::raw_document::RawDocument;
 pub use self::store::Store;

 /// Represent an internally generated document unique identifier.
@ -130,132 +132,6 @@ impl Document {
    }
 }

-#[derive(Clone)]
-pub struct RawDocument {
-    pub id: DocumentId,
-    pub matches: SharedMatches,
-    pub highlights: Vec<Highlight>,
-}
-
-impl RawDocument {
-    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
-        RawDocument { id, matches, highlights }
-    }
-
-    pub fn query_index(&self) -> &[u32] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn distance(&self) -> &[u8] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn attribute(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn word_index(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn is_exact(&self) -> &[bool] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
-    }
-}
-
-impl fmt::Debug for RawDocument {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("RawDocument")
-            .field("id", &self.id)
-            .field("query_index", &self.query_index())
-            .field("distance", &self.distance())
-            .field("attribute", &self.attribute())
-            .field("word_index", &self.word_index())
-            .field("is_exact", &self.is_exact())
-            .finish()
-    }
-}
-
-fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec<RawDocument> {
-    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
-    let mut matches2 = Matches::with_capacity(matches.len());
-
-    for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) {
-        let document_id = group[0].0;
-        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
-        let end = start + group.len();
-
-        let highlights = group.iter().map(|(_, _, h)| *h).collect();
-        docs_ranges.push((document_id, Range { start, end }, highlights));
-
-        matches2.extend_from_slice(group);
-    }
-
-    let matches = Arc::new(matches2);
-    docs_ranges.into_iter().map(|(i, range, highlights)| {
-        let matches = SharedMatches { range, matches: matches.clone() };
-        RawDocument::new(i, matches, highlights)
-    }).collect()
-}
-
-#[derive(Debug, Copy, Clone)]
-struct Range {
-    start: usize,
-    end: usize,
-}
-
-#[derive(Clone)]
-pub struct SharedMatches {
-    range: Range,
-    matches: Arc<Matches>,
-}
-
-#[derive(Clone)]
-struct Matches {
-    query_index: Vec<u32>,
-    distance: Vec<u8>,
-    attribute: Vec<u16>,
-    word_index: Vec<u16>,
-    is_exact: Vec<bool>,
-}
-
-impl Matches {
-    fn with_capacity(cap: usize) -> Matches {
-        Matches {
-            query_index: Vec::with_capacity(cap),
-            distance: Vec::with_capacity(cap),
-            attribute: Vec::with_capacity(cap),
-            word_index: Vec::with_capacity(cap),
-            is_exact: Vec::with_capacity(cap),
-        }
-    }
-
-    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) {
-        for (_, match_, _) in matches {
-            self.query_index.push(match_.query_index);
-            self.distance.push(match_.distance);
-            self.attribute.push(match_.attribute);
-            self.word_index.push(match_.word_index);
-            self.is_exact.push(match_.is_exact);
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
--- a/meilidb-core/src/query_enhancer.rs
+++ b/meilidb-core/src/query_enhancer.rs
@ -0,0 +1,398 @@
+use std::ops::Range;
+use std::cmp::Ordering::{Less, Greater, Equal};
+
+/// Return `true` if the specified range can accept the given replacements words.
+/// Returns `false` if the replacements words are already present in the original query
+/// or if there is fewer replacement words than the range to replace.
+//
+//
+// ## Ignored because already present in original
+//
+//     new york city subway
+//     -------- ^^^^
+//   /          \
+//  [new york city]
+//
+//
+// ## Ignored because smaller than the original
+//
+//   new york city subway
+//   -------------
+//   \          /
+//    [new york]
+//
+//
+// ## Accepted because bigger than the original
+//
+//        NYC subway
+//        ---
+//       /   \
+//      /     \
+//     /       \
+//    /         \
+//   /           \
+//  [new york city]
+//
+fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
+where S: AsRef<str>,
+      T: AsRef<str>,
+{
+    if words.len() <= range.len() {
+        // there is fewer or equal replacement words
+        // than there is already in the replaced range
+        return false
+    }
+
+    // retrieve the part to rewrite but with the length
+    // of the replacement part
+    let original = query.iter().skip(range.start).take(words.len());
+
+    // check if the original query doesn't already contain
+    // the replacement words
+    !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
+}
+
+type Origin = usize;
+type RealLength = usize;
+
+struct FakeIntervalTree {
+    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
+}
+
+impl FakeIntervalTree {
+    fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
+        intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
+        FakeIntervalTree { intervals }
+    }
+
+    fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
+        let element = self.intervals.binary_search_by(|(r, _)| {
+            if point >= r.start {
+                if point < r.end { Equal } else { Less }
+            } else { Greater }
+        });
+
+        let n = match element { Ok(n) => n, Err(n) => n };
+
+        match self.intervals.get(n) {
+            Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
+            _otherwise => None,
+        }
+    }
+}
+
+pub struct QueryEnhancerBuilder<'a, S> {
+    query: &'a [S],
+    origins: Vec<usize>,
+    real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
+}
+
+impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
+    pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
+        // we initialize origins query indices based on their positions
+        let origins: Vec<_> = (0..query.len() + 1).collect();
+        let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
+
+        QueryEnhancerBuilder { query, origins, real_to_origin }
+    }
+
+    /// Update the final real to origin query indices mapping.
+    ///
+    /// `range` is the original words range that this `replacement` words replace
+    /// and `real` is the first real query index of these replacement words.
+    pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
+    where T: AsRef<str>,
+    {
+        // check if the range of original words
+        // can be rewritten with the replacement words
+        if rewrite_range_with(self.query, range.clone(), replacement) {
+
+            // this range can be replaced so we need to
+            // modify the origins accordingly
+            let offset = replacement.len() - range.len();
+
+            let previous_padding = self.origins[range.end - 1];
+            let current_offset = (self.origins[range.end] - 1) - previous_padding;
+            let diff = offset.saturating_sub(current_offset);
+            self.origins[range.end] += diff;
+
+            for r in &mut self.origins[range.end + 1..] {
+                *r += diff;
+            }
+        }
+
+        // we need to store the real number and origins relations
+        // this way it will be possible to know by how many
+        // we need to pad real query indices
+        let real_range = real..real + replacement.len().max(range.len());
+        let real_length = replacement.len();
+        self.real_to_origin.push((real_range, (range.start, real_length)));
+    }
+
+    pub fn build(self) -> QueryEnhancer {
+        QueryEnhancer {
+            origins: self.origins,
+            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
+        }
+    }
+}
+
+pub struct QueryEnhancer {
+    origins: Vec<usize>,
+    real_to_origin: FakeIntervalTree,
+}
+
+impl QueryEnhancer {
+    /// Returns the query indices to use to replace this real query index.
+    pub fn replacement(&self, real: u32) -> Range<u32> {
+        let real = real as usize;
+
+        // query the fake interval tree with the real query index
+        let (range, (origin, real_length)) =
+            self.real_to_origin
+                .query(real)
+                .expect("real has never been declared");
+
+        // if `real` is the end bound of the range
+        if (range.start + real_length - 1) == real {
+            let mut count = range.len();
+            let mut new_origin = origin;
+            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
+                let len = slice[1] - slice[0];
+                count = count.saturating_sub(len);
+                if count == 0 { new_origin = origin + i; break }
+            }
+
+            let n = real - range.start;
+            let start = self.origins[origin];
+            let end = self.origins[new_origin + 1];
+            let remaining = (end - start) - n;
+
+            Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
+
+        } else {
+            // just return the origin along with
+            // the real position of the word
+            let n = real as usize - range.start;
+            let origin = self.origins[origin];
+
+            Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn original_unmodified() {
+        let query = ["new", "york", "city", "subway"];
+        //             0       1       2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..2); // york
+        assert_eq!(enhancer.replacement(2), 2..3); // city
+        assert_eq!(enhancer.replacement(3), 3..4); // subway
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+    }
+
+    #[test]
+    fn simple_growing() {
+        let query = ["new", "york", "subway"];
+        //             0       1        2
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 3, &["new", "york", "city"]);
+        //                    ^      3       4       5
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..3); // york
+        assert_eq!(enhancer.replacement(2), 3..4); // subway
+        assert_eq!(enhancer.replacement(3), 0..1); // new
+        assert_eq!(enhancer.replacement(4), 1..2); // york
+        assert_eq!(enhancer.replacement(5), 2..3); // city
+    }
+
+    #[test]
+    fn same_place_growings() {
+        let query = ["NY", "subway"];
+        //             0       1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NY = new york
+        builder.declare(0..1, 2, &["new", "york"]);
+        //                    ^      2       3
+
+        // NY = new york city
+        builder.declare(0..1, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // NY = NYC
+        builder.declare(0..1, 7, &["NYC"]);
+        //                    ^      7
+
+        // NY = new york city
+        builder.declare(0..1, 8, &["new", "york", "city"]);
+        //                    ^      8       9      10
+
+        // subway = underground train
+        builder.declare(1..2, 11, &["underground", "train"]);
+        //                    ^          11          12
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NY
+        assert_eq!(enhancer.replacement(1), 3..5); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..3); // york
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+        assert_eq!(enhancer.replacement(7), 0..3); // NYC
+        assert_eq!(enhancer.replacement(8), 0..1); // new
+        assert_eq!(enhancer.replacement(9), 1..2); // york
+        assert_eq!(enhancer.replacement(10), 2..3); // city
+        assert_eq!(enhancer.replacement(11), 3..4); // underground
+        assert_eq!(enhancer.replacement(12), 4..5); // train
+    }
+
+    #[test]
+    fn bigger_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(0..1, 2, &["new", "york", "city"]);
+        //                    ^      2       3       4
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NYC
+        assert_eq!(enhancer.replacement(1), 3..4); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..2); // york
+        assert_eq!(enhancer.replacement(4), 2..3); // city
+    }
+
+    #[test]
+    fn middle_query_growing() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..6); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+    }
+
+    #[test]
+    fn end_query_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(1..2, 2, &["underground", "train"]);
+        //                    ^         2            3
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // NYC
+        assert_eq!(enhancer.replacement(1), 1..3); // subway
+        assert_eq!(enhancer.replacement(2), 1..2); // underground
+        assert_eq!(enhancer.replacement(3), 2..3); // train
+    }
+
+    #[test]
+    fn multiple_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..7); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+        assert_eq!(enhancer.replacement(7), 5..6); // underground
+        assert_eq!(enhancer.replacement(8), 6..7); // train
+    }
+
+    #[test]
+    fn multiple_probable_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        // great awesome = good
+        builder.declare(0..2, 9, &["good"]);
+        //                    ^       9
+
+        // awesome NYC = NY
+        builder.declare(1..3, 10, &["NY"]);
+        //                    ^^     10
+
+        // NYC subway = metro
+        builder.declare(2..4, 11, &["metro"]);
+        //                    ^^      11
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0),  0..1); // great
+        assert_eq!(enhancer.replacement(1),  1..2); // awesome
+        assert_eq!(enhancer.replacement(2),  2..5); // NYC
+        assert_eq!(enhancer.replacement(3),  5..7); // subway
+        assert_eq!(enhancer.replacement(4),  2..3); // new
+        assert_eq!(enhancer.replacement(5),  3..4); // york
+        assert_eq!(enhancer.replacement(6),  4..5); // city
+        assert_eq!(enhancer.replacement(7),  5..6); // underground
+        assert_eq!(enhancer.replacement(8),  6..7); // train
+        assert_eq!(enhancer.replacement(9),  0..2); // good
+        assert_eq!(enhancer.replacement(10), 1..5); // NY
+        assert_eq!(enhancer.replacement(11), 2..5); // metro
+    }
+}
--- a/meilidb-core/src/raw_document.rs
+++ b/meilidb-core/src/raw_document.rs
@ -0,0 +1,141 @@
+use std::sync::Arc;
+use std::fmt;
+use sdset::SetBuf;
+use slice_group_by::GroupBy;
+use crate::{TmpMatch, DocumentId, Highlight};
+
+#[derive(Clone)]
+pub struct RawDocument {
+    pub id: DocumentId,
+    pub matches: SharedMatches,
+    pub highlights: Vec<Highlight>,
+}
+
+impl RawDocument {
+    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
+        RawDocument { id, matches, highlights }
+    }
+
+    pub fn query_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn distance(&self) -> &[u8] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn attribute(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn word_index(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn is_exact(&self) -> &[bool] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
+    }
+}
+
+impl fmt::Debug for RawDocument {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str("RawDocument {\r\n")?;
+        f.write_fmt(format_args!("{:>15}: {:?},\r\n",    "id",          self.id))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "query_index", self.query_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "distance",    self.distance()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "attribute",   self.attribute()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "word_index",  self.word_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact",    self.is_exact()))?;
+        f.write_str("}")?;
+        Ok(())
+    }
+}
+
+pub fn raw_documents_from(
+    matches: SetBuf<(DocumentId, TmpMatch)>,
+    highlights: SetBuf<(DocumentId, Highlight)>,
+) -> Vec<RawDocument>
+{
+    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
+    let mut matches2 = Matches::with_capacity(matches.len());
+
+    let matches = matches.linear_group_by_key(|(id, _)| *id);
+    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
+
+    for (mgroup, hgroup) in matches.zip(highlights) {
+        debug_assert_eq!(mgroup[0].0, hgroup[0].0);
+
+        let document_id = mgroup[0].0;
+        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
+        let end = start + mgroup.len();
+
+        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
+        docs_ranges.push((document_id, Range { start, end }, highlights));
+
+        matches2.extend_from_slice(mgroup);
+    }
+
+    let matches = Arc::new(matches2);
+    docs_ranges.into_iter().map(|(id, range, highlights)| {
+        let matches = SharedMatches { range, matches: matches.clone() };
+        RawDocument::new(id, matches, highlights)
+    }).collect()
+}
+
+#[derive(Debug, Copy, Clone)]
+struct Range {
+    start: usize,
+    end: usize,
+}
+
+#[derive(Clone)]
+pub struct SharedMatches {
+    range: Range,
+    matches: Arc<Matches>,
+}
+
+#[derive(Clone)]
+struct Matches {
+    query_index: Vec<u32>,
+    distance: Vec<u8>,
+    attribute: Vec<u16>,
+    word_index: Vec<u16>,
+    is_exact: Vec<bool>,
+}
+
+impl Matches {
+    fn with_capacity(cap: usize) -> Matches {
+        Matches {
+            query_index: Vec::with_capacity(cap),
+            distance: Vec::with_capacity(cap),
+            attribute: Vec::with_capacity(cap),
+            word_index: Vec::with_capacity(cap),
+            is_exact: Vec::with_capacity(cap),
+        }
+    }
+
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
+        for (_, match_) in matches {
+            self.query_index.push(match_.query_index);
+            self.distance.push(match_.distance);
+            self.attribute.push(match_.attribute);
+            self.word_index.push(match_.word_index);
+            self.is_exact.push(match_.is_exact);
+        }
+    }
+}
--- a/meilidb-core/src/reordered_attrs.rs
+++ b/meilidb-core/src/reordered_attrs.rs
@ -1,4 +1,4 @@
-#[derive(Default)]
+#[derive(Default, Clone)]
 pub struct ReorderedAttrs {
    count: usize,
    reorders: Vec<Option<u16>>,
--- a/meilidb-data/src/database/synonyms_addition.rs
+++ b/meilidb-data/src/database/synonyms_addition.rs
@ -21,10 +21,10 @@ impl<'a> SynonymsAddition<'a> {
    pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
    where S: AsRef<str>,
          T: AsRef<str>,
-          I: Iterator<Item=T>,
+          I: IntoIterator<Item=T>,
    {
        let synonym = normalize_str(synonym.as_ref());
-        let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
+        let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
        self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
    }

@ -73,7 +73,7 @@ impl<'a> SynonymsAddition<'a> {

        // update the "consistent" view of the Index
        let words = main.words_set()?.unwrap_or_default();
-        let ranked_map = lease_inner.ranked_map.clone();;
+        let ranked_map = lease_inner.ranked_map.clone();
        let schema = lease_inner.schema.clone();
        let raw = lease_inner.raw.clone();
        lease_inner.raw.compact();
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@ -14,10 +14,12 @@ csv = "1.0.7"
 diskus = "0.5.0"
 env_logger = "0.6.1"
 jemallocator = "0.1.9"
+linked-hash-map = "0.5.2"
 meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
 quickcheck = "0.8.2"
 rand = "0.6.5"
 rand_xorshift = "0.1.1"
+rustyline = { version = "5.0.0", default-features = false }
 serde = { version = "1.0.91" , features = ["derive"] }
 serde_json = "1.0.39"
 structopt = "0.2.15"
--- a/meilidb/examples/create-database.rs
+++ b/meilidb/examples/create-database.rs
@ -31,9 +31,13 @@ pub struct Opt {
    #[structopt(long = "schema", parse(from_os_str))]
    pub schema_path: PathBuf,

+    /// The file with the synonyms.
+    #[structopt(long = "synonyms", parse(from_os_str))]
+    pub synonyms: Option<PathBuf>,
+
    /// The path to the list of stop words (one by line).
    #[structopt(long = "stop-words", parse(from_os_str))]
-    pub stop_words_path: Option<PathBuf>,
+    pub stop_words: Option<PathBuf>,

    #[structopt(long = "update-group-size")]
    pub update_group_size: Option<usize>,
@ -45,12 +49,40 @@ struct Document<'a> (
    HashMap<Cow<'a, str>, Cow<'a, str>>
 );

+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonym {
+    OneWay(SynonymOneWay),
+    MultiWay { synonyms: Vec<String> },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SynonymOneWay {
+    pub search_terms: String,
+    pub synonyms: Synonyms,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonyms {
+    Multiple(Vec<String>),
+    Single(String),
+}
+
+fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
+    let file = File::open(path)?;
+    let synonyms = serde_json::from_reader(file)?;
+    Ok(synonyms)
+}
+
 fn index(
    schema: Schema,
    database_path: &Path,
    csv_data_path: &Path,
    update_group_size: Option<usize>,
    stop_words: &HashSet<String>,
+    synonyms: Vec<Synonym>,
 ) -> Result<Database, Box<dyn Error>>
 {
    let database = Database::start_default(database_path)?;
@ -62,6 +94,28 @@ fn index(

    let index = database.create_index("test", schema.clone())?;

+    let mut synonyms_adder = index.synonyms_addition();
+    for synonym in synonyms {
+        match synonym {
+            Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
+                let alternatives = match synonyms {
+                    Synonyms::Multiple(alternatives) => alternatives,
+                    Synonyms::Single(alternative) => vec![alternative],
+                };
+                synonyms_adder.add_synonym(search_terms, alternatives);
+            },
+            Synonym::MultiWay { mut synonyms } => {
+                for _ in 0..synonyms.len() {
+                    if let Some((synonym, alternatives)) = synonyms.split_first() {
+                        synonyms_adder.add_synonym(synonym, alternatives);
+                    }
+                    synonyms.rotate_left(1);
+                }
+            },
+        }
+    }
+    synonyms_adder.finalize()?;
+
    let mut rdr = csv::Reader::from_path(csv_data_path)?;
    let mut raw_record = csv::StringRecord::new();
    let headers = rdr.headers()?.clone();
@ -133,13 +187,25 @@ fn main() -> Result<(), Box<dyn Error>> {
        Schema::from_toml(file)?
    };

-    let stop_words = match opt.stop_words_path {
+    let stop_words = match opt.stop_words {
        Some(ref path) => retrieve_stop_words(path)?,
        None           => HashSet::new(),
    };

+    let synonyms = match opt.synonyms {
+        Some(ref path) => read_synomys(path)?,
+        None           => Vec::new(),
+    };
+
    let start = Instant::now();
-    let result = index(schema, &opt.database_path, &opt.csv_data_path, opt.update_group_size, &stop_words);
+    let result = index(
+        schema,
+        &opt.database_path,
+        &opt.csv_data_path,
+        opt.update_group_size,
+        &stop_words,
+        synonyms,
+    );

    if let Err(e) = result {
        return Err(e.into())
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@ -2,17 +2,19 @@
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;

 use std::collections::btree_map::{BTreeMap, Entry};
-use std::collections::{HashMap, HashSet};
-use std::iter::FromIterator;
-use std::io::{self, Write};
-use std::time::{Instant, Duration};
-use std::path::PathBuf;
+use std::collections::HashSet;
 use std::error::Error;
+use std::io::{self, Write};
+use std::iter::FromIterator;
+use std::path::PathBuf;
+use std::time::{Instant, Duration};

-use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
+use linked_hash_map::LinkedHashMap;
+use rustyline::{Editor, Config};
 use structopt::StructOpt;
-use meilidb_core::Highlight;
+use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};

+use meilidb_core::Highlight;
 use meilidb_data::Database;
 use meilidb_schema::SchemaAttr;

@ -22,6 +24,9 @@ pub struct Opt {
    #[structopt(parse(from_os_str))]
    pub database_path: PathBuf,

+    #[structopt(long = "fetch-timeout-ms")]
+    pub fetch_timeout_ms: Option<u64>,
+
    /// Fields that must be displayed.
    pub displayed_fields: Vec<String>,

@ -34,7 +39,7 @@ pub struct Opt {
    pub char_context: usize,
 }

-type Document = HashMap<String, String>;
+type Document = LinkedHashMap<String, String>;

 fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
    let mut stdout = StandardStream::stdout(ColorChoice::Always);
@ -140,9 +145,6 @@ fn main() -> Result<(), Box<dyn Error>> {
    let start = Instant::now();
    let database = Database::start_default(&opt.database_path)?;

-    let mut buffer = String::new();
-    let input = io::stdin();
-
    let index = database.open_index("test")?.unwrap();
    let schema = index.schema();

@ -151,65 +153,77 @@ fn main() -> Result<(), Box<dyn Error>> {
    let fields = opt.displayed_fields.iter().map(String::as_str);
    let fields = HashSet::from_iter(fields);

-    loop {
-        print!("Searching for: ");
-        io::stdout().flush()?;
+    let config = Config::builder().auto_add_history(true).build();
+    let mut readline = Editor::<()>::with_config(config);
+    let _ = readline.load_history("query-history.txt");

-        if input.read_line(&mut buffer)? == 0 { break }
-        let query = buffer.trim_end_matches('\n');
+    for result in readline.iter("Searching for: ") {
+        match result {
+            Ok(query) => {
+                let start_total = Instant::now();

-        let start_total = Instant::now();
+                let builder = match opt.fetch_timeout_ms {
+                    Some(timeout_ms) => {
+                        let timeout = Duration::from_millis(timeout_ms);
+                        index.query_builder().with_fetch_timeout(timeout)
+                    },
+                    None => index.query_builder(),
+                };
+                let documents = builder.query(&query, 0..opt.number_results)?;

-        let builder = index.query_builder();
-        let documents = builder.query(query, 0..opt.number_results)?;
+                let mut retrieve_duration = Duration::default();

-        let mut retrieve_duration = Duration::default();
+                let number_of_documents = documents.len();
+                for mut doc in documents {

-        let number_of_documents = documents.len();
-        for mut doc in documents {
+                    doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));

-            doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+                    let start_retrieve = Instant::now();
+                    let result = index.document::<Document>(Some(&fields), doc.id);
+                    retrieve_duration += start_retrieve.elapsed();

-            let start_retrieve = Instant::now();
-            let result = index.document::<Document>(Some(&fields), doc.id);
-            retrieve_duration += start_retrieve.elapsed();
+                    match result {
+                        Ok(Some(document)) => {
+                            for (name, text) in document {
+                                print!("{}: ", name);

-            match result {
-                Ok(Some(document)) => {
-                    for (name, text) in document {
-                        print!("{}: ", name);
-
-                        let attr = schema.attribute(&name).unwrap();
-                        let highlights = doc.highlights.iter()
-                                        .filter(|m| SchemaAttr::new(m.attribute) == attr)
-                                        .cloned();
-                        let (text, highlights) = crop_text(&text, highlights, opt.char_context);
-                        let areas = create_highlight_areas(&text, &highlights);
-                        display_highlights(&text, &areas)?;
-                        println!();
+                                let attr = schema.attribute(&name).unwrap();
+                                let highlights = doc.highlights.iter()
+                                                .filter(|m| SchemaAttr::new(m.attribute) == attr)
+                                                .cloned();
+                                let (text, highlights) = crop_text(&text, highlights, opt.char_context);
+                                let areas = create_highlight_areas(&text, &highlights);
+                                display_highlights(&text, &areas)?;
+                                println!();
+                            }
+                        },
+                        Ok(None) => eprintln!("missing document"),
+                        Err(e) => eprintln!("{}", e),
                    }
-                },
-                Ok(None) => eprintln!("missing document"),
-                Err(e) => eprintln!("{}", e),
+
+                    let mut matching_attributes = HashSet::new();
+                    for highlight in doc.highlights {
+                        let attr = SchemaAttr::new(highlight.attribute);
+                        let name = schema.attribute_name(attr);
+                        matching_attributes.insert(name);
+                    }
+
+                    let matching_attributes = Vec::from_iter(matching_attributes);
+                    println!("matching in: {:?}", matching_attributes);
+
+                    println!();
+                }
+
+                eprintln!("document field retrieve took {:.2?}", retrieve_duration);
+                eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
+            },
+            Err(err) => {
+                println!("Error: {:?}", err);
+                break
            }
-
-            let mut matching_attributes = HashSet::new();
-            for highlight in doc.highlights {
-                let attr = SchemaAttr::new(highlight.attribute);
-                let name = schema.attribute_name(attr);
-                matching_attributes.insert(name);
-            }
-
-            let matching_attributes = Vec::from_iter(matching_attributes);
-            println!("matching in: {:?}", matching_attributes);
-
-            println!();
        }
-
-        eprintln!("document field retrieve took {:.2?}", retrieve_duration);
-        eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
-        buffer.clear();
    }

+    readline.save_history("query-history.txt").unwrap();
    Ok(())
 }