From a45cc4b6189c2eb40758b9749c3d1c8b19b88d40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 4 Mar 2019 14:59:58 +0100
Subject: [PATCH 1/3] fix: Reduce the size of the DocIndex type

---
 examples/query-database.rs                  |  2 +-
 src/database/serde/indexer_serializer.rs    |  6 +++---
 src/lib.rs                                  | 14 +++++++-------
 src/rank/criterion/sum_of_words_position.rs |  2 +-
 src/rank/criterion/words_proximity.rs       | 14 +++++++-------
 src/rank/mod.rs                             |  8 ++++----
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/examples/query-database.rs b/examples/query-database.rs
index 64e035a8a..ca6733c30 100644
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@@ -126,7 +126,7 @@ fn crop_text(
             (m.char_index as usize) + (m.char_length as usize) <= start + (context * 2)
         })
         .map(|match_| {
-            Match { char_index: match_.char_index - start as u32, ..match_ }
+            Match { char_index: match_.char_index - start as u16, ..match_ }
         })
         .collect();
 
diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs
index 75860d937..c25ffe98c 100644
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@@ -56,7 +56,7 @@ where B: TokenizerBuilder
 
             // FIXME must u32::try_from instead
             let attribute = self.attribute.0;
-            let word_index = word_index as u32;
+            let word_index = word_index as u16;
 
             // insert the exact representation
             let word_lower = word.to_lowercase();
@@ -69,7 +69,7 @@ where B: TokenizerBuilder
                 let word_unidecoded = unidecode::unidecode(word).to_lowercase();
                 let word_unidecoded = word_unidecoded.trim();
                 if word_lower != word_unidecoded {
-                    let char_index = char_index as u32;
+                    let char_index = char_index as u16;
                     let char_length = length;
 
                     let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
@@ -77,7 +77,7 @@ where B: TokenizerBuilder
                 }
             }
 
-            let char_index = char_index as u32;
+            let char_index = char_index as u16;
             let char_length = length;
 
             let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
diff --git a/src/lib.rs b/src/lib.rs
index e77e03ecb..964de8f75 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -50,14 +50,14 @@ pub struct DocIndex {
     /// The attribute in the document where the word was found
     /// along with the index in it.
     pub attribute: u16,
-    pub word_index: u32,
+    pub word_index: u16,
 
     /// The position in bytes where the word was found
     /// along with the length of it.
     ///
     /// It informs on the original word area in the text indexed
     /// without needing to run the tokenizer again.
-    pub char_index: u32,
+    pub char_index: u16,
     pub char_length: u16,
 }
 
@@ -84,7 +84,7 @@ pub struct Match {
     /// The attribute in the document where the word was found
     /// along with the index in it.
     pub attribute: u16,
-    pub word_index: u32,
+    pub word_index: u16,
 
     /// Whether the word that match is an exact match or a prefix.
     pub is_exact: bool,
@@ -94,7 +94,7 @@ pub struct Match {
     ///
     /// It informs on the original word area in the text indexed
     /// without needing to run the tokenizer again.
-    pub char_index: u32,
+    pub char_index: u16,
     pub char_length: u16,
 }
 
@@ -116,9 +116,9 @@ impl Match {
             query_index: u32::max_value(),
             distance: u8::max_value(),
             attribute: u16::max_value(),
-            word_index: u32::max_value(),
+            word_index: u16::max_value(),
             is_exact: true,
-            char_index: u32::max_value(),
+            char_index: u16::max_value(),
             char_length: u16::max_value(),
         }
     }
@@ -131,6 +131,6 @@ mod tests {
 
     #[test]
     fn docindex_mem_size() {
-        assert_eq!(mem::size_of::<DocIndex>(), 24);
+        assert_eq!(mem::size_of::<DocIndex>(), 16);
     }
 }
diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs
index 73ea5978c..5938ce5ab 100644
--- a/src/rank/criterion/sum_of_words_position.rs
+++ b/src/rank/criterion/sum_of_words_position.rs
@@ -6,7 +6,7 @@ use crate::rank::criterion::Criterion;
 use crate::rank::RawDocument;
 
 #[inline]
-fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u32]) -> usize {
+fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
     let mut sum_word_index = 0;
     let mut index = 0;
 
diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs
index 42cc738ce..dbf26e21a 100644
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@@ -5,14 +5,14 @@ use slice_group_by::GroupBy;
 use crate::rank::criterion::Criterion;
 use crate::rank::RawDocument;
 
-const MAX_DISTANCE: u32 = 8;
+const MAX_DISTANCE: u16 = 8;
 
 #[inline]
 fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
     (a.clone(), b.clone())
 }
 
-fn index_proximity(lhs: u32, rhs: u32) -> u32 {
+fn index_proximity(lhs: u16, rhs: u16) -> u16 {
     if lhs < rhs {
         cmp::min(rhs - lhs, MAX_DISTANCE)
     } else {
@@ -20,13 +20,13 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 {
     }
 }
 
-fn attribute_proximity((lattr, lwi): (u16, u32), (rattr, rwi): (u16, u32)) -> u32 {
+fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
     if lattr != rattr { return MAX_DISTANCE }
     index_proximity(lwi, rwi)
 }
 
-fn min_proximity((lattr, lwi): (&[u16], &[u32]), (rattr, rwi): (&[u16], &[u32])) -> u32 {
-    let mut min_prox = u32::max_value();
+fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
+    let mut min_prox = u16::max_value();
 
     for a in lattr.iter().zip(lwi) {
         for b in rattr.iter().zip(rwi) {
@@ -43,8 +43,8 @@ fn matches_proximity(
     query_index: &[u32],
     distance: &[u8],
     attribute: &[u16],
-    word_index: &[u32],
-) -> u32
+    word_index: &[u16],
+) -> u16
 {
     let mut query_index_groups = query_index.linear_group();
     let mut proximity = 0;
diff --git a/src/rank/mod.rs b/src/rank/mod.rs
index 3b31c0794..f5b07d27d 100644
--- a/src/rank/mod.rs
+++ b/src/rank/mod.rs
@@ -79,7 +79,7 @@ impl RawDocument {
         unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
     }
 
-    pub fn word_index(&self) -> &[u32] {
+    pub fn word_index(&self) -> &[u16] {
         let r = self.matches.range;
         // it is safe because construction/modifications
         // can only be done in this module
@@ -93,7 +93,7 @@ impl RawDocument {
         unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
     }
 
-    pub fn char_index(&self) -> &[u32] {
+    pub fn char_index(&self) -> &[u16] {
         let r = self.matches.range;
         // it is safe because construction/modifications
         // can only be done in this module
@@ -150,9 +150,9 @@ struct Matches {
     query_index: Vec<u32>,
     distance: Vec<u8>,
     attribute: Vec<u16>,
-    word_index: Vec<u32>,
+    word_index: Vec<u16>,
     is_exact: Vec<bool>,
-    char_index: Vec<u32>,
+    char_index: Vec<u16>,
     char_length: Vec<u16>,
 }
 

From 383a49b44f943ade59ce92dd2d0578d930a9de9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 4 Mar 2019 15:00:53 +0100
Subject: [PATCH 2/3] fix: Compact the whole database for each WriteBatch
 injected

---
 src/database/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/database/mod.rs b/src/database/mod.rs
index 2b7a87f45..d8b1bea79 100644
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@@ -266,6 +266,7 @@ impl DatabaseIndex {
     fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
         let batch = update.build()?;
         self.db.write(batch)?;
+        self.db.compact_range(None, None);
 
         let snapshot = Snapshot::new(self.db.clone());
         let view = Arc::new(DatabaseView::new(snapshot)?);

From aae301878c6bf1dbb04e9535395147d0edd1afad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 4 Mar 2019 15:01:38 +0100
Subject: [PATCH 3/3] fix: Flush the database after each WriteBatch injected

---
 src/database/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/database/mod.rs b/src/database/mod.rs
index d8b1bea79..70ca62d92 100644
--- a/src/database/mod.rs
+++ b/src/database/mod.rs
@@ -267,6 +267,7 @@ impl DatabaseIndex {
         let batch = update.build()?;
         self.db.write(batch)?;
         self.db.compact_range(None, None);
+        self.db.flush(true)?;
 
         let snapshot = Snapshot::new(self.db.clone());
         let view = Arc::new(DatabaseView::new(snapshot)?);