From 106b88687344f2b9d9db7b6057bc21f376d598b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 30 Dec 2019 12:27:24 +0100
Subject: [PATCH] Cache the prefix postings lists

---
 .../src/update/documents_addition.rs          | 52 ++++++++++++++++--
 meilisearch-core/src/update/mod.rs            | 54 +------------------
 2 files changed, 50 insertions(+), 56 deletions(-)

diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs
index d6f3ac00a..6a4733d01 100644
--- a/meilisearch-core/src/update/documents_addition.rs
+++ b/meilisearch-core/src/update/documents_addition.rs
@@ -1,8 +1,10 @@
 use std::collections::HashMap;
+use std::borrow::Cow;
 
-use fst::{set::OpBuilder, SetBuilder};
-use sdset::{duo::Union, SetOperation};
+use fst::{set::OpBuilder, SetBuilder, IntoStreamer, Streamer};
+use sdset::{duo::Union, SetOperation, SetBuf};
 use serde::{Deserialize, Serialize};
+use log::debug;
 
 use crate::database::{MainT, UpdateT};
 use crate::database::{UpdateEvent, UpdateEventsEmitter};
@@ -110,6 +112,7 @@ pub fn apply_documents_addition<'a, 'b>(
     postings_lists_store: store::PostingsLists,
     docs_words_store: store::DocsWords,
     prefix_documents_cache_store: store::PrefixDocumentsCache,
+    prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
     addition: Vec<HashMap<String, serde_json::Value>>,
 ) -> MResult<()> {
     let mut documents_additions = HashMap::new();
@@ -180,7 +183,50 @@ pub fn apply_documents_addition<'a, 'b>(
         &ranked_map,
         number_of_inserted_documents,
         indexer,
-    )
+    )?;
+
+
+    // retrieve the words fst to compute all those prefixes
+    let words_fst = match main_store.words_fst(writer)? {
+        Some(fst) => fst,
+        None => return Ok(()),
+    };
+
+    // clear the prefixes
+    let pplc_store = prefix_postings_lists_cache_store;
+    pplc_store.clear(writer)?;
+
+    const MAX_PREFIX_LENGTH: usize = 1;
+
+    // compute prefixes and store those in the PrefixPostingsListsCache.
+    let mut stream = words_fst.into_stream();
+    while let Some(input) = stream.next() {
+        for i in 1..=MAX_PREFIX_LENGTH {
+            let prefix = &input[..i];
+            if let Some(postings_list) = postings_lists_store.postings_list(writer, prefix)? {
+                if let (Ok(input), Ok(prefix)) = (std::str::from_utf8(input), std::str::from_utf8(prefix)) {
+                    debug!("{:?} postings list (prefix {:?}) length {}", input, prefix, postings_list.len());
+                }
+
+                // compute the new prefix postings lists
+                let mut p = [0; 4];
+                let len = std::cmp::min(4, prefix.len());
+                p[..len].copy_from_slice(&prefix[..len]);
+
+                let previous = match pplc_store.prefix_postings_list(writer, p)? {
+                    Some(previous) => previous,
+                    None => Cow::Owned(SetBuf::default()),
+                };
+
+                let new_postings_list = Union::new(&postings_list, &previous).into_set_buf();
+                pplc_store.put_prefix_postings_list(writer, p, &new_postings_list)?;
+
+                debug!("new length {}", new_postings_list.len());
+            }
+        }
+    }
+
+    Ok(())
 }
 
 pub fn apply_documents_partial_addition<'a, 'b>(
diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs
index 1c18ef5d8..265a6e193 100644
--- a/meilisearch-core/src/update/mod.rs
+++ b/meilisearch-core/src/update/mod.rs
@@ -309,62 +309,10 @@ pub fn update_task<'a, 'b>(
                 index.postings_lists,
                 index.docs_words,
                 index.prefix_documents_cache,
+                index.prefix_postings_lists_cache,
                 documents,
             );
 
-            let words_fst = index.main.words_fst(writer)?.unwrap();
-            let mut stream = words_fst.into_stream();
-            let mut previous_char = None;
-            while let Some(input) = stream.next() {
-                let (s, c) = match std::str::from_utf8(input) {
-                    Ok(s) => {
-                        let c = s.chars().next().unwrap();
-                        (&s[..c.len_utf8()], c)
-                    },
-                    Err(_) => continue,
-                };
-
-                match previous_char {
-                    Some(pc) if pc != c => {
-                        debug!("searching and caching {:?}", s);
-
-                        let documents = bucket_sort(
-                            writer,
-                            s,
-                            0..20,
-                            None as Option<fn(DocumentId) -> bool>,
-                            Criteria::default(),
-                            None,
-                            index.main,
-                            index.postings_lists,
-                            index.documents_fields_counts,
-                            index.synonyms,
-                            index.prefix_documents_cache,
-                        ).unwrap();
-
-                        let mut prefix = [0; 4];
-                        let len = cmp::min(4, s.len());
-                        prefix[..len].copy_from_slice(&s.as_bytes()[..len]);
-
-                        for (i, document) in documents.into_iter().enumerate() {
-                            index.prefix_documents_cache.put_prefix_document(
-                                writer,
-                                prefix,
-                                i,
-                                document.id,
-                                &document.highlights,
-                            ).unwrap();
-                        }
-
-                        previous_char = Some(c)
-                    },
-                    Some(_) => (),
-                    None => previous_char = Some(c),
-                }
-            }
-
-            // TODO we forget to do it for the last prefix char
-
             (update_type, result, start.elapsed())
         }
         UpdateData::DocumentsPartial(documents) => {