Merge #478

478: Disable typo on attribute r=Kerollmops a=MarinPostma disable typo on attributes Co-authored-by: ad hoc <postma.marin@protonmail.com>
2025-05-25 09:03:59 +02:00 · 2022-04-05 23:45:35 +00:00 · 2022-04-05 23:45:35 +00:00 · aadb0c58c9
commit aadb0c58c9
parent 900825bac0 86249e2ae4
17 changed files with 535 additions and 129 deletions
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@ -99,8 +99,10 @@ impl Settings {
            })
            .collect();
        let exact_attributes = index.exact_attributes(&txn)?;
        println!(
-            "displayed attributes:\n\t{}\nsearchable attributes:\n\t{}\nfilterable attributes:\n\t{}\nsortable attributes:\n\t{}\ncriterion:\n\t{}\nstop words:\n\t{}\ndistinct fields:\n\t{}\nsynonyms:\n\t{}\n",
+            "displayed attributes:\n\t{}\nsearchable attributes:\n\t{}\nfilterable attributes:\n\t{}\nsortable attributes:\n\t{}\ncriterion:\n\t{}\nstop words:\n\t{}\ndistinct fields:\n\t{}\nsynonyms:\n\t{}\nexact attributes:\n\t{}\n",
            displayed_attributes.unwrap_or(vec!["*".to_owned()]).join("\n\t"),
            searchable_attributes.unwrap_or(vec!["*".to_owned()]).join("\n\t"),
            filterable_attributes.join("\n\t"),
@ -109,6 +111,7 @@ impl Settings {
            stop_words.join("\n\t"),
            distinct_field.unwrap_or_default(),
            synonyms.into_iter().map(|(k, v)| format!("\n\t{}:\n{:?}", k, v)).collect::<String>(),
            exact_attributes.join("\n\t"),
        );
        Ok(())
    }
@ -463,6 +466,8 @@ struct SettingsUpdate {
    filterable_attributes: Option<Vec<String>>,
    #[structopt(long)]
    criteria: Option<Vec<String>>,
    #[structopt(long)]
    exact_attributes: Option<Vec<String>>,
 }
 impl Performer for SettingsUpdate {
@ -489,6 +494,14 @@ impl Performer for SettingsUpdate {
            }
        }
        if let Some(exact_attributes) = self.exact_attributes {
            if !exact_attributes.is_empty() {
                update.set_exact_attributes(exact_attributes.into_iter().collect());
            } else {
                update.reset_exact_attributes();
            }
        }
        let mut bars = Vec::new();
        let progesses = MultiProgress::new();
        for _ in 0..4 {
--- a/infos/src/main.rs
+++ b/infos/src/main.rs
@ -29,6 +29,8 @@ const ALL_DATABASE_NAMES: &[&str] = &[
    FACET_ID_STRING_DOCIDS,
    FIELD_ID_DOCID_FACET_F64S,
    FIELD_ID_DOCID_FACET_STRINGS,
    EXACT_WORD_DOCIDS,
    EXACT_WORD_PREFIX_DOCIDS,
    DOCUMENTS,
 ];
@ -384,6 +386,8 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
        field_id_word_count_docids,
        facet_id_f64_docids,
        facet_id_string_docids,
        exact_word_docids,
        exact_word_prefix_docids,
        field_id_docid_facet_f64s: _,
        field_id_docid_facet_strings: _,
        documents,
@ -436,6 +440,14 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
            }
        }
        for result in exact_word_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
            let (word, value) = result?;
            heap.push(Reverse((value.len(), word.to_string(), word_docids_name)));
            if heap.len() > limit {
                heap.pop();
            }
        }
        for result in word_prefix_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
            let (word, value) = result?;
            heap.push(Reverse((value.len(), word.to_string(), word_prefix_docids_name)));
@ -444,6 +456,14 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
            }
        }
        for result in exact_word_prefix_docids.remap_data_type::<ByteSlice>().iter(rtxn)? {
            let (word, value) = result?;
            heap.push(Reverse((value.len(), word.to_string(), word_prefix_docids_name)));
            if heap.len() > limit {
                heap.pop();
            }
        }
        for result in docid_word_positions.remap_data_type::<ByteSlice>().iter(rtxn)? {
            let ((docid, word), value) = result?;
            let key = format!("{} {}", docid, word);
@ -967,6 +987,8 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
        facet_id_string_docids,
        field_id_docid_facet_f64s,
        field_id_docid_facet_strings,
        exact_word_prefix_docids,
        exact_word_docids,
        documents,
    } = index;
@ -991,6 +1013,8 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
            FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(),
            FIELD_ID_DOCID_FACET_F64S => field_id_docid_facet_f64s.as_polymorph(),
            FIELD_ID_DOCID_FACET_STRINGS => field_id_docid_facet_strings.as_polymorph(),
            EXACT_WORD_DOCIDS => exact_word_docids.as_polymorph(),
            EXACT_WORD_PREFIX_DOCIDS => exact_word_prefix_docids.as_polymorph(),
            DOCUMENTS => documents.as_polymorph(),
            unknown => anyhow::bail!("unknown database {:?}", unknown),
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -53,12 +53,15 @@ pub mod main_key {
    pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
    pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
    pub const EXACT_WORDS: &str = "exact-words";
    pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
 }
 pub mod db_name {
    pub const MAIN: &str = "main";
    pub const WORD_DOCIDS: &str = "word-docids";
    pub const EXACT_WORD_DOCIDS: &str = "exact-word-docids";
    pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
    pub const EXACT_WORD_PREFIX_DOCIDS: &str = "exact-word-prefix-docids";
    pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
    pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
    pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
@ -82,9 +85,16 @@ pub struct Index {
    /// A word and all the documents ids containing the word.
    pub word_docids: Database<Str, RoaringBitmapCodec>,
    /// A word and all the documents ids containing the word, from attributes for which typos are not allowed.
    pub exact_word_docids: Database<Str, RoaringBitmapCodec>,
    /// A prefix of word and all the documents ids containing this prefix.
    pub word_prefix_docids: Database<Str, RoaringBitmapCodec>,
    /// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
    pub exact_word_prefix_docids: Database<Str, RoaringBitmapCodec>,
    /// Maps a word and a document id (u32) to all the positions where the given word appears.
    pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
@ -118,13 +128,15 @@ impl Index {
    pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> {
        use db_name::*;
-        options.max_dbs(14);
+        options.max_dbs(16);
        unsafe { options.flag(Flags::MdbAlwaysFreePages) };
        let env = options.open(path)?;
        let main = env.create_poly_database(Some(MAIN))?;
        let word_docids = env.create_database(Some(WORD_DOCIDS))?;
        let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?;
        let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
        let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
        let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
        let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
        let word_prefix_pair_proximity_docids =
@ -145,7 +157,9 @@ impl Index {
            env,
            main,
            word_docids,
            exact_word_docids,
            word_prefix_docids,
            exact_word_prefix_docids,
            docid_word_positions,
            word_pair_proximity_docids,
            word_prefix_pair_proximity_docids,
@ -949,6 +963,33 @@ impl Index {
        )?;
        Ok(())
    }
    /// Returns the exact attributes: attributes for which typo is disallowed.
    pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> {
        Ok(self
            .main
            .get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)?
            .unwrap_or_default())
    }
    /// Returns the list of exact attributes field ids.
    pub fn exact_attributes_ids(&self, txn: &RoTxn) -> Result<HashSet<FieldId>> {
        let attrs = self.exact_attributes(txn)?;
        let fid_map = self.fields_ids_map(txn)?;
        Ok(attrs.iter().filter_map(|attr| fid_map.id(attr)).collect())
    }
    /// Writes the exact attributes to the database.
    pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> {
        self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?;
        Ok(())
    }
    /// Clears the exact attributes from the store.
    pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> Result<()> {
        self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?;
        Ok(())
    }
 }
 #[cfg(test)]
--- a/milli/src/search/criteria/mod.rs
+++ b/milli/src/search/criteria/mod.rs
@ -68,7 +68,9 @@ impl Default for Candidates {
 pub trait Context<'c> {
    fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
    fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
    fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
    fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
    fn exact_word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
    fn word_pair_proximity_docids(
        &self,
        left: &str,
@ -118,10 +120,18 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
        self.index.word_docids.get(self.rtxn, &word)
    }
    fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
        self.index.exact_word_docids.get(self.rtxn, &word)
    }
    fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
        self.index.word_prefix_docids.get(self.rtxn, &word)
    }
    fn exact_word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
        self.index.exact_word_prefix_docids.get(self.rtxn, &word)
    }
    fn word_pair_proximity_docids(
        &self,
        left: &str,
@ -392,26 +402,42 @@ fn query_docids(
    wdcache: &mut WordDerivationsCache,
 ) -> Result<RoaringBitmap> {
    match &query.kind {
-        QueryKind::Exact { word, .. } => {
+        QueryKind::Exact { word, original_typo } => {
            if query.prefix && ctx.in_prefix_cache(&word) {
-                Ok(ctx.word_prefix_docids(&word)?.unwrap_or_default())
+                let mut docids = ctx.word_prefix_docids(&word)?.unwrap_or_default();
                // only add the exact docids if the word hasn't been derived
                if *original_typo == 0 {
                    docids |= ctx.exact_word_prefix_docids(&word)?.unwrap_or_default();
                }
                Ok(docids)
            } else if query.prefix {
                let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?;
                let mut docids = RoaringBitmap::new();
                for (word, _typo) in words {
-                    let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
+                    docids |= ctx.word_docids(&word)?.unwrap_or_default();
-                    docids |= current_docids;
+                    // only add the exact docids if the word hasn't been derived
                    if *original_typo == 0 {
                        docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
                    }
                }
                Ok(docids)
            } else {
-                Ok(ctx.word_docids(&word)?.unwrap_or_default())
+                let mut docids = ctx.word_docids(&word)?.unwrap_or_default();
                // only add the exact docids if the word hasn't been derived
                if *original_typo == 0 {
                    docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
                }
                Ok(docids)
            }
        }
        QueryKind::Tolerant { typo, word } => {
            let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
            let mut docids = RoaringBitmap::new();
-            for (word, _typo) in words {
+            for (word, typo) in words {
-                let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
+                let mut current_docids = ctx.word_docids(&word)?.unwrap_or_default();
                if *typo == 0 {
                    current_docids |= ctx.exact_word_docids(&word)?.unwrap_or_default()
                }
                docids |= current_docids;
            }
            Ok(docids)
@ -512,7 +538,9 @@ pub mod test {
    pub struct TestContext<'t> {
        words_fst: fst::Set<Cow<'t, [u8]>>,
        word_docids: HashMap<String, RoaringBitmap>,
        exact_word_docids: HashMap<String, RoaringBitmap>,
        word_prefix_docids: HashMap<String, RoaringBitmap>,
        exact_word_prefix_docids: HashMap<String, RoaringBitmap>,
        word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
        word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
        docid_words: HashMap<u32, Vec<String>>,
@ -527,10 +555,18 @@ pub mod test {
            Ok(self.word_docids.get(&word.to_string()).cloned())
        }
        fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
            Ok(self.exact_word_docids.get(&word.to_string()).cloned())
        }
        fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
            Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
        }
        fn exact_word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
            Ok(self.exact_word_prefix_docids.get(&word.to_string()).cloned())
        }
        fn word_pair_proximity_docids(
            &self,
            left: &str,
@ -643,6 +679,8 @@ pub mod test {
                s("morning")    => random_postings(rng,    125),
            };
            let exact_word_docids = HashMap::new();
            let mut docid_words = HashMap::new();
            for (word, docids) in word_docids.iter() {
                for docid in docids {
@ -657,6 +695,8 @@ pub mod test {
                s("20")  => &word_docids[&s("2020")]  | &word_docids[&s("2021")],
            };
            let exact_word_prefix_docids = HashMap::new();
            let mut word_pair_proximity_docids = HashMap::new();
            let mut word_prefix_pair_proximity_docids = HashMap::new();
            for (lword, lcandidates) in &word_docids {
@ -712,7 +752,9 @@ pub mod test {
            TestContext {
                words_fst,
                word_docids,
                exact_word_docids,
                word_prefix_docids,
                exact_word_prefix_docids,
                word_pair_proximity_docids,
                word_prefix_pair_proximity_docids,
                docid_words,
--- a/milli/src/search/query_tree.rs
+++ b/milli/src/search/query_tree.rs
@ -1267,6 +1267,7 @@ mod test {
            QueryKind::Tolerant { typo: 2, word: "verylongword".to_string() }
        );
    }
    #[test]
    fn disable_typo_on_word() {
        let query = "goodbye";
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@ -19,7 +19,9 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
            env: _env,
            main: _main,
            word_docids,
            exact_word_docids,
            word_prefix_docids,
            exact_word_prefix_docids,
            docid_word_positions,
            word_pair_proximity_docids,
            word_prefix_pair_proximity_docids,
@ -55,7 +57,9 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
        // Clear the other databases.
        word_docids.clear(self.wtxn)?;
        exact_word_docids.clear(self.wtxn)?;
        word_prefix_docids.clear(self.wtxn)?;
        exact_word_prefix_docids.clear(self.wtxn)?;
        docid_word_positions.clear(self.wtxn)?;
        word_pair_proximity_docids.clear(self.wtxn)?;
        word_prefix_pair_proximity_docids.clear(self.wtxn)?;
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@ -2,8 +2,8 @@ use std::collections::btree_map::Entry;
 use std::collections::HashMap;
 use fst::IntoStreamer;
-use heed::types::ByteSlice;
+use heed::types::{ByteSlice, Str};
-use heed::{BytesDecode, BytesEncode};
+use heed::{BytesDecode, BytesEncode, Database};
 use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
@ -16,7 +16,10 @@ use crate::heed_codec::facet::{
 };
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::index::{db_name, main_key};
-use crate::{DocumentId, ExternalDocumentsIds, FieldId, Index, Result, SmallString32, BEU32};
+use crate::{
    DocumentId, ExternalDocumentsIds, FieldId, Index, Result, RoaringBitmapCodec, SmallString32,
    BEU32,
 };
 pub struct DeleteDocuments<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -108,7 +111,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
            env: _env,
            main: _main,
            word_docids,
            exact_word_docids,
            word_prefix_docids,
            exact_word_prefix_docids,
            docid_word_positions,
            word_pair_proximity_docids,
            field_id_word_count_docids,
@ -204,25 +209,21 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        // We iterate over the words and delete the documents ids
        // from the word docids database.
        for (word, must_remove) in &mut words {
-            // We create an iterator to be able to get the content and delete the word docids.
+            remove_from_word_docids(
-            // It's faster to acquire a cursor to get and delete or put, as we avoid traversing
+                self.wtxn,
-            // the LMDB B-Tree two times but only once.
+                word_docids,
-            let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?;
+                word.as_str(),
-            if let Some((key, mut docids)) = iter.next().transpose()? {
+                must_remove,
-                if key == word.as_str() {
+                &self.documents_ids,
-                    let previous_len = docids.len();
+            )?;
-                    docids -= &self.documents_ids;
+
-                    if docids.is_empty() {
+            remove_from_word_docids(
-                        // safety: we don't keep references from inside the LMDB database.
+                self.wtxn,
-                        unsafe { iter.del_current()? };
+                exact_word_docids,
-                        *must_remove = true;
+                word.as_str(),
-                    } else if docids.len() != previous_len {
+                must_remove,
-                        let key = key.to_owned();
+                &self.documents_ids,
-                        // safety: we don't keep references from inside the LMDB database.
+            )?;
                        unsafe { iter.put_current(&key, &docids)? };
                    }
                }
            }
        }
        // We construct an FST set that contains the words to delete from the words FST.
@ -254,34 +255,24 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        // We write the new words FST into the main database.
        self.index.put_words_fst(self.wtxn, &new_words_fst)?;
-        // We iterate over the word prefix docids database and remove the deleted documents ids
+        let prefixes_to_delete =
-        // from every docids lists. We register the empty prefixes in an fst Set for futur deletion.
+            remove_from_word_prefix_docids(self.wtxn, word_prefix_docids, &self.documents_ids)?;
        let mut prefixes_to_delete = fst::SetBuilder::memory();
        let mut iter = word_prefix_docids.iter_mut(self.wtxn)?;
        while let Some(result) = iter.next() {
            let (prefix, mut docids) = result?;
            let prefix = prefix.to_owned();
            let previous_len = docids.len();
            docids -= &self.documents_ids;
            if docids.is_empty() {
                // safety: we don't keep references from inside the LMDB database.
                unsafe { iter.del_current()? };
                prefixes_to_delete.insert(prefix)?;
            } else if docids.len() != previous_len {
                // safety: we don't keep references from inside the LMDB database.
                unsafe { iter.put_current(&prefix, &docids)? };
            }
        }
-        drop(iter);
+        let exact_prefix_to_delete = remove_from_word_prefix_docids(
            self.wtxn,
            exact_word_prefix_docids,
            &self.documents_ids,
        )?;
        let all_prefixes_to_delete = prefixes_to_delete.op().add(&exact_prefix_to_delete).union();
        // We compute the new prefix FST and write it only if there is a change.
-        let prefixes_to_delete = prefixes_to_delete.into_set();
+        if !prefixes_to_delete.is_empty() || !exact_prefix_to_delete.is_empty() {
        if !prefixes_to_delete.is_empty() {
            let new_words_prefixes_fst = {
                // We retrieve the current words prefixes FST from the database.
                let words_prefixes_fst = self.index.words_prefixes_fst(self.wtxn)?;
-                let difference = words_prefixes_fst.op().add(&prefixes_to_delete).difference();
+                let difference =
                    words_prefixes_fst.op().add(all_prefixes_to_delete.into_stream()).difference();
                // We stream the new external ids that does no more contains the to-delete external ids.
                let mut new_words_prefixes_fst_builder = fst::SetBuilder::memory();
@ -457,6 +448,64 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
    }
 }
 fn remove_from_word_prefix_docids(
    txn: &mut heed::RwTxn,
    db: &Database<Str, RoaringBitmapCodec>,
    to_remove: &RoaringBitmap,
 ) -> Result<fst::Set<Vec<u8>>> {
    let mut prefixes_to_delete = fst::SetBuilder::memory();
    // We iterate over the word prefix docids database and remove the deleted documents ids
    // from every docids lists. We register the empty prefixes in an fst Set for futur deletion.
    let mut iter = db.iter_mut(txn)?;
    while let Some(result) = iter.next() {
        let (prefix, mut docids) = result?;
        let prefix = prefix.to_owned();
        let previous_len = docids.len();
        docids -= to_remove;
        if docids.is_empty() {
            // safety: we don't keep references from inside the LMDB database.
            unsafe { iter.del_current()? };
            prefixes_to_delete.insert(prefix)?;
        } else if docids.len() != previous_len {
            // safety: we don't keep references from inside the LMDB database.
            unsafe { iter.put_current(&prefix, &docids)? };
        }
    }
    Ok(prefixes_to_delete.into_set())
 }
 fn remove_from_word_docids(
    txn: &mut heed::RwTxn,
    db: &heed::Database<Str, RoaringBitmapCodec>,
    word: &str,
    must_remove: &mut bool,
    to_remove: &RoaringBitmap,
 ) -> Result<()> {
    // We create an iterator to be able to get the content and delete the word docids.
    // It's faster to acquire a cursor to get and delete or put, as we avoid traversing
    // the LMDB B-Tree two times but only once.
    let mut iter = db.prefix_iter_mut(txn, &word)?;
    if let Some((key, mut docids)) = iter.next().transpose()? {
        if key == word {
            let previous_len = docids.len();
            docids -= to_remove;
            if docids.is_empty() {
                // safety: we don't keep references from inside the LMDB database.
                unsafe { iter.del_current()? };
                *must_remove = true;
            } else if docids.len() != previous_len {
                let key = key.to_owned();
                // safety: we don't keep references from inside the LMDB database.
                unsafe { iter.put_current(&key, &docids)? };
            }
        }
    }
    Ok(())
 }
 fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F, DC, V>(
    wtxn: &'a mut heed::RwTxn,
    db: &heed::Database<C, DC>,
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -1,3 +1,4 @@
 use std::collections::HashSet;
 use std::fs::File;
 use std::io;
 use std::iter::FromIterator;
@ -10,17 +11,22 @@ use super::helpers::{
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::Result;
+use crate::update::index_documents::helpers::read_u32_ne_bytes;
 use crate::{relative_from_absolute_position, FieldId, Result};
 /// Extracts the word and the documents ids where this word appear.
 ///
 /// Returns a grenad reader with the list of extracted words and
 /// documents ids from the given chunk of docid word positions.
 ///
 /// The first returned reader is the one for normal word_docids, and the second one is for
 /// exact_word_docids
 #[logging_timer::time]
 pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<File>> {
+    exact_attributes: &HashSet<FieldId>,
 ) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
    let max_memory = indexer.max_memory_by_thread();
    let mut word_docids_sorter = create_sorter(
@ -28,20 +34,53 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory,
+        max_memory.map(|x| x / 2),
    );
    let mut exact_word_docids_sorter = create_sorter(
        merge_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory.map(|x| x / 2),
    );
    let mut value_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
-    while let Some((key, _value)) = cursor.move_on_next()? {
+    while let Some((key, positions)) = cursor.move_on_next()? {
        let (document_id_bytes, word_bytes) = try_split_array_at(key)
            .ok_or_else(|| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);
        let bitmap = RoaringBitmap::from_iter(Some(document_id));
        serialize_roaring_bitmap(&bitmap, &mut value_buffer)?;
-        word_docids_sorter.insert(word_bytes, &value_buffer)?;
+
        // If there are no exact attributes, we do not need to iterate over positions.
        if exact_attributes.is_empty() {
            word_docids_sorter.insert(word_bytes, &value_buffer)?;
        } else {
            let mut added_to_exact = false;
            let mut added_to_word_docids = false;
            for position in read_u32_ne_bytes(positions) {
                // as soon as we know that this word had been to both readers, we don't need to
                // iterate over the positions.
                if added_to_exact && added_to_word_docids {
                    break;
                }
                let (fid, _) = relative_from_absolute_position(position);
                if exact_attributes.contains(&fid) && !added_to_exact {
                    exact_word_docids_sorter.insert(word_bytes, &value_buffer)?;
                    added_to_exact = true;
                } else if !added_to_word_docids {
                    word_docids_sorter.insert(word_bytes, &value_buffer)?;
                    added_to_word_docids = true;
                }
            }
        }
    }
-    sorter_into_reader(word_docids_sorter, indexer)
+    Ok((
        sorter_into_reader(word_docids_sorter, indexer)?,
        sorter_into_reader(exact_word_docids_sorter, indexer)?,
    ))
 }
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -26,7 +26,7 @@ use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids
 use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{
    as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
-    merge_readers, merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
+    merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, MergeableReader,
 };
 use super::{helpers, TypedChunk};
 use crate::{FieldId, Result};
@ -43,6 +43,7 @@ pub(crate) fn data_from_obkv_documents(
    geo_field_id: Option<FieldId>,
    stop_words: Option<fst::Set<&[u8]>>,
    max_positions_per_attributes: Option<u32>,
    exact_attributes: HashSet<FieldId>,
 ) -> Result<()> {
    let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
        .par_bridge()
@ -66,7 +67,7 @@ pub(crate) fn data_from_obkv_documents(
        (docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks),
    ) = result?;
-    spawn_extraction_task(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks.clone(),
        indexer.clone(),
        lmdb_writer_sx.clone(),
@ -76,7 +77,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-pair-proximity-docids",
    );
-    spawn_extraction_task(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks.clone(),
        indexer.clone(),
        lmdb_writer_sx.clone(),
@ -86,17 +87,20 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-wordcount-docids",
    );
-    spawn_extraction_task(
+    spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
        docid_word_positions_chunks.clone(),
        indexer.clone(),
        lmdb_writer_sx.clone(),
-        extract_word_docids,
+        move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
        merge_roaring_bitmaps,
-        TypedChunk::WordDocids,
+        |(word_docids_reader, exact_word_docids_reader)| TypedChunk::WordDocids {
            word_docids_reader,
            exact_word_docids_reader,
        },
        "word-docids",
    );
-    spawn_extraction_task(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks.clone(),
        indexer.clone(),
        lmdb_writer_sx.clone(),
@ -106,7 +110,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-position-docids",
    );
-    spawn_extraction_task(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_fid_facet_strings_chunks.clone(),
        indexer.clone(),
        lmdb_writer_sx.clone(),
@ -116,7 +120,7 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-facet-string-docids",
    );
-    spawn_extraction_task(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_fid_facet_numbers_chunks.clone(),
        indexer.clone(),
        lmdb_writer_sx.clone(),
@ -133,7 +137,7 @@ pub(crate) fn data_from_obkv_documents(
 /// Generated grenad chunks are merged using the merge_fn.
 /// The result of merged chunks is serialized as TypedChunk using the serialize_fn
 /// and sent into lmdb_writer_sx.
-fn spawn_extraction_task<FE, FS>(
+fn spawn_extraction_task<FE, FS, M>(
    chunks: Vec<grenad::Reader<CursorClonableMmap>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
@ -142,19 +146,21 @@ fn spawn_extraction_task<FE, FS>(
    serialize_fn: FS,
    name: &'static str,
 ) where
-    FE: Fn(grenad::Reader<CursorClonableMmap>, GrenadParameters) -> Result<grenad::Reader<File>>
+    FE: Fn(grenad::Reader<CursorClonableMmap>, GrenadParameters) -> Result<M::Output>
        + Sync
        + Send
        + 'static,
-    FS: Fn(grenad::Reader<File>) -> TypedChunk + Sync + Send + 'static,
+    FS: Fn(M::Output) -> TypedChunk + Sync + Send + 'static,
    M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
    M::Output: Send,
 {
    rayon::spawn(move || {
-        let chunks: Result<Vec<_>> =
+        let chunks: Result<M> =
            chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer.clone())).collect();
        rayon::spawn(move || match chunks {
            Ok(chunks) => {
                debug!("merge {} database", name);
-                let reader = merge_readers(chunks, merge_fn, indexer);
+                let reader = chunks.merge(merge_fn, &indexer);
                let _ = lmdb_writer_sx.send(reader.map(|r| serialize_fn(r)));
            }
            Err(e) => {
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@ -78,25 +78,62 @@ pub unsafe fn as_cloneable_grenad(
    Ok(reader)
 }
-pub fn merge_readers<R: io::Read + io::Seek>(
+pub trait MergeableReader
-    readers: Vec<grenad::Reader<R>>,
+where
-    merge_fn: MergeFn,
+    Self: Sized,
-    indexer: GrenadParameters,
+{
-) -> Result<grenad::Reader<File>> {
+    type Output;
-    let mut merger_builder = grenad::MergerBuilder::new(merge_fn);
+
-    for reader in readers {
+    fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
-        merger_builder.push(reader.into_cursor()?);
+}
 impl MergeableReader for Vec<grenad::Reader<File>> {
    type Output = grenad::Reader<File>;
    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut merger = MergerBuilder::new(merge_fn);
        self.into_iter().try_for_each(|r| merger.push(r))?;
        merger.finish(params)
    }
 }
 impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
    type Output = (grenad::Reader<File>, grenad::Reader<File>);
    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut m1 = MergerBuilder::new(merge_fn);
        let mut m2 = MergerBuilder::new(merge_fn);
        for (r1, r2) in self.into_iter() {
            m1.push(r1)?;
            m2.push(r2)?;
        }
        Ok((m1.finish(params)?, m2.finish(params)?))
    }
 }
 struct MergerBuilder<R>(grenad::MergerBuilder<R, MergeFn>);
 impl<R: io::Read + io::Seek> MergerBuilder<R> {
    fn new(merge_fn: MergeFn) -> Self {
        Self(grenad::MergerBuilder::new(merge_fn))
    }
-    let merger = merger_builder.build();
+    fn push(&mut self, reader: grenad::Reader<R>) -> Result<()> {
-    let mut writer = create_writer(
+        self.0.push(reader.into_cursor()?);
-        indexer.chunk_compression_type,
+        Ok(())
-        indexer.chunk_compression_level,
+    }
        tempfile::tempfile()?,
    );
    merger.write_into_stream_writer(&mut writer)?;
-    Ok(writer_into_reader(writer)?)
+    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
        let merger = self.0.build();
        let mut writer = create_writer(
            params.chunk_compression_type,
            params.chunk_compression_level,
            tempfile::tempfile()?,
        );
        merger.write_into_stream_writer(&mut writer)?;
        Ok(writer_into_reader(writer)?)
    }
 }
 #[derive(Debug, Clone, Copy)]
@ -240,3 +277,8 @@ pub fn sorter_into_lmdb_database(
    debug!("MTBL sorter writen in {:.02?}!", before.elapsed());
    Ok(())
 }
 /// Used when trying to merge readers, but you don't actually care about the values.
 pub fn merge_ignore_values<'a>(_key: &[u8], _values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
    Ok(Cow::Owned(Vec::new()))
 }
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@ -8,9 +8,9 @@ use std::convert::{TryFrom, TryInto};
 pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
 use fst::{IntoStreamer, Streamer};
 pub use grenad_helpers::{
-    as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks, merge_readers,
+    as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
-    sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database, writer_into_reader,
+    merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database,
-    GrenadParameters,
+    writer_into_reader, GrenadParameters, MergeableReader,
 };
 pub use merge_functions::{
    concat_u32s_array, keep_first, keep_first_prefix_value_merge_roaring_bitmaps, keep_latest_obkv,
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -4,11 +4,13 @@ mod transform;
 mod typed_chunk;
 use std::collections::HashSet;
-use std::io::{Read, Seek};
+use std::io::{Cursor, Read, Seek};
 use std::iter::FromIterator;
 use std::num::{NonZeroU32, NonZeroUsize};
 use crossbeam_channel::{Receiver, Sender};
 use heed::types::Str;
 use heed::Database;
 use log::debug;
 use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize};
@ -28,7 +30,7 @@ use crate::update::{
    self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
    WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
 };
-use crate::{Index, Result};
+use crate::{Index, Result, RoaringBitmapCodec};
 static MERGED_DATABASE_COUNT: usize = 7;
 static PREFIX_DATABASE_COUNT: usize = 5;
@ -226,6 +228,7 @@ where
        };
        let stop_words = self.index.stop_words(self.wtxn)?;
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
        // Run extraction pipeline in parallel.
        pool.install(|| {
@ -255,6 +258,7 @@ where
                    geo_field_id,
                    stop_words,
                    self.indexer_config.max_positions_per_attributes,
                    exact_attributes,
                )
            });
@ -282,6 +286,7 @@ where
        let mut word_pair_proximity_docids = None;
        let mut word_position_docids = None;
        let mut word_docids = None;
        let mut exact_word_docids = None;
        let mut databases_seen = 0;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
@ -291,10 +296,13 @@ where
        for result in lmdb_writer_rx {
            let typed_chunk = match result? {
-                TypedChunk::WordDocids(chunk) => {
+                TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
+                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
                    word_docids = Some(cloneable_chunk);
-                    TypedChunk::WordDocids(chunk)
+                    let cloneable_chunk =
                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
                    exact_word_docids = Some(cloneable_chunk);
                    TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
                }
                TypedChunk::WordPairProximityDocids(chunk) => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
@ -346,6 +354,7 @@ where
        self.execute_prefix_databases(
            word_docids,
            exact_word_docids,
            word_pair_proximity_docids,
            word_position_docids,
        )?;
@ -357,6 +366,7 @@ where
    pub fn execute_prefix_databases(
        self,
        word_docids: Option<grenad::Reader<CursorClonableMmap>>,
        exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
        word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
        word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
    ) -> Result<()>
@ -425,14 +435,25 @@ where
        });
        if let Some(word_docids) = word_docids {
-            // Run the word prefix docids update operation.
+            execute_word_prefix_docids(
-            let mut builder = WordPrefixDocids::new(self.wtxn, self.index);
+                self.wtxn,
            builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
            builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
            builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
            builder.max_memory = self.indexer_config.max_memory;
            builder.execute(
                word_docids,
                self.index.word_docids.clone(),
                self.index.word_prefix_docids.clone(),
                &self.indexer_config,
                &new_prefix_fst_words,
                &common_prefix_fst_words,
                &del_prefix_fst_words,
            )?;
        }
        if let Some(exact_word_docids) = exact_word_docids {
            execute_word_prefix_docids(
                self.wtxn,
                exact_word_docids,
                self.index.exact_word_docids.clone(),
                self.index.exact_word_prefix_docids.clone(),
                &self.indexer_config,
                &new_prefix_fst_words,
                &common_prefix_fst_words,
                &del_prefix_fst_words,
@ -497,6 +518,32 @@ where
    }
 }
 /// Run the word prefix docids update operation.
 fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn,
    reader: grenad::Reader<Cursor<ClonableMmap>>,
    word_docids_db: Database<Str, RoaringBitmapCodec>,
    word_prefix_docids_db: Database<Str, RoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
    new_prefix_fst_words: &[String],
    common_prefix_fst_words: &[&[String]],
    del_prefix_fst_words: &HashSet<Vec<u8>>,
 ) -> Result<()> {
    let cursor = reader.into_cursor()?;
    let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
    builder.chunk_compression_type = indexer_config.chunk_compression_type;
    builder.chunk_compression_level = indexer_config.chunk_compression_level;
    builder.max_nb_chunks = indexer_config.max_nb_chunks;
    builder.max_memory = indexer_config.max_memory;
    builder.execute(
        cursor,
        &new_prefix_fst_words,
        &common_prefix_fst_words,
        &del_prefix_fst_words,
    )?;
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use std::io::Cursor;
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -3,14 +3,16 @@ use std::convert::TryInto;
 use std::fs::File;
 use std::io;
 use grenad::MergerBuilder;
 use heed::types::ByteSlice;
 use heed::{BytesDecode, RwTxn};
 use roaring::RoaringBitmap;
 use super::helpers::{
-    self, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, valid_lmdb_key,
+    self, merge_ignore_values, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap,
-    CursorClonableMmap,
+    valid_lmdb_key, CursorClonableMmap,
 };
 use super::{ClonableMmap, MergeFn};
 use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
 use crate::update::index_documents::helpers::as_cloneable_grenad;
 use crate::{
@ -25,7 +27,10 @@ pub(crate) enum TypedChunk {
    Documents(grenad::Reader<CursorClonableMmap>),
    FieldIdWordcountDocids(grenad::Reader<File>),
    NewDocumentsIds(RoaringBitmap),
-    WordDocids(grenad::Reader<File>),
+    WordDocids {
        word_docids_reader: grenad::Reader<File>,
        exact_word_docids_reader: grenad::Reader<File>,
    },
    WordPositionDocids(grenad::Reader<File>),
    WordPairProximityDocids(grenad::Reader<File>),
    FieldIdFacetStringDocids(grenad::Reader<File>),
@ -86,8 +91,8 @@ pub(crate) fn write_typed_chunk_into_index(
        TypedChunk::NewDocumentsIds(documents_ids) => {
            return Ok((documents_ids, is_merged_database))
        }
-        TypedChunk::WordDocids(word_docids_iter) => {
+        TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
-            let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_iter) }?;
+            let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
            append_entries_into_database(
                word_docids_iter.clone(),
                &index.word_docids,
@ -97,15 +102,18 @@ pub(crate) fn write_typed_chunk_into_index(
                merge_roaring_bitmaps,
            )?;
            let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
            append_entries_into_database(
                exact_word_docids_iter.clone(),
                &index.exact_word_docids,
                wtxn,
                index_is_empty,
                |value, _buffer| Ok(value),
                merge_roaring_bitmaps,
            )?;
            // create fst from word docids
-            let mut builder = fst::SetBuilder::memory();
+            let fst = merge_word_docids_reader_into_fst(word_docids_iter, exact_word_docids_iter)?;
            let mut cursor = word_docids_iter.into_cursor()?;
            while let Some((word, _value)) = cursor.move_on_next()? {
                // This is a lexicographically ordered word position
                // we use the key to construct the words fst.
                builder.insert(word)?;
            }
            let fst = builder.into_set().map_data(std::borrow::Cow::Owned)?;
            let db_fst = index.words_fst(wtxn)?;
            // merge new fst with database fst
@ -214,6 +222,23 @@ pub(crate) fn write_typed_chunk_into_index(
    Ok((RoaringBitmap::new(), is_merged_database))
 }
 fn merge_word_docids_reader_into_fst(
    word_docids_iter: grenad::Reader<io::Cursor<ClonableMmap>>,
    exact_word_docids_iter: grenad::Reader<io::Cursor<ClonableMmap>>,
 ) -> Result<fst::Set<Vec<u8>>> {
    let mut merger_builder = MergerBuilder::new(merge_ignore_values as MergeFn);
    merger_builder.push(word_docids_iter.into_cursor()?);
    merger_builder.push(exact_word_docids_iter.into_cursor()?);
    let mut iter = merger_builder.build().into_stream_merger_iter()?;
    let mut builder = fst::SetBuilder::memory();
    while let Some((k, _)) = iter.next()? {
        builder.insert(k)?;
    }
    Ok(builder.into_set())
 }
 fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>) -> Result<()> {
    let new_value = RoaringBitmap::deserialize_from(new_value)?;
    let db_value = RoaringBitmap::deserialize_from(db_value)?;
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@ -93,6 +93,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
    min_word_len_two_typos: Setting<u8>,
    min_word_len_one_typo: Setting<u8>,
    exact_words: Setting<BTreeSet<String>>,
    /// Attributes on which typo tolerance is disabled.
    exact_attributes: Setting<HashSet<String>>,
 }
 impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@ -117,6 +119,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
            exact_words: Setting::NotSet,
            min_word_len_two_typos: Setting::Reset,
            min_word_len_one_typo: Setting::Reset,
            exact_attributes: Setting::Reset,
            indexer_config,
        }
    }
@ -226,6 +229,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        self.exact_words = Setting::Reset;
    }
    pub fn set_exact_attributes(&mut self, attrs: HashSet<String>) {
        self.exact_attributes = Setting::Set(attrs);
    }
    pub fn reset_exact_attributes(&mut self) {
        self.exact_attributes = Setting::Reset;
    }
    fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
    where
        F: Fn(UpdateIndexingStep) + Sync,
@ -411,6 +422,21 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        }
    }
    fn update_exact_attributes(&mut self) -> Result<bool> {
        match self.exact_attributes {
            Setting::Set(ref attrs) => {
                let attrs = attrs.iter().map(String::as_str).collect::<Vec<_>>();
                self.index.put_exact_attributes(&mut self.wtxn, &attrs)?;
                Ok(true)
            }
            Setting::Reset => {
                self.index.delete_exact_attributes(&mut self.wtxn)?;
                Ok(true)
            }
            Setting::NotSet => Ok(false),
        }
    }
    fn update_filterable(&mut self) -> Result<()> {
        match self.filterable_fields {
            Setting::Set(ref fields) => {
@ -579,8 +605,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        let stop_words_updated = self.update_stop_words()?;
        let synonyms_updated = self.update_synonyms()?;
        let searchable_updated = self.update_searchable()?;
        let exact_attributes_updated = self.update_exact_attributes()?;
-        if stop_words_updated || faceted_updated || synonyms_updated || searchable_updated {
+        if stop_words_updated
            || faceted_updated
            || synonyms_updated
            || searchable_updated
            || exact_attributes_updated
        {
            self.reindex(&progress_callback, old_fields_ids_map)?;
        }
--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@ -1,16 +1,18 @@
 use std::collections::{HashMap, HashSet};
 use grenad::CompressionType;
-use heed::types::ByteSlice;
+use heed::types::{ByteSlice, Str};
 use heed::Database;
 use crate::update::index_documents::{
    create_sorter, merge_roaring_bitmaps, sorter_into_lmdb_database, CursorClonableMmap, MergeFn,
 };
-use crate::{Index, Result};
+use crate::{Result, RoaringBitmapCodec};
 pub struct WordPrefixDocids<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
-    index: &'i Index,
+    word_docids: Database<Str, RoaringBitmapCodec>,
    word_prefix_docids: Database<Str, RoaringBitmapCodec>,
    pub(crate) chunk_compression_type: CompressionType,
    pub(crate) chunk_compression_level: Option<u32>,
    pub(crate) max_nb_chunks: Option<usize>,
@ -20,11 +22,13 @@ pub struct WordPrefixDocids<'t, 'u, 'i> {
 impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
    pub fn new(
        wtxn: &'t mut heed::RwTxn<'i, 'u>,
-        index: &'i Index,
+        word_docids: Database<Str, RoaringBitmapCodec>,
        word_prefix_docids: Database<Str, RoaringBitmapCodec>,
    ) -> WordPrefixDocids<'t, 'u, 'i> {
        WordPrefixDocids {
            wtxn,
-            index,
+            word_docids,
            word_prefix_docids,
            chunk_compression_type: CompressionType::None,
            chunk_compression_level: None,
            max_nb_chunks: None,
@ -35,7 +39,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
    #[logging_timer::time("WordPrefixDocids::{}")]
    pub fn execute(
        self,
-        new_word_docids: grenad::Reader<CursorClonableMmap>,
+        mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,
        new_prefix_fst_words: &[String],
        common_prefix_fst_words: &[&[String]],
        del_prefix_fst_words: &HashSet<Vec<u8>>,
@ -51,7 +55,6 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
        );
        if !common_prefix_fst_words.is_empty() {
            let mut new_word_docids_iter = new_word_docids.into_cursor()?;
            let mut current_prefixes: Option<&&[String]> = None;
            let mut prefixes_cache = HashMap::new();
            while let Some((word, data)) = new_word_docids_iter.move_on_next()? {
@ -84,7 +87,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
        }
        // We fetch the docids associated to the newly added word prefix fst only.
-        let db = self.index.word_docids.remap_data_type::<ByteSlice>();
+        let db = self.word_docids.remap_data_type::<ByteSlice>();
        for prefix in new_prefix_fst_words {
            let prefix = std::str::from_utf8(prefix.as_bytes())?;
            for result in db.prefix_iter(self.wtxn, prefix)? {
@ -94,7 +97,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
        }
        // We remove all the entries that are no more required in this word prefix docids database.
-        let mut iter = self.index.word_prefix_docids.iter_mut(self.wtxn)?.lazily_decode_data();
+        let mut iter = self.word_prefix_docids.iter_mut(self.wtxn)?.lazily_decode_data();
        while let Some((prefix, _)) = iter.next().transpose()? {
            if del_prefix_fst_words.contains(prefix.as_bytes()) {
                unsafe { iter.del_current()? };
@ -106,7 +109,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
        // We finally write the word prefix docids into the LMDB database.
        sorter_into_lmdb_database(
            self.wtxn,
-            *self.index.word_prefix_docids.as_polymorph(),
+            *self.word_prefix_docids.as_polymorph(),
            prefix_docids_sorter,
            merge_roaring_bitmaps,
        )?;
--- a/milli/tests/search/query_criteria.rs
+++ b/milli/tests/search/query_criteria.rs
@ -373,7 +373,7 @@ fn criteria_mixup() {
 fn criteria_ascdesc() {
    let path = tempfile::tempdir().unwrap();
    let mut options = EnvOpenOptions::new();
-    options.map_size(10 * 1024 * 1024); // 10 MB
+    options.map_size(12 * 1024 * 1024); // 10 MB
    let index = Index::new(options, &path).unwrap();
    let mut wtxn = index.write_txn().unwrap();
--- a/milli/tests/search/typo_tolerance.rs
+++ b/milli/tests/search/typo_tolerance.rs
@ -170,3 +170,41 @@ fn test_typo_disabled_on_word() {
    let result = search.execute().unwrap();
    assert_eq!(result.documents_ids.len(), 1);
 }
 #[test]
 fn test_disable_typo_on_attribute() {
    let criteria = [Typo];
    let index = super::setup_search_index_with_criteria(&criteria);
    // basic typo search with default typo settings
    {
        let txn = index.read_txn().unwrap();
        let mut search = Search::new(&txn, &index);
        // typo in `antebel(l)um`
        search.query("antebelum");
        search.limit(10);
        search.authorize_typos(true);
        search.optional_words(true);
        let result = search.execute().unwrap();
        assert_eq!(result.documents_ids.len(), 1);
    }
    let mut txn = index.write_txn().unwrap();
    let config = IndexerConfig::default();
    let mut builder = Settings::new(&mut txn, &index, &config);
    // disable typos on `description`
    builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
    builder.execute(|_| ()).unwrap();
    let mut search = Search::new(&txn, &index);
    search.query("antebelum");
    search.limit(10);
    search.authorize_typos(true);
    search.optional_words(true);
    let result = search.execute().unwrap();
    assert_eq!(result.documents_ids.len(), 0);
 }