From ff9414a6baca2411958d7c634c71fb8deaf52a65 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 10:57:32 +0200
Subject: [PATCH 01/22] Use the out of the compute_primary_key_pair function

---
 milli/src/update/index_documents/transform.rs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index fd508d6a4..cfc2530b4 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -246,7 +246,7 @@ impl Transform<'_, '_> {
         // Returns the field id in the fields ids map, create an "id" field
         // in case it is not in the current headers.
         let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string());
-        let (primary_key_id, _) = compute_primary_key_pair(
+        let (primary_key_id, primary_key_name) = compute_primary_key_pair(
             self.index.primary_key(self.rtxn)?,
             &mut fields_ids_map,
             alternative_name,
@@ -330,10 +330,6 @@ impl Transform<'_, '_> {
 
         // Now that we have a valid sorter that contains the user id and the obkv we
         // give it to the last transforming function which returns the TransformOutput.
-        let primary_key_name = fields_ids_map
-            .name(primary_key_id)
-            .map(String::from)
-            .expect("Primary key must be present in fields id map");
         self.output_from_sorter(
             sorter,
             primary_key_name,

From 93978ec38a345cd0acbc068cde59f7e739ecf1bb Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 11:28:38 +0200
Subject: [PATCH 02/22] Serializing a RoaringBitmap into a Vec cannot fail

---
 .../roaring_bitmap/cbo_roaring_bitmap_codec.rs           | 9 ++++-----
 milli/src/update/index_documents/merge_function.rs       | 2 +-
 milli/src/update/index_documents/store.rs                | 7 +++----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
index 8ccf831e3..325effa73 100644
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -23,18 +23,17 @@ impl CboRoaringBitmapCodec {
         }
     }
 
-    pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) -> io::Result<()> {
+    pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
         if roaring.len() <= THRESHOLD as u64 {
             // If the number of items (u32s) to encode is less than or equal to the threshold
             // it means that it would weigh the same or less than the RoaringBitmap
             // header, so we directly encode them using ByteOrder instead.
             for integer in roaring {
-                vec.write_u32::<NativeEndian>(integer)?;
+                vec.write_u32::<NativeEndian>(integer).unwrap();
             }
-            Ok(())
         } else {
             // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
-            roaring.serialize_into(vec)
+            roaring.serialize_into(vec).unwrap();
         }
     }
 
@@ -68,7 +67,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
 
     fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
         let mut vec = Vec::with_capacity(Self::serialized_size(item));
-        Self::serialize_into(item, &mut vec).ok()?;
+        Self::serialize_into(item, &mut vec);
         Some(Cow::Owned(vec))
     }
 }
diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index 230116e99..6da19bc84 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -112,6 +112,6 @@ fn cbo_roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     }
 
     let mut vec = Vec::new();
-    CboRoaringBitmapCodec::serialize_into(&head, &mut vec)?;
+    CboRoaringBitmapCodec::serialize_into(&head, &mut vec);
     Ok(vec)
 }
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index 08050092e..69263e5a0 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -407,7 +407,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             // We serialize the document ids into a buffer
             buffer.clear();
             buffer.reserve(CboRoaringBitmapCodec::serialized_size(&docids));
-            CboRoaringBitmapCodec::serialize_into(&docids, &mut buffer)?;
+            CboRoaringBitmapCodec::serialize_into(&docids, &mut buffer);
             // that we write under the generated key into MTBL
             if lmdb_key_valid_size(&key) {
                 sorter.insert(&key, &buffer)?;
@@ -469,8 +469,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
                 data_buffer.clear();
                 let positions = RoaringBitmap::from_iter(Some(document_id));
                 // We serialize the positions into a buffer.
-                CboRoaringBitmapCodec::serialize_into(&positions, &mut data_buffer)
-                    .with_context(|| "could not serialize positions")?;
+                CboRoaringBitmapCodec::serialize_into(&positions, &mut data_buffer);
 
                 // that we write under the generated key into MTBL
                 if lmdb_key_valid_size(&key_buffer) {
@@ -706,7 +705,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         let mut docids_buffer = Vec::new();
         for ((fid, count), docids) in self.field_id_word_count_docids {
             docids_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&docids, &mut docids_buffer)?;
+            CboRoaringBitmapCodec::serialize_into(&docids, &mut docids_buffer);
             self.field_id_word_count_docids_sorter.insert([fid, count], &docids_buffer)?;
         }
 

From cfc7314bd16576b745a806ab125f023fa1afd5a2 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 12:17:11 +0200
Subject: [PATCH 03/22] Prefer using an explicit merge function name

---
 .../update/index_documents/merge_function.rs  | 70 ++++---------------
 milli/src/update/index_documents/mod.rs       | 35 +++++-----
 milli/src/update/index_documents/store.rs     | 24 +++----
 milli/src/update/word_prefix_docids.rs        |  8 ++-
 .../word_prefix_pair_proximity_docids.rs      |  6 +-
 milli/src/update/words_level_positions.rs     |  6 +-
 6 files changed, 51 insertions(+), 98 deletions(-)

diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index 6da19bc84..c8424dc8c 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -1,71 +1,29 @@
 use std::borrow::Cow;
 
-use anyhow::{bail, ensure, Context};
+use anyhow::bail;
 use bstr::ByteSlice as _;
 use fst::IntoStreamer;
 use roaring::RoaringBitmap;
 
 use crate::heed_codec::CboRoaringBitmapCodec;
 
-const WORDS_FST_KEY: &[u8] = crate::index::WORDS_FST_KEY.as_bytes();
-const FIELDS_IDS_MAP_KEY: &[u8] = crate::index::FIELDS_IDS_MAP_KEY.as_bytes();
-const DOCUMENTS_IDS_KEY: &[u8] = crate::index::DOCUMENTS_IDS_KEY.as_bytes();
+// Union of multiple FSTs
+pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+    let fsts = values.iter().map(fst::Set::new).collect::<Result<Vec<_>, _>>()?;
+    let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect();
+    let op = op_builder.r#union();
 
-pub fn main_merge(key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    match key {
-        WORDS_FST_KEY => {
-            let fsts: Vec<_> = values.iter().map(|v| fst::Set::new(v).unwrap()).collect();
-
-            // Union of the FSTs
-            let mut op = fst::set::OpBuilder::new();
-            fsts.iter().for_each(|fst| op.push(fst.into_stream()));
-            let op = op.r#union();
-
-            let mut build = fst::SetBuilder::memory();
-            build.extend_stream(op.into_stream()).unwrap();
-            Ok(build.into_inner().unwrap())
-        },
-        FIELDS_IDS_MAP_KEY => {
-            ensure!(values.windows(2).all(|vs| vs[0] == vs[1]), "fields ids map doesn't match");
-            Ok(values[0].to_vec())
-        },
-        DOCUMENTS_IDS_KEY => roaring_bitmap_merge(values),
-        otherwise => bail!("wut {:?}", otherwise),
-    }
-}
-
-pub fn word_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    roaring_bitmap_merge(values)
+    let mut build = fst::SetBuilder::memory();
+    build.extend_stream(op.into_stream()).unwrap();
+    Ok(build.into_inner().unwrap())
 }
 
 pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    bail!("merging docid word positions is an error ({:?})", key.as_bstr())
+    panic!("merging docid word positions is an error ({:?})", key.as_bstr())
 }
 
-pub fn field_id_docid_facet_values_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    let first = values.first().context("no value to merge")?;
-    ensure!(values.iter().all(|v| v == first), "invalid field id docid facet value merging");
-    Ok(first.to_vec())
-}
-
-pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    cbo_roaring_bitmap_merge(values)
-}
-
-pub fn word_prefix_level_positions_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    cbo_roaring_bitmap_merge(values)
-}
-
-pub fn word_level_position_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    cbo_roaring_bitmap_merge(values)
-}
-
-pub fn field_id_word_count_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    cbo_roaring_bitmap_merge(values)
-}
-
-pub fn facet_field_value_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    cbo_roaring_bitmap_merge(values)
+pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+    Ok(values.first().unwrap().to_vec())
 }
 
 pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
@@ -88,7 +46,7 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu
     writer.finish().unwrap();
 }
 
-fn roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     let (head, tail) = values.split_first().unwrap();
     let mut head = RoaringBitmap::deserialize_from(&head[..])?;
 
@@ -102,7 +60,7 @@ fn roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     Ok(vec)
 }
 
-fn cbo_roaring_bitmap_merge(values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     let (head, tail) = values.split_first().unwrap();
     let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
 
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 4cf56b563..2b790ce06 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -25,11 +25,8 @@ use crate::update::{
 };
 use self::store::{Store, Readers};
 pub use self::merge_function::{
-    main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
-    docid_word_positions_merge, documents_merge,
-    word_level_position_docids_merge, word_prefix_level_positions_docids_merge,
-    facet_field_value_docids_merge, field_id_docid_facet_values_merge,
-    field_id_word_count_docids_merge,
+    fst_merge, cbo_roaring_bitmap_merge, roaring_bitmap_merge,
+    docid_word_positions_merge, documents_merge, keep_first
 };
 pub use self::transform::{Transform, TransformOutput};
 
@@ -539,22 +536,22 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             debug!("Merging the main, word docids and words pairs proximity docids in parallel...");
             rayon::spawn(move || {
                 vec![
-                    (DatabaseType::Main, main_readers, main_merge as MergeFn),
-                    (DatabaseType::WordDocids, word_docids_readers, word_docids_merge),
+                    (DatabaseType::Main, main_readers, fst_merge as MergeFn),
+                    (DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge),
                     (
                         DatabaseType::FacetLevel0NumbersDocids,
                         facet_field_numbers_docids_readers,
-                        facet_field_value_docids_merge,
+                        cbo_roaring_bitmap_merge,
                     ),
                     (
                         DatabaseType::WordLevel0PositionDocids,
                         word_level_position_docids_readers,
-                        word_level_position_docids_merge,
+                        cbo_roaring_bitmap_merge,
                     ),
                     (
                         DatabaseType::FieldIdWordCountDocids,
                         field_id_word_count_docids_readers,
-                        field_id_word_count_docids_merge,
+                        cbo_roaring_bitmap_merge,
                     ),
                 ]
                 .into_par_iter()
@@ -657,7 +654,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             self.wtxn,
             *self.index.facet_id_string_docids.as_polymorph(),
             facet_field_strings_docids_readers,
-            facet_field_value_docids_merge,
+            cbo_roaring_bitmap_merge,
             write_method,
         )?;
 
@@ -672,7 +669,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             self.wtxn,
             *self.index.field_id_docid_facet_f64s.as_polymorph(),
             field_id_docid_facet_numbers_readers,
-            field_id_docid_facet_values_merge,
+            keep_first,
             write_method,
         )?;
 
@@ -687,7 +684,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             self.wtxn,
             *self.index.field_id_docid_facet_strings.as_polymorph(),
             field_id_docid_facet_strings_readers,
-            field_id_docid_facet_values_merge,
+            keep_first,
             write_method,
         )?;
 
@@ -702,7 +699,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             self.wtxn,
             *self.index.word_pair_proximity_docids.as_polymorph(),
             words_pairs_proximities_docids_readers,
-            words_pairs_proximities_docids_merge,
+            cbo_roaring_bitmap_merge,
             write_method,
         )?;
 
@@ -721,7 +718,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                         self.wtxn,
                         self.index.main,
                         content,
-                        main_merge,
+                        fst_merge,
                         WriteMethod::GetMergePut,
                     )?;
                 },
@@ -732,7 +729,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                         self.wtxn,
                         db,
                         content,
-                        word_docids_merge,
+                        roaring_bitmap_merge,
                         write_method,
                     )?;
                 },
@@ -743,7 +740,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                         self.wtxn,
                         db,
                         content,
-                        facet_field_value_docids_merge,
+                        cbo_roaring_bitmap_merge,
                         write_method,
                     )?;
                 },
@@ -754,7 +751,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                         self.wtxn,
                         db,
                         content,
-                        field_id_word_count_docids_merge,
+                        cbo_roaring_bitmap_merge,
                         write_method,
                     )?;
                 },
@@ -765,7 +762,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                         self.wtxn,
                         db,
                         content,
-                        word_level_position_docids_merge,
+                        cbo_roaring_bitmap_merge,
                         write_method,
                     )?;
                 }
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index 69263e5a0..16837ca7b 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -26,11 +26,7 @@ use crate::update::UpdateIndexingStep;
 use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId};
 
 use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
-use super::merge_function::{
-    main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
-    word_level_position_docids_merge, facet_field_value_docids_merge,
-    field_id_docid_facet_values_merge, field_id_word_count_docids_merge,
-};
+use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge};
 
 const LMDB_MAX_KEY_LENGTH: usize = 511;
 const ONE_KILOBYTE: usize = 1024 * 1024;
@@ -104,7 +100,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         let linked_hash_map_size = linked_hash_map_size.unwrap_or(500);
 
         let main_sorter = create_sorter(
-            main_merge,
+            fst_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -112,7 +108,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let word_docids_sorter = create_sorter(
-            word_docids_merge,
+            roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -120,7 +116,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let words_pairs_proximities_docids_sorter = create_sorter(
-            words_pairs_proximities_docids_merge,
+            cbo_roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -128,7 +124,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let word_level_position_docids_sorter = create_sorter(
-            word_level_position_docids_merge,
+            cbo_roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -136,7 +132,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let field_id_word_count_docids_sorter = create_sorter(
-            field_id_word_count_docids_merge,
+            cbo_roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -144,7 +140,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let facet_field_numbers_docids_sorter = create_sorter(
-            facet_field_value_docids_merge,
+            cbo_roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -152,7 +148,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let facet_field_strings_docids_sorter = create_sorter(
-            facet_field_value_docids_merge,
+            cbo_roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -160,7 +156,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let field_id_docid_facet_numbers_sorter = create_sorter(
-            field_id_docid_facet_values_merge,
+            keep_first,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -168,7 +164,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             Some(1024 * 1024 * 1024), // 1MB
         );
         let field_id_docid_facet_strings_sorter = create_sorter(
-            field_id_docid_facet_values_merge,
+            keep_first,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs
index 58c984212..0544f8789 100644
--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@@ -6,7 +6,9 @@ use grenad::CompressionType;
 use heed::types::ByteSlice;
 
 use crate::update::index_documents::WriteMethod;
-use crate::update::index_documents::{create_sorter, word_docids_merge, sorter_into_lmdb_database};
+use crate::update::index_documents::{
+    create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database,
+};
 
 pub struct WordPrefixDocids<'t, 'u, 'i> {
     wtxn: &'t mut heed::RwTxn<'i, 'u>,
@@ -40,7 +42,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
         // It is forbidden to keep a mutable reference into the database
         // and write into it at the same time, therefore we write into another file.
         let mut prefix_docids_sorter = create_sorter(
-            word_docids_merge,
+            roaring_bitmap_merge,
             self.chunk_compression_type,
             self.chunk_compression_level,
             self.chunk_fusing_shrink_size,
@@ -66,7 +68,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
             self.wtxn,
             *self.index.word_prefix_docids.as_polymorph(),
             prefix_docids_sorter,
-            word_docids_merge,
+            roaring_bitmap_merge,
             WriteMethod::Append,
         )?;
 
diff --git a/milli/src/update/word_prefix_pair_proximity_docids.rs b/milli/src/update/word_prefix_pair_proximity_docids.rs
index c972efc4f..c6b935e54 100644
--- a/milli/src/update/word_prefix_pair_proximity_docids.rs
+++ b/milli/src/update/word_prefix_pair_proximity_docids.rs
@@ -11,7 +11,7 @@ use crate::Index;
 use crate::heed_codec::StrStrU8Codec;
 use crate::update::index_documents::{
     WriteMethod, create_sorter, sorter_into_lmdb_database,
-    words_pairs_proximities_docids_merge,
+    cbo_roaring_bitmap_merge,
 };
 
 pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
@@ -50,7 +50,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
 
         // Here we create a sorter akin to the previous one.
         let mut word_prefix_pair_proximity_docids_sorter = create_sorter(
-            words_pairs_proximities_docids_merge,
+            cbo_roaring_bitmap_merge,
             self.chunk_compression_type,
             self.chunk_compression_level,
             self.chunk_fusing_shrink_size,
@@ -80,7 +80,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
             self.wtxn,
             *self.index.word_prefix_pair_proximity_docids.as_polymorph(),
             word_prefix_pair_proximity_docids_sorter,
-            words_pairs_proximities_docids_merge,
+            cbo_roaring_bitmap_merge,
             WriteMethod::Append,
         )?;
 
diff --git a/milli/src/update/words_level_positions.rs b/milli/src/update/words_level_positions.rs
index 1b772c37d..f94507aab 100644
--- a/milli/src/update/words_level_positions.rs
+++ b/milli/src/update/words_level_positions.rs
@@ -15,7 +15,7 @@ use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
 use crate::update::index_documents::WriteMethod;
 use crate::update::index_documents::{
     create_writer, create_sorter, writer_into_reader, write_into_lmdb_database,
-    word_prefix_level_positions_docids_merge, sorter_into_lmdb_database
+    cbo_roaring_bitmap_merge, sorter_into_lmdb_database
 };
 use crate::{Index, TreeLevel};
 
@@ -86,7 +86,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
         self.index.word_prefix_level_position_docids.clear(self.wtxn)?;
 
         let mut word_prefix_level_positions_docids_sorter = create_sorter(
-            word_prefix_level_positions_docids_merge,
+            cbo_roaring_bitmap_merge,
             self.chunk_compression_type,
             self.chunk_compression_level,
             self.chunk_fusing_shrink_size,
@@ -119,7 +119,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
             self.wtxn,
             *self.index.word_prefix_level_position_docids.as_polymorph(),
             word_prefix_level_positions_docids_sorter,
-            word_prefix_level_positions_docids_merge,
+            cbo_roaring_bitmap_merge,
             WriteMethod::Append,
         )?;
 

From 93a8633f188150e2d13c7a2d5a3d6773f0c9d204 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 14:45:56 +0200
Subject: [PATCH 04/22] Remove the documents_merge method that must never be
 called

---
 milli/src/update/index_documents/merge_function.rs | 4 ----
 milli/src/update/index_documents/mod.rs            | 8 ++++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index c8424dc8c..904368fb0 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -26,10 +26,6 @@ pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>>
     Ok(values.first().unwrap().to_vec())
 }
 
-pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    bail!("merging documents is an error ({:?})", key.as_bstr())
-}
-
 pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mut Vec<u8>) {
     use itertools::merge_join_by;
     use itertools::EitherOrBoth::{Both, Left, Right};
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 2b790ce06..7fb47fa4a 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -26,7 +26,7 @@ use crate::update::{
 use self::store::{Store, Readers};
 pub use self::merge_function::{
     fst_merge, cbo_roaring_bitmap_merge, roaring_bitmap_merge,
-    docid_word_positions_merge, documents_merge, keep_first
+    docid_word_positions_merge, keep_first
 };
 pub use self::transform::{Transform, TransformOutput};
 
@@ -149,7 +149,7 @@ pub fn write_into_lmdb_database(
                 let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
                 match iter.next().transpose()? {
                     Some((key, old_val)) if key == k => {
-                        let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
+                        let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
                         let val = merge(k, &vals)?;
                         iter.put_current(k, &val)?;
                     },
@@ -634,12 +634,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             total_databases,
         });
 
-        debug!("Writing the documents into LMDB on disk...");
+        debug!("Inserting the documents into LMDB on disk...");
         merge_into_lmdb_database(
             self.wtxn,
             *self.index.documents.as_polymorph(),
             documents_readers,
-            documents_merge,
+            keep_first,
             write_method
         )?;
 

From ab727e428bbacb0f1117ce9140f972d15833d990 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 14:49:06 +0200
Subject: [PATCH 05/22] Remove the docid_word_positions_merge method that must
 never be called

---
 milli/src/update/index_documents/merge_function.rs | 4 ----
 milli/src/update/index_documents/mod.rs            | 7 +++----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index 904368fb0..0a32603b5 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -18,10 +18,6 @@ pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     Ok(build.into_inner().unwrap())
 }
 
-pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    panic!("merging docid word positions is an error ({:?})", key.as_bstr())
-}
-
 pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     Ok(values.first().unwrap().to_vec())
 }
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 7fb47fa4a..1d31cba85 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -25,8 +25,7 @@ use crate::update::{
 };
 use self::store::{Store, Readers};
 pub use self::merge_function::{
-    fst_merge, cbo_roaring_bitmap_merge, roaring_bitmap_merge,
-    docid_word_positions_merge, keep_first
+    fst_merge, cbo_roaring_bitmap_merge, roaring_bitmap_merge, keep_first
 };
 pub use self::transform::{Transform, TransformOutput};
 
@@ -619,12 +618,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             total_databases,
         });
 
-        debug!("Writing the docid word positions into LMDB on disk...");
+        debug!("Inserting the docid word positions into LMDB on disk...");
         merge_into_lmdb_database(
             self.wtxn,
             *self.index.docid_word_positions.as_polymorph(),
             docid_word_positions_readers,
-            docid_word_positions_merge,
+            keep_first,
             write_method
         )?;
 

From 65b1d09d55400349b00219afaa62f92ab3233acb Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 14:57:03 +0200
Subject: [PATCH 06/22] Move the obkv merging functions into the merge_function
 module

---
 .../update/index_documents/merge_function.rs  | 20 ++++++++++++++++--
 milli/src/update/index_documents/transform.rs | 21 ++-----------------
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index 0a32603b5..8c93773ce 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -1,12 +1,28 @@
 use std::borrow::Cow;
 
-use anyhow::bail;
-use bstr::ByteSlice as _;
 use fst::IntoStreamer;
 use roaring::RoaringBitmap;
 
 use crate::heed_codec::CboRoaringBitmapCodec;
 
+/// Only the last value associated with an id is kept.
+pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+    Ok(obkvs.last().unwrap().clone().into_owned())
+}
+
+/// Merge all the obks in the order we see them.
+pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+    let mut iter = obkvs.iter();
+    let first = iter.next().map(|b| b.clone().into_owned()).unwrap();
+    Ok(iter.fold(first, |acc, current| {
+        let first = obkv::KvReader::new(&acc);
+        let second = obkv::KvReader::new(current);
+        let mut buffer = Vec::new();
+        merge_two_obkvs(first, second, &mut buffer);
+        buffer
+    }))
+}
+
 // Union of multiple FSTs
 pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     let fsts = values.iter().map(fst::Set::new).collect::<Result<Vec<_>, _>>()?;
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index cfc2530b4..5fbd24bb1 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -10,8 +10,9 @@ use log::info;
 use roaring::RoaringBitmap;
 use serde_json::{Map, Value};
 
-use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
+use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
 use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
+use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
 use super::merge_function::merge_two_obkvs;
 use super::{create_writer, create_sorter, IndexDocumentsMethod};
 
@@ -552,24 +553,6 @@ fn compute_primary_key_pair(
     }
 }
 
-/// Only the last value associated with an id is kept.
-fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    obkvs.last().context("no last value").map(|last| last.clone().into_owned())
-}
-
-/// Merge all the obks in the order we see them.
-fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    let mut iter = obkvs.iter();
-    let first = iter.next().map(|b| b.clone().into_owned()).context("no first value")?;
-    Ok(iter.fold(first, |acc, current| {
-        let first = obkv::KvReader::new(&acc);
-        let second = obkv::KvReader::new(current);
-        let mut buffer = Vec::new();
-        merge_two_obkvs(first, second, &mut buffer);
-        buffer
-    }))
-}
-
 fn validate_document_id(document_id: &str) -> Option<&str> {
     let document_id = document_id.trim();
     Some(document_id).filter(|id| {

From d2b1ecc88565c93fd66d3ab6d963a5213e86b30c Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 15:26:40 +0200
Subject: [PATCH 07/22] Remove a lot of serialization unreachable errors

---
 .../facet/facet_value_string_codec.rs         | 13 +++--
 .../facet/field_doc_id_facet_string_codec.rs  | 15 ++++--
 .../roaring_bitmap/bo_roaring_bitmap_codec.rs | 15 +++---
 milli/src/update/index_documents/store.rs     | 52 ++++++++++++-------
 4 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/milli/src/heed_codec/facet/facet_value_string_codec.rs b/milli/src/heed_codec/facet/facet_value_string_codec.rs
index 350efc450..259dab972 100644
--- a/milli/src/heed_codec/facet/facet_value_string_codec.rs
+++ b/milli/src/heed_codec/facet/facet_value_string_codec.rs
@@ -5,6 +5,14 @@ use crate::FieldId;
 
 pub struct FacetValueStringCodec;
 
+impl FacetValueStringCodec {
+    pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
+        out.reserve(value.len() + 1);
+        out.push(field_id);
+        out.extend_from_slice(value.as_bytes());
+    }
+}
+
 impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
     type DItem = (FieldId, &'a str);
 
@@ -19,9 +27,8 @@ impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
     type EItem = (FieldId, &'a str);
 
     fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
-        let mut bytes = Vec::with_capacity(value.len() + 1);
-        bytes.push(*field_id);
-        bytes.extend_from_slice(value.as_bytes());
+        let mut bytes = Vec::new();
+        FacetValueStringCodec::serialize_into(*field_id, value, &mut bytes);
         Some(Cow::Owned(bytes))
     }
 }
diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs
index 2e282b2a0..b002346e9 100644
--- a/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs
+++ b/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs
@@ -6,6 +6,15 @@ use crate::{FieldId, DocumentId};
 
 pub struct FieldDocIdFacetStringCodec;
 
+impl FieldDocIdFacetStringCodec {
+    pub fn serialize_into(field_id: FieldId, document_id: DocumentId, value: &str, out: &mut Vec<u8>) {
+        out.reserve(1 + 4 + value.len());
+        out.push(field_id);
+        out.extend_from_slice(&document_id.to_be_bytes());
+        out.extend_from_slice(value.as_bytes());
+    }
+}
+
 impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
     type DItem = (FieldId, DocumentId, &'a str);
 
@@ -22,10 +31,8 @@ impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
     type EItem = (FieldId, DocumentId, &'a str);
 
     fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
-        let mut bytes = Vec::with_capacity(1 + 4 + value.len());
-        bytes.push(*field_id);
-        bytes.extend_from_slice(&document_id.to_be_bytes());
-        bytes.extend_from_slice(value.as_bytes());
+        let mut bytes = Vec::new();
+        FieldDocIdFacetStringCodec::serialize_into(*field_id, *document_id, value, &mut bytes);
         Some(Cow::Owned(bytes))
     }
 }
diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
index 8d1eb79dd..994e23b39 100644
--- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
@@ -6,6 +6,13 @@ use roaring::RoaringBitmap;
 
 pub struct BoRoaringBitmapCodec;
 
+impl BoRoaringBitmapCodec {
+    pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec<u8>) {
+        out.reserve(bitmap.len() as usize * size_of::<u32>());
+        bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes));
+    }
+}
+
 impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
     type DItem = RoaringBitmap;
 
@@ -25,12 +32,8 @@ impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
     type EItem = RoaringBitmap;
 
     fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
-        let mut out = Vec::with_capacity(item.len() as usize * size_of::<u32>());
-
-        item.iter()
-            .map(|i| i.to_ne_bytes())
-            .for_each(|bytes| out.extend_from_slice(&bytes));
-
+        let mut out = Vec::new();
+        BoRoaringBitmapCodec::serialize_into(item, &mut out);
         Some(Cow::Owned(out))
     }
 }
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index 16837ca7b..4662cd609 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -421,6 +421,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
     {
         // We prefix the words by the document id.
         let mut key = id.to_be_bytes().to_vec();
+        let mut buffer = Vec::new();
         let base_size = key.len();
 
         // We order the words lexicographically, this way we avoid passing by a sorter.
@@ -429,13 +430,15 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         for (word, positions) in words_positions {
             key.truncate(base_size);
             key.extend_from_slice(word.as_bytes());
+            buffer.clear();
+
             // We serialize the positions into a buffer.
             let positions = RoaringBitmap::from_iter(positions.iter().cloned());
-            let bytes = BoRoaringBitmapCodec::bytes_encode(&positions)
-                .with_context(|| "could not serialize positions")?;
+            BoRoaringBitmapCodec::serialize_into(&positions, &mut buffer);
+
             // that we write under the generated key into MTBL
             if lmdb_key_valid_size(&key) {
-                writer.insert(&key, &bytes)?;
+                writer.insert(&key, &buffer)?;
             }
         }
 
@@ -483,14 +486,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
     ) -> anyhow::Result<()>
     where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>
     {
+        let mut key_buffer = Vec::new();
+        let mut data_buffer = Vec::new();
+
         for ((field_id, value), docids) in iter {
-            let key = FacetValueStringCodec::bytes_encode(&(field_id, &value))
-                .map(Cow::into_owned)
-                .context("could not serialize facet key")?;
-            let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
-                .context("could not serialize docids")?;
-            if lmdb_key_valid_size(&key) {
-                sorter.insert(&key, &bytes)?;
+            key_buffer.clear();
+            data_buffer.clear();
+
+            FacetValueStringCodec::serialize_into(field_id, &value, &mut key_buffer);
+            CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
+
+            if lmdb_key_valid_size(&key_buffer) {
+                sorter.insert(&key_buffer, &data_buffer)?;
             }
         }
 
@@ -503,14 +510,19 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
     ) -> anyhow::Result<()>
     where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>
     {
+        let mut data_buffer = Vec::new();
+
         for ((field_id, value), docids) in iter {
+            data_buffer.clear();
+
             let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
                 .map(Cow::into_owned)
-                .context("could not serialize facet key")?;
-            let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
-                .context("could not serialize docids")?;
+                .context("could not serialize facet level value key")?;
+
+            CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
+
             if lmdb_key_valid_size(&key) {
-                sorter.insert(&key, &bytes)?;
+                sorter.insert(&key, &data_buffer)?;
             }
         }
 
@@ -526,7 +538,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
     {
         let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
             .map(Cow::into_owned)
-            .context("could not serialize facet key")?;
+            .context("could not serialize facet level value key")?;
 
         if lmdb_key_valid_size(&key) {
             sorter.insert(&key, &[])?;
@@ -542,12 +554,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         value: &str,
     ) -> anyhow::Result<()>
     {
-        let key = FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, value))
-            .map(Cow::into_owned)
-            .context("could not serialize facet key")?;
+        let mut buffer = Vec::new();
 
-        if lmdb_key_valid_size(&key) {
-            sorter.insert(&key, &[])?;
+        FieldDocIdFacetStringCodec::serialize_into(field_id, document_id, value, &mut buffer);
+
+        if lmdb_key_valid_size(&buffer) {
+            sorter.insert(&buffer, &[])?;
         }
 
         Ok(())

From 23fcf7920ea67672f3c16e50d3ca4c797f851219 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 9 Jun 2021 17:05:46 +0200
Subject: [PATCH 08/22] Introduce a basic version of the InternalError struct

---
 milli/src/error.rs | 42 ++++++++++++++++++++++++++++++++++++++++++
 milli/src/lib.rs   |  1 +
 2 files changed, 43 insertions(+)
 create mode 100644 milli/src/error.rs

diff --git a/milli/src/error.rs b/milli/src/error.rs
new file mode 100644
index 000000000..02740377c
--- /dev/null
+++ b/milli/src/error.rs
@@ -0,0 +1,42 @@
+use std::io;
+
+use crate::{DocumentId, FieldId};
+
+pub enum Error {
+    InternalError(InternalError),
+    IoError(io::Error),
+    UserError(UserError),
+}
+
+pub enum InternalError {
+    DatabaseMissingEntry(DatabaseMissingEntry),
+    FieldIdMapMissingEntry(FieldIdMapMissingEntry),
+    IndexingMergingKeys(IndexingMergingKeys),
+}
+
+pub enum IndexingMergingKeys {
+    DocIdWordPosition,
+    Document,
+    MainFstDeserialization,
+    WordLevelPositionDocids,
+    WordPrefixLevelPositionDocids,
+}
+
+pub enum FieldIdMapMissingEntry {
+    DisplayedFieldId { field_id: FieldId },
+    DisplayedFieldName { field_name: String },
+    FacetedFieldName { field_name: String },
+    FilterableFieldName { field_name: String },
+    SearchableFieldName { field_name: String },
+}
+
+pub enum DatabaseMissingEntry {
+    DocumentId { internal_id: DocumentId },
+    FacetValuesDocids,
+    IndexCreationTime,
+    IndexUpdateTime,
+}
+
+pub enum UserError {
+
+}
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index 8c1ed514c..b7401330a 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -1,6 +1,7 @@
 #[macro_use] extern crate pest_derive;
 
 mod criterion;
+mod error;
 mod external_documents_ids;
 mod fields_ids_map;
 mod search;

From 44c353fafd97217e74a182b3bbb802e33d16f1a6 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Thu, 10 Jun 2021 15:55:22 +0200
Subject: [PATCH 09/22] Introduce some way to construct an Error

---
 milli/src/error.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index 02740377c..c6dc85e9c 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -1,6 +1,9 @@
 use std::io;
 
 use crate::{DocumentId, FieldId};
+use heed::{MdbError, Error as HeedError};
+
+pub type Object = serde_json::Map<String, serde_json::Value>;
 
 pub enum Error {
     InternalError(InternalError),
@@ -12,6 +15,16 @@ pub enum InternalError {
     DatabaseMissingEntry(DatabaseMissingEntry),
     FieldIdMapMissingEntry(FieldIdMapMissingEntry),
     IndexingMergingKeys(IndexingMergingKeys),
+    SerializationError(SerializationError),
+    StoreError(MdbError),
+    InvalidDatabaseTyping,
+    DatabaseClosing,
+}
+
+pub enum SerializationError {
+    Decoding { db_name: Option<&'static str> },
+    Encoding { db_name: Option<&'static str> },
+    InvalidNumberSerialization,
 }
 
 pub enum IndexingMergingKeys {
@@ -38,5 +51,40 @@ pub enum DatabaseMissingEntry {
 }
 
 pub enum UserError {
-
+    AttributeLimitReached,
+    DocumentLimitReached,
+    InvalidCriterionName { name: String },
+    InvalidDocumentId { document_id: DocumentId },
+    MissingDocumentId { document: Object },
+    MissingPrimaryKey,
+    DatabaseSizeReached,
+    NoSpaceLeftOnDevice,
+    InvalidStoreFile,
+}
+
+impl From<io::Error> for Error {
+    fn from(error: io::Error) -> Error {
+        // TODO must be improved and more precise
+        Error::IoError(error)
+    }
+}
+
+impl From<HeedError> for Error {
+    fn from(error: HeedError) -> Error {
+        use self::Error::*;
+        use self::InternalError::*;
+        use self::SerializationError::*;
+        use self::UserError::*;
+
+        match error {
+            HeedError::Io(error) => Error::from(error),
+            HeedError::Mdb(MdbError::MapFull) => UserError(DatabaseSizeReached),
+            HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
+            HeedError::Mdb(error) => InternalError(StoreError(error)),
+            HeedError::Encoding => InternalError(SerializationError(Encoding { db_name: None })),
+            HeedError::Decoding => InternalError(SerializationError(Decoding { db_name: None })),
+            HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
+            HeedError::DatabaseClosing => InternalError(DatabaseClosing),
+        }
+    }
 }

From 456541e921b9bebd5e6a8e66167da6cdee32aae6 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Thu, 10 Jun 2021 17:31:08 +0200
Subject: [PATCH 10/22] Implement the Display trait on the Error type

---
 milli/src/error.rs | 132 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 106 insertions(+), 26 deletions(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index c6dc85e9c..ce22a5512 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -1,60 +1,50 @@
-use std::io;
+use std::error::Error as StdError;
+use std::{fmt, io};
+
+use heed::{MdbError, Error as HeedError};
+use serde_json::{Map, Value};
 
 use crate::{DocumentId, FieldId};
-use heed::{MdbError, Error as HeedError};
 
-pub type Object = serde_json::Map<String, serde_json::Value>;
+pub type Object = Map<String, Value>;
 
+#[derive(Debug)]
 pub enum Error {
     InternalError(InternalError),
     IoError(io::Error),
     UserError(UserError),
 }
 
+#[derive(Debug)]
 pub enum InternalError {
-    DatabaseMissingEntry(DatabaseMissingEntry),
+    DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
     FieldIdMapMissingEntry(FieldIdMapMissingEntry),
-    IndexingMergingKeys(IndexingMergingKeys),
+    IndexingMergingKeys { process: &'static str },
     SerializationError(SerializationError),
     StoreError(MdbError),
     InvalidDatabaseTyping,
     DatabaseClosing,
 }
 
+#[derive(Debug)]
 pub enum SerializationError {
     Decoding { db_name: Option<&'static str> },
     Encoding { db_name: Option<&'static str> },
     InvalidNumberSerialization,
 }
 
-pub enum IndexingMergingKeys {
-    DocIdWordPosition,
-    Document,
-    MainFstDeserialization,
-    WordLevelPositionDocids,
-    WordPrefixLevelPositionDocids,
-}
-
+#[derive(Debug)]
 pub enum FieldIdMapMissingEntry {
-    DisplayedFieldId { field_id: FieldId },
-    DisplayedFieldName { field_name: String },
-    FacetedFieldName { field_name: String },
-    FilterableFieldName { field_name: String },
-    SearchableFieldName { field_name: String },
-}
-
-pub enum DatabaseMissingEntry {
-    DocumentId { internal_id: DocumentId },
-    FacetValuesDocids,
-    IndexCreationTime,
-    IndexUpdateTime,
+    FieldId { field_id: FieldId, from_db_name: &'static str },
+    FieldName { field_name: String, from_db_name: &'static str },
 }
 
+#[derive(Debug)]
 pub enum UserError {
     AttributeLimitReached,
     DocumentLimitReached,
     InvalidCriterionName { name: String },
-    InvalidDocumentId { document_id: DocumentId },
+    InvalidDocumentId { document_id: Value },
     MissingDocumentId { document: Object },
     MissingPrimaryKey,
     DatabaseSizeReached,
@@ -88,3 +78,93 @@ impl From<HeedError> for Error {
         }
     }
 }
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::InternalError(error) => write!(f, "internal: {}", error),
+            Self::IoError(error) => error.fmt(f),
+            Self::UserError(error) => error.fmt(f),
+        }
+    }
+}
+
+impl StdError for Error {}
+
+impl fmt::Display for InternalError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::DatabaseMissingEntry { db_name, key } => {
+                write!(f, "missing {} in the {} database", key.unwrap_or("key"), db_name)
+            },
+            Self::FieldIdMapMissingEntry(error) => error.fmt(f),
+            Self::IndexingMergingKeys { process } => {
+                write!(f, "invalid merge while processing {}", process)
+            },
+            Self::SerializationError(error) => error.fmt(f),
+            Self::StoreError(error) => error.fmt(f),
+            Self::InvalidDatabaseTyping => HeedError::InvalidDatabaseTyping.fmt(f),
+            Self::DatabaseClosing => HeedError::DatabaseClosing.fmt(f),
+        }
+    }
+}
+
+impl StdError for InternalError {}
+
+impl fmt::Display for UserError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"),
+            Self::DocumentLimitReached => f.write_str("maximum number of documents reached"),
+            Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
+            Self::InvalidDocumentId { document_id } => {
+                let json = serde_json::to_string(document_id).unwrap();
+                write!(f, "document identifier is invalid {}", json)
+            },
+            Self::MissingDocumentId { document } => {
+                let json = serde_json::to_string(document).unwrap();
+                write!(f, "document doesn't have an identifier {}", json)
+            },
+            Self::MissingPrimaryKey => f.write_str("missing primary key"),
+            Self::DatabaseSizeReached => f.write_str("database size reached"),
+            // TODO where can we find it instead of writing the text ourselves?
+            Self::NoSpaceLeftOnDevice => f.write_str("no space left on device"),
+            Self::InvalidStoreFile => f.write_str("store file is not a valid database file"),
+        }
+    }
+}
+
+impl StdError for UserError {}
+
+impl fmt::Display for FieldIdMapMissingEntry {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::FieldId { field_id, from_db_name } => {
+                write!(f, "unknown field id {} coming from {} database", field_id, from_db_name)
+            },
+            Self::FieldName { field_name, from_db_name } => {
+                write!(f, "unknown field name {} coming from {} database", field_name, from_db_name)
+            },
+        }
+    }
+}
+
+impl StdError for FieldIdMapMissingEntry {}
+
+impl fmt::Display for SerializationError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::Decoding { db_name: Some(name) } => {
+                write!(f, "decoding from the {} database failed", name)
+            },
+            Self::Decoding { db_name: None } => f.write_str("decoding failed"),
+            Self::Encoding { db_name: Some(name) } => {
+                write!(f, "encoding into the {} database failed", name)
+            },
+            Self::Encoding { db_name: None } => f.write_str("encoding failed"),
+            Self::InvalidNumberSerialization => f.write_str("number is not a valid finite number"),
+        }
+    }
+}
+
+impl StdError for SerializationError {}

From ca78cb5aca8c91a6efdc9e4d1ba37080d46c5e00 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Mon, 14 Jun 2021 16:58:38 +0200
Subject: [PATCH 11/22] Introduce more variants to the error module enums

---
 milli/src/error.rs | 107 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 96 insertions(+), 11 deletions(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index ce22a5512..096851f09 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -1,9 +1,12 @@
+use std::convert::Infallible;
 use std::error::Error as StdError;
-use std::{fmt, io};
+use std::{fmt, io, str};
 
 use heed::{MdbError, Error as HeedError};
+use rayon::ThreadPoolBuildError;
 use serde_json::{Map, Value};
 
+use crate::search::ParserRule;
 use crate::{DocumentId, FieldId};
 
 pub type Object = Map<String, Value>;
@@ -17,13 +20,18 @@ pub enum Error {
 
 #[derive(Debug)]
 pub enum InternalError {
+    DatabaseClosing,
     DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
     FieldIdMapMissingEntry(FieldIdMapMissingEntry),
+    Fst(fst::Error),
+    GrenadInvalidCompressionType,
     IndexingMergingKeys { process: &'static str },
-    SerializationError(SerializationError),
-    StoreError(MdbError),
     InvalidDatabaseTyping,
-    DatabaseClosing,
+    RayonThreadPool(ThreadPoolBuildError),
+    SerdeJson(serde_json::Error),
+    Serialization(SerializationError),
+    Store(MdbError),
+    Utf8(str::Utf8Error),
 }
 
 #[derive(Debug)]
@@ -42,14 +50,18 @@ pub enum FieldIdMapMissingEntry {
 #[derive(Debug)]
 pub enum UserError {
     AttributeLimitReached,
+    Csv(csv::Error),
+    DatabaseSizeReached,
     DocumentLimitReached,
+    FilterParsing(pest::error::Error<ParserRule>),
     InvalidCriterionName { name: String },
     InvalidDocumentId { document_id: Value },
+    InvalidStoreFile,
     MissingDocumentId { document: Object },
     MissingPrimaryKey,
-    DatabaseSizeReached,
     NoSpaceLeftOnDevice,
-    InvalidStoreFile,
+    SerdeJson(serde_json::Error),
+    UnknownInternalDocumentId { document_id: DocumentId },
 }
 
 impl From<io::Error> for Error {
@@ -59,6 +71,36 @@ impl From<io::Error> for Error {
     }
 }
 
+impl From<fst::Error> for Error {
+    fn from(error: fst::Error) -> Error {
+        Error::InternalError(InternalError::Fst(error))
+    }
+}
+
+impl<E> From<grenad::Error<E>> for Error where Error: From<E> {
+    fn from(error: grenad::Error<E>) -> Error {
+        match error {
+            grenad::Error::Io(error) => Error::IoError(error),
+            grenad::Error::Merge(error) => Error::from(error),
+            grenad::Error::InvalidCompressionType => {
+                Error::InternalError(InternalError::GrenadInvalidCompressionType)
+            },
+        }
+    }
+}
+
+impl From<str::Utf8Error> for Error {
+    fn from(error: str::Utf8Error) -> Error {
+        Error::InternalError(InternalError::Utf8(error))
+    }
+}
+
+impl From<Infallible> for Error {
+    fn from(_error: Infallible) -> Error {
+        unreachable!()
+    }
+}
+
 impl From<HeedError> for Error {
     fn from(error: HeedError) -> Error {
         use self::Error::*;
@@ -70,15 +112,45 @@ impl From<HeedError> for Error {
             HeedError::Io(error) => Error::from(error),
             HeedError::Mdb(MdbError::MapFull) => UserError(DatabaseSizeReached),
             HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
-            HeedError::Mdb(error) => InternalError(StoreError(error)),
-            HeedError::Encoding => InternalError(SerializationError(Encoding { db_name: None })),
-            HeedError::Decoding => InternalError(SerializationError(Decoding { db_name: None })),
+            HeedError::Mdb(error) => InternalError(Store(error)),
+            HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
+            HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
             HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
             HeedError::DatabaseClosing => InternalError(DatabaseClosing),
         }
     }
 }
 
+impl From<ThreadPoolBuildError> for Error {
+    fn from(error: ThreadPoolBuildError) -> Error {
+        Error::InternalError(InternalError::RayonThreadPool(error))
+    }
+}
+
+impl From<FieldIdMapMissingEntry> for Error {
+    fn from(error: FieldIdMapMissingEntry) -> Error {
+        Error::InternalError(InternalError::FieldIdMapMissingEntry(error))
+    }
+}
+
+impl From<InternalError> for Error {
+    fn from(error: InternalError) -> Error {
+        Error::InternalError(error)
+    }
+}
+
+impl From<UserError> for Error {
+    fn from(error: UserError) -> Error {
+        Error::UserError(error)
+    }
+}
+
+impl From<SerializationError> for Error {
+    fn from(error: SerializationError) -> Error {
+        Error::InternalError(InternalError::Serialization(error))
+    }
+}
+
 impl fmt::Display for Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
@@ -98,13 +170,20 @@ impl fmt::Display for InternalError {
                 write!(f, "missing {} in the {} database", key.unwrap_or("key"), db_name)
             },
             Self::FieldIdMapMissingEntry(error) => error.fmt(f),
+            Self::Fst(error) => error.fmt(f),
+            Self::GrenadInvalidCompressionType => {
+                f.write_str("invalid compression type have been specified to grenad")
+            },
             Self::IndexingMergingKeys { process } => {
                 write!(f, "invalid merge while processing {}", process)
             },
-            Self::SerializationError(error) => error.fmt(f),
-            Self::StoreError(error) => error.fmt(f),
+            Self::Serialization(error) => error.fmt(f),
             Self::InvalidDatabaseTyping => HeedError::InvalidDatabaseTyping.fmt(f),
+            Self::RayonThreadPool(error) => error.fmt(f),
+            Self::SerdeJson(error) => error.fmt(f),
             Self::DatabaseClosing => HeedError::DatabaseClosing.fmt(f),
+            Self::Store(error) => error.fmt(f),
+            Self::Utf8(error) => error.fmt(f),
         }
     }
 }
@@ -115,7 +194,9 @@ impl fmt::Display for UserError {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"),
+            Self::Csv(error) => error.fmt(f),
             Self::DocumentLimitReached => f.write_str("maximum number of documents reached"),
+            Self::FilterParsing(error) => error.fmt(f),
             Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
             Self::InvalidDocumentId { document_id } => {
                 let json = serde_json::to_string(document_id).unwrap();
@@ -130,6 +211,10 @@ impl fmt::Display for UserError {
             // TODO where can we find it instead of writing the text ourselves?
             Self::NoSpaceLeftOnDevice => f.write_str("no space left on device"),
             Self::InvalidStoreFile => f.write_str("store file is not a valid database file"),
+            Self::SerdeJson(error) => error.fmt(f),
+            Self::UnknownInternalDocumentId { document_id } => {
+                write!(f, "an unknown internal document id have been used ({})", document_id)
+            },
         }
     }
 }

From 312c2d1d8ef12d3b7e50a39f909978f73fd0459e Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Mon, 14 Jun 2021 16:46:19 +0200
Subject: [PATCH 12/22] Use the Error enum everywhere in the project

---
 Cargo.lock                                    |   1 -
 milli/Cargo.toml                              |   1 -
 milli/src/criterion.rs                        |  11 +-
 milli/src/index.rs                            |  22 ++--
 milli/src/lib.rs                              |  16 ++-
 milli/src/search/criteria/asc_desc.rs         |  21 ++--
 milli/src/search/criteria/attribute.rs        |  17 ++-
 milli/src/search/criteria/exactness.rs        |   6 +-
 milli/src/search/criteria/final.rs            |   3 +-
 milli/src/search/criteria/initial.rs          |   4 +-
 milli/src/search/criteria/mod.rs              |  27 ++---
 milli/src/search/criteria/proximity.rs        |  19 ++--
 milli/src/search/criteria/typo.rs             |  13 ++-
 milli/src/search/criteria/words.rs            |   3 +-
 milli/src/search/distinct/facet_distinct.rs   |  14 +--
 milli/src/search/distinct/mod.rs              |   6 +-
 milli/src/search/distinct/noop_distinct.rs    |   4 +-
 milli/src/search/facet/facet_distribution.rs  |  11 +-
 milli/src/search/facet/filter_condition.rs    |  70 +++++++-----
 milli/src/search/facet/mod.rs                 |   3 +-
 milli/src/search/mod.rs                       |  10 +-
 milli/src/search/query_tree.rs                |  14 +--
 milli/src/update/clear_documents.rs           |   5 +-
 milli/src/update/delete_documents.rs          |  14 ++-
 milli/src/update/facets.rs                    |  13 ++-
 .../update/index_documents/merge_function.rs  |  16 +--
 milli/src/update/index_documents/mod.rs       |  92 +++++++++-------
 milli/src/update/index_documents/store.rs     | 104 ++++++++++--------
 milli/src/update/index_documents/transform.rs |  77 +++++++------
 milli/src/update/settings.rs                  |  41 +++----
 milli/src/update/update_builder.rs            |   4 +-
 milli/src/update/word_prefix_docids.rs        |   3 +-
 .../word_prefix_pair_proximity_docids.rs      |   4 +-
 milli/src/update/words_level_positions.rs     |  12 +-
 milli/src/update/words_prefixes_fst.rs        |   4 +-
 35 files changed, 385 insertions(+), 300 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c719f6f03..8e6794fb6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1377,7 +1377,6 @@ dependencies = [
 name = "milli"
 version = "0.3.1"
 dependencies = [
- "anyhow",
  "big_s",
  "bstr",
  "byteorder",
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 9fe1ce3d3..ac7a977a2 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -5,7 +5,6 @@ authors = ["Kerollmops <clement@meilisearch.com>"]
 edition = "2018"
 
 [dependencies]
-anyhow = "1.0.38"
 bstr = "0.2.15"
 byteorder = "1.4.2"
 chrono = { version = "0.4.19", features = ["serde"] }
diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs
index c2205613d..931cf8588 100644
--- a/milli/src/criterion.rs
+++ b/milli/src/criterion.rs
@@ -1,11 +1,12 @@
 use std::fmt;
 use std::str::FromStr;
 
-use anyhow::{Context, bail};
 use regex::Regex;
 use serde::{Serialize, Deserialize};
 use once_cell::sync::Lazy;
 
+use crate::error::{Error, UserError};
+
 static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| {
     Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()
 });
@@ -41,7 +42,7 @@ impl Criterion {
 }
 
 impl FromStr for Criterion {
-    type Err = anyhow::Error;
+    type Err = Error;
 
     fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
         match txt {
@@ -51,13 +52,15 @@ impl FromStr for Criterion {
             "attribute" => Ok(Criterion::Attribute),
             "exactness" => Ok(Criterion::Exactness),
             text => {
-                let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
+                let caps = ASC_DESC_REGEX.captures(text).ok_or_else(|| {
+                    UserError::InvalidCriterionName { name: text.to_string() }
+                })?;
                 let order = caps.get(1).unwrap().as_str();
                 let field_name = caps.get(2).unwrap().as_str();
                 match order {
                     "asc" => Ok(Criterion::Asc(field_name.to_string())),
                     "desc" => Ok(Criterion::Desc(field_name.to_string())),
-                    otherwise => bail!("unknown criterion name: {}", otherwise),
+                    text => return Err(UserError::InvalidCriterionName { name: text.to_string() }.into()),
                 }
             },
         }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 4e32f673a..9ebe34a2e 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -2,14 +2,14 @@ use std::borrow::Cow;
 use std::collections::{HashMap, HashSet};
 use std::path::Path;
 
-use anyhow::Context;
 use chrono::{DateTime, Utc};
 use heed::{Database, PolyDatabase, RoTxn, RwTxn};
 use heed::types::*;
 use roaring::RoaringBitmap;
 
+use crate::error::UserError;
 use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search};
-use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId};
+use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId, Result};
 use crate::{
     BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
     ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec,
@@ -84,7 +84,7 @@ pub struct Index {
 }
 
 impl Index {
-    pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
+    pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> {
         options.max_dbs(14);
 
         let env = options.open(path)?;
@@ -173,7 +173,7 @@ impl Index {
     }
 
     /// Returns the number of documents indexed in the database.
-    pub fn number_of_documents(&self, rtxn: &RoTxn) -> anyhow::Result<u64> {
+    pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
         let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?;
         Ok(count.unwrap_or_default())
     }
@@ -215,7 +215,7 @@ impl Index {
 
     /// Returns the external documents ids map which associate the external ids
     /// with the internal ids (i.e. `u32`).
-    pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> {
+    pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result<ExternalDocumentsIds<'t>> {
         let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
         let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
         let hard = match hard {
@@ -504,7 +504,7 @@ impl Index {
     }
 
     /// Returns the FST which is the words dictionary of the engine.
-    pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
+    pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
         match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
             Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
             None => Ok(fst::Set::default().map_data(Cow::Owned)?),
@@ -521,7 +521,7 @@ impl Index {
         self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
     }
 
-    pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<Option<fst::Set<&'t [u8]>>> {
+    pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
         match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
             Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
             None => Ok(None),
@@ -555,7 +555,7 @@ impl Index {
     }
 
     /// Returns the FST which is the words prefixes dictionnary of the engine.
-    pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> {
+    pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
         match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? {
             Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
             None => Ok(fst::Set::default().map_data(Cow::Owned)?),
@@ -577,13 +577,13 @@ impl Index {
         &self,
         rtxn: &'t RoTxn,
         ids: impl IntoIterator<Item=DocumentId>,
-    ) -> anyhow::Result<Vec<(DocumentId, obkv::KvReader<'t>)>>
+    ) -> Result<Vec<(DocumentId, obkv::KvReader<'t>)>>
     {
         let mut documents = Vec::new();
 
         for id in ids {
             let kv = self.documents.get(rtxn, &BEU32::new(id))?
-                .with_context(|| format!("Could not find document {}", id))?;
+                .ok_or_else(|| UserError::UnknownInternalDocumentId { document_id: id })?;
             documents.push((id, kv));
         }
 
@@ -594,7 +594,7 @@ impl Index {
     pub fn all_documents<'t>(
         &self,
         rtxn: &'t RoTxn,
-    ) -> anyhow::Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReader<'t>)>>> {
+    ) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReader<'t>)>>> {
         Ok(self
             .documents
             .iter(rtxn)?
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index b7401330a..6fa88ad64 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -15,12 +15,13 @@ pub mod update;
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
+use std::result::Result as StdResult;
 
-use anyhow::Context;
 use fxhash::{FxHasher32, FxHasher64};
 use serde_json::{Map, Value};
 
 pub use self::criterion::{Criterion, default_criteria};
+pub use self::error::Error;
 pub use self::external_documents_ids::ExternalDocumentsIds;
 pub use self::fields_ids_map::FieldsIdsMap;
 pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec};
@@ -30,6 +31,8 @@ pub use self::index::Index;
 pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
 pub use self::tree_level::TreeLevel;
 
+pub type Result<T> = std::result::Result<T, error::Error>;
+
 pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
 pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
 pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
@@ -44,21 +47,24 @@ pub type FieldId = u8;
 pub type Position = u32;
 pub type FieldsDistribution = HashMap<String, u64>;
 
-type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>;
+type MergeFn<E> = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Vec<u8>, E>;
 
 /// Transform a raw obkv store into a JSON Object.
 pub fn obkv_to_json(
     displayed_fields: &[FieldId],
     fields_ids_map: &FieldsIdsMap,
     obkv: obkv::KvReader,
-) -> anyhow::Result<Map<String, Value>>
+) -> Result<Map<String, Value>>
 {
     displayed_fields.iter()
         .copied()
         .flat_map(|id| obkv.get(id).map(|value| (id, value)))
         .map(|(id, value)| {
-            let name = fields_ids_map.name(id).context("unknown obkv field id")?;
-            let value = serde_json::from_slice(value)?;
+            let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
+                field_id: id,
+                from_db_name: "documents",
+            })?;
+            let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
             Ok((name.to_owned(), value))
         })
         .collect()
diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs
index f90f3e421..c72781629 100644
--- a/milli/src/search/criteria/asc_desc.rs
+++ b/milli/src/search/criteria/asc_desc.rs
@@ -1,15 +1,15 @@
 use std::mem::take;
 
-use anyhow::Context;
 use itertools::Itertools;
 use log::debug;
 use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 
+use crate::error::FieldIdMapMissingEntry;
 use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
 use crate::search::facet::FacetIter;
 use crate::search::query_tree::Operation;
-use crate::{FieldId, Index};
+use crate::{FieldId, Index, Result};
 use super::{Criterion, CriterionParameters, CriterionResult};
 
 /// Threshold on the number of candidates that will make
@@ -36,7 +36,7 @@ impl<'t> AscDesc<'t> {
         rtxn: &'t heed::RoTxn,
         parent: Box<dyn Criterion + 't>,
         field_name: String,
-    ) -> anyhow::Result<Self> {
+    ) -> Result<Self> {
         Self::new(index, rtxn, parent, field_name, true)
     }
 
@@ -45,7 +45,7 @@ impl<'t> AscDesc<'t> {
         rtxn: &'t heed::RoTxn,
         parent: Box<dyn Criterion + 't>,
         field_name: String,
-    ) -> anyhow::Result<Self> {
+    ) -> Result<Self> {
         Self::new(index, rtxn, parent, field_name, false)
     }
 
@@ -55,11 +55,14 @@ impl<'t> AscDesc<'t> {
         parent: Box<dyn Criterion + 't>,
         field_name: String,
         ascending: bool,
-    ) -> anyhow::Result<Self> {
+    ) -> Result<Self> {
         let fields_ids_map = index.fields_ids_map(rtxn)?;
         let field_id = fields_ids_map
             .id(&field_name)
-            .with_context(|| format!("field {:?} isn't registered", field_name))?;
+            .ok_or_else(|| FieldIdMapMissingEntry::FieldName {
+                field_name: field_name.clone(),
+                from_db_name: "asc-desc",
+            })?;
 
         Ok(AscDesc {
             index,
@@ -79,7 +82,7 @@ impl<'t> AscDesc<'t> {
 
 impl<'t> Criterion for AscDesc<'t> {
     #[logging_timer::time("AscDesc::{}")]
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         // remove excluded candidates when next is called, instead of doing it in the loop.
         self.allowed_candidates -= params.excluded_candidates;
 
@@ -162,7 +165,7 @@ fn facet_ordered<'t>(
     field_id: FieldId,
     ascending: bool,
     candidates: RoaringBitmap,
-) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
+) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
     if candidates.len() <= CANDIDATES_THRESHOLD {
         let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
         Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
@@ -186,7 +189,7 @@ fn iterative_facet_ordered_iter<'t>(
     field_id: FieldId,
     ascending: bool,
     candidates: RoaringBitmap,
-) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't> {
+) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
     let mut docids_values = Vec::with_capacity(candidates.len() as usize);
     for docid in candidates.iter() {
         let left = (field_id, docid, f64::MIN);
diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs
index f825623f6..f191defe1 100644
--- a/milli/src/search/criteria/attribute.rs
+++ b/milli/src/search/criteria/attribute.rs
@@ -5,7 +5,7 @@ use std::mem::take;
 
 use roaring::RoaringBitmap;
 
-use crate::{TreeLevel, search::build_dfa};
+use crate::{TreeLevel, Result, search::build_dfa};
 use crate::search::criteria::Query;
 use crate::search::query_tree::{Operation, QueryKind};
 use crate::search::{word_derivations, WordDerivationsCache};
@@ -48,7 +48,7 @@ impl<'t> Attribute<'t> {
 
 impl<'t> Criterion for Attribute<'t> {
     #[logging_timer::time("Attribute::{}")]
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         // remove excluded candidates when next is called, instead of doing it in the loop.
         if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
             *allowed_candidates -= params.excluded_candidates;
@@ -224,7 +224,12 @@ struct QueryLevelIterator<'t, 'q> {
 }
 
 impl<'t, 'q> QueryLevelIterator<'t, 'q> {
-    fn new(ctx: &'t dyn Context<'t>, queries: &'q [Query], wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<Self>> {
+    fn new(
+        ctx: &'t dyn Context<'t>,
+        queries: &'q [Query],
+        wdcache: &mut WordDerivationsCache,
+    ) -> Result<Option<Self>>
+    {
         let mut inner = Vec::with_capacity(queries.len());
         for query in queries {
             match &query.kind {
@@ -471,7 +476,7 @@ fn initialize_query_level_iterators<'t, 'q>(
     branches: &'q FlattenedQueryTree,
     allowed_candidates: &RoaringBitmap,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
+) -> Result<BinaryHeap<Branch<'t, 'q>>> {
 
     let mut positions = BinaryHeap::with_capacity(branches.len());
     for branch in branches {
@@ -521,7 +526,7 @@ fn set_compute_candidates<'t>(
     branches: &FlattenedQueryTree,
     allowed_candidates: &RoaringBitmap,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<Option<RoaringBitmap>>
+) -> Result<Option<RoaringBitmap>>
 {
     let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
     let lowest_level = TreeLevel::min_value();
@@ -573,7 +578,7 @@ fn linear_compute_candidates(
     ctx: &dyn Context,
     branches: &FlattenedQueryTree,
     allowed_candidates: &RoaringBitmap,
-) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
+) -> Result<BTreeMap<u64, RoaringBitmap>>
 {
     fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
         let mut min_rank = u64::max_value();
diff --git a/milli/src/search/criteria/exactness.rs b/milli/src/search/criteria/exactness.rs
index 4d9e54f6e..eb44b7b8e 100644
--- a/milli/src/search/criteria/exactness.rs
+++ b/milli/src/search/criteria/exactness.rs
@@ -14,7 +14,7 @@ use crate::search::criteria::{
     CriterionResult,
     resolve_query_tree,
 };
-use crate::TreeLevel;
+use crate::{TreeLevel, Result};
 
 pub struct Exactness<'t> {
     ctx: &'t dyn Context<'t>,
@@ -45,7 +45,7 @@ impl<'t> Exactness<'t> {
 
 impl<'t> Criterion for Exactness<'t> {
     #[logging_timer::time("Exactness::{}")]
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         // remove excluded candidates when next is called, instead of doing it in the loop.
         if let Some(state) = self.state.as_mut() {
             state.difference_with(params.excluded_candidates);
@@ -158,7 +158,7 @@ fn resolve_state(
     ctx: &dyn Context,
     state: State,
     query: &[ExactQueryPart],
-) -> anyhow::Result<(RoaringBitmap, Option<State>)>
+) -> Result<(RoaringBitmap, Option<State>)>
 {
     use State::*;
     match state {
diff --git a/milli/src/search/criteria/final.rs b/milli/src/search/criteria/final.rs
index 860362f51..645a3a5d7 100644
--- a/milli/src/search/criteria/final.rs
+++ b/milli/src/search/criteria/final.rs
@@ -1,6 +1,7 @@
 use log::debug;
 use roaring::RoaringBitmap;
 
+use crate::Result;
 use crate::search::query_tree::Operation;
 use crate::search::WordDerivationsCache;
 use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context};
@@ -29,7 +30,7 @@ impl<'t> Final<'t> {
     }
 
     #[logging_timer::time("Final::{}")]
-    pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> {
+    pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> Result<Option<FinalResult>> {
         debug!("Final iteration");
         let excluded_candidates = &self.returned_candidates | excluded_candidates;
         let mut criterion_parameters = CriterionParameters {
diff --git a/milli/src/search/criteria/initial.rs b/milli/src/search/criteria/initial.rs
index 5d242a0eb..e6d0a17f7 100644
--- a/milli/src/search/criteria/initial.rs
+++ b/milli/src/search/criteria/initial.rs
@@ -1,7 +1,7 @@
 use roaring::RoaringBitmap;
 
+use crate::Result;
 use crate::search::query_tree::Operation;
-
 use super::{Criterion, CriterionResult, CriterionParameters};
 
 pub struct Initial {
@@ -22,7 +22,7 @@ impl Initial {
 
 impl Criterion for Initial {
     #[logging_timer::time("Initial::{}")]
-    fn next(&mut self, _: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, _: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         Ok(self.answer.take())
     }
 }
diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs
index b14d75ddb..981fc3ef2 100644
--- a/milli/src/search/criteria/mod.rs
+++ b/milli/src/search/criteria/mod.rs
@@ -4,7 +4,7 @@ use std::borrow::Cow;
 use roaring::RoaringBitmap;
 
 use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
-use crate::{Index, DocumentId};
+use crate::{Index, DocumentId, Result};
 
 use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
 use self::asc_desc::AscDesc;
@@ -26,7 +26,7 @@ mod words;
 pub mod r#final;
 
 pub trait Criterion {
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>>;
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>>;
 }
 
 /// The result of a call to the parent criterion.
@@ -78,8 +78,9 @@ pub trait Context<'c> {
     fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
     fn searchable_fields_ids(&self) ->  heed::Result<Vec<FieldId>>;
     fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
-    fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error>;
+    fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>>;
 }
+
 pub struct CriteriaBuilder<'t> {
     rtxn: &'t heed::RoTxn<'t>,
     index: &'t Index,
@@ -185,14 +186,14 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
         self.index.field_id_word_count_docids.get(self.rtxn, &key)
     }
 
-    fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
+    fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>> {
         let key = (word, level, left, right);
         self.index.word_level_position_docids.get(self.rtxn, &key)
     }
 }
 
 impl<'t> CriteriaBuilder<'t> {
-    pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> anyhow::Result<Self> {
+    pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> Result<Self> {
         let words_fst = index.words_fst(rtxn)?;
         let words_prefixes_fst = index.words_prefixes_fst(rtxn)?;
         Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
@@ -203,7 +204,7 @@ impl<'t> CriteriaBuilder<'t> {
         query_tree: Option<Operation>,
         primitive_query: Option<Vec<PrimitiveQueryPart>>,
         filtered_candidates: Option<RoaringBitmap>,
-    ) -> anyhow::Result<Final<'t>>
+    ) -> Result<Final<'t>>
     {
         use crate::criterion::Criterion as Name;
 
@@ -230,13 +231,13 @@ pub fn resolve_query_tree<'t>(
     ctx: &'t dyn Context,
     query_tree: &Operation,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     fn resolve_operation<'t>(
         ctx: &'t dyn Context,
         query_tree: &Operation,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<RoaringBitmap>
+    ) -> Result<RoaringBitmap>
     {
         use Operation::{And, Phrase, Or, Query};
 
@@ -244,7 +245,7 @@ pub fn resolve_query_tree<'t>(
             And(ops) => {
                 let mut ops = ops.iter().map(|op| {
                     resolve_operation(ctx, op, wdcache)
-                }).collect::<anyhow::Result<Vec<_>>>()?;
+                }).collect::<Result<Vec<_>>>()?;
 
                 ops.sort_unstable_by_key(|cds| cds.len());
 
@@ -302,7 +303,7 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
     left_words: &[(T, u8)],
     right_words: &[(U, u8)],
     proximity: u8
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     let mut docids = RoaringBitmap::new();
     for (left, _l_typo) in left_words {
@@ -318,7 +319,7 @@ fn query_docids(
     ctx: &dyn Context,
     query: &Query,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     match &query.kind {
         QueryKind::Exact { word, .. } => {
@@ -354,7 +355,7 @@ fn query_pair_proximity_docids(
     right: &Query,
     proximity: u8,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     if proximity >= 8 {
         let mut candidates = query_docids(ctx, left, wdcache)?;
@@ -481,7 +482,7 @@ pub mod test {
             todo!()
         }
 
-        fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> Result<Option<RoaringBitmap>, heed::Error> {
+        fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> heed::Result<Option<RoaringBitmap>> {
             todo!()
         }
 
diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs
index 4da6fd1eb..c3c8027cb 100644
--- a/milli/src/search/criteria/proximity.rs
+++ b/milli/src/search/criteria/proximity.rs
@@ -5,9 +5,10 @@ use std::mem::take;
 use roaring::RoaringBitmap;
 use log::debug;
 
-use crate::{DocumentId, Position, search::{query_tree::QueryKind}};
 use crate::search::query_tree::{maximum_proximity, Operation, Query};
 use crate::search::{build_dfa, WordDerivationsCache};
+use crate::search::{query_tree::QueryKind};
+use crate::{DocumentId, Position, Result};
 use super::{
     Context,
     Criterion,
@@ -55,7 +56,7 @@ impl<'t> Proximity<'t> {
 
 impl<'t> Criterion for Proximity<'t> {
     #[logging_timer::time("Proximity::{}")]
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         // remove excluded candidates when next is called, instead of doing it in the loop.
         if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
             *allowed_candidates -= params.excluded_candidates;
@@ -161,7 +162,7 @@ fn resolve_candidates<'t>(
     proximity: u8,
     cache: &mut Cache,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     fn resolve_operation<'t>(
         ctx: &'t dyn Context,
@@ -169,7 +170,7 @@ fn resolve_candidates<'t>(
         proximity: u8,
         cache: &mut Cache,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
+    ) -> Result<Vec<(Query, Query, RoaringBitmap)>>
     {
         use Operation::{And, Phrase, Or};
 
@@ -227,7 +228,7 @@ fn resolve_candidates<'t>(
         proximity: u8,
         cache: &mut Cache,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
+    ) -> Result<Vec<(Query, Query, RoaringBitmap)>>
     {
         fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
             (0..=mana.min(left_max)).map(move |m| (m, mana - m))
@@ -281,7 +282,7 @@ fn resolve_candidates<'t>(
         proximity: u8,
         cache: &mut Cache,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
+    ) -> Result<Vec<(Query, Query, RoaringBitmap)>>
     {
         // Extract the first two elements but gives the tail
         // that is just after the first element.
@@ -324,13 +325,13 @@ fn resolve_plane_sweep_candidates(
     query_tree: &Operation,
     allowed_candidates: &RoaringBitmap,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>>
+) -> Result<BTreeMap<u8, RoaringBitmap>>
 {
     /// FIXME may be buggy with query like "new new york"
     fn plane_sweep(
         groups_positions: Vec<Vec<(Position, u8, Position)>>,
         consecutive: bool,
-    ) -> anyhow::Result<Vec<(Position, u8, Position)>>
+    ) -> Result<Vec<(Position, u8, Position)>>
     {
         fn compute_groups_proximity(
             groups: &[(usize, (Position, u8, Position))],
@@ -451,7 +452,7 @@ fn resolve_plane_sweep_candidates(
         rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
         words_positions: &HashMap<String, RoaringBitmap>,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<Vec<(Position, u8, Position)>>
+    ) -> Result<Vec<(Position, u8, Position)>>
     {
         use Operation::{And, Phrase, Or};
 
diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs
index d075b6bca..436f4affd 100644
--- a/milli/src/search/criteria/typo.rs
+++ b/milli/src/search/criteria/typo.rs
@@ -5,6 +5,7 @@ use roaring::RoaringBitmap;
 
 use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
 use crate::search::{word_derivations, WordDerivationsCache};
+use crate::Result;
 use super::{
     Candidates,
     Context,
@@ -43,7 +44,7 @@ impl<'t> Typo<'t> {
 
 impl<'t> Criterion for Typo<'t> {
     #[logging_timer::time("Typo::{}")]
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         use Candidates::{Allowed, Forbidden};
         // remove excluded candidates when next is called, instead of doing it in the loop.
         match self.state.as_mut() {
@@ -163,14 +164,14 @@ fn alterate_query_tree(
     mut query_tree: Operation,
     number_typos: u8,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<Operation>
+) -> Result<Operation>
 {
     fn recurse(
         words_fst: &fst::Set<Cow<[u8]>>,
         operation: &mut Operation,
         number_typos: u8,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         use Operation::{And, Phrase, Or};
 
@@ -218,7 +219,7 @@ fn resolve_candidates<'t>(
     number_typos: u8,
     cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
     wdcache: &mut WordDerivationsCache,
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     fn resolve_operation<'t>(
         ctx: &'t dyn Context,
@@ -226,7 +227,7 @@ fn resolve_candidates<'t>(
         number_typos: u8,
         cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<RoaringBitmap>
+    ) -> Result<RoaringBitmap>
     {
         use Operation::{And, Phrase, Or, Query};
 
@@ -277,7 +278,7 @@ fn resolve_candidates<'t>(
         mana: u8,
         cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
         wdcache: &mut WordDerivationsCache,
-    ) -> anyhow::Result<RoaringBitmap>
+    ) -> Result<RoaringBitmap>
     {
         match branches.split_first() {
             Some((head, [])) => {
diff --git a/milli/src/search/criteria/words.rs b/milli/src/search/criteria/words.rs
index 8730fa331..add90d80d 100644
--- a/milli/src/search/criteria/words.rs
+++ b/milli/src/search/criteria/words.rs
@@ -4,6 +4,7 @@ use log::debug;
 use roaring::RoaringBitmap;
 
 use crate::search::query_tree::Operation;
+use crate::Result;
 use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree};
 
 pub struct Words<'t> {
@@ -30,7 +31,7 @@ impl<'t> Words<'t> {
 
 impl<'t> Criterion for Words<'t> {
     #[logging_timer::time("Words::{}")]
-    fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
+    fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
         // remove excluded candidates when next is called, instead of doing it in the loop.
         if let Some(candidates) = self.candidates.as_mut() {
             *candidates -= params.excluded_candidates;
diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs
index de7b28141..f86d6b8ed 100644
--- a/milli/src/search/distinct/facet_distinct.rs
+++ b/milli/src/search/distinct/facet_distinct.rs
@@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
 
 use super::{Distinct, DocIter};
 use crate::heed_codec::facet::*;
-use crate::{DocumentId, FieldId, Index};
+use crate::{DocumentId, FieldId, Index, Result};
 
 const FID_SIZE: usize = size_of::<FieldId>();
 const DOCID_SIZE: usize = size_of::<DocumentId>();
@@ -57,7 +57,7 @@ impl<'a> FacetDistinctIter<'a> {
             .get(self.txn, &(self.distinct, 0, key, key))
     }
 
-    fn distinct_string(&mut self, id: DocumentId) -> anyhow::Result<()> {
+    fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
         let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
 
         for item in iter {
@@ -73,7 +73,7 @@ impl<'a> FacetDistinctIter<'a> {
         Ok(())
     }
 
-    fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> {
+    fn distinct_number(&mut self, id: DocumentId) -> Result<()> {
         let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
 
         for item in iter {
@@ -92,7 +92,7 @@ impl<'a> FacetDistinctIter<'a> {
     /// Performs the next iteration of the facet distinct. This is a convenience method that is
     /// called by the Iterator::next implementation that transposes the result. It makes error
     /// handling easier.
-    fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
+    fn next_inner(&mut self) -> Result<Option<DocumentId>> {
         // The first step is to remove all the excluded documents from our candidates
         self.candidates.difference_with(&self.excluded);
 
@@ -129,7 +129,7 @@ fn facet_number_values<'a>(
     distinct: FieldId,
     index: &Index,
     txn: &'a heed::RoTxn,
-) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
+) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
     let key = facet_values_prefix_key(distinct, id);
 
     let iter = index
@@ -146,7 +146,7 @@ fn facet_string_values<'a>(
     distinct: FieldId,
     index: &Index,
     txn: &'a heed::RoTxn,
-) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
+) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
     let key = facet_values_prefix_key(distinct, id);
 
     let iter = index
@@ -159,7 +159,7 @@ fn facet_string_values<'a>(
 }
 
 impl Iterator for FacetDistinctIter<'_> {
-    type Item = anyhow::Result<DocumentId>;
+    type Item = Result<DocumentId>;
 
     fn next(&mut self) -> Option<Self::Item> {
         self.next_inner().transpose()
diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs
index 1b7c69c7a..99bc74be0 100644
--- a/milli/src/search/distinct/mod.rs
+++ b/milli/src/search/distinct/mod.rs
@@ -3,13 +3,13 @@ mod noop_distinct;
 
 use roaring::RoaringBitmap;
 
-use crate::DocumentId;
+use crate::{DocumentId, Result};
 pub use facet_distinct::FacetDistinct;
 pub use noop_distinct::NoopDistinct;
 
 /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
 /// It provides a way to get back the ownership to the excluded set.
-pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> {
+pub trait DocIter: Iterator<Item = Result<DocumentId>> {
     /// Returns ownership on the internal exluded set.
     fn into_excluded(self) -> RoaringBitmap;
 }
@@ -106,7 +106,7 @@ mod test {
 
     /// Checks that all the candidates are distinct, and returns the candidates number.
     pub(crate) fn validate_distinct_candidates(
-        candidates: impl Iterator<Item=anyhow::Result<DocumentId>>,
+        candidates: impl Iterator<Item = crate::Result<DocumentId>>,
         distinct: FieldId,
         index: &Index,
         ) -> usize {
diff --git a/milli/src/search/distinct/noop_distinct.rs b/milli/src/search/distinct/noop_distinct.rs
index bfaafed85..812701794 100644
--- a/milli/src/search/distinct/noop_distinct.rs
+++ b/milli/src/search/distinct/noop_distinct.rs
@@ -1,6 +1,6 @@
 use roaring::{RoaringBitmap, bitmap::IntoIter};
 
-use crate::DocumentId;
+use crate::{DocumentId, Result};
 use super::{DocIter, Distinct};
 
 /// A distinct implementer that does not perform any distinct,
@@ -13,7 +13,7 @@ pub struct NoopDistinctIter {
 }
 
 impl Iterator for NoopDistinctIter {
-    type Item = anyhow::Result<DocumentId>;
+    type Item = Result<DocumentId>;
 
     fn next(&mut self) -> Option<Self::Item> {
         self.candidates.next().map(Ok)
diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs
index 565f4c6dd..917314b25 100644
--- a/milli/src/search/facet/facet_distribution.rs
+++ b/milli/src/search/facet/facet_distribution.rs
@@ -2,15 +2,15 @@ use std::collections::{HashSet, BTreeMap};
 use std::ops::Bound::Unbounded;
 use std::{cmp, fmt};
 
-use anyhow::Context;
 use heed::{Database, BytesDecode};
 use heed::types::{ByteSlice, Unit};
 use roaring::RoaringBitmap;
 
+use crate::error::FieldIdMapMissingEntry;
 use crate::facet::FacetType;
 use crate::heed_codec::facet::FacetValueStringCodec;
 use crate::search::facet::{FacetIter, FacetRange};
-use crate::{Index, FieldId, DocumentId};
+use crate::{Index, FieldId, DocumentId, Result};
 
 /// The default number of values by facets that will
 /// be fetched from the key-value store.
@@ -195,14 +195,15 @@ impl<'a> FacetDistribution<'a> {
         }
     }
 
-    pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<String, u64>>> {
+    pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
         let filterable_fields = self.index.filterable_fields(self.rtxn)?;
 
         let mut distribution = BTreeMap::new();
         for name in filterable_fields {
-            let fid = fields_ids_map.id(&name).with_context(|| {
-                format!("missing field name {:?} from the fields id map", name)
+            let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
+                field_name: name.clone(),
+                from_db_name: "filterable-fields",
             })?;
             let values = self.facet_values(fid)?;
             distribution.insert(name, values);
diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs
index f58443b6f..98d638574 100644
--- a/milli/src/search/facet/filter_condition.rs
+++ b/milli/src/search/facet/filter_condition.rs
@@ -1,6 +1,7 @@
 use std::collections::HashSet;
 use std::fmt::Debug;
 use std::ops::Bound::{self, Included, Excluded};
+use std::result::Result as StdResult;
 use std::str::FromStr;
 
 use either::Either;
@@ -11,8 +12,9 @@ use pest::iterators::{Pair, Pairs};
 use pest::Parser;
 use roaring::RoaringBitmap;
 
+use crate::error::UserError;
 use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
-use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec};
+use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec, Result};
 
 use super::FacetRange;
 use super::parser::Rule;
@@ -60,7 +62,7 @@ impl FilterCondition {
         rtxn: &heed::RoTxn,
         index: &Index,
         array: I,
-    ) -> anyhow::Result<Option<FilterCondition>>
+    ) -> Result<Option<FilterCondition>>
     where I: IntoIterator<Item=Either<J, B>>,
           J: IntoIterator<Item=A>,
           A: AsRef<str>,
@@ -104,11 +106,11 @@ impl FilterCondition {
         rtxn: &heed::RoTxn,
         index: &Index,
         expression: &str,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let fields_ids_map = index.fields_ids_map(rtxn)?;
         let filterable_fields = index.filterable_fields_ids(rtxn)?;
-        let lexed = FilterParser::parse(Rule::prgm, expression)?;
+        let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::FilterParsing)?;
         FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
     }
 
@@ -116,7 +118,7 @@ impl FilterCondition {
         fim: &FieldsIdsMap,
         ff: &HashSet<FieldId>,
         expression: Pairs<Rule>,
-    ) -> anyhow::Result<Self>
+    ) -> Result<Self>
     {
         PREC_CLIMBER.climb(
             expression,
@@ -133,7 +135,7 @@ impl FilterCondition {
                 Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
                 _ => unreachable!(),
             },
-            |lhs: anyhow::Result<Self>, op: Pair<Rule>, rhs: anyhow::Result<Self>| {
+            |lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| {
                 match op.as_rule() {
                     Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
                     Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
@@ -158,16 +160,17 @@ impl FilterCondition {
         fields_ids_map: &FieldsIdsMap,
         filterable_fields: &HashSet<FieldId>,
         item: Pair<Rule>,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let mut items = item.into_inner();
-        let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
+        let fid = field_id(fields_ids_map, filterable_fields, &mut items)
+            .map_err(UserError::FilterParsing)?;
 
         let (lresult, _) = pest_parse(items.next().unwrap());
         let (rresult, _) = pest_parse(items.next().unwrap());
 
-        let lvalue = lresult?;
-        let rvalue = rresult?;
+        let lvalue = lresult.map_err(UserError::FilterParsing)?;
+        let rvalue = rresult.map_err(UserError::FilterParsing)?;
 
         Ok(Operator(fid, Between(lvalue, rvalue)))
     }
@@ -176,10 +179,11 @@ impl FilterCondition {
         fields_ids_map: &FieldsIdsMap,
         filterable_fields: &HashSet<FieldId>,
         item: Pair<Rule>,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let mut items = item.into_inner();
-        let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
+        let fid = field_id(fields_ids_map, filterable_fields, &mut items)
+            .map_err(UserError::FilterParsing)?;
 
         let value = items.next().unwrap();
         let (result, svalue) = pest_parse(value);
@@ -192,60 +196,68 @@ impl FilterCondition {
         fields_ids_map: &FieldsIdsMap,
         filterable_fields: &HashSet<FieldId>,
         item: Pair<Rule>,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let mut items = item.into_inner();
-        let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
+        let fid = field_id(fields_ids_map, filterable_fields, &mut items)
+            .map_err(UserError::FilterParsing)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
+        let value = result.map_err(UserError::FilterParsing)?;
 
-        Ok(Operator(fid, GreaterThan(result?)))
+        Ok(Operator(fid, GreaterThan(value)))
     }
 
     fn greater_than_or_equal(
         fields_ids_map: &FieldsIdsMap,
         filterable_fields: &HashSet<FieldId>,
         item: Pair<Rule>,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let mut items = item.into_inner();
-        let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
+        let fid = field_id(fields_ids_map, filterable_fields, &mut items)
+            .map_err(UserError::FilterParsing)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
+        let value = result.map_err(UserError::FilterParsing)?;
 
-        Ok(Operator(fid, GreaterThanOrEqual(result?)))
+        Ok(Operator(fid, GreaterThanOrEqual(value)))
     }
 
     fn lower_than(
         fields_ids_map: &FieldsIdsMap,
         filterable_fields: &HashSet<FieldId>,
         item: Pair<Rule>,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let mut items = item.into_inner();
-        let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
+        let fid = field_id(fields_ids_map, filterable_fields, &mut items)
+            .map_err(UserError::FilterParsing)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
+        let value = result.map_err(UserError::FilterParsing)?;
 
-        Ok(Operator(fid, LowerThan(result?)))
+        Ok(Operator(fid, LowerThan(value)))
     }
 
     fn lower_than_or_equal(
         fields_ids_map: &FieldsIdsMap,
         filterable_fields: &HashSet<FieldId>,
         item: Pair<Rule>,
-    ) -> anyhow::Result<FilterCondition>
+    ) -> Result<FilterCondition>
     {
         let mut items = item.into_inner();
-        let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
+        let fid = field_id(fields_ids_map, filterable_fields, &mut items)
+            .map_err(UserError::FilterParsing)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
+        let value = result.map_err(UserError::FilterParsing)?;
 
-        Ok(Operator(fid, LowerThanOrEqual(result?)))
+        Ok(Operator(fid, LowerThanOrEqual(value)))
     }
 }
 
@@ -260,7 +272,7 @@ impl FilterCondition {
         left: Bound<f64>,
         right: Bound<f64>,
         output: &mut RoaringBitmap,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         match (left, right) {
             // If the request is an exact value we must go directly to the deepest level.
@@ -332,7 +344,7 @@ impl FilterCondition {
         strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
         field_id: FieldId,
         operator: &Operator,
-    ) -> anyhow::Result<RoaringBitmap>
+    ) -> Result<RoaringBitmap>
     {
         // Make sure we always bound the ranges with the field id and the level,
         // as the facets values are all in the same database and prefixed by the
@@ -390,7 +402,7 @@ impl FilterCondition {
         &self,
         rtxn: &heed::RoTxn,
         index: &Index,
-    ) -> anyhow::Result<RoaringBitmap>
+    ) -> Result<RoaringBitmap>
     {
         let numbers_db = index.facet_id_f64_docids;
         let strings_db = index.facet_id_string_docids;
@@ -422,7 +434,7 @@ fn field_id(
     fields_ids_map: &FieldsIdsMap,
     filterable_fields: &HashSet<FieldId>,
     items: &mut Pairs<Rule>,
-) -> Result<FieldId, PestError<Rule>>
+) -> StdResult<FieldId, PestError<Rule>>
 {
     // lexing ensures that we at least have a key
     let key = items.next().unwrap();
@@ -463,7 +475,7 @@ fn field_id(
 /// the original string that we tried to parse.
 ///
 /// Returns the parsing error associated with the span if the conversion fails.
-fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String)
+fn pest_parse<T>(pair: Pair<Rule>) -> (StdResult<T, pest::error::Error<Rule>>, String)
 where T: FromStr,
       T::Err: ToString,
 {
diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs
index a5e02fc9f..a1a03dba3 100644
--- a/milli/src/search/facet/mod.rs
+++ b/milli/src/search/facet/mod.rs
@@ -9,8 +9,9 @@ use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::heed_codec::facet::FacetLevelValueF64Codec;
 use crate::{Index, FieldId};
 
-pub use self::filter_condition::{FilterCondition, Operator};
 pub use self::facet_distribution::FacetDistribution;
+pub use self::filter_condition::{FilterCondition, Operator};
+pub(crate) use self::parser::Rule as ParserRule;
 
 mod filter_condition;
 mod facet_distribution;
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 872ebfca6..f8c7b5d9b 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::collections::hash_map::{Entry, HashMap};
 use std::fmt;
 use std::mem::take;
+use std::result::Result as StdResult;
 use std::str::Utf8Error;
 use std::time::Instant;
 
@@ -14,10 +15,11 @@ use roaring::bitmap::RoaringBitmap;
 
 use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
 use crate::search::criteria::r#final::{Final, FinalResult};
-use crate::{Index, DocumentId};
+use crate::{Index, DocumentId, Result};
 
 pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
 pub use self::matching_words::MatchingWords;
+pub(crate) use self::facet::ParserRule;
 use self::query_tree::QueryTreeBuilder;
 
 // Building these factories is not free.
@@ -93,7 +95,7 @@ impl<'a> Search<'a> {
         self
     }
 
-    pub fn execute(&self) -> anyhow::Result<SearchResult> {
+    pub fn execute(&self) -> Result<SearchResult> {
         // We create the query tree by spliting the query into tokens.
         let before = Instant::now();
         let (query_tree, primitive_query) = match self.query.as_ref() {
@@ -152,7 +154,7 @@ impl<'a> Search<'a> {
         mut distinct: D,
         matching_words: MatchingWords,
         mut criteria: Final,
-    ) -> anyhow::Result<SearchResult>
+    ) -> Result<SearchResult>
     {
         let mut offset = self.offset;
         let mut initial_candidates = RoaringBitmap::new();
@@ -225,7 +227,7 @@ pub fn word_derivations<'c>(
     max_typo: u8,
     fst: &fst::Set<Cow<[u8]>>,
     cache: &'c mut WordDerivationsCache,
-) -> Result<&'c [(String, u8)], Utf8Error> {
+) -> StdResult<&'c [(String, u8)], Utf8Error> {
     match cache.entry((word.to_string(), is_prefix, max_typo)) {
         Entry::Occupied(entry) => Ok(entry.into_mut()),
         Entry::Vacant(entry) => {
diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs
index 3c3420db4..c371b07d4 100644
--- a/milli/src/search/query_tree.rs
+++ b/milli/src/search/query_tree.rs
@@ -7,7 +7,7 @@ use meilisearch_tokenizer::TokenKind;
 use roaring::RoaringBitmap;
 use slice_group_by::GroupBy;
 
-use crate::Index;
+use crate::{Index, Result};
 
 type IsOptionalWord = bool;
 type IsPrefix = bool;
@@ -219,7 +219,7 @@ impl<'a> QueryTreeBuilder<'a> {
     /// - if `authorize_typos` is set to `false` the query tree will be generated
     ///   forcing all query words to match documents without any typo
     ///   (the criterion `typo` will be ignored)
-    pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> {
+    pub fn build(&self, query: TokenStream) -> Result<Option<(Operation, PrimitiveQuery)>> {
         let stop_words = self.index.stop_words(self.rtxn)?;
         let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
         if !primitive_query.is_empty() {
@@ -291,14 +291,14 @@ fn create_query_tree(
     optional_words: bool,
     authorize_typos: bool,
     query: &[PrimitiveQueryPart],
-) -> anyhow::Result<Operation>
+) -> Result<Operation>
 {
     /// Matches on the `PrimitiveQueryPart` and create an operation from it.
     fn resolve_primitive_part(
         ctx: &impl Context,
         authorize_typos: bool,
         part: PrimitiveQueryPart,
-    ) -> anyhow::Result<Operation>
+    ) -> Result<Operation>
     {
         match part {
             // 1. try to split word in 2
@@ -325,7 +325,7 @@ fn create_query_tree(
         ctx: &impl Context,
         authorize_typos: bool,
         query: &[PrimitiveQueryPart],
-    ) -> anyhow::Result<Operation>
+    ) -> Result<Operation>
     {
         const MAX_NGRAM: usize = 3;
         let mut op_children = Vec::new();
@@ -379,7 +379,7 @@ fn create_query_tree(
         ctx: &impl Context,
         authorize_typos: bool,
         query: PrimitiveQuery,
-    ) -> anyhow::Result<Operation>
+    ) -> Result<Operation>
     {
         let number_phrases = query.iter().filter(|p| p.is_phrase()).count();
         let mut operation_children = Vec::new();
@@ -532,7 +532,7 @@ mod test {
             authorize_typos: bool,
             words_limit: Option<usize>,
             query: TokenStream,
-        ) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>>
+        ) -> Result<Option<(Operation, PrimitiveQuery)>>
         {
             let primitive_query = create_primitive_query(query, None, words_limit);
             if !primitive_query.is_empty() {
diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs
index f4c13e8f8..6e26bf027 100644
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -1,6 +1,7 @@
 use chrono::Utc;
 use roaring::RoaringBitmap;
-use crate::{ExternalDocumentsIds, Index, FieldsDistribution};
+
+use crate::{ExternalDocumentsIds, Index, FieldsDistribution, Result};
 
 pub struct ClearDocuments<'t, 'u, 'i> {
     wtxn: &'t mut heed::RwTxn<'i, 'u>,
@@ -18,7 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
         ClearDocuments { wtxn, index, _update_id: update_id }
     }
 
-    pub fn execute(self) -> anyhow::Result<u64> {
+    pub fn execute(self) -> Result<u64> {
         self.index.set_updated_at(self.wtxn, &Utc::now())?;
         let Index {
             env: _env,
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index c4cf132bb..6792d6278 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -1,15 +1,15 @@
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 
-use anyhow::{anyhow, Context};
 use chrono::Utc;
 use fst::IntoStreamer;
 use heed::types::{ByteSlice, Unit};
 use roaring::RoaringBitmap;
 use serde_json::Value;
 
+use crate::error::{InternalError, UserError};
 use crate::heed_codec::CboRoaringBitmapCodec;
-use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds};
+use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
 use super::ClearDocuments;
 
 pub struct DeleteDocuments<'t, 'u, 'i> {
@@ -25,7 +25,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
         wtxn: &'t mut heed::RwTxn<'i, 'u>,
         index: &'i Index,
         update_id: u64,
-    ) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>>
+    ) -> Result<DeleteDocuments<'t, 'u, 'i>>
     {
         let external_documents_ids = index
             .external_documents_ids(wtxn)?
@@ -54,7 +54,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
         Some(docid)
     }
 
-    pub fn execute(self) -> anyhow::Result<u64> {
+    pub fn execute(self) -> Result<u64> {
         self.index.set_updated_at(self.wtxn, &Utc::now())?;
         // We retrieve the current documents ids that are in the database.
         let mut documents_ids = self.index.documents_ids(self.wtxn)?;
@@ -77,7 +77,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
         }
 
         let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
-        let primary_key = self.index.primary_key(self.wtxn)?.context("missing primary key")?;
+        let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
+            InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") }
+        })?;
         let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);
 
         let Index {
@@ -119,7 +121,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
                     let external_id = match serde_json::from_slice(content).unwrap() {
                         Value::String(string) => SmallString32::from(string.as_str()),
                         Value::Number(number) => SmallString32::from(number.to_string()),
-                        _ => return Err(anyhow!("documents ids must be either strings or numbers")),
+                        document_id => return Err(UserError::InvalidDocumentId { document_id }.into()),
                     };
                     external_ids.push(external_id);
                 }
diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs
index f0eab6023..757cbe810 100644
--- a/milli/src/update/facets.rs
+++ b/milli/src/update/facets.rs
@@ -9,11 +9,12 @@ use heed::{BytesEncode, Error};
 use log::debug;
 use roaring::RoaringBitmap;
 
+use crate::error::InternalError;
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::heed_codec::facet::FacetLevelValueF64Codec;
-use crate::Index;
 use crate::update::index_documents::WriteMethod;
 use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
+use crate::{Index, Result};
 
 pub struct Facets<'t, 'u, 'i> {
     wtxn: &'t mut heed::RwTxn<'i, 'u>,
@@ -55,7 +56,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
         self
     }
 
-    pub fn execute(self) -> anyhow::Result<()> {
+    pub fn execute(self) -> Result<()> {
         self.index.set_updated_at(self.wtxn, &Utc::now())?;
         // We get the faceted fields to be able to create the facet levels.
         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
@@ -102,7 +103,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
                 self.wtxn,
                 *self.index.facet_id_f64_docids.as_polymorph(),
                 content,
-                |_, _| anyhow::bail!("invalid facet number level merging"),
+                |_, _| Err(InternalError::IndexingMergingKeys { process: "facet number level" }),
                 WriteMethod::GetMergePut,
             )?;
         }
@@ -132,7 +133,7 @@ fn compute_facet_number_levels<'t>(
     level_group_size: NonZeroUsize,
     min_level_size: NonZeroUsize,
     field_id: u8,
-) -> anyhow::Result<Reader<FileFuse>>
+) -> Result<Reader<FileFuse>>
 {
     let first_level_size = db
         .remap_key_type::<ByteSlice>()
@@ -195,7 +196,7 @@ fn compute_faceted_documents_ids(
     rtxn: &heed::RoTxn,
     db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
     field_id: u8,
-) -> anyhow::Result<RoaringBitmap>
+) -> Result<RoaringBitmap>
 {
     let mut documents_ids = RoaringBitmap::new();
 
@@ -214,7 +215,7 @@ fn write_number_entry(
     left: f64,
     right: f64,
     ids: &RoaringBitmap,
-) -> anyhow::Result<()>
+) -> Result<()>
 {
     let key = (field_id, level, left, right);
     let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index 8c93773ce..3d9ffda6a 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -1,17 +1,19 @@
 use std::borrow::Cow;
+use std::result::Result as StdResult;
 
 use fst::IntoStreamer;
 use roaring::RoaringBitmap;
 
 use crate::heed_codec::CboRoaringBitmapCodec;
+use crate::Result;
 
 /// Only the last value associated with an id is kept.
-pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
     Ok(obkvs.last().unwrap().clone().into_owned())
 }
 
 /// Merge all the obks in the order we see them.
-pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
     let mut iter = obkvs.iter();
     let first = iter.next().map(|b| b.clone().into_owned()).unwrap();
     Ok(iter.fold(first, |acc, current| {
@@ -24,8 +26,8 @@ pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>>
 }
 
 // Union of multiple FSTs
-pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
-    let fsts = values.iter().map(fst::Set::new).collect::<Result<Vec<_>, _>>()?;
+pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
+    let fsts = values.iter().map(fst::Set::new).collect::<StdResult<Vec<_>, _>>()?;
     let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect();
     let op = op_builder.r#union();
 
@@ -34,7 +36,7 @@ pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
     Ok(build.into_inner().unwrap())
 }
 
-pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
     Ok(values.first().unwrap().to_vec())
 }
 
@@ -54,7 +56,7 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu
     writer.finish().unwrap();
 }
 
-pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
     let (head, tail) = values.split_first().unwrap();
     let mut head = RoaringBitmap::deserialize_from(&head[..])?;
 
@@ -68,7 +70,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result
     Ok(vec)
 }
 
-pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
+pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
     let (head, tail) = values.split_first().unwrap();
     let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
 
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 1d31cba85..51c8b948a 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -3,11 +3,11 @@ use std::collections::HashSet;
 use std::fs::File;
 use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
 use std::num::{NonZeroU32, NonZeroUsize};
+use std::result::Result as StdResult;
 use std::str;
 use std::sync::mpsc::sync_channel;
 use std::time::Instant;
 
-use anyhow::Context;
 use bstr::ByteSlice as _;
 use chrono::Utc;
 use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
@@ -18,7 +18,8 @@ use rayon::prelude::*;
 use rayon::ThreadPool;
 use serde::{Serialize, Deserialize};
 
-use crate::index::Index;
+use crate::error::{Error, InternalError};
+use crate::{Index, Result};
 use crate::update::{
     Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep,
     WordPrefixPairProximityDocids,
@@ -56,14 +57,14 @@ pub fn create_writer(typ: CompressionType, level: Option<u32>, file: File) -> io
     builder.build(file)
 }
 
-pub fn create_sorter(
-    merge: MergeFn,
+pub fn create_sorter<E>(
+    merge: MergeFn<E>,
     chunk_compression_type: CompressionType,
     chunk_compression_level: Option<u32>,
     chunk_fusing_shrink_size: Option<u64>,
     max_nb_chunks: Option<usize>,
     max_memory: Option<usize>,
-) -> Sorter<MergeFn>
+) -> Sorter<MergeFn<E>>
 {
     let mut builder = Sorter::builder(merge);
     if let Some(shrink_size) = chunk_fusing_shrink_size {
@@ -82,7 +83,7 @@ pub fn create_sorter(
     builder.build()
 }
 
-pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> anyhow::Result<Reader<FileFuse>> {
+pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> Result<Reader<FileFuse>> {
     let mut file = writer.into_inner()?;
     file.seek(SeekFrom::Start(0))?;
     let file = if let Some(shrink_size) = shrink_size {
@@ -93,19 +94,25 @@ pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> any
     Reader::new(file).map_err(Into::into)
 }
 
-pub fn merge_readers(sources: Vec<Reader<FileFuse>>, merge: MergeFn) -> Merger<FileFuse, MergeFn> {
+pub fn merge_readers<E>(
+    sources: Vec<Reader<FileFuse>>,
+    merge: MergeFn<E>,
+) -> Merger<FileFuse, MergeFn<E>>
+{
     let mut builder = Merger::builder(merge);
     builder.extend(sources);
     builder.build()
 }
 
-pub fn merge_into_lmdb_database(
+pub fn merge_into_lmdb_database<E>(
     wtxn: &mut heed::RwTxn,
     database: heed::PolyDatabase,
     sources: Vec<Reader<FileFuse>>,
-    merge: MergeFn,
+    merge: MergeFn<E>,
     method: WriteMethod,
-) -> anyhow::Result<()>
+) -> Result<()>
+where
+    Error: From<E>,
 {
     debug!("Merging {} MTBL stores...", sources.len());
     let before = Instant::now();
@@ -123,13 +130,15 @@ pub fn merge_into_lmdb_database(
     Ok(())
 }
 
-pub fn write_into_lmdb_database(
+pub fn write_into_lmdb_database<E>(
     wtxn: &mut heed::RwTxn,
     database: heed::PolyDatabase,
     mut reader: Reader<FileFuse>,
-    merge: MergeFn,
+    merge: MergeFn<E>,
     method: WriteMethod,
-) -> anyhow::Result<()>
+) -> Result<()>
+where
+    Error: From<E>,
 {
     debug!("Writing MTBL stores...");
     let before = Instant::now();
@@ -138,9 +147,7 @@ pub fn write_into_lmdb_database(
         WriteMethod::Append => {
             let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
             while let Some((k, v)) = reader.next()? {
-                out_iter.append(k, v).with_context(|| {
-                    format!("writing {:?} into LMDB", k.as_bstr())
-                })?;
+                out_iter.append(k, v)?;
             }
         },
         WriteMethod::GetMergePut => {
@@ -165,13 +172,16 @@ pub fn write_into_lmdb_database(
     Ok(())
 }
 
-pub fn sorter_into_lmdb_database(
+pub fn sorter_into_lmdb_database<E>(
     wtxn: &mut heed::RwTxn,
     database: heed::PolyDatabase,
-    sorter: Sorter<MergeFn>,
-    merge: MergeFn,
+    sorter: Sorter<MergeFn<E>>,
+    merge: MergeFn<E>,
     method: WriteMethod,
-) -> anyhow::Result<()>
+) -> Result<()>
+where
+    Error: From<E>,
+    Error: From<grenad::Error<E>>
 {
     debug!("Writing MTBL sorter...");
     let before = Instant::now();
@@ -188,21 +198,21 @@ pub fn sorter_into_lmdb_database(
     Ok(())
 }
 
-fn merger_iter_into_lmdb_database<R: io::Read>(
+fn merger_iter_into_lmdb_database<R: io::Read, E>(
     wtxn: &mut heed::RwTxn,
     database: heed::PolyDatabase,
-    mut sorter: MergerIter<R, MergeFn>,
-    merge: MergeFn,
+    mut sorter: MergerIter<R, MergeFn<E>>,
+    merge: MergeFn<E>,
     method: WriteMethod,
-) -> anyhow::Result<()>
+) -> Result<()>
+where
+    Error: From<E>,
 {
     match method {
         WriteMethod::Append => {
             let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
             while let Some((k, v)) = sorter.next()? {
-                out_iter.append(k, v).with_context(|| {
-                    format!("writing {:?} into LMDB", k.as_bstr())
-                })?;
+                out_iter.append(k, v)?;
             }
         },
         WriteMethod::GetMergePut => {
@@ -211,7 +221,10 @@ fn merger_iter_into_lmdb_database<R: io::Read>(
                 match iter.next().transpose()? {
                     Some((key, old_val)) if key == k => {
                         let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
-                        let val = merge(k, &vals).expect("merge failed");
+                        let val = merge(k, &vals).map_err(|_| {
+                            // TODO just wrap this error?
+                            InternalError::IndexingMergingKeys { process: "get-put-merge" }
+                        })?;
                         iter.put_current(k, &val)?;
                     },
                     _ => {
@@ -318,7 +331,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
         self.autogenerate_docids = false;
     }
 
-    pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<DocumentAdditionResult>
+    pub fn execute<R, F>(self, reader: R, progress_callback: F) -> Result<DocumentAdditionResult>
     where
         R: io::Read,
         F: Fn(UpdateIndexingStep, u64) + Sync,
@@ -365,7 +378,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
         Ok(DocumentAdditionResult { nb_documents })
     }
 
-    pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> anyhow::Result<()>
+    pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> Result<()>
     where
         F: Fn(UpdateIndexingStep) + Sync
     {
@@ -403,15 +416,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             debug!("{} documents actually deleted", deleted_documents_count);
         }
 
-        let mmap;
-        let bytes = if documents_count == 0 {
-            &[][..]
-        } else {
-            mmap = unsafe { Mmap::map(&documents_file).context("mmaping the transform documents file")? };
-            &mmap
-        };
+        if documents_count == 0 {
+            return Ok(());
+        }
 
-        let documents = grenad::Reader::new(bytes).unwrap();
+        let bytes = unsafe { Mmap::map(&documents_file)? };
+        let documents = grenad::Reader::new(bytes.as_bytes()).unwrap();
 
         // The enum which indicates the type of the readers
         // merges that are potentially done on different threads.
@@ -477,7 +487,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                         &progress_callback,
                     )
                 })
-                .collect::<Result<Vec<_>, _>>()?;
+                .collect::<StdResult<Vec<_>, _>>()?;
 
             let mut main_readers = Vec::with_capacity(readers.len());
             let mut word_docids_readers = Vec::with_capacity(readers.len());
@@ -535,7 +545,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             debug!("Merging the main, word docids and words pairs proximity docids in parallel...");
             rayon::spawn(move || {
                 vec![
-                    (DatabaseType::Main, main_readers, fst_merge as MergeFn),
+                    (DatabaseType::Main, main_readers, fst_merge as MergeFn<_>),
                     (DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge),
                     (
                         DatabaseType::FacetLevel0NumbersDocids,
@@ -570,7 +580,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
                 facet_field_strings_docids_readers,
                 field_id_docid_facet_numbers_readers,
                 field_id_docid_facet_strings_readers,
-            )) as anyhow::Result<_>
+            )) as Result<_>
         })?;
 
         let (
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index 4662cd609..e5e55682e 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -6,7 +6,6 @@ use std::iter::FromIterator;
 use std::time::Instant;
 use std::{cmp, iter};
 
-use anyhow::Context;
 use bstr::ByteSlice as _;
 use fst::Set;
 use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
@@ -19,11 +18,12 @@ use roaring::RoaringBitmap;
 use serde_json::Value;
 use tempfile::tempfile;
 
+use crate::error::{Error, InternalError, SerializationError};
 use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
 use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
 use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
 use crate::update::UpdateIndexingStep;
-use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId};
+use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, Result};
 
 use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
 use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge};
@@ -66,15 +66,15 @@ pub struct Store<'s, A> {
     chunk_compression_level: Option<u32>,
     chunk_fusing_shrink_size: Option<u64>,
     // MTBL sorters
-    main_sorter: Sorter<MergeFn>,
-    word_docids_sorter: Sorter<MergeFn>,
-    words_pairs_proximities_docids_sorter: Sorter<MergeFn>,
-    word_level_position_docids_sorter: Sorter<MergeFn>,
-    field_id_word_count_docids_sorter: Sorter<MergeFn>,
-    facet_field_numbers_docids_sorter: Sorter<MergeFn>,
-    facet_field_strings_docids_sorter: Sorter<MergeFn>,
-    field_id_docid_facet_numbers_sorter: Sorter<MergeFn>,
-    field_id_docid_facet_strings_sorter: Sorter<MergeFn>,
+    main_sorter: Sorter<MergeFn<Error>>,
+    word_docids_sorter: Sorter<MergeFn<Error>>,
+    words_pairs_proximities_docids_sorter: Sorter<MergeFn<Error>>,
+    word_level_position_docids_sorter: Sorter<MergeFn<Error>>,
+    field_id_word_count_docids_sorter: Sorter<MergeFn<Error>>,
+    facet_field_numbers_docids_sorter: Sorter<MergeFn<Error>>,
+    facet_field_strings_docids_sorter: Sorter<MergeFn<Error>>,
+    field_id_docid_facet_numbers_sorter: Sorter<MergeFn<Error>>,
+    field_id_docid_facet_strings_sorter: Sorter<MergeFn<Error>>,
     // MTBL writers
     docid_word_positions_writer: Writer<File>,
     documents_writer: Writer<File>,
@@ -93,7 +93,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         chunk_compression_level: Option<u32>,
         chunk_fusing_shrink_size: Option<u64>,
         stop_words: Option<&'s Set<A>>,
-    ) -> anyhow::Result<Self>
+    ) -> Result<Self>
     {
         // We divide the max memory by the number of sorter the Store have.
         let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5));
@@ -221,7 +221,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
     }
 
     // Save the documents ids under the position and word we have seen it.
-    fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> anyhow::Result<()> {
+    fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> Result<()> {
         // if get_refresh finds the element it is assured to be at the end of the linked hash map.
         match self.word_docids.get_refresh(word.as_bytes()) {
             Some(old) => { old.insert(id); },
@@ -246,7 +246,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         field_id: FieldId,
         value: OrderedFloat<f64>,
         id: DocumentId,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         let sorter = &mut self.field_id_docid_facet_numbers_sorter;
         Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?;
@@ -279,7 +279,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         field_id: FieldId,
         value: String,
         id: DocumentId,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         let sorter = &mut self.field_id_docid_facet_strings_sorter;
         Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
@@ -311,7 +311,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         &mut self,
         words_pairs_proximities: impl IntoIterator<Item=((&'a str, &'a str), u8)>,
         id: DocumentId,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         for ((w1, w2), prox) in words_pairs_proximities {
             let w1 = SmallVec32::from(w1.as_bytes());
@@ -350,7 +350,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
         facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
         record: &[u8],
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         // We compute the list of words pairs proximities (self-join) and write it directly to disk.
         let words_pair_proximities = compute_words_pair_proximities(&words_positions);
@@ -385,10 +385,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_words_pairs_proximities(
-        sorter: &mut Sorter<MergeFn>,
+    fn write_words_pairs_proximities<E>(
+        sorter: &mut Sorter<MergeFn<E>>,
         iter: impl IntoIterator<Item=((SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap)>,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
+    where
+        Error: From<E>,
     {
         let mut key = Vec::new();
         let mut buffer = Vec::new();
@@ -417,7 +419,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         writer: &mut Writer<File>,
         id: DocumentId,
         words_positions: &HashMap<String, SmallVec32<Position>>,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
     {
         // We prefix the words by the document id.
         let mut key = id.to_be_bytes().to_vec();
@@ -445,11 +447,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_word_position_docids(
-        writer: &mut Sorter<MergeFn>,
+    fn write_word_position_docids<E>(
+        writer: &mut Sorter<MergeFn<E>>,
         document_id: DocumentId,
         words_positions: &HashMap<String, SmallVec32<Position>>,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
+    where
+        Error: From<E>,
     {
         let mut key_buffer = Vec::new();
         let mut data_buffer = Vec::new();
@@ -480,11 +484,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_facet_field_string_docids<I>(
-        sorter: &mut Sorter<MergeFn>,
+    fn write_facet_field_string_docids<I, E>(
+        sorter: &mut Sorter<MergeFn<E>>,
         iter: I,
-    ) -> anyhow::Result<()>
-    where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>
+    ) -> Result<()>
+    where
+        I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>,
+        Error: From<E>,
     {
         let mut key_buffer = Vec::new();
         let mut data_buffer = Vec::new();
@@ -504,11 +510,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_facet_field_number_docids<I>(
-        sorter: &mut Sorter<MergeFn>,
+    fn write_facet_field_number_docids<I, E>(
+        sorter: &mut Sorter<MergeFn<E>>,
         iter: I,
-    ) -> anyhow::Result<()>
-    where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>
+    ) -> Result<()>
+    where
+        I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>,
+        Error: From<E>,
     {
         let mut data_buffer = Vec::new();
 
@@ -517,7 +525,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
 
             let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
                 .map(Cow::into_owned)
-                .context("could not serialize facet level value key")?;
+                .ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
 
             CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
 
@@ -529,16 +537,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_field_id_docid_facet_number_value(
-        sorter: &mut Sorter<MergeFn>,
+    fn write_field_id_docid_facet_number_value<E>(
+        sorter: &mut Sorter<MergeFn<E>>,
         field_id: FieldId,
         document_id: DocumentId,
         value: OrderedFloat<f64>,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
+    where
+        Error: From<E>,
     {
         let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
             .map(Cow::into_owned)
-            .context("could not serialize facet level value key")?;
+            .ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
 
         if lmdb_key_valid_size(&key) {
             sorter.insert(&key, &[])?;
@@ -547,12 +557,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_field_id_docid_facet_string_value(
-        sorter: &mut Sorter<MergeFn>,
+    fn write_field_id_docid_facet_string_value<E>(
+        sorter: &mut Sorter<MergeFn<E>>,
         field_id: FieldId,
         document_id: DocumentId,
         value: &str,
-    ) -> anyhow::Result<()>
+    ) -> Result<()>
+    where
+        Error: From<E>,
     {
         let mut buffer = Vec::new();
 
@@ -565,8 +577,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(())
     }
 
-    fn write_word_docids<I>(sorter: &mut Sorter<MergeFn>, iter: I) -> anyhow::Result<()>
-    where I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>
+    fn write_word_docids<I, E>(sorter: &mut Sorter<MergeFn<E>>, iter: I) -> Result<()>
+    where
+        I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>,
+        Error: From<E>,
     {
         let mut key = Vec::new();
         let mut buffer = Vec::new();
@@ -596,7 +610,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         num_threads: usize,
         log_every_n: Option<usize>,
         mut progress_callback: F,
-    ) -> anyhow::Result<Readers>
+    ) -> Result<Readers>
     where F: FnMut(UpdateIndexingStep),
     {
         debug!("{:?}: Indexing in a Store...", thread_index);
@@ -625,7 +639,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
 
                 for (attr, content) in document.iter() {
                     if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) {
-                        let value = serde_json::from_slice(content)?;
+                        let value = serde_json::from_slice(content).map_err(InternalError::SerdeJson)?;
 
                         let (facet_numbers, facet_strings) = extract_facet_values(&value);
                         facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers);
@@ -679,7 +693,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         Ok(readers)
     }
 
-    fn finish(mut self) -> anyhow::Result<Readers> {
+    fn finish(mut self) -> Result<Readers> {
         let comp_type = self.chunk_compression_type;
         let comp_level = self.chunk_compression_level;
         let shrink_size = self.chunk_fusing_shrink_size;
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index 5fbd24bb1..82003eddc 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -2,17 +2,19 @@ use std::borrow::Cow;
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom};
 use std::iter::Peekable;
+use std::result::Result as StdResult;
 use std::time::Instant;
 
-use anyhow::{anyhow, Context};
 use grenad::CompressionType;
 use log::info;
 use roaring::RoaringBitmap;
 use serde_json::{Map, Value};
 
+use crate::error::{Error, UserError, InternalError};
 use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
 use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
-use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
+use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
+use crate::{Index, Result};
 use super::merge_function::merge_two_obkvs;
 use super::{create_writer, create_sorter, IndexDocumentsMethod};
 
@@ -53,7 +55,7 @@ fn is_primary_key(field: impl AsRef<str>) -> bool {
 }
 
 impl Transform<'_, '_> {
-    pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
+    pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
     where
         R: Read,
         F: Fn(UpdateIndexingStep) + Sync,
@@ -61,7 +63,7 @@ impl Transform<'_, '_> {
         self.output_from_generic_json(reader, false, progress_callback)
     }
 
-    pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
+    pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
     where
         R: Read,
         F: Fn(UpdateIndexingStep) + Sync,
@@ -74,7 +76,7 @@ impl Transform<'_, '_> {
         reader: R,
         is_stream: bool,
         progress_callback: F,
-    ) -> anyhow::Result<TransformOutput>
+    ) -> Result<TransformOutput>
     where
         R: Read,
         F: Fn(UpdateIndexingStep) + Sync,
@@ -88,7 +90,7 @@ impl Transform<'_, '_> {
             let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
             iter.peekable()
         } else {
-            let vec: Vec<_> = serde_json::from_reader(reader)?;
+            let vec: Vec<_> = serde_json::from_reader(reader).map_err(UserError::SerdeJson)?;
             let iter = vec.into_iter().map(Ok);
             let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
             iter.peekable()
@@ -96,9 +98,12 @@ impl Transform<'_, '_> {
 
         // We extract the primary key from the first document in
         // the batch if it hasn't already been defined in the index
-        let first = match documents.peek().map(Result::as_ref).transpose() {
+        let first = match documents.peek().map(StdResult::as_ref).transpose() {
             Ok(first) => first,
-            Err(_) => return Err(documents.next().unwrap().unwrap_err().into()),
+            Err(_) => {
+                let error = documents.next().unwrap().unwrap_err();
+                return Err(UserError::SerdeJson(error).into());
+            },
         };
 
         let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
@@ -145,7 +150,7 @@ impl Transform<'_, '_> {
         let mut documents_count = 0;
 
         for result in documents {
-            let document = result?;
+            let document = result.map_err(UserError::SerdeJson)?;
 
             if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
                 progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat {
@@ -158,7 +163,7 @@ impl Transform<'_, '_> {
 
             // We prepare the fields ids map with the documents keys.
             for (key, _value) in &document {
-                fields_ids_map.insert(&key).context("field id limit reached")?;
+                fields_ids_map.insert(&key).ok_or(UserError::AttributeLimitReached)?;
             }
 
             // We retrieve the user id from the document based on the primary key name,
@@ -167,11 +172,13 @@ impl Transform<'_, '_> {
                 Some(value) => match value {
                     Value::String(string) => Cow::Borrowed(string.as_str()),
                     Value::Number(number) => Cow::Owned(number.to_string()),
-                    _ => return Err(anyhow!("documents ids must be either strings or numbers")),
+                    content => return Err(UserError::InvalidDocumentId {
+                        document_id: content.clone(),
+                    }.into()),
                 },
                 None => {
                     if !self.autogenerate_docids {
-                        return Err(anyhow!("missing primary key"));
+                        return Err(UserError::MissingPrimaryKey.into());
                     }
                     let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
                     Cow::Borrowed(uuid)
@@ -186,13 +193,15 @@ impl Transform<'_, '_> {
                 // and this should be the document id we return the one we generated.
                 if let Some(value) = document.get(name) {
                     // We serialize the attribute values.
-                    serde_json::to_writer(&mut json_buffer, value)?;
+                    serde_json::to_writer(&mut json_buffer, value).map_err(InternalError::SerdeJson)?;
                     writer.insert(field_id, &json_buffer)?;
                 }
 
                 // We validate the document id [a-zA-Z0-9\-_].
                 if field_id == primary_key_id && validate_document_id(&external_id).is_none() {
-                    return Err(anyhow!("invalid document id: {:?}", external_id));
+                    return Err(UserError::InvalidDocumentId {
+                        document_id: Value::from(external_id),
+                    }.into());
                 }
             }
 
@@ -217,7 +226,7 @@ impl Transform<'_, '_> {
         )
     }
 
-    pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
+    pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
     where
         R: Read,
         F: Fn(UpdateIndexingStep) + Sync,
@@ -226,12 +235,12 @@ impl Transform<'_, '_> {
         let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
 
         let mut csv = csv::Reader::from_reader(reader);
-        let headers = csv.headers()?;
+        let headers = csv.headers().map_err(UserError::Csv)?;
 
         let mut fields_ids = Vec::new();
         // Generate the new fields ids based on the current fields ids and this CSV headers.
         for (i, header) in headers.iter().enumerate() {
-            let id = fields_ids_map.insert(header).context("field id limit reached)")?;
+            let id = fields_ids_map.insert(header).ok_or(UserError::AttributeLimitReached)?;
             fields_ids.push((id, i));
         }
 
@@ -281,7 +290,7 @@ impl Transform<'_, '_> {
         let mut documents_count = 0;
 
         let mut record = csv::StringRecord::new();
-        while csv.read_record(&mut record)? {
+        while csv.read_record(&mut record).map_err(UserError::Csv)? {
             obkv_buffer.clear();
             let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
 
@@ -298,7 +307,9 @@ impl Transform<'_, '_> {
                     // We validate the document id [a-zA-Z0-9\-_].
                     match validate_document_id(&external_id) {
                         Some(valid) => valid,
-                        None => return Err(anyhow!("invalid document id: {:?}", external_id)),
+                        None => return Err(UserError::InvalidDocumentId {
+                            document_id: Value::from(external_id),
+                        }.into()),
                     }
                 },
                 None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer),
@@ -316,7 +327,7 @@ impl Transform<'_, '_> {
             for (field_id, field) in iter {
                 // We serialize the attribute values as JSON strings.
                 json_buffer.clear();
-                serde_json::to_writer(&mut json_buffer, &field)?;
+                serde_json::to_writer(&mut json_buffer, &field).map_err(InternalError::SerdeJson)?;
                 writer.insert(*field_id, &json_buffer)?;
             }
 
@@ -344,17 +355,18 @@ impl Transform<'_, '_> {
     /// Generate the `TransformOutput` based on the given sorter that can be generated from any
     /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
     /// id for the user side and the value must be an obkv where keys are valid fields ids.
-    fn output_from_sorter<F>(
+    fn output_from_sorter<F, E>(
         self,
-        sorter: grenad::Sorter<MergeFn>,
+        sorter: grenad::Sorter<MergeFn<E>>,
         primary_key: String,
         fields_ids_map: FieldsIdsMap,
         approximate_number_of_documents: usize,
         mut external_documents_ids: ExternalDocumentsIds<'_>,
         progress_callback: F,
-    ) -> anyhow::Result<TransformOutput>
+    ) -> Result<TransformOutput>
     where
         F: Fn(UpdateIndexingStep) + Sync,
+        Error: From<E>,
     {
         let documents_ids = self.index.documents_ids(self.rtxn)?;
         let mut fields_distribution = self.index.fields_distribution(self.rtxn)?;
@@ -362,7 +374,7 @@ impl Transform<'_, '_> {
 
         // Once we have sort and deduplicated the documents we write them into a final file.
         let mut final_sorter = create_sorter(
-            |_docid, _obkvs| Err(anyhow!("cannot merge two documents")),
+            |_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "merging documents" }),
             self.chunk_compression_type,
             self.chunk_compression_level,
             self.chunk_fusing_shrink_size,
@@ -398,7 +410,10 @@ impl Transform<'_, '_> {
                         IndexDocumentsMethod::UpdateDocuments => {
                             let key = BEU32::new(docid);
                             let base_obkv = self.index.documents.get(&self.rtxn, &key)?
-                                .context("document not found")?;
+                                .ok_or(InternalError::DatabaseMissingEntry {
+                                    db_name: "documents",
+                                    key: None,
+                                })?;
                             let update_obkv = obkv::KvReader::new(update_obkv);
                             merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
                             (docid, obkv_buffer.as_slice())
@@ -409,7 +424,7 @@ impl Transform<'_, '_> {
                     // If this user id is new we add it to the external documents ids map
                     // for new ids and into the list of new documents.
                     let new_docid = available_documents_ids.next()
-                        .context("no more available documents ids")?;
+                        .ok_or(UserError::DocumentLimitReached)?;
                     new_external_documents_ids_builder.insert(external_id, new_docid as u64)?;
                     new_documents_ids.insert(new_docid);
                     (new_docid, update_obkv)
@@ -469,7 +484,7 @@ impl Transform<'_, '_> {
         primary_key: String,
         old_fields_ids_map: FieldsIdsMap,
         new_fields_ids_map: FieldsIdsMap,
-    ) -> anyhow::Result<TransformOutput>
+    ) -> Result<TransformOutput>
     {
         let fields_distribution = self.index.fields_distribution(self.rtxn)?;
         let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
@@ -529,10 +544,10 @@ fn compute_primary_key_pair(
     fields_ids_map: &mut FieldsIdsMap,
     alternative_name: Option<String>,
     autogenerate_docids: bool,
-) -> anyhow::Result<(FieldId, String)> {
+) -> Result<(FieldId, String)> {
     match primary_key {
         Some(primary_key) => {
-            let id = fields_ids_map.insert(primary_key).ok_or(anyhow!("Maximum number of fields exceeded"))?;
+            let id = fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
             Ok((id, primary_key.to_string()))
         }
         None => {
@@ -542,12 +557,12 @@ fn compute_primary_key_pair(
                     if !autogenerate_docids {
                         // If there is no primary key in the current document batch, we must
                         // return an error and not automatically generate any document id.
-                        anyhow::bail!("missing primary key")
+                        return Err(UserError::MissingPrimaryKey.into());
                     }
                     DEFAULT_PRIMARY_KEY_NAME.to_string()
                 },
             };
-            let id = fields_ids_map.insert(&name).context("field id limit reached")?;
+            let id = fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?;
             Ok((id, name))
         },
     }
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 1c687e089..1756a21c9 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1,6 +1,6 @@
 use std::collections::{BTreeSet, HashMap, HashSet};
+use std::result::Result as StdResult;
 
-use anyhow::Context;
 use chrono::Utc;
 use grenad::CompressionType;
 use itertools::Itertools;
@@ -9,9 +9,10 @@ use rayon::ThreadPool;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 
 use crate::criterion::Criterion;
+use crate::error::UserError;
 use crate::update::index_documents::{IndexDocumentsMethod, Transform};
 use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
-use crate::{FieldsIdsMap, Index};
+use crate::{FieldsIdsMap, Index, Result};
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum Setting<T> {
@@ -33,7 +34,7 @@ impl<T> Setting<T> {
 }
 
 impl<T: Serialize> Serialize for Setting<T> {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
+    fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error> where S: Serializer {
         match self {
             Self::Set(value) => Some(value),
             // Usually not_set isn't serialized by setting skip_serializing_if field attribute
@@ -43,7 +44,7 @@ impl<T: Serialize> Serialize for Setting<T> {
 }
 
 impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
+    fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error> where D: Deserializer<'de> {
         Deserialize::deserialize(deserializer).map(|x| match x {
             Some(x) => Self::Set(x),
             None => Self::Reset, // Reset is forced by sending null value
@@ -165,7 +166,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         }
     }
 
-    fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
+    fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
     where
         F: Fn(UpdateIndexingStep, u64) + Sync
     {
@@ -192,7 +193,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         };
 
         // There already has been a document addition, the primary key should be set by now.
-        let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?;
+        let primary_key = self.index.primary_key(&self.wtxn)?.ok_or(UserError::MissingPrimaryKey)?;
 
         // We remap the documents fields based on the new `FieldsIdsMap`.
         let output = transform.remap_index_documents(
@@ -220,7 +221,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         Ok(())
     }
 
-    fn update_displayed(&mut self) -> anyhow::Result<bool> {
+    fn update_displayed(&mut self) -> Result<bool> {
         match self.displayed_fields {
             Setting::Set(ref fields) => {
                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@@ -234,7 +235,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                 for name in names.iter() {
                     fields_ids_map
                         .insert(name)
-                        .context("field id limit exceeded")?;
+                        .ok_or(UserError::AttributeLimitReached)?;
                 }
                 self.index.put_displayed_fields(self.wtxn, &names)?;
                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@@ -245,13 +246,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         Ok(true)
     }
 
-    fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
+    fn update_distinct_field(&mut self) -> Result<bool> {
         match self.distinct_field {
             Setting::Set(ref attr) => {
                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
                 fields_ids_map
                     .insert(attr)
-                    .context("field id limit exceeded")?;
+                    .ok_or(UserError::AttributeLimitReached)?;
 
                 self.index.put_distinct_field(self.wtxn, &attr)?;
                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@@ -264,7 +265,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
 
     /// Updates the index's searchable attributes. This causes the field map to be recomputed to
     /// reflect the order of the searchable attributes.
-    fn update_searchable(&mut self) -> anyhow::Result<bool> {
+    fn update_searchable(&mut self) -> Result<bool> {
         match self.searchable_fields {
             Setting::Set(ref fields) => {
                 // every time the searchable attributes are updated, we need to update the
@@ -285,13 +286,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                 for name in names.iter() {
                     new_fields_ids_map
                         .insert(&name)
-                        .context("field id limit exceeded")?;
+                        .ok_or(UserError::AttributeLimitReached)?;
                 }
 
                 for (_, name) in old_fields_ids_map.iter() {
                     new_fields_ids_map
                         .insert(&name)
-                        .context("field id limit exceeded")?;
+                        .ok_or(UserError::AttributeLimitReached)?;
                 }
 
                 self.index.put_searchable_fields(self.wtxn, &names)?;
@@ -303,7 +304,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         Ok(true)
     }
 
-    fn update_stop_words(&mut self) -> anyhow::Result<bool> {
+    fn update_stop_words(&mut self) -> Result<bool> {
         match self.stop_words {
             Setting::Set(ref stop_words) => {
                 let current = self.index.stop_words(self.wtxn)?;
@@ -325,7 +326,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         }
     }
 
-    fn update_synonyms(&mut self) -> anyhow::Result<bool> {
+    fn update_synonyms(&mut self) -> Result<bool> {
         match self.synonyms {
             Setting::Set(ref synonyms) => {
                 fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
@@ -383,13 +384,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         }
     }
 
-    fn update_filterable(&mut self) -> anyhow::Result<()> {
+    fn update_filterable(&mut self) -> Result<()> {
         match self.filterable_fields {
             Setting::Set(ref fields) => {
                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
                 let mut new_facets = HashSet::new();
                 for name in fields {
-                    fields_ids_map.insert(name).context("field id limit exceeded")?;
+                    fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
                     new_facets.insert(name.clone());
                 }
                 self.index.put_filterable_fields(self.wtxn, &new_facets)?;
@@ -401,7 +402,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         Ok(())
     }
 
-    fn update_criteria(&mut self) -> anyhow::Result<()> {
+    fn update_criteria(&mut self) -> Result<()> {
         match self.criteria {
             Setting::Set(ref fields) => {
                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@@ -409,7 +410,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                 for name in fields {
                     let criterion: Criterion = name.parse()?;
                     if let Some(name) = criterion.field_name() {
-                        fields_ids_map.insert(name).context("field id limit exceeded")?;
+                        fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
                     }
                     new_criteria.push(criterion);
                 }
@@ -422,7 +423,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         Ok(())
     }
 
-    pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()>
+    pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
         where
             F: Fn(UpdateIndexingStep, u64) + Sync
     {
diff --git a/milli/src/update/update_builder.rs b/milli/src/update/update_builder.rs
index 8d6eb034d..1d0e776b1 100644
--- a/milli/src/update/update_builder.rs
+++ b/milli/src/update/update_builder.rs
@@ -1,7 +1,7 @@
 use grenad::CompressionType;
 use rayon::ThreadPool;
 
-use crate::Index;
+use crate::{Index, Result};
 use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
 
 pub struct UpdateBuilder<'a> {
@@ -76,7 +76,7 @@ impl<'a> UpdateBuilder<'a> {
         self,
         wtxn: &'t mut heed::RwTxn<'i, 'u>,
         index: &'i Index,
-    ) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>>
+    ) -> Result<DeleteDocuments<'t, 'u, 'i>>
     {
         DeleteDocuments::new(wtxn, index, self.update_id)
     }
diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs
index 0544f8789..a2197b28c 100644
--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@@ -5,6 +5,7 @@ use fst::Streamer;
 use grenad::CompressionType;
 use heed::types::ByteSlice;
 
+use crate::Result;
 use crate::update::index_documents::WriteMethod;
 use crate::update::index_documents::{
     create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database,
@@ -33,7 +34,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
         }
     }
 
-    pub fn execute(self) -> anyhow::Result<()> {
+    pub fn execute(self) -> Result<()> {
         // Clear the word prefix docids database.
         self.index.word_prefix_docids.clear(self.wtxn)?;
 
diff --git a/milli/src/update/word_prefix_pair_proximity_docids.rs b/milli/src/update/word_prefix_pair_proximity_docids.rs
index c6b935e54..9019b26e5 100644
--- a/milli/src/update/word_prefix_pair_proximity_docids.rs
+++ b/milli/src/update/word_prefix_pair_proximity_docids.rs
@@ -7,7 +7,7 @@ use heed::BytesEncode;
 use heed::types::ByteSlice;
 use log::debug;
 
-use crate::Index;
+use crate::{Index, Result};
 use crate::heed_codec::StrStrU8Codec;
 use crate::update::index_documents::{
     WriteMethod, create_sorter, sorter_into_lmdb_database,
@@ -41,7 +41,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
         }
     }
 
-    pub fn execute(self) -> anyhow::Result<()> {
+    pub fn execute(self) -> Result<()> {
         debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
 
         self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;
diff --git a/milli/src/update/words_level_positions.rs b/milli/src/update/words_level_positions.rs
index f94507aab..e2e3f7b4c 100644
--- a/milli/src/update/words_level_positions.rs
+++ b/milli/src/update/words_level_positions.rs
@@ -11,7 +11,9 @@ use heed::{BytesEncode, Error};
 use log::debug;
 use roaring::RoaringBitmap;
 
+use crate::error::InternalError;
 use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
+use crate::Result;
 use crate::update::index_documents::WriteMethod;
 use crate::update::index_documents::{
     create_writer, create_sorter, writer_into_reader, write_into_lmdb_database,
@@ -56,7 +58,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
         self
     }
 
-    pub fn execute(self) -> anyhow::Result<()> {
+    pub fn execute(self) -> Result<()> {
         debug!("Computing and writing the word levels positions docids into LMDB on disk...");
 
         let entries = compute_positions_levels(
@@ -78,7 +80,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
             self.wtxn,
             *self.index.word_level_position_docids.as_polymorph(),
             entries,
-            |_, _| anyhow::bail!("invalid word level position merging"),
+            |_, _| Err(InternalError::IndexingMergingKeys { process: "word level position" }),
             WriteMethod::Append,
         )?;
 
@@ -142,7 +144,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
             self.wtxn,
             *self.index.word_prefix_level_position_docids.as_polymorph(),
             entries,
-            |_, _| anyhow::bail!("invalid word prefix level position merging"),
+            |_, _| Err(InternalError::IndexingMergingKeys { process: "word prefix level position" }),
             WriteMethod::Append,
         )?;
 
@@ -174,7 +176,7 @@ fn compute_positions_levels(
     shrink_size: Option<u64>,
     level_group_size: NonZeroU32,
     min_level_size: NonZeroU32,
-) -> anyhow::Result<Reader<FileFuse>>
+) -> Result<Reader<FileFuse>>
 {
     // It is forbidden to keep a cursor and write in a database at the same time with LMDB
     // therefore we write the facet levels entries into a grenad file before transfering them.
@@ -251,7 +253,7 @@ fn write_level_entry(
     left: u32,
     right: u32,
     ids: &RoaringBitmap,
-) -> anyhow::Result<()>
+) -> Result<()>
 {
     let key = (word, level, left, right);
     let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?;
diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs
index f53b0ee00..d1aa267b8 100644
--- a/milli/src/update/words_prefixes_fst.rs
+++ b/milli/src/update/words_prefixes_fst.rs
@@ -2,7 +2,7 @@ use std::iter::FromIterator;
 use std::str;
 
 use fst::Streamer;
-use crate::{Index, SmallString32};
+use crate::{Index, SmallString32, Result};
 
 pub struct WordsPrefixesFst<'t, 'u, 'i> {
     wtxn: &'t mut heed::RwTxn<'i, 'u>,
@@ -48,7 +48,7 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
         self
     }
 
-    pub fn execute(self) -> anyhow::Result<()> {
+    pub fn execute(self) -> Result<()> {
         let words_fst = self.index.words_fst(&self.wtxn)?;
         let number_of_words = words_fst.len();
         let min_number_of_words = (number_of_words as f64 * self.threshold) as usize;

From 78fe4259a900afc2fdac78e5ae1068c28cd40d6b Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Mon, 14 Jun 2021 18:06:23 +0200
Subject: [PATCH 13/22] Fix the http-ui crate

---
 http-ui/src/main.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs
index 1f91e6370..e23dddd4c 100644
--- a/http-ui/src/main.rs
+++ b/http-ui/src/main.rs
@@ -391,7 +391,7 @@ async fn main() -> anyhow::Result<()> {
 
                     match result {
                         Ok(_) => wtxn.commit().map_err(Into::into),
-                        Err(e) => Err(e)
+                        Err(e) => Err(e.into()),
                     }
                 }
                 UpdateMeta::ClearDocuments => {
@@ -401,7 +401,7 @@ async fn main() -> anyhow::Result<()> {
 
                     match builder.execute() {
                         Ok(_count) => wtxn.commit().map_err(Into::into),
-                        Err(e) => Err(e)
+                        Err(e) => Err(e.into()),
                     }
                 }
                 UpdateMeta::Settings(settings) => {
@@ -471,7 +471,7 @@ async fn main() -> anyhow::Result<()> {
 
                     match result {
                         Ok(_count) => wtxn.commit().map_err(Into::into),
-                        Err(e) => Err(e)
+                        Err(e) => Err(e.into()),
                     }
                 }
                 UpdateMeta::Facets(levels) => {
@@ -486,7 +486,7 @@ async fn main() -> anyhow::Result<()> {
                     }
                     match builder.execute() {
                         Ok(()) => wtxn.commit().map_err(Into::into),
-                        Err(e) => Err(e)
+                        Err(e) => Err(e.into()),
                     }
                 }
             };

From 28c004aa2cd8cb16610aa322e449955c5cf523ce Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 11:06:42 +0200
Subject: [PATCH 14/22] Prefer using constant for the database names

---
 infos/src/main.rs                             | 100 ++++----
 milli/src/index.rs                            | 224 +++++++++++-------
 milli/src/update/delete_documents.rs          |   6 +-
 milli/src/update/index_documents/store.rs     |   2 +-
 milli/src/update/index_documents/transform.rs |   3 +-
 5 files changed, 183 insertions(+), 152 deletions(-)

diff --git a/infos/src/main.rs b/infos/src/main.rs
index d6aa1f854..b0c304de0 100644
--- a/infos/src/main.rs
+++ b/infos/src/main.rs
@@ -5,55 +5,41 @@ use std::{str, io, fmt};
 use anyhow::Context;
 use byte_unit::Byte;
 use heed::EnvOpenOptions;
-use milli::facet::FacetType;
-use milli::{Index, TreeLevel};
 use structopt::StructOpt;
 
+use milli::facet::FacetType;
+use milli::index::db_name::*;
+use milli::{Index, TreeLevel};
+
 use Command::*;
 
 #[cfg(target_os = "linux")]
 #[global_allocator]
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
-const MAIN_DB_NAME: &str = "main";
-const WORD_DOCIDS_DB_NAME: &str = "word-docids";
-const WORD_PREFIX_DOCIDS_DB_NAME: &str = "word-prefix-docids";
-const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
-const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
-const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids";
-const WORD_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-level-position-docids";
-const WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME: &str = "word-prefix-level-position-docids";
-const FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME: &str = "field-id-word-count-docids";
-const FACET_ID_F64_DOCIDS_DB_NAME: &str = "facet-id-f64-docids";
-const FACET_ID_STRING_DOCIDS_DB_NAME: &str = "facet-id-string-docids";
-const FIELD_ID_DOCID_FACET_F64S_DB_NAME: &str = "field-id-docid-facet-f64s";
-const FIELD_ID_DOCID_FACET_STRINGS_DB_NAME: &str = "field-id-docid-facet-strings";
-
-const DOCUMENTS_DB_NAME: &str = "documents";
-
 const ALL_DATABASE_NAMES: &[&str] = &[
-    MAIN_DB_NAME,
-    WORD_DOCIDS_DB_NAME,
-    WORD_PREFIX_DOCIDS_DB_NAME,
-    DOCID_WORD_POSITIONS_DB_NAME,
-    WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
-    WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
-    WORD_LEVEL_POSITION_DOCIDS_DB_NAME,
-    WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME,
-    FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME,
-    FACET_ID_F64_DOCIDS_DB_NAME,
-    FACET_ID_STRING_DOCIDS_DB_NAME,
-    FIELD_ID_DOCID_FACET_F64S_DB_NAME,
-    FIELD_ID_DOCID_FACET_STRINGS_DB_NAME,
-    DOCUMENTS_DB_NAME,
+    MAIN,
+    WORD_DOCIDS,
+    WORD_PREFIX_DOCIDS,
+    DOCID_WORD_POSITIONS,
+    WORD_PAIR_PROXIMITY_DOCIDS,
+    WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
+    WORD_LEVEL_POSITION_DOCIDS,
+    WORD_PREFIX_LEVEL_POSITION_DOCIDS,
+    FIELD_ID_WORD_COUNT_DOCIDS,
+    FACET_ID_F64_DOCIDS,
+    FACET_ID_STRING_DOCIDS,
+    FIELD_ID_DOCID_FACET_F64S,
+    FIELD_ID_DOCID_FACET_STRINGS,
+    DOCUMENTS,
 ];
 
 const POSTINGS_DATABASE_NAMES: &[&str] = &[
-    WORD_DOCIDS_DB_NAME,
-    WORD_PREFIX_DOCIDS_DB_NAME,
-    DOCID_WORD_POSITIONS_DB_NAME,
-    WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
-    WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME,
+    WORD_DOCIDS,
+    WORD_PREFIX_DOCIDS,
+    DOCID_WORD_POSITIONS,
+    WORD_PAIR_PROXIMITY_DOCIDS,
+    WORD_PREFIX_PAIR_PROXIMITY_DOCIDS,
 ];
 
 #[derive(Debug, StructOpt)]
@@ -944,21 +930,21 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> a
 
     for name in names {
         let database = match name.as_str() {
-            MAIN_DB_NAME => &main,
-            WORD_PREFIX_DOCIDS_DB_NAME => word_prefix_docids.as_polymorph(),
-            WORD_DOCIDS_DB_NAME => word_docids.as_polymorph(),
-            DOCID_WORD_POSITIONS_DB_NAME => docid_word_positions.as_polymorph(),
-            WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_pair_proximity_docids.as_polymorph(),
-            WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => word_prefix_pair_proximity_docids.as_polymorph(),
-            WORD_LEVEL_POSITION_DOCIDS_DB_NAME => word_level_position_docids.as_polymorph(),
-            WORD_PREFIX_LEVEL_POSITION_DOCIDS_DB_NAME => word_prefix_level_position_docids.as_polymorph(),
-            FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => field_id_word_count_docids.as_polymorph(),
-            FACET_ID_F64_DOCIDS_DB_NAME => facet_id_f64_docids.as_polymorph(),
-            FACET_ID_STRING_DOCIDS_DB_NAME => facet_id_string_docids.as_polymorph(),
-            FIELD_ID_DOCID_FACET_F64S_DB_NAME => field_id_docid_facet_f64s.as_polymorph(),
-            FIELD_ID_DOCID_FACET_STRINGS_DB_NAME => field_id_docid_facet_strings.as_polymorph(),
+            MAIN => &main,
+            WORD_PREFIX_DOCIDS => word_prefix_docids.as_polymorph(),
+            WORD_DOCIDS => word_docids.as_polymorph(),
+            DOCID_WORD_POSITIONS => docid_word_positions.as_polymorph(),
+            WORD_PAIR_PROXIMITY_DOCIDS => word_pair_proximity_docids.as_polymorph(),
+            WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => word_prefix_pair_proximity_docids.as_polymorph(),
+            WORD_LEVEL_POSITION_DOCIDS => word_level_position_docids.as_polymorph(),
+            WORD_PREFIX_LEVEL_POSITION_DOCIDS => word_prefix_level_position_docids.as_polymorph(),
+            FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(),
+            FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(),
+            FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(),
+            FIELD_ID_DOCID_FACET_F64S => field_id_docid_facet_f64s.as_polymorph(),
+            FIELD_ID_DOCID_FACET_STRINGS => field_id_docid_facet_strings.as_polymorph(),
 
-            DOCUMENTS_DB_NAME => documents.as_polymorph(),
+            DOCUMENTS => documents.as_polymorph(),
             unknown => anyhow::bail!("unknown database {:?}", unknown),
         };
 
@@ -1039,27 +1025,27 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
     }
 
     match name {
-        WORD_DOCIDS_DB_NAME => {
+        WORD_DOCIDS => {
             let db = index.word_docids.as_polymorph();
             compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
         },
-        WORD_PREFIX_DOCIDS_DB_NAME => {
+        WORD_PREFIX_DOCIDS => {
             let db = index.word_prefix_docids.as_polymorph();
             compute_stats::<RoaringBitmapCodec>(*db, rtxn, name)
         },
-        DOCID_WORD_POSITIONS_DB_NAME => {
+        DOCID_WORD_POSITIONS => {
             let db = index.docid_word_positions.as_polymorph();
             compute_stats::<BoRoaringBitmapCodec>(*db, rtxn, name)
         },
-        WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => {
+        WORD_PAIR_PROXIMITY_DOCIDS => {
             let db = index.word_pair_proximity_docids.as_polymorph();
             compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
         },
-        WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => {
+        WORD_PREFIX_PAIR_PROXIMITY_DOCIDS => {
             let db = index.word_prefix_pair_proximity_docids.as_polymorph();
             compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
         },
-        FIELD_ID_WORD_COUNT_DOCIDS_DB_NAME => {
+        FIELD_ID_WORD_COUNT_DOCIDS => {
             let db = index.field_id_word_count_docids.as_polymorph();
             compute_stats::<CboRoaringBitmapCodec>(*db, rtxn, name)
         },
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 9ebe34a2e..f3411564b 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -21,25 +21,44 @@ use crate::heed_codec::facet::{
 };
 use crate::fields_ids_map::FieldsIdsMap;
 
-pub const CRITERIA_KEY: &str = "criteria";
-pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
-pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
-pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
-pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
-pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
-pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
-pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
-pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
-pub const PRIMARY_KEY_KEY: &str = "primary-key";
-pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
-pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
-pub const STOP_WORDS_KEY: &str = "stop-words";
-pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
-pub const SYNONYMS_KEY: &str = "synonyms";
-pub const WORDS_FST_KEY: &str = "words-fst";
-pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
-const CREATED_AT_KEY: &str = "created-at";
-const UPDATED_AT_KEY: &str = "updated-at";
+pub mod main_key {
+    pub const CRITERIA_KEY: &str = "criteria";
+    pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
+    pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
+    pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
+    pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
+    pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
+    pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
+    pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
+    pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
+    pub const PRIMARY_KEY_KEY: &str = "primary-key";
+    pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
+    pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
+    pub const STOP_WORDS_KEY: &str = "stop-words";
+    pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
+    pub const SYNONYMS_KEY: &str = "synonyms";
+    pub const WORDS_FST_KEY: &str = "words-fst";
+    pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
+    pub const CREATED_AT_KEY: &str = "created-at";
+    pub const UPDATED_AT_KEY: &str = "updated-at";
+}
+
+pub mod db_name {
+    pub const MAIN: &str = "main";
+    pub const WORD_DOCIDS: &str = "word-docids";
+    pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids";
+    pub const DOCID_WORD_POSITIONS: &str = "docid-word-positions";
+    pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids";
+    pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
+    pub const WORD_LEVEL_POSITION_DOCIDS: &str = "word-level-position-docids";
+    pub const WORD_PREFIX_LEVEL_POSITION_DOCIDS: &str = "word-prefix-level-position-docids";
+    pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
+    pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
+    pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
+    pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
+    pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
+    pub const DOCUMENTS: &str = "documents";
+}
 
 #[derive(Clone)]
 pub struct Index {
@@ -85,23 +104,25 @@ pub struct Index {
 
 impl Index {
     pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> {
+        use db_name::*;
+
         options.max_dbs(14);
 
         let env = options.open(path)?;
-        let main = env.create_poly_database(Some("main"))?;
-        let word_docids = env.create_database(Some("word-docids"))?;
-        let word_prefix_docids = env.create_database(Some("word-prefix-docids"))?;
-        let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
-        let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
-        let word_prefix_pair_proximity_docids = env.create_database(Some("word-prefix-pair-proximity-docids"))?;
-        let word_level_position_docids = env.create_database(Some("word-level-position-docids"))?;
-        let field_id_word_count_docids = env.create_database(Some("field-id-word-count-docids"))?;
-        let word_prefix_level_position_docids = env.create_database(Some("word-prefix-level-position-docids"))?;
-        let facet_id_f64_docids = env.create_database(Some("facet-id-f64-docids"))?;
-        let facet_id_string_docids = env.create_database(Some("facet-id-string-docids"))?;
-        let field_id_docid_facet_f64s = env.create_database(Some("field-id-docid-facet-f64s"))?;
-        let field_id_docid_facet_strings = env.create_database(Some("field-id-docid-facet-strings"))?;
-        let documents = env.create_database(Some("documents"))?;
+        let main = env.create_poly_database(Some(MAIN))?;
+        let word_docids = env.create_database(Some(WORD_DOCIDS))?;
+        let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
+        let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
+        let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
+        let word_prefix_pair_proximity_docids = env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
+        let word_level_position_docids = env.create_database(Some(WORD_LEVEL_POSITION_DOCIDS))?;
+        let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
+        let word_prefix_level_position_docids = env.create_database(Some(WORD_PREFIX_LEVEL_POSITION_DOCIDS))?;
+        let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
+        let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
+        let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
+        let field_id_docid_facet_strings = env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?;
+        let documents = env.create_database(Some(DOCUMENTS))?;
 
         Index::initialize_creation_dates(&env, main)?;
 
@@ -127,10 +148,10 @@ impl Index {
     fn initialize_creation_dates(env: &heed::Env, main: PolyDatabase) -> heed::Result<()> {
         let mut txn = env.write_txn()?;
         // The db was just created, we update its metadata with the relevant information.
-        if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, CREATED_AT_KEY)?.is_none() {
+        if main.get::<_, Str, SerdeJson<DateTime<Utc>>>(&txn, main_key::CREATED_AT_KEY)?.is_none() {
             let now = Utc::now();
-            main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, UPDATED_AT_KEY, &now)?;
-            main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, CREATED_AT_KEY, &now)?;
+            main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, main_key::UPDATED_AT_KEY, &now)?;
+            main.put::<_, Str, SerdeJson<DateTime<Utc>>>(&mut txn, main_key::CREATED_AT_KEY, &now)?;
             txn.commit()?;
         }
         Ok(())
@@ -164,17 +185,17 @@ impl Index {
 
     /// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
     pub fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
-        self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, DOCUMENTS_IDS_KEY, docids)
+        self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
     }
 
     /// Returns the internal documents ids.
     pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
-        Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, DOCUMENTS_IDS_KEY)?.unwrap_or_default())
+        Ok(self.main.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?.unwrap_or_default())
     }
 
     /// Returns the number of documents indexed in the database.
     pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
-        let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?;
+        let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?;
         Ok(count.unwrap_or_default())
     }
 
@@ -183,17 +204,17 @@ impl Index {
     /// Writes the documents primary key, this is the field name that is used to store the id.
     pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
         self.set_updated_at(wtxn, &Utc::now())?;
-        self.main.put::<_, Str, Str>(wtxn, PRIMARY_KEY_KEY, &primary_key)
+        self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key)
     }
 
     /// Deletes the primary key of the documents, this can be done to reset indexes settings.
     pub fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, PRIMARY_KEY_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY)
     }
 
     /// Returns the documents primary key, `None` if it hasn't been defined.
     pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> {
-        self.main.get::<_, Str, Str>(rtxn, PRIMARY_KEY_KEY)
+        self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY)
     }
 
     /* external documents ids */
@@ -208,16 +229,16 @@ impl Index {
         let ExternalDocumentsIds { hard, soft } = external_documents_ids;
         let hard = hard.as_fst().as_bytes();
         let soft = soft.as_fst().as_bytes();
-        self.main.put::<_, Str, ByteSlice>(wtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?;
-        self.main.put::<_, Str, ByteSlice>(wtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?;
+        self.main.put::<_, Str, ByteSlice>(wtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY, hard)?;
+        self.main.put::<_, Str, ByteSlice>(wtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY, soft)?;
         Ok(())
     }
 
     /// Returns the external documents ids map which associate the external ids
     /// with the internal ids (i.e. `u32`).
     pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result<ExternalDocumentsIds<'t>> {
-        let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
-        let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
+        let hard = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
+        let soft = self.main.get::<_, Str, ByteSlice>(rtxn, main_key::SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
         let hard = match hard {
             Some(hard) => fst::Map::new(hard)?.map_data(Cow::Borrowed)?,
             None => fst::Map::default().map_data(Cow::Owned)?,
@@ -234,13 +255,16 @@ impl Index {
     /// Writes the fields ids map which associate the documents keys with an internal field id
     /// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
     pub fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, FIELDS_IDS_MAP_KEY, map)
+        self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
     }
 
     /// Returns the fields ids map which associate the documents keys with an internal field id
     /// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
     pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> {
-        Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, FIELDS_IDS_MAP_KEY)?.unwrap_or_default())
+        Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(
+            rtxn,
+            main_key::FIELDS_IDS_MAP_KEY,
+        )?.unwrap_or_default())
     }
 
     /* fields distribution */
@@ -248,13 +272,16 @@ impl Index {
     /// Writes the fields distribution which associates every field name with
     /// the number of times it occurs in the documents.
     pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, FIELDS_DISTRIBUTION_KEY, distribution)
+        self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, main_key::FIELDS_DISTRIBUTION_KEY, distribution)
     }
 
     /// Returns the fields distribution which associates every field name with
     /// the number of times it occurs in the documents.
     pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
-        Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(rtxn, FIELDS_DISTRIBUTION_KEY)?.unwrap_or_default())
+        Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(
+            rtxn,
+            main_key::FIELDS_DISTRIBUTION_KEY,
+        )?.unwrap_or_default())
     }
 
     /* displayed fields */
@@ -262,19 +289,19 @@ impl Index {
     /// Writes the fields that must be displayed in the defined order.
     /// There must be not be any duplicate field id.
     pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, DISPLAYED_FIELDS_KEY, &fields)
+        self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields)
     }
 
     /// Deletes the displayed fields ids, this will make the engine to display
     /// all the documents attributes in the order of the `FieldsIdsMap`.
     pub fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, DISPLAYED_FIELDS_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY)
     }
 
     /// Returns the displayed fields in the order they were set by the user. If it returns
     /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`.
     pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
-        self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, DISPLAYED_FIELDS_KEY)
+        self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY)
     }
 
     pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
@@ -291,18 +318,18 @@ impl Index {
 
     /// Writes the searchable fields, when this list is specified, only these are indexed.
     pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, SEARCHABLE_FIELDS_KEY, &fields)
+        self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields)
     }
 
     /// Deletes the searchable fields, when no fields are specified, all fields are indexed.
     pub fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, SEARCHABLE_FIELDS_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY)
     }
 
     /// Returns the searchable fields, those are the fields that are indexed,
     /// if the searchable fields aren't there it means that **all** the fields are indexed.
     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
-        self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, SEARCHABLE_FIELDS_KEY)
+        self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
     }
 
     /// Identical to `searchable_fields`, but returns the ids instead.
@@ -328,17 +355,20 @@ impl Index {
 
     /// Writes the filterable fields names in the database.
     pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields)
+        self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
     }
 
     /// Deletes the filterable fields ids in the database.
     pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY)
     }
 
     /// Returns the filterable fields names.
     pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
-        Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default())
+        Ok(self.main.get::<_, Str, SerdeJson<_>>(
+            rtxn,
+            main_key::FILTERABLE_FIELDS_KEY,
+        )?.unwrap_or_default())
     }
 
     /// Same as `filterable_fields`, but returns ids instead.
@@ -409,9 +439,9 @@ impl Index {
         docids: &RoaringBitmap,
     ) -> heed::Result<()>
     {
-        let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
-        buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
-            .copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
+        let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
+        buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
+            .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
         *buffer.last_mut().unwrap() = field_id;
         self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
     }
@@ -423,9 +453,9 @@ impl Index {
         field_id: FieldId,
     ) -> heed::Result<RoaringBitmap>
     {
-        let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
-        buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
-            .copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
+        let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
+        buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
+            .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
         *buffer.last_mut().unwrap() = field_id;
         match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
             Some(docids) => Ok(docids),
@@ -441,9 +471,9 @@ impl Index {
         docids: &RoaringBitmap,
     ) -> heed::Result<()>
     {
-        let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
-        buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
-            .copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
+        let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
+        buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
+            .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
         *buffer.last_mut().unwrap() = field_id;
         self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
     }
@@ -455,9 +485,9 @@ impl Index {
         field_id: FieldId,
     ) -> heed::Result<RoaringBitmap>
     {
-        let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
-        buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
-            .copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
+        let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
+        buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
+            .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
         *buffer.last_mut().unwrap() = field_id;
         match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
             Some(docids) => Ok(docids),
@@ -468,29 +498,29 @@ impl Index {
     /* distinct field */
 
     pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
-        self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
+        self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field)
     }
 
     pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
-        self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
+        self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY)
     }
 
     pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY)
     }
 
     /* criteria */
 
     pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, CRITERIA_KEY, &criteria)
+        self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
     }
 
     pub fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, CRITERIA_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY)
     }
 
     pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> {
-        match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, CRITERIA_KEY)? {
+        match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? {
             Some(criteria) => Ok(criteria),
             None => Ok(default_criteria()),
         }
@@ -500,12 +530,12 @@ impl Index {
 
     /// Writes the FST which is the words dictionary of the engine.
     pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
-        self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_FST_KEY, fst.as_fst().as_bytes())
+        self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
     }
 
     /// Returns the FST which is the words dictionary of the engine.
     pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
-        match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
+        match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? {
             Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
             None => Ok(fst::Set::default().map_data(Cow::Owned)?),
         }
@@ -514,15 +544,15 @@ impl Index {
     /* stop words */
 
     pub fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
-        self.main.put::<_, Str, ByteSlice>(wtxn, STOP_WORDS_KEY, fst.as_fst().as_bytes())
+        self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
     }
 
     pub fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY)
     }
 
     pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
-        match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
+        match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? {
             Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
             None => Ok(None),
         }
@@ -530,19 +560,29 @@ impl Index {
 
     /* synonyms */
 
-    pub fn put_synonyms(&self, wtxn: &mut RwTxn, synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, SYNONYMS_KEY, synonyms)
+    pub fn put_synonyms(
+        &self,
+        wtxn: &mut RwTxn,
+        synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
+    ) -> heed::Result<()>
+    {
+        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
     }
 
     pub fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, SYNONYMS_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
     }
 
     pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
-        Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, SYNONYMS_KEY)?.unwrap_or_default())
+        Ok(self.main.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)?.unwrap_or_default())
     }
 
-    pub fn words_synonyms<S: AsRef<str>>(&self, rtxn: &RoTxn, words: &[S]) -> heed::Result<Option<Vec<Vec<String>>>> {
+    pub fn words_synonyms<S: AsRef<str>>(
+        &self,
+        rtxn: &RoTxn,
+        words: &[S],
+    ) -> heed::Result<Option<Vec<Vec<String>>>>
+    {
         let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
         Ok(self.synonyms(rtxn)?.remove(&words))
     }
@@ -551,12 +591,12 @@ impl Index {
 
     /// Writes the FST which is the words prefixes dictionnary of the engine.
     pub fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
-        self.main.put::<_, Str, ByteSlice>(wtxn, WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
+        self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
     }
 
     /// Returns the FST which is the words prefixes dictionnary of the engine.
     pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
-        match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? {
+        match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
             Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
             None => Ok(fst::Set::default().map_data(Cow::Owned)?),
         }
@@ -613,7 +653,7 @@ impl Index {
     /// Returns the index creation time.
     pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
         let time = self.main
-            .get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, CREATED_AT_KEY)?
+            .get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::CREATED_AT_KEY)?
             .expect("Index without creation time");
         Ok(time)
     }
@@ -621,13 +661,13 @@ impl Index {
     /// Returns the index last updated time.
     pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
         let time = self.main
-            .get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, UPDATED_AT_KEY)?
+            .get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::UPDATED_AT_KEY)?
             .expect("Index without update time");
         Ok(time)
     }
 
     pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime<Utc>) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, UPDATED_AT_KEY, &time)
+        self.main.put::<_, Str, SerdeJson<DateTime<Utc>>>(wtxn, main_key::UPDATED_AT_KEY, &time)
     }
 }
 
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index 6792d6278..ceba7bf01 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -9,6 +9,7 @@ use serde_json::Value;
 
 use crate::error::{InternalError, UserError};
 use crate::heed_codec::CboRoaringBitmapCodec;
+use crate::index::{db_name, main_key};
 use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
 use super::ClearDocuments;
 
@@ -78,7 +79,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
 
         let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
         let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
-            InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") }
+            InternalError::DatabaseMissingEntry {
+                db_name: db_name::MAIN,
+                key: Some(main_key::PRIMARY_KEY_KEY),
+            }
         })?;
         let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);
 
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index e5e55682e..94ae12108 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -32,7 +32,7 @@ const LMDB_MAX_KEY_LENGTH: usize = 511;
 const ONE_KILOBYTE: usize = 1024 * 1024;
 
 const MAX_POSITION: usize = 1000;
-const WORDS_FST_KEY: &[u8] = crate::index::WORDS_FST_KEY.as_bytes();
+const WORDS_FST_KEY: &[u8] = crate::index::main_key::WORDS_FST_KEY.as_bytes();
 
 pub struct Readers {
     pub main: Reader<FileFuse>,
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index 82003eddc..c44130d7e 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -11,6 +11,7 @@ use roaring::RoaringBitmap;
 use serde_json::{Map, Value};
 
 use crate::error::{Error, UserError, InternalError};
+use crate::index::db_name;
 use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
 use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
 use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
@@ -411,7 +412,7 @@ impl Transform<'_, '_> {
                             let key = BEU32::new(docid);
                             let base_obkv = self.index.documents.get(&self.rtxn, &key)?
                                 .ok_or(InternalError::DatabaseMissingEntry {
-                                    db_name: "documents",
+                                    db_name: db_name::DOCUMENTS,
                                     key: None,
                                 })?;
                             let update_obkv = obkv::KvReader::new(update_obkv);

From f0e804afd5687b2c074a83eb73c68d6eb674cd43 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 11:10:50 +0200
Subject: [PATCH 15/22] Rename the FieldIdMapMissingEntry from_db_name field
 into process

---
 milli/src/error.rs                           | 12 ++++++------
 milli/src/lib.rs                             |  2 +-
 milli/src/search/criteria/asc_desc.rs        |  2 +-
 milli/src/search/facet/facet_distribution.rs |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index 096851f09..5a8dfc90b 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -43,8 +43,8 @@ pub enum SerializationError {
 
 #[derive(Debug)]
 pub enum FieldIdMapMissingEntry {
-    FieldId { field_id: FieldId, from_db_name: &'static str },
-    FieldName { field_name: String, from_db_name: &'static str },
+    FieldId { field_id: FieldId, process: &'static str },
+    FieldName { field_name: String, process: &'static str },
 }
 
 #[derive(Debug)]
@@ -224,11 +224,11 @@ impl StdError for UserError {}
 impl fmt::Display for FieldIdMapMissingEntry {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
-            Self::FieldId { field_id, from_db_name } => {
-                write!(f, "unknown field id {} coming from {} database", field_id, from_db_name)
+            Self::FieldId { field_id, process } => {
+                write!(f, "unknown field id {} coming from the {} process", field_id, process)
             },
-            Self::FieldName { field_name, from_db_name } => {
-                write!(f, "unknown field name {} coming from {} database", field_name, from_db_name)
+            Self::FieldName { field_name, process } => {
+                write!(f, "unknown field name {} coming from the {} process", field_name, process)
             },
         }
     }
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index 6fa88ad64..f37244114 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -62,7 +62,7 @@ pub fn obkv_to_json(
         .map(|(id, value)| {
             let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
                 field_id: id,
-                from_db_name: "documents",
+                process: "obkv_to_json",
             })?;
             let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
             Ok((name.to_owned(), value))
diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs
index c72781629..95f77fd78 100644
--- a/milli/src/search/criteria/asc_desc.rs
+++ b/milli/src/search/criteria/asc_desc.rs
@@ -61,7 +61,7 @@ impl<'t> AscDesc<'t> {
             .id(&field_name)
             .ok_or_else(|| FieldIdMapMissingEntry::FieldName {
                 field_name: field_name.clone(),
-                from_db_name: "asc-desc",
+                process: "AscDesc::new",
             })?;
 
         Ok(AscDesc {
diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs
index 917314b25..265a8ffeb 100644
--- a/milli/src/search/facet/facet_distribution.rs
+++ b/milli/src/search/facet/facet_distribution.rs
@@ -203,7 +203,7 @@ impl<'a> FacetDistribution<'a> {
         for name in filterable_fields {
             let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
                 field_name: name.clone(),
-                from_db_name: "filterable-fields",
+                process: "FacetDistribution::execute",
             })?;
             let values = self.facet_values(fid)?;
             distribution.insert(name, values);

From a7d6930905d423d7a007abb610315f0e7ec65965 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 11:51:32 +0200
Subject: [PATCH 16/22] Replace the panicking expect by tracked Errors

---
 milli/src/index.rs                          | 137 ++++++++++++--------
 milli/src/search/criteria/mod.rs            |   6 +-
 milli/src/search/distinct/facet_distinct.rs |  14 +-
 milli/src/search/mod.rs                     |   9 +-
 milli/src/update/delete_documents.rs        |   9 +-
 5 files changed, 109 insertions(+), 66 deletions(-)

diff --git a/milli/src/index.rs b/milli/src/index.rs
index f3411564b..02a1f9d58 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -7,7 +7,7 @@ use heed::{Database, PolyDatabase, RoTxn, RwTxn};
 use heed::types::*;
 use roaring::RoaringBitmap;
 
-use crate::error::UserError;
+use crate::error::{UserError, FieldIdMapMissingEntry, InternalError};
 use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search};
 use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId, Result};
 use crate::{
@@ -304,14 +304,25 @@ impl Index {
         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY)
     }
 
-    pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
-        let fields_ids_map = self.fields_ids_map(rtxn)?;
-        let ids = self.displayed_fields(rtxn)?
-            .map(|fields| fields
-                .into_iter()
-                .map(|name| fields_ids_map.id(name).expect("Field not found"))
-                .collect::<Vec<_>>());
-        Ok(ids)
+    /// Identical to `displayed_fields`, but returns the ids instead.
+    pub fn displayed_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
+        match self.displayed_fields(rtxn)? {
+            Some(fields) => {
+                let fields_ids_map = self.fields_ids_map(rtxn)?;
+                let mut fields_ids = Vec::new();
+                for name in fields.into_iter() {
+                    match fields_ids_map.id(name) {
+                        Some(field_id) => fields_ids.push(field_id),
+                        None => return Err(FieldIdMapMissingEntry::FieldName {
+                            field_name: name.to_string(),
+                            process: "Index::displayed_fields_ids",
+                        }.into()),
+                    }
+                }
+                Ok(Some(fields_ids))
+            },
+            None => Ok(None),
+        }
     }
 
     /* searchable fields */
@@ -333,20 +344,22 @@ impl Index {
     }
 
     /// Identical to `searchable_fields`, but returns the ids instead.
-    pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<Option<Vec<FieldId>>> {
+    pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
         match self.searchable_fields(rtxn)? {
-            Some(names) => {
-                let fields_map = self.fields_ids_map(rtxn)?;
-                let mut ids = Vec::new();
-                for name in names {
-                    let id = fields_map
-                        .id(name)
-                        .ok_or_else(|| format!("field id map must contain {:?}", name))
-                        .expect("corrupted data: ");
-                    ids.push(id);
+            Some(fields) => {
+                let fields_ids_map = self.fields_ids_map(rtxn)?;
+                let mut fields_ids = Vec::new();
+                for name in fields {
+                    match fields_ids_map.id(name) {
+                        Some(field_id) => fields_ids.push(field_id),
+                        None => return Err(FieldIdMapMissingEntry::FieldName {
+                            field_name: name.to_string(),
+                            process: "Index::searchable_fields_ids",
+                        }.into()),
+                    }
                 }
-                Ok(Some(ids))
-            }
+                Ok(Some(fields_ids))
+            },
             None => Ok(None),
         }
     }
@@ -371,21 +384,25 @@ impl Index {
         )?.unwrap_or_default())
     }
 
-    /// Same as `filterable_fields`, but returns ids instead.
-    pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
-        let filterable_fields = self.filterable_fields(rtxn)?;
+    /// Identical to `filterable_fields`, but returns ids instead.
+    pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
+        let fields = self.filterable_fields(rtxn)?;
         let fields_ids_map = self.fields_ids_map(rtxn)?;
-        let filterable_fields = filterable_fields
-            .iter()
-            .map(|k| {
-                fields_ids_map
-                    .id(k)
-                    .ok_or_else(|| format!("{:?} should be present in the field id map", k))
-                    .expect("corrupted data: ")
-            })
-            .collect();
 
-        Ok(filterable_fields)
+        let mut fields_ids = HashSet::new();
+        for name in fields {
+            match fields_ids_map.id(&name) {
+                Some(field_id) => {
+                    fields_ids.insert(field_id);
+                },
+                None => return Err(FieldIdMapMissingEntry::FieldName {
+                    field_name: name,
+                    process: "Index::filterable_fields_ids",
+                }.into()),
+            }
+        }
+
+        Ok(fields_ids)
     }
 
     /* faceted documents ids */
@@ -393,7 +410,7 @@ impl Index {
     /// Returns the faceted fields names.
     ///
     /// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
-    pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
+    pub fn faceted_fields(&self, rtxn: &RoTxn) -> Result<HashSet<String>> {
         let filterable_fields = self.filterable_fields(rtxn)?;
         let distinct_field = self.distinct_field(rtxn)?;
         let asc_desc_fields = self.criteria(rtxn)?
@@ -412,21 +429,25 @@ impl Index {
         Ok(faceted_fields)
     }
 
-    /// Same as `faceted_fields`, but returns ids instead.
-    pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
-        let faceted_fields = self.faceted_fields(rtxn)?;
+    /// Identical to `faceted_fields`, but returns ids instead.
+    pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> Result<HashSet<FieldId>> {
+        let fields = self.faceted_fields(rtxn)?;
         let fields_ids_map = self.fields_ids_map(rtxn)?;
-        let faceted_fields = faceted_fields
-            .iter()
-            .map(|k| {
-                fields_ids_map
-                    .id(k)
-                    .ok_or_else(|| format!("{:?} should be present in the field id map", k))
-                    .expect("corrupted data: ")
-            })
-            .collect();
 
-        Ok(faceted_fields)
+        let mut fields_ids = HashSet::new();
+        for name in fields.into_iter() {
+            match fields_ids_map.id(&name) {
+                Some(field_id) => {
+                    fields_ids.insert(field_id);
+                },
+                None => return Err(FieldIdMapMissingEntry::FieldName {
+                    field_name: name,
+                    process: "Index::faceted_fields_ids",
+                }.into()),
+            }
+        }
+
+        Ok(fields_ids)
     }
 
     /* faceted documents ids */
@@ -651,19 +672,23 @@ impl Index {
     }
 
     /// Returns the index creation time.
-    pub fn created_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
-        let time = self.main
+    pub fn created_at(&self, rtxn: &RoTxn) -> Result<DateTime<Utc>> {
+        Ok(self.main
             .get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::CREATED_AT_KEY)?
-            .expect("Index without creation time");
-        Ok(time)
+            .ok_or(InternalError::DatabaseMissingEntry {
+                db_name: db_name::MAIN,
+                key: Some(main_key::CREATED_AT_KEY),
+            })?)
     }
 
     /// Returns the index last updated time.
-    pub fn updated_at(&self, rtxn: &RoTxn) -> heed::Result<DateTime<Utc>> {
-        let time = self.main
+    pub fn updated_at(&self, rtxn: &RoTxn) -> Result<DateTime<Utc>> {
+        Ok(self.main
             .get::<_, Str, SerdeJson<DateTime<Utc>>>(rtxn, main_key::UPDATED_AT_KEY)?
-            .expect("Index without update time");
-        Ok(time)
+            .ok_or(InternalError::DatabaseMissingEntry {
+                db_name: db_name::MAIN,
+                key: Some(main_key::UPDATED_AT_KEY),
+            })?)
     }
 
     pub(crate) fn set_updated_at(&self, wtxn: &mut RwTxn, time: &DateTime<Utc>) -> heed::Result<()> {
diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs
index 981fc3ef2..48af0b8aa 100644
--- a/milli/src/search/criteria/mod.rs
+++ b/milli/src/search/criteria/mod.rs
@@ -76,7 +76,7 @@ pub trait Context<'c> {
     fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>;
     fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>;
     fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
-    fn searchable_fields_ids(&self) ->  heed::Result<Vec<FieldId>>;
+    fn searchable_fields_ids(&self) ->  Result<Vec<FieldId>>;
     fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
     fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>>;
 }
@@ -174,7 +174,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
         self.index.words_synonyms(self.rtxn, &[word])
     }
 
-    fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>> {
+    fn searchable_fields_ids(&self) -> Result<Vec<FieldId>> {
         match self.index.searchable_fields_ids(self.rtxn)? {
             Some(searchable_fields_ids) => Ok(searchable_fields_ids),
             None => Ok(self.index.fields_ids_map(self.rtxn)?.ids().collect()),
@@ -478,7 +478,7 @@ pub mod test {
             todo!()
         }
 
-        fn searchable_fields_ids(&self) ->  heed::Result<Vec<FieldId>> {
+        fn searchable_fields_ids(&self) ->  Result<Vec<FieldId>> {
             todo!()
         }
 
diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs
index f86d6b8ed..b9ffd9d90 100644
--- a/milli/src/search/distinct/facet_distinct.rs
+++ b/milli/src/search/distinct/facet_distinct.rs
@@ -3,9 +3,11 @@ use std::mem::size_of;
 use heed::types::ByteSlice;
 use roaring::RoaringBitmap;
 
-use super::{Distinct, DocIter};
+use crate::error::InternalError;
 use crate::heed_codec::facet::*;
+use crate::index::db_name;
 use crate::{DocumentId, FieldId, Index, Result};
+use super::{Distinct, DocIter};
 
 const FID_SIZE: usize = size_of::<FieldId>();
 const DOCID_SIZE: usize = size_of::<DocumentId>();
@@ -64,7 +66,10 @@ impl<'a> FacetDistinctIter<'a> {
             let ((_, _, value), _) = item?;
             let facet_docids = self
                 .facet_string_docids(value)?
-                .expect("Corrupted data: Facet values must exist");
+                .ok_or(InternalError::DatabaseMissingEntry {
+                    db_name: db_name::FACET_ID_STRING_DOCIDS,
+                    key: None,
+                })?;
             self.excluded.union_with(&facet_docids);
         }
 
@@ -80,7 +85,10 @@ impl<'a> FacetDistinctIter<'a> {
             let ((_, _, value), _) = item?;
             let facet_docids = self
                 .facet_number_docids(value)?
-                .expect("Corrupted data: Facet values must exist");
+                .ok_or(InternalError::DatabaseMissingEntry {
+                    db_name: db_name::FACET_ID_F64_DOCIDS,
+                    key: None,
+                })?;
             self.excluded.union_with(&facet_docids);
         }
 
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index f8c7b5d9b..9deb541e3 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -13,7 +13,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
 
-use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
+use crate::error::FieldIdMapMissingEntry;
 use crate::search::criteria::r#final::{Final, FinalResult};
 use crate::{Index, DocumentId, Result};
 
@@ -22,6 +22,8 @@ pub use self::matching_words::MatchingWords;
 pub(crate) use self::facet::ParserRule;
 use self::query_tree::QueryTreeBuilder;
 
+use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
+
 // Building these factories is not free.
 static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
 static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
@@ -142,7 +144,10 @@ impl<'a> Search<'a> {
             None => self.perform_sort(NoopDistinct, matching_words, criteria),
             Some(name) => {
                 let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
-                let id = field_ids_map.id(name).expect("distinct not present in field map");
+                let id = field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
+                    field_name: name.to_string(),
+                    process: "fetching distint attribute",
+                })?;
                 let distinct = FacetDistinct::new(id, self.index, self.rtxn);
                 self.perform_sort(distinct, matching_words, criteria)
             }
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index ceba7bf01..7fc7e5d77 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -7,7 +7,7 @@ use heed::types::{ByteSlice, Unit};
 use roaring::RoaringBitmap;
 use serde_json::Value;
 
-use crate::error::{InternalError, UserError};
+use crate::error::{InternalError, FieldIdMapMissingEntry, UserError};
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::index::{db_name, main_key};
 use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
@@ -84,7 +84,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
                 key: Some(main_key::PRIMARY_KEY_KEY),
             }
         })?;
-        let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);
+        let id_field = fields_ids_map.id(primary_key).ok_or_else(|| {
+            FieldIdMapMissingEntry::FieldName {
+                field_name: primary_key.to_string(),
+                process: "DeleteDocuments::execute",
+            }
+        })?;
 
         let Index {
             env: _env,

From 713acc408be998c4d6aa33e5c2a2114bfbc90514 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 13:45:20 +0200
Subject: [PATCH 17/22] Introduce the primary key to the Settings builder
 structure

---
 benchmarks/benches/utils.rs                   |  8 +-
 milli/src/error.rs                            |  8 ++
 milli/src/index.rs                            | 30 +++----
 milli/src/update/index_documents/transform.rs |  2 +-
 milli/src/update/settings.rs                  | 89 ++++++++++++++++++-
 milli/tests/search/query_criteria.rs          |  7 +-
 6 files changed, 121 insertions(+), 23 deletions(-)

diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs
index 5138de4d2..d5181849f 100644
--- a/benchmarks/benches/utils.rs
+++ b/benchmarks/benches/utils.rs
@@ -55,15 +55,15 @@ pub fn base_setup(conf: &Conf) -> Index {
     options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
     options.max_readers(10);
     let index = Index::new(options, conf.database_name).unwrap();
-    if let Some(primary_key) = conf.primary_key {
-        let mut wtxn = index.write_txn().unwrap();
-        index.put_primary_key(&mut wtxn, primary_key).unwrap();
-    }
 
     let update_builder = UpdateBuilder::new(0);
     let mut wtxn = index.write_txn().unwrap();
     let mut builder = update_builder.settings(&mut wtxn, &index);
 
+    if let Some(primary_key) = conf.primary_key {
+        builder.set_primary_key(primary_key.to_string());
+    }
+
     if let Some(criterion) = conf.criterion {
         builder.reset_filterable_fields();
         builder.reset_criteria();
diff --git a/milli/src/error.rs b/milli/src/error.rs
index 5a8dfc90b..19f9c364c 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -60,6 +60,8 @@ pub enum UserError {
     MissingDocumentId { document: Object },
     MissingPrimaryKey,
     NoSpaceLeftOnDevice,
+    PrimaryKeyCannotBeChanged,
+    PrimaryKeyCannotBeReset,
     SerdeJson(serde_json::Error),
     UnknownInternalDocumentId { document_id: DocumentId },
 }
@@ -211,6 +213,12 @@ impl fmt::Display for UserError {
             // TODO where can we find it instead of writing the text ourselves?
             Self::NoSpaceLeftOnDevice => f.write_str("no space left on device"),
             Self::InvalidStoreFile => f.write_str("store file is not a valid database file"),
+            Self::PrimaryKeyCannotBeChanged => {
+                f.write_str("primary key cannot be changed if the database contains documents")
+            },
+            Self::PrimaryKeyCannotBeReset => {
+                f.write_str("primary key cannot be reset if the database contains documents")
+            },
             Self::SerdeJson(error) => error.fmt(f),
             Self::UnknownInternalDocumentId { document_id } => {
                 write!(f, "an unknown internal document id have been used ({})", document_id)
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 02a1f9d58..bf4b3e023 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -184,7 +184,7 @@ impl Index {
     /* documents ids */
 
     /// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
-    pub fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
+    pub(crate) fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
         self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
     }
 
@@ -202,7 +202,7 @@ impl Index {
     /* primary key */
 
     /// Writes the documents primary key, this is the field name that is used to store the id.
-    pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
+    pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
         self.set_updated_at(wtxn, &Utc::now())?;
         self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key)
     }
@@ -220,7 +220,7 @@ impl Index {
     /* external documents ids */
 
     /// Writes the external documents ids and internal ids (i.e. `u32`).
-    pub fn put_external_documents_ids<'a>(
+    pub(crate) fn put_external_documents_ids<'a>(
         &self,
         wtxn: &mut RwTxn,
         external_documents_ids: &ExternalDocumentsIds<'a>,
@@ -254,7 +254,7 @@ impl Index {
 
     /// Writes the fields ids map which associate the documents keys with an internal field id
     /// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
-    pub fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
+    pub(crate) fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
         self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
     }
 
@@ -271,7 +271,7 @@ impl Index {
 
     /// Writes the fields distribution which associates every field name with
     /// the number of times it occurs in the documents.
-    pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
+    pub(crate) fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
         self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, main_key::FIELDS_DISTRIBUTION_KEY, distribution)
     }
 
@@ -288,7 +288,7 @@ impl Index {
 
     /// Writes the fields that must be displayed in the defined order.
     /// There must be not be any duplicate field id.
-    pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
+    pub(crate) fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
         self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields)
     }
 
@@ -328,7 +328,7 @@ impl Index {
     /* searchable fields */
 
     /// Writes the searchable fields, when this list is specified, only these are indexed.
-    pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
+    pub(crate) fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
         self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields)
     }
 
@@ -367,7 +367,7 @@ impl Index {
     /* filterable fields */
 
     /// Writes the filterable fields names in the database.
-    pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
+    pub(crate) fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
         self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
     }
 
@@ -453,7 +453,7 @@ impl Index {
     /* faceted documents ids */
 
     /// Writes the documents ids that are faceted with numbers under this field id.
-    pub fn put_number_faceted_documents_ids(
+    pub(crate) fn put_number_faceted_documents_ids(
         &self,
         wtxn: &mut RwTxn,
         field_id: FieldId,
@@ -485,7 +485,7 @@ impl Index {
     }
 
     /// Writes the documents ids that are faceted with strings under this field id.
-    pub fn put_string_faceted_documents_ids(
+    pub(crate) fn put_string_faceted_documents_ids(
         &self,
         wtxn: &mut RwTxn,
         field_id: FieldId,
@@ -532,7 +532,7 @@ impl Index {
 
     /* criteria */
 
-    pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
+    pub(crate) fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
         self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
     }
 
@@ -550,7 +550,7 @@ impl Index {
     /* words fst */
 
     /// Writes the FST which is the words dictionary of the engine.
-    pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
+    pub(crate) fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
         self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
     }
 
@@ -564,7 +564,7 @@ impl Index {
 
     /* stop words */
 
-    pub fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
+    pub(crate) fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
         self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
     }
 
@@ -581,7 +581,7 @@ impl Index {
 
     /* synonyms */
 
-    pub fn put_synonyms(
+    pub(crate) fn put_synonyms(
         &self,
         wtxn: &mut RwTxn,
         synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
@@ -611,7 +611,7 @@ impl Index {
     /* words prefixes fst */
 
     /// Writes the FST which is the words prefixes dictionnary of the engine.
-    pub fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
+    pub(crate) fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
         self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
     }
 
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index c44130d7e..9e88559d0 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -375,7 +375,7 @@ impl Transform<'_, '_> {
 
         // Once we have sort and deduplicated the documents we write them into a final file.
         let mut final_sorter = create_sorter(
-            |_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "merging documents" }),
+            |_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "documents" }),
             self.chunk_compression_type,
             self.chunk_compression_level,
             self.chunk_fusing_shrink_size,
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 1756a21c9..39cb27c00 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -72,6 +72,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
     stop_words: Setting<BTreeSet<String>>,
     distinct_field: Setting<String>,
     synonyms: Setting<HashMap<String, Vec<String>>>,
+    primary_key: Setting<String>,
 }
 
 impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@@ -98,6 +99,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
             stop_words: Setting::NotSet,
             distinct_field: Setting::NotSet,
             synonyms: Setting::NotSet,
+            primary_key: Setting::NotSet,
             update_id,
         }
     }
@@ -166,6 +168,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         }
     }
 
+    pub fn reset_primary_key(&mut self) {
+        self.primary_key = Setting::Reset;
+    }
+
+    pub fn set_primary_key(&mut self, primary_key: String) {
+        self.primary_key = Setting::Set(primary_key);
+    }
+
     fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
     where
         F: Fn(UpdateIndexingStep, u64) + Sync
@@ -423,6 +433,31 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         Ok(())
     }
 
+    fn update_primary_key(&mut self) -> Result<()> {
+        match self.primary_key {
+            Setting::Set(ref primary_key) => {
+                if self.index.number_of_documents(&self.wtxn)? == 0 {
+                    let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
+                    fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
+                    self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
+                    self.index.put_primary_key(self.wtxn, primary_key)?;
+                    Ok(())
+                } else {
+                    Err(UserError::PrimaryKeyCannotBeChanged.into())
+                }
+            },
+            Setting::Reset => {
+                if self.index.number_of_documents(&self.wtxn)? == 0 {
+                    self.index.delete_primary_key(self.wtxn)?;
+                    Ok(())
+                } else {
+                    Err(UserError::PrimaryKeyCannotBeReset.into())
+                }
+            },
+            Setting::NotSet => Ok(()),
+        }
+    }
+
     pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
         where
             F: Fn(UpdateIndexingStep, u64) + Sync
@@ -436,6 +471,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         self.update_filterable()?;
         self.update_distinct_field()?;
         self.update_criteria()?;
+        self.update_primary_key()?;
 
         // If there is new faceted fields we indicate that we must reindex as we must
         // index new fields as facets. It means that the distinct attribute,
@@ -462,8 +498,9 @@ mod tests {
     use maplit::{btreeset, hashmap, hashset};
     use big_s::S;
 
-    use crate::{Criterion, FilterCondition, SearchResult};
+    use crate::error::Error;
     use crate::update::{IndexDocuments, UpdateFormat};
+    use crate::{Criterion, FilterCondition, SearchResult};
 
     use super::*;
 
@@ -977,4 +1014,54 @@ mod tests {
         let rtxn = index.read_txn().unwrap();
         FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err();
     }
+
+    #[test]
+    fn setting_primary_key() {
+        let path = tempfile::tempdir().unwrap();
+        let mut options = EnvOpenOptions::new();
+        options.map_size(10 * 1024 * 1024); // 10 MB
+        let index = Index::new(options, &path).unwrap();
+
+        // Set the primary key settings
+        let mut wtxn = index.write_txn().unwrap();
+        let mut builder = Settings::new(&mut wtxn, &index, 0);
+        builder.set_primary_key(S("mykey"));
+
+        builder.execute(|_, _| ()).unwrap();
+        assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey"));
+
+        // Then index some documents with the "mykey" primary key.
+        let content = &br#"[
+            { "mykey": 1, "name": "kevin",  "age": 23 },
+            { "mykey": 2, "name": "kevina", "age": 21 },
+            { "mykey": 3, "name": "benoit", "age": 34 },
+            { "mykey": 4, "name": "bernard", "age": 34 },
+            { "mykey": 5, "name": "bertrand", "age": 34 },
+            { "mykey": 6, "name": "bernie", "age": 34 },
+            { "mykey": 7, "name": "ben", "age": 34 }
+        ]"#[..];
+        let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
+        builder.update_format(UpdateFormat::Json);
+        builder.disable_autogenerate_docids();
+        builder.execute(content, |_, _| ()).unwrap();
+        wtxn.commit().unwrap();
+
+        // We now try to reset the primary key
+        let mut wtxn = index.write_txn().unwrap();
+        let mut builder = Settings::new(&mut wtxn, &index, 0);
+        builder.reset_primary_key();
+
+        let err = builder.execute(|_, _| ()).unwrap_err();
+        assert!(matches!(err, Error::UserError(UserError::PrimaryKeyCannotBeReset)));
+
+        // But if we clear the database...
+        let mut wtxn = index.write_txn().unwrap();
+        let builder = ClearDocuments::new(&mut wtxn, &index, 0);
+        builder.execute().unwrap();
+
+        // ...we can change the primary key
+        let mut builder = Settings::new(&mut wtxn, &index, 0);
+        builder.set_primary_key(S("myid"));
+        builder.execute(|_, _| ()).unwrap();
+    }
 }
diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs
index f0eecfaba..2b9c5ae5e 100644
--- a/milli/tests/search/query_criteria.rs
+++ b/milli/tests/search/query_criteria.rs
@@ -1,5 +1,6 @@
-use milli::{Search, SearchResult, Criterion};
 use big_s::S;
+use milli::update::Settings;
+use milli::{Search, SearchResult, Criterion};
 
 use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
 use Criterion::*;
@@ -189,7 +190,9 @@ fn criteria_mixup() {
         eprintln!("Testing with criteria order: {:?}", &criteria);
         //update criteria
         let mut wtxn = index.write_txn().unwrap();
-        index.put_criteria(&mut wtxn, &criteria).unwrap();
+        let mut builder = Settings::new(&mut wtxn, &index, 0);
+        builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
+        builder.execute(|_, _| ()).unwrap();
         wtxn.commit().unwrap();
 
         let mut rtxn = index.read_txn().unwrap();

From 4eda438f6f22b113666210e3ce3797c2df61041b Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 14:34:04 +0200
Subject: [PATCH 18/22] Add a new Error for when a user use a non-filtered
 attribute in a filter

---
 milli/src/error.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index 19f9c364c..a9c2add24 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -56,6 +56,7 @@ pub enum UserError {
     FilterParsing(pest::error::Error<ParserRule>),
     InvalidCriterionName { name: String },
     InvalidDocumentId { document_id: Value },
+    InvalidFilterAttribute(pest::error::Error<ParserRule>),
     InvalidStoreFile,
     MissingDocumentId { document: Object },
     MissingPrimaryKey,
@@ -204,6 +205,7 @@ impl fmt::Display for UserError {
                 let json = serde_json::to_string(document_id).unwrap();
                 write!(f, "document identifier is invalid {}", json)
             },
+            Self::InvalidFilterAttribute(error) => error.fmt(f),
             Self::MissingDocumentId { document } => {
                 let json = serde_json::to_string(document).unwrap();
                 write!(f, "document doesn't have an identifier {}", json)

From 8cfe3e1ec0656d5b4d1f5dfd410d06b05e334e51 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 17:20:33 +0200
Subject: [PATCH 19/22] Rename DatabaseSizeReached into MaxDatabaseSizeReached

---
 milli/src/error.rs                         |  6 +++---
 milli/src/search/facet/filter_condition.rs | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index a9c2add24..294b2aa57 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -51,7 +51,7 @@ pub enum FieldIdMapMissingEntry {
 pub enum UserError {
     AttributeLimitReached,
     Csv(csv::Error),
-    DatabaseSizeReached,
+    MaxDatabaseSizeReached,
     DocumentLimitReached,
     FilterParsing(pest::error::Error<ParserRule>),
     InvalidCriterionName { name: String },
@@ -113,7 +113,7 @@ impl From<HeedError> for Error {
 
         match error {
             HeedError::Io(error) => Error::from(error),
-            HeedError::Mdb(MdbError::MapFull) => UserError(DatabaseSizeReached),
+            HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
             HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
             HeedError::Mdb(error) => InternalError(Store(error)),
             HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
@@ -211,7 +211,7 @@ impl fmt::Display for UserError {
                 write!(f, "document doesn't have an identifier {}", json)
             },
             Self::MissingPrimaryKey => f.write_str("missing primary key"),
-            Self::DatabaseSizeReached => f.write_str("database size reached"),
+            Self::MaxDatabaseSizeReached => f.write_str("maximum database size reached"),
             // TODO where can we find it instead of writing the text ourselves?
             Self::NoSpaceLeftOnDevice => f.write_str("no space left on device"),
             Self::InvalidStoreFile => f.write_str("store file is not a valid database file"),
diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs
index 98d638574..6d99bb977 100644
--- a/milli/src/search/facet/filter_condition.rs
+++ b/milli/src/search/facet/filter_condition.rs
@@ -164,7 +164,7 @@ impl FilterCondition {
     {
         let mut items = item.into_inner();
         let fid = field_id(fields_ids_map, filterable_fields, &mut items)
-            .map_err(UserError::FilterParsing)?;
+            .map_err(UserError::InvalidFilterAttribute)?;
 
         let (lresult, _) = pest_parse(items.next().unwrap());
         let (rresult, _) = pest_parse(items.next().unwrap());
@@ -183,7 +183,7 @@ impl FilterCondition {
     {
         let mut items = item.into_inner();
         let fid = field_id(fields_ids_map, filterable_fields, &mut items)
-            .map_err(UserError::FilterParsing)?;
+            .map_err(UserError::InvalidFilterAttribute)?;
 
         let value = items.next().unwrap();
         let (result, svalue) = pest_parse(value);
@@ -200,7 +200,7 @@ impl FilterCondition {
     {
         let mut items = item.into_inner();
         let fid = field_id(fields_ids_map, filterable_fields, &mut items)
-            .map_err(UserError::FilterParsing)?;
+            .map_err(UserError::InvalidFilterAttribute)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
@@ -217,7 +217,7 @@ impl FilterCondition {
     {
         let mut items = item.into_inner();
         let fid = field_id(fields_ids_map, filterable_fields, &mut items)
-            .map_err(UserError::FilterParsing)?;
+            .map_err(UserError::InvalidFilterAttribute)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
@@ -234,7 +234,7 @@ impl FilterCondition {
     {
         let mut items = item.into_inner();
         let fid = field_id(fields_ids_map, filterable_fields, &mut items)
-            .map_err(UserError::FilterParsing)?;
+            .map_err(UserError::InvalidFilterAttribute)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
@@ -251,7 +251,7 @@ impl FilterCondition {
     {
         let mut items = item.into_inner();
         let fid = field_id(fields_ids_map, filterable_fields, &mut items)
-            .map_err(UserError::FilterParsing)?;
+            .map_err(UserError::InvalidFilterAttribute)?;
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);

From adf0c389c5c284b12f3fae9e19870650f1b99b7a Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 17:22:04 +0200
Subject: [PATCH 20/22] Rename FilterParsing into InvalidFilter

---
 milli/src/error.rs                         |  4 ++--
 milli/src/search/facet/filter_condition.rs | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index 294b2aa57..78a1b1c59 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -53,7 +53,7 @@ pub enum UserError {
     Csv(csv::Error),
     MaxDatabaseSizeReached,
     DocumentLimitReached,
-    FilterParsing(pest::error::Error<ParserRule>),
+    InvalidFilter(pest::error::Error<ParserRule>),
     InvalidCriterionName { name: String },
     InvalidDocumentId { document_id: Value },
     InvalidFilterAttribute(pest::error::Error<ParserRule>),
@@ -199,7 +199,7 @@ impl fmt::Display for UserError {
             Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"),
             Self::Csv(error) => error.fmt(f),
             Self::DocumentLimitReached => f.write_str("maximum number of documents reached"),
-            Self::FilterParsing(error) => error.fmt(f),
+            Self::InvalidFilter(error) => error.fmt(f),
             Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name),
             Self::InvalidDocumentId { document_id } => {
                 let json = serde_json::to_string(document_id).unwrap();
diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs
index 6d99bb977..424118f77 100644
--- a/milli/src/search/facet/filter_condition.rs
+++ b/milli/src/search/facet/filter_condition.rs
@@ -110,7 +110,7 @@ impl FilterCondition {
     {
         let fields_ids_map = index.fields_ids_map(rtxn)?;
         let filterable_fields = index.filterable_fields_ids(rtxn)?;
-        let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::FilterParsing)?;
+        let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
         FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
     }
 
@@ -169,8 +169,8 @@ impl FilterCondition {
         let (lresult, _) = pest_parse(items.next().unwrap());
         let (rresult, _) = pest_parse(items.next().unwrap());
 
-        let lvalue = lresult.map_err(UserError::FilterParsing)?;
-        let rvalue = rresult.map_err(UserError::FilterParsing)?;
+        let lvalue = lresult.map_err(UserError::InvalidFilter)?;
+        let rvalue = rresult.map_err(UserError::InvalidFilter)?;
 
         Ok(Operator(fid, Between(lvalue, rvalue)))
     }
@@ -204,7 +204,7 @@ impl FilterCondition {
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
-        let value = result.map_err(UserError::FilterParsing)?;
+        let value = result.map_err(UserError::InvalidFilter)?;
 
         Ok(Operator(fid, GreaterThan(value)))
     }
@@ -221,7 +221,7 @@ impl FilterCondition {
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
-        let value = result.map_err(UserError::FilterParsing)?;
+        let value = result.map_err(UserError::InvalidFilter)?;
 
         Ok(Operator(fid, GreaterThanOrEqual(value)))
     }
@@ -238,7 +238,7 @@ impl FilterCondition {
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
-        let value = result.map_err(UserError::FilterParsing)?;
+        let value = result.map_err(UserError::InvalidFilter)?;
 
         Ok(Operator(fid, LowerThan(value)))
     }
@@ -255,7 +255,7 @@ impl FilterCondition {
 
         let value = items.next().unwrap();
         let (result, _svalue) = pest_parse(value);
-        let value = result.map_err(UserError::FilterParsing)?;
+        let value = result.map_err(UserError::InvalidFilter)?;
 
         Ok(Operator(fid, LowerThanOrEqual(value)))
     }

From 7ac441e4739b62e8430dc513d8e77e023c1decf9 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 15 Jun 2021 17:26:08 +0200
Subject: [PATCH 21/22] Fix small typos

---
 milli/src/search/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 9deb541e3..3c85796bc 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -146,7 +146,7 @@ impl<'a> Search<'a> {
                 let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
                 let id = field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
                     field_name: name.to_string(),
-                    process: "fetching distint attribute",
+                    process: "distinct attribute",
                 })?;
                 let distinct = FacetDistinct::new(id, self.index, self.rtxn);
                 self.perform_sort(distinct, matching_words, criteria)

From ce0315a10f2052fd3c30cd89b89131836912f33d Mon Sep 17 00:00:00 2001
From: many <maxime@meilisearch.com>
Date: Tue, 15 Jun 2021 17:49:15 +0200
Subject: [PATCH 22/22] Close write transaction in test

---
 milli/src/update/settings.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 39cb27c00..8f4fe48c9 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1053,6 +1053,7 @@ mod tests {
 
         let err = builder.execute(|_, _| ()).unwrap_err();
         assert!(matches!(err, Error::UserError(UserError::PrimaryKeyCannotBeReset)));
+        wtxn.abort().unwrap();
 
         // But if we clear the database...
         let mut wtxn = index.write_txn().unwrap();
@@ -1063,5 +1064,6 @@ mod tests {
         let mut builder = Settings::new(&mut wtxn, &index, 0);
         builder.set_primary_key(S("myid"));
         builder.execute(|_, _| ()).unwrap();
+        wtxn.commit().unwrap();
     }
 }