diff --git a/infos/src/main.rs b/infos/src/main.rs
index d5d1ad0af..da15251b0 100644
--- a/infos/src/main.rs
+++ b/infos/src/main.rs
@@ -627,14 +627,14 @@ fn facet_values_docids(
         FacetType::String => {
             wtr.write_record(&["facet_string", "documents_count", "documents_ids"])?;
             for result in facet_values_iter(rtxn, index.facet_id_string_docids, field_id)? {
-                let ((_fid, value), docids) = result?;
+                let ((_fid, normalized), (_original, docids)) = result?;
                 let count = docids.len();
                 let docids = if debug {
                     format!("{:?}", docids)
                 } else {
                     format!("{:?}", docids.iter().collect::<Vec<_>>())
                 };
-                wtr.write_record(&[value.to_string(), count.to_string(), docids])?;
+                wtr.write_record(&[normalized.to_string(), count.to_string(), docids])?;
             }
         }
     }
diff --git a/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs b/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs
new file mode 100644
index 000000000..597335b6e
--- /dev/null
+++ b/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs
@@ -0,0 +1,53 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::num::NonZeroU8;
+
+use crate::{try_split_array_at, FieldId};
+
+/// A codec that stores the field id, level 1 and higher and the groups ids.
+///
+/// It can only be used to encode the facet string of the level 1 or higher.
+pub struct FacetLevelValueU32Codec;
+
+impl<'a> heed::BytesDecode<'a> for FacetLevelValueU32Codec {
+    type DItem = (FieldId, NonZeroU8, u32, u32);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
+        let field_id = u16::from_be_bytes(field_id_bytes);
+        let (level, bytes) = bytes.split_first()?;
+        let level = NonZeroU8::new(*level)?;
+        let left = bytes[8..12].try_into().ok().map(u32::from_be_bytes)?;
+        let right = bytes[12..].try_into().ok().map(u32::from_be_bytes)?;
+        Some((field_id, level, left, right))
+    }
+}
+
+impl heed::BytesEncode<'_> for FacetLevelValueU32Codec {
+    type EItem = (FieldId, NonZeroU8, u32, u32);
+
+    fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
+        let mut buffer = [0u8; 16];
+
+        // Write the big-endian integers.
+        let bytes = left.to_be_bytes();
+        buffer[..4].copy_from_slice(&bytes[..]);
+
+        let bytes = right.to_be_bytes();
+        buffer[4..8].copy_from_slice(&bytes[..]);
+
+        // Then the u32 values just to be able to read them back.
+        let bytes = left.to_be_bytes();
+        buffer[8..12].copy_from_slice(&bytes[..]);
+
+        let bytes = right.to_be_bytes();
+        buffer[12..].copy_from_slice(&bytes[..]);
+
+        let mut bytes = Vec::with_capacity(buffer.len() + 2 + 1);
+        bytes.extend_from_slice(&field_id.to_be_bytes());
+        bytes.push(level.get());
+        bytes.extend_from_slice(&buffer);
+
+        Some(Cow::Owned(bytes))
+    }
+}
diff --git a/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs b/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs
new file mode 100644
index 000000000..009c6454a
--- /dev/null
+++ b/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs
@@ -0,0 +1,50 @@
+use std::borrow::Cow;
+use std::str;
+
+use crate::{try_split_array_at, FieldId};
+
+/// A codec that stores the field id, level 0, and facet string.
+///
+/// It can only be used to encode the facet string of the level 0,
+/// as it hardcodes the level.
+///
+/// We encode the level 0 to not break the lexicographical ordering of the LMDB keys,
+/// and make sure that the levels are not mixed-up. The level 0 is special, the key
+/// are strings, other levels represent groups and keys are simply two integers.
+pub struct FacetStringLevelZeroCodec;
+
+impl FacetStringLevelZeroCodec {
+    pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
+        out.reserve(value.len() + 2);
+        out.extend_from_slice(&field_id.to_be_bytes());
+        out.push(0); // the level zero (for LMDB ordering only)
+        out.extend_from_slice(value.as_bytes());
+    }
+}
+
+impl<'a> heed::BytesDecode<'a> for FacetStringLevelZeroCodec {
+    type DItem = (FieldId, &'a str);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
+        let field_id = u16::from_be_bytes(field_id_bytes);
+        let (level, bytes) = bytes.split_first()?;
+
+        if *level != 0 {
+            return None;
+        }
+
+        let value = str::from_utf8(bytes).ok()?;
+        Some((field_id, value))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for FacetStringLevelZeroCodec {
+    type EItem = (FieldId, &'a str);
+
+    fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
+        let mut bytes = Vec::new();
+        FacetStringLevelZeroCodec::serialize_into(*field_id, value, &mut bytes);
+        Some(Cow::Owned(bytes))
+    }
+}
diff --git a/milli/src/heed_codec/facet/facet_string_level_zero_value_codec.rs b/milli/src/heed_codec/facet/facet_string_level_zero_value_codec.rs
new file mode 100644
index 000000000..b2434d453
--- /dev/null
+++ b/milli/src/heed_codec/facet/facet_string_level_zero_value_codec.rs
@@ -0,0 +1,80 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::{marker, str};
+
+use super::try_split_at;
+
+/// A codec that encodes a string in front of the value.
+///
+/// The usecase is for the facet string levels algorithm where we must know the
+/// original string of a normalized facet value, the original values are stored
+/// in the value to not break the lexicographical ordering of the LMDB keys.
+pub struct FacetStringLevelZeroValueCodec<C>(marker::PhantomData<C>);
+
+impl<'a, C> heed::BytesDecode<'a> for FacetStringLevelZeroValueCodec<C>
+where
+    C: heed::BytesDecode<'a>,
+{
+    type DItem = (&'a str, C::DItem);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        let (string_len, bytes) = try_split_at(bytes, 2)?;
+        let string_len = string_len.try_into().ok().map(u16::from_be_bytes)?;
+
+        let (string, bytes) = try_split_at(bytes, string_len as usize)?;
+        let string = str::from_utf8(string).ok()?;
+
+        C::bytes_decode(bytes).map(|item| (string, item))
+    }
+}
+
+impl<'a, C> heed::BytesEncode<'a> for FacetStringLevelZeroValueCodec<C>
+where
+    C: heed::BytesEncode<'a>,
+{
+    type EItem = (&'a str, C::EItem);
+
+    fn bytes_encode((string, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
+        let string_len: u16 = string.len().try_into().ok()?;
+        let value_bytes = C::bytes_encode(&value)?;
+
+        let mut bytes = Vec::with_capacity(2 + string.len() + value_bytes.len());
+        bytes.extend_from_slice(&string_len.to_be_bytes());
+        bytes.extend_from_slice(string.as_bytes());
+        bytes.extend_from_slice(&value_bytes[..]);
+
+        Some(Cow::Owned(bytes))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use heed::types::Unit;
+    use heed::{BytesDecode, BytesEncode};
+    use roaring::RoaringBitmap;
+
+    use super::*;
+    use crate::CboRoaringBitmapCodec;
+
+    #[test]
+    fn deserialize_roaring_bitmaps() {
+        let string = "abc";
+        let docids: RoaringBitmap = (0..100).chain(3500..4398).collect();
+        let key = (string, docids.clone());
+        let bytes =
+            FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&key).unwrap();
+        let (out_string, out_docids) =
+            FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&bytes).unwrap();
+        assert_eq!((out_string, out_docids), (string, docids));
+    }
+
+    #[test]
+    fn deserialize_unit() {
+        let string = "def";
+        let key = (string, ());
+        let bytes = FacetStringLevelZeroValueCodec::<Unit>::bytes_encode(&key).unwrap();
+        let (out_string, out_unit) =
+            FacetStringLevelZeroValueCodec::<Unit>::bytes_decode(&bytes).unwrap();
+        assert_eq!((out_string, out_unit), (string, ()));
+    }
+}
diff --git a/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs b/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs
new file mode 100644
index 000000000..337433c2b
--- /dev/null
+++ b/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs
@@ -0,0 +1,114 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::{marker, str};
+
+use super::try_split_at;
+
+/// A codec that optionally encodes two strings in front of the value.
+///
+/// The usecase is for the facet string levels algorithm where we must
+/// know the origin of a group, the group left and right bounds are stored
+/// in the value to not break the lexicographical ordering of the LMDB keys.
+pub struct FacetStringZeroBoundsValueCodec<C>(marker::PhantomData<C>);
+
+impl<'a, C> heed::BytesDecode<'a> for FacetStringZeroBoundsValueCodec<C>
+where
+    C: heed::BytesDecode<'a>,
+{
+    type DItem = (Option<(&'a str, &'a str)>, C::DItem);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        let (contains_bounds, bytes) = bytes.split_first()?;
+
+        if *contains_bounds != 0 {
+            let (left_len, bytes) = try_split_at(bytes, 2)?;
+            let (right_len, bytes) = try_split_at(bytes, 2)?;
+
+            let left_len = left_len.try_into().ok().map(u16::from_be_bytes)?;
+            let right_len = right_len.try_into().ok().map(u16::from_be_bytes)?;
+
+            let (left, bytes) = try_split_at(bytes, left_len as usize)?;
+            let (right, bytes) = try_split_at(bytes, right_len as usize)?;
+
+            let left = str::from_utf8(left).ok()?;
+            let right = str::from_utf8(right).ok()?;
+
+            C::bytes_decode(bytes).map(|item| (Some((left, right)), item))
+        } else {
+            C::bytes_decode(bytes).map(|item| (None, item))
+        }
+    }
+}
+
+impl<'a, C> heed::BytesEncode<'a> for FacetStringZeroBoundsValueCodec<C>
+where
+    C: heed::BytesEncode<'a>,
+{
+    type EItem = (Option<(&'a str, &'a str)>, C::EItem);
+
+    fn bytes_encode((bounds, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
+        let mut bytes = Vec::new();
+
+        match bounds {
+            Some((left, right)) => {
+                bytes.push(u8::max_value());
+
+                if left.is_empty() || right.is_empty() {
+                    return None;
+                }
+
+                let left_len: u16 = left.len().try_into().ok()?;
+                let right_len: u16 = right.len().try_into().ok()?;
+
+                bytes.extend_from_slice(&left_len.to_be_bytes());
+                bytes.extend_from_slice(&right_len.to_be_bytes());
+
+                bytes.extend_from_slice(left.as_bytes());
+                bytes.extend_from_slice(right.as_bytes());
+
+                let value_bytes = C::bytes_encode(&value)?;
+                bytes.extend_from_slice(&value_bytes[..]);
+
+                Some(Cow::Owned(bytes))
+            }
+            None => {
+                bytes.push(0);
+                let value_bytes = C::bytes_encode(&value)?;
+                bytes.extend_from_slice(&value_bytes[..]);
+                Some(Cow::Owned(bytes))
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use heed::types::Unit;
+    use heed::{BytesDecode, BytesEncode};
+    use roaring::RoaringBitmap;
+
+    use super::*;
+    use crate::CboRoaringBitmapCodec;
+
+    #[test]
+    fn deserialize_roaring_bitmaps() {
+        let bounds = Some(("abc", "def"));
+        let docids: RoaringBitmap = (0..100).chain(3500..4398).collect();
+        let key = (bounds, docids.clone());
+        let bytes =
+            FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&key).unwrap();
+        let (out_bounds, out_docids) =
+            FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&bytes).unwrap();
+        assert_eq!((out_bounds, out_docids), (bounds, docids));
+    }
+
+    #[test]
+    fn deserialize_unit() {
+        let bounds = Some(("abc", "def"));
+        let key = (bounds, ());
+        let bytes = FacetStringZeroBoundsValueCodec::<Unit>::bytes_encode(&key).unwrap();
+        let (out_bounds, out_unit) =
+            FacetStringZeroBoundsValueCodec::<Unit>::bytes_decode(&bytes).unwrap();
+        assert_eq!((out_bounds, out_unit), (bounds, ()));
+    }
+}
diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs
index 36408f578..178bb21c1 100644
--- a/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs
+++ b/milli/src/heed_codec/facet/field_doc_id_facet_string_codec.rs
@@ -9,13 +9,13 @@ impl FieldDocIdFacetStringCodec {
     pub fn serialize_into(
         field_id: FieldId,
         document_id: DocumentId,
-        value: &str,
+        normalized_value: &str,
         out: &mut Vec<u8>,
     ) {
-        out.reserve(2 + 4 + value.len());
+        out.reserve(2 + 4 + normalized_value.len());
         out.extend_from_slice(&field_id.to_be_bytes());
         out.extend_from_slice(&document_id.to_be_bytes());
-        out.extend_from_slice(value.as_bytes());
+        out.extend_from_slice(normalized_value.as_bytes());
     }
 }
 
@@ -29,17 +29,22 @@ impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
         let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
         let document_id = u32::from_be_bytes(document_id_bytes);
 
-        let value = str::from_utf8(bytes).ok()?;
-        Some((field_id, document_id, value))
+        let normalized_value = str::from_utf8(bytes).ok()?;
+        Some((field_id, document_id, normalized_value))
     }
 }
 
 impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
     type EItem = (FieldId, DocumentId, &'a str);
 
-    fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
+    fn bytes_encode((field_id, document_id, normalized_value): &Self::EItem) -> Option<Cow<[u8]>> {
         let mut bytes = Vec::new();
-        FieldDocIdFacetStringCodec::serialize_into(*field_id, *document_id, value, &mut bytes);
+        FieldDocIdFacetStringCodec::serialize_into(
+            *field_id,
+            *document_id,
+            normalized_value,
+            &mut bytes,
+        );
         Some(Cow::Owned(bytes))
     }
 }
diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs
index 532da12fa..a6a805bf7 100644
--- a/milli/src/heed_codec/facet/mod.rs
+++ b/milli/src/heed_codec/facet/mod.rs
@@ -1,9 +1,25 @@
 mod facet_level_value_f64_codec;
-mod facet_value_string_codec;
+mod facet_level_value_u32_codec;
+mod facet_string_level_zero_codec;
+mod facet_string_level_zero_value_codec;
+mod facet_string_zero_bounds_value_codec;
 mod field_doc_id_facet_f64_codec;
 mod field_doc_id_facet_string_codec;
 
 pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
-pub use self::facet_value_string_codec::FacetValueStringCodec;
+pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
+pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
+pub use self::facet_string_level_zero_value_codec::FacetStringLevelZeroValueCodec;
+pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
 pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
 pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
+
+/// Tries to split a slice in half at the given middle point,
+/// `None` if the slice is too short.
+pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
+    if slice.len() >= mid {
+        Some(slice.split_at(mid))
+    } else {
+        None
+    }
+}
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 099a5891d..f26643de7 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -11,8 +11,8 @@ use roaring::RoaringBitmap;
 use crate::error::{FieldIdMapMissingEntry, InternalError, UserError};
 use crate::fields_ids_map::FieldsIdsMap;
 use crate::heed_codec::facet::{
-    FacetLevelValueF64Codec, FacetValueStringCodec, FieldDocIdFacetF64Codec,
-    FieldDocIdFacetStringCodec,
+    FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
+    FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
 };
 use crate::{
     default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
@@ -90,13 +90,14 @@ pub struct Index {
 
     /// Maps the facet field id, level and the number with the docids that corresponds to it.
     pub facet_id_f64_docids: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
-    /// Maps the facet field id and the string with the docids that corresponds to it.
-    pub facet_id_string_docids: Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
+    /// Maps the facet field id and the string with the original string and docids that corresponds to it.
+    pub facet_id_string_docids:
+        Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
 
     /// Maps the document id, the facet field id and the numbers.
     pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
     /// Maps the document id, the facet field id and the strings.
-    pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Unit>,
+    pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
 
     /// Maps the document id to the document as an obkv store.
     pub documents: Database<OwnedType<BEU32>, ObkvCodec>,
diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs
index ccee2c393..99d63c90d 100644
--- a/milli/src/search/criteria/asc_desc.rs
+++ b/milli/src/search/criteria/asc_desc.rs
@@ -8,7 +8,7 @@ use roaring::RoaringBitmap;
 use super::{Criterion, CriterionParameters, CriterionResult};
 use crate::error::FieldIdMapMissingEntry;
 use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
-use crate::search::facet::FacetIter;
+use crate::search::facet::FacetNumberIter;
 use crate::search::query_tree::Operation;
 use crate::{FieldId, Index, Result};
 
@@ -172,8 +172,11 @@ fn facet_ordered<'t>(
         let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
         Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
     } else {
-        let facet_fn =
-            if ascending { FacetIter::new_reducing } else { FacetIter::new_reverse_reducing };
+        let facet_fn = if ascending {
+            FacetNumberIter::new_reducing
+        } else {
+            FacetNumberIter::new_reverse_reducing
+        };
         let iter = facet_fn(rtxn, index, field_id, candidates)?;
         Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
     }
diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs
index 91620da2a..4436d4cda 100644
--- a/milli/src/search/distinct/facet_distinct.rs
+++ b/milli/src/search/distinct/facet_distinct.rs
@@ -1,6 +1,7 @@
 use std::mem::size_of;
 
-use heed::types::ByteSlice;
+use concat_arrays::concat_arrays;
+use heed::types::{ByteSlice, Str, Unit};
 use roaring::RoaringBitmap;
 
 use super::{Distinct, DocIter};
@@ -43,7 +44,10 @@ pub struct FacetDistinctIter<'a> {
 
 impl<'a> FacetDistinctIter<'a> {
     fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
-        self.index.facet_id_string_docids.get(self.txn, &(self.distinct, key))
+        self.index
+            .facet_id_string_docids
+            .get(self.txn, &(self.distinct, key))
+            .map(|result| result.map(|(_original, docids)| docids))
     }
 
     fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
@@ -116,10 +120,7 @@ impl<'a> FacetDistinctIter<'a> {
 }
 
 fn facet_values_prefix_key(distinct: FieldId, id: DocumentId) -> [u8; FID_SIZE + DOCID_SIZE] {
-    let mut key = [0; FID_SIZE + DOCID_SIZE];
-    key[0..FID_SIZE].copy_from_slice(&distinct.to_be_bytes());
-    key[FID_SIZE..].copy_from_slice(&id.to_be_bytes());
-    key
+    concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
 }
 
 fn facet_number_values<'a>(
@@ -127,7 +128,7 @@ fn facet_number_values<'a>(
     distinct: FieldId,
     index: &Index,
     txn: &'a heed::RoTxn,
-) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
+) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, Unit>> {
     let key = facet_values_prefix_key(distinct, id);
 
     let iter = index
@@ -144,14 +145,14 @@ fn facet_string_values<'a>(
     distinct: FieldId,
     index: &Index,
     txn: &'a heed::RoTxn,
-) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
+) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, Str>> {
     let key = facet_values_prefix_key(distinct, id);
 
     let iter = index
         .field_id_docid_facet_strings
         .remap_key_type::<ByteSlice>()
         .prefix_iter(txn, &key)?
-        .remap_key_type::<FieldDocIdFacetStringCodec>();
+        .remap_types::<FieldDocIdFacetStringCodec, Str>();
 
     Ok(iter)
 }
diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs
index ae3fdb91e..e7dc52a82 100644
--- a/milli/src/search/distinct/mod.rs
+++ b/milli/src/search/distinct/mod.rs
@@ -47,7 +47,7 @@ mod test {
 
         let mut documents = Vec::new();
 
-        let txts = ["toto", "titi", "tata"];
+        let txts = ["Toto", "Titi", "Tata"];
         let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
         let cat_ints = (1..10).collect::<Vec<_>>();
 
@@ -90,7 +90,6 @@ mod test {
 
         addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
         addition.update_format(UpdateFormat::Json);
-
         addition.execute(JSON.to_string().as_bytes(), |_, _| ()).unwrap();
 
         let fields_map = index.fields_ids_map(&txn).unwrap();
diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs
index b0b22ac49..94f875dfc 100644
--- a/milli/src/search/facet/facet_distribution.rs
+++ b/milli/src/search/facet/facet_distribution.rs
@@ -1,16 +1,17 @@
 use std::collections::{BTreeMap, HashSet};
 use std::ops::Bound::Unbounded;
-use std::{cmp, fmt};
+use std::{cmp, fmt, mem};
 
-use heed::types::{ByteSlice, Unit};
-use heed::{BytesDecode, Database};
+use heed::types::ByteSlice;
 use roaring::RoaringBitmap;
 
 use crate::error::{FieldIdMapMissingEntry, UserError};
 use crate::facet::FacetType;
-use crate::heed_codec::facet::FacetValueStringCodec;
-use crate::search::facet::{FacetIter, FacetRange};
-use crate::{DocumentId, FieldId, Index, Result};
+use crate::heed_codec::facet::{
+    FacetStringLevelZeroCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
+};
+use crate::search::facet::{FacetNumberIter, FacetNumberRange, FacetStringIter};
+use crate::{FieldId, Index, Result};
 
 /// The default number of values by facets that will
 /// be fetched from the key-value store.
@@ -22,7 +23,7 @@ const MAX_VALUES_BY_FACET: usize = 1000;
 
 /// Threshold on the number of candidates that will make
 /// the system to choose between one algorithm or another.
-const CANDIDATES_THRESHOLD: u64 = 1000;
+const CANDIDATES_THRESHOLD: u64 = 3000;
 
 pub struct FacetDistribution<'a> {
     facets: Option<HashSet<String>>,
@@ -67,46 +68,63 @@ impl<'a> FacetDistribution<'a> {
         candidates: &RoaringBitmap,
         distribution: &mut BTreeMap<String, u64>,
     ) -> heed::Result<()> {
-        fn fetch_facet_values<'t, KC, K: 't>(
-            rtxn: &'t heed::RoTxn,
-            db: Database<KC, Unit>,
-            field_id: FieldId,
-            candidates: &RoaringBitmap,
-            distribution: &mut BTreeMap<String, u64>,
-        ) -> heed::Result<()>
-        where
-            K: fmt::Display,
-            KC: BytesDecode<'t, DItem = (FieldId, DocumentId, K)>,
-        {
-            let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
-
-            for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) {
-                key_buffer.truncate(1);
-                key_buffer.extend_from_slice(&docid.to_be_bytes());
-                let iter = db
-                    .remap_key_type::<ByteSlice>()
-                    .prefix_iter(rtxn, &key_buffer)?
-                    .remap_key_type::<KC>();
-
-                for result in iter {
-                    let ((_, _, value), ()) = result?;
-                    *distribution.entry(value.to_string()).or_insert(0) += 1;
-                }
-            }
-
-            Ok(())
-        }
-
         match facet_type {
             FacetType::Number => {
+                let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
+
+                let distribution_prelength = distribution.len();
                 let db = self.index.field_id_docid_facet_f64s;
-                fetch_facet_values(self.rtxn, db, field_id, candidates, distribution)
+                for docid in candidates.into_iter() {
+                    key_buffer.truncate(mem::size_of::<FieldId>());
+                    key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    let iter = db
+                        .remap_key_type::<ByteSlice>()
+                        .prefix_iter(self.rtxn, &key_buffer)?
+                        .remap_key_type::<FieldDocIdFacetF64Codec>();
+
+                    for result in iter {
+                        let ((_, _, value), ()) = result?;
+                        *distribution.entry(value.to_string()).or_insert(0) += 1;
+                        if distribution.len() - distribution_prelength == self.max_values_by_facet {
+                            break;
+                        }
+                    }
+                }
             }
             FacetType::String => {
+                let mut normalized_distribution = BTreeMap::new();
+                let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
+
                 let db = self.index.field_id_docid_facet_strings;
-                fetch_facet_values(self.rtxn, db, field_id, candidates, distribution)
+                for docid in candidates.into_iter() {
+                    key_buffer.truncate(mem::size_of::<FieldId>());
+                    key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    let iter = db
+                        .remap_key_type::<ByteSlice>()
+                        .prefix_iter(self.rtxn, &key_buffer)?
+                        .remap_key_type::<FieldDocIdFacetStringCodec>();
+
+                    for result in iter {
+                        let ((_, _, normalized_value), original_value) = result?;
+                        let (_, count) = normalized_distribution
+                            .entry(normalized_value)
+                            .or_insert_with(|| (original_value, 0));
+                        *count += 1;
+
+                        if normalized_distribution.len() == self.max_values_by_facet {
+                            break;
+                        }
+                    }
+                }
+
+                let iter = normalized_distribution
+                    .into_iter()
+                    .map(|(_normalized, (original, count))| (original.to_string(), count));
+                distribution.extend(iter);
             }
         }
+
+        Ok(())
     }
 
     /// There is too much documents, we use the facet levels to move throught
@@ -118,7 +136,7 @@ impl<'a> FacetDistribution<'a> {
         distribution: &mut BTreeMap<String, u64>,
     ) -> heed::Result<()> {
         let iter =
-            FacetIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
+            FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
 
         for result in iter {
             let (value, mut docids) = result?;
@@ -134,6 +152,29 @@ impl<'a> FacetDistribution<'a> {
         Ok(())
     }
 
+    fn facet_strings_distribution_from_facet_levels(
+        &self,
+        field_id: FieldId,
+        candidates: &RoaringBitmap,
+        distribution: &mut BTreeMap<String, u64>,
+    ) -> heed::Result<()> {
+        let iter =
+            FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
+
+        for result in iter {
+            let (_normalized, original, mut docids) = result?;
+            docids &= candidates;
+            if !docids.is_empty() {
+                distribution.insert(original.to_string(), docids.len());
+            }
+            if distribution.len() == self.max_values_by_facet {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+
     /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
     /// facet values one by one and iterate on the facet level 0 for numbers.
     fn facet_values_from_raw_facet_database(
@@ -143,7 +184,7 @@ impl<'a> FacetDistribution<'a> {
         let mut distribution = BTreeMap::new();
 
         let db = self.index.facet_id_f64_docids;
-        let range = FacetRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?;
+        let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?;
 
         for result in range {
             let ((_, _, value, _), docids) = result?;
@@ -158,16 +199,22 @@ impl<'a> FacetDistribution<'a> {
             .facet_id_string_docids
             .remap_key_type::<ByteSlice>()
             .prefix_iter(self.rtxn, &field_id.to_be_bytes())?
-            .remap_key_type::<FacetValueStringCodec>();
+            .remap_key_type::<FacetStringLevelZeroCodec>();
 
+        let mut normalized_distribution = BTreeMap::new();
         for result in iter {
-            let ((_, value), docids) = result?;
-            distribution.insert(value.to_string(), docids.len());
+            let ((_, normalized_value), (original_value, docids)) = result?;
+            normalized_distribution.insert(normalized_value, (original_value, docids.len()));
             if distribution.len() == self.max_values_by_facet {
                 break;
             }
         }
 
+        let iter = normalized_distribution
+            .into_iter()
+            .map(|(_normalized, (original, count))| (original.to_string(), count));
+        distribution.extend(iter);
+
         Ok(distribution)
     }
 
@@ -198,14 +245,12 @@ impl<'a> FacetDistribution<'a> {
                         candidates,
                         &mut distribution,
                     )?;
-                    self.facet_distribution_from_documents(
+                    self.facet_strings_distribution_from_facet_levels(
                         field_id,
-                        String,
                         candidates,
                         &mut distribution,
                     )?;
                 }
-
                 Ok(distribution)
             }
             None => self.facet_values_from_raw_facet_database(field_id),
diff --git a/milli/src/search/facet/facet_number.rs b/milli/src/search/facet/facet_number.rs
new file mode 100644
index 000000000..02390aac1
--- /dev/null
+++ b/milli/src/search/facet/facet_number.rs
@@ -0,0 +1,248 @@
+use std::ops::Bound::{self, Excluded, Included, Unbounded};
+
+use either::Either::{self, Left, Right};
+use heed::types::{ByteSlice, DecodeIgnore};
+use heed::{Database, LazyDecode, RoRange, RoRevRange};
+use roaring::RoaringBitmap;
+
+use crate::heed_codec::facet::FacetLevelValueF64Codec;
+use crate::heed_codec::CboRoaringBitmapCodec;
+use crate::{FieldId, Index};
+
+pub struct FacetNumberRange<'t> {
+    iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
+    end: Bound<f64>,
+}
+
+impl<'t> FacetNumberRange<'t> {
+    pub fn new(
+        rtxn: &'t heed::RoTxn,
+        db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
+        field_id: FieldId,
+        level: u8,
+        left: Bound<f64>,
+        right: Bound<f64>,
+    ) -> heed::Result<FacetNumberRange<'t>> {
+        let left_bound = match left {
+            Included(left) => Included((field_id, level, left, f64::MIN)),
+            Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
+            Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
+        };
+        let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
+        let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?;
+        Ok(FacetNumberRange { iter, end: right })
+    }
+}
+
+impl<'t> Iterator for FacetNumberRange<'t> {
+    type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.iter.next() {
+            Some(Ok(((fid, level, left, right), docids))) => {
+                let must_be_returned = match self.end {
+                    Included(end) => right <= end,
+                    Excluded(end) => right < end,
+                    Unbounded => true,
+                };
+                if must_be_returned {
+                    match docids.decode() {
+                        Ok(docids) => Some(Ok(((fid, level, left, right), docids))),
+                        Err(e) => Some(Err(e)),
+                    }
+                } else {
+                    None
+                }
+            }
+            Some(Err(e)) => Some(Err(e)),
+            None => None,
+        }
+    }
+}
+
+pub struct FacetNumberRevRange<'t> {
+    iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
+    end: Bound<f64>,
+}
+
+impl<'t> FacetNumberRevRange<'t> {
+    pub fn new(
+        rtxn: &'t heed::RoTxn,
+        db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
+        field_id: FieldId,
+        level: u8,
+        left: Bound<f64>,
+        right: Bound<f64>,
+    ) -> heed::Result<FacetNumberRevRange<'t>> {
+        let left_bound = match left {
+            Included(left) => Included((field_id, level, left, f64::MIN)),
+            Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
+            Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
+        };
+        let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
+        let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
+        Ok(FacetNumberRevRange { iter, end: right })
+    }
+}
+
+impl<'t> Iterator for FacetNumberRevRange<'t> {
+    type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            match self.iter.next() {
+                Some(Ok(((fid, level, left, right), docids))) => {
+                    let must_be_returned = match self.end {
+                        Included(end) => right <= end,
+                        Excluded(end) => right < end,
+                        Unbounded => true,
+                    };
+                    if must_be_returned {
+                        match docids.decode() {
+                            Ok(docids) => return Some(Ok(((fid, level, left, right), docids))),
+                            Err(e) => return Some(Err(e)),
+                        }
+                    }
+                    continue;
+                }
+                Some(Err(e)) => return Some(Err(e)),
+                None => return None,
+            }
+        }
+    }
+}
+
+pub struct FacetNumberIter<'t> {
+    rtxn: &'t heed::RoTxn<'t>,
+    db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
+    field_id: FieldId,
+    level_iters: Vec<(RoaringBitmap, Either<FacetNumberRange<'t>, FacetNumberRevRange<'t>>)>,
+    must_reduce: bool,
+}
+
+impl<'t> FacetNumberIter<'t> {
+    /// Create a `FacetNumberIter` that will iterate on the different facet entries
+    /// (facet value + documents ids) and that will reduce the given documents ids
+    /// while iterating on the different facet levels.
+    pub fn new_reducing(
+        rtxn: &'t heed::RoTxn,
+        index: &'t Index,
+        field_id: FieldId,
+        documents_ids: RoaringBitmap,
+    ) -> heed::Result<FacetNumberIter<'t>> {
+        let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
+        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
+        let highest_iter =
+            FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
+        let level_iters = vec![(documents_ids, Left(highest_iter))];
+        Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true })
+    }
+
+    /// Create a `FacetNumberIter` that will iterate on the different facet entries in reverse
+    /// (facet value + documents ids) and that will reduce the given documents ids
+    /// while iterating on the different facet levels.
+    pub fn new_reverse_reducing(
+        rtxn: &'t heed::RoTxn,
+        index: &'t Index,
+        field_id: FieldId,
+        documents_ids: RoaringBitmap,
+    ) -> heed::Result<FacetNumberIter<'t>> {
+        let db = index.facet_id_f64_docids;
+        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
+        let highest_iter =
+            FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
+        let level_iters = vec![(documents_ids, Right(highest_iter))];
+        Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true })
+    }
+
+    /// Create a `FacetNumberIter` that will iterate on the different facet entries
+    /// (facet value + documents ids) and that will not reduce the given documents ids
+    /// while iterating on the different facet levels, possibly returning multiple times
+    /// a document id associated with multiple facet values.
+    pub fn new_non_reducing(
+        rtxn: &'t heed::RoTxn,
+        index: &'t Index,
+        field_id: FieldId,
+        documents_ids: RoaringBitmap,
+    ) -> heed::Result<FacetNumberIter<'t>> {
+        let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
+        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
+        let highest_iter =
+            FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
+        let level_iters = vec![(documents_ids, Left(highest_iter))];
+        Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: false })
+    }
+
+    fn highest_level<X>(
+        rtxn: &'t heed::RoTxn,
+        db: Database<FacetLevelValueF64Codec, X>,
+        fid: FieldId,
+    ) -> heed::Result<Option<u8>> {
+        let level = db
+            .remap_types::<ByteSlice, DecodeIgnore>()
+            .prefix_iter(rtxn, &fid.to_be_bytes())?
+            .remap_key_type::<FacetLevelValueF64Codec>()
+            .last()
+            .transpose()?
+            .map(|((_, level, _, _), _)| level);
+        Ok(level)
+    }
+}
+
+impl<'t> Iterator for FacetNumberIter<'t> {
+    type Item = heed::Result<(f64, RoaringBitmap)>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'outer: loop {
+            let (documents_ids, last) = self.level_iters.last_mut()?;
+            let is_ascending = last.is_left();
+            for result in last {
+                // If the last iterator must find an empty set of documents it means
+                // that we found all the documents in the sub level iterations already,
+                // we can pop this level iterator.
+                if documents_ids.is_empty() {
+                    break;
+                }
+
+                match result {
+                    Ok(((_fid, level, left, right), mut docids)) => {
+                        docids &= &*documents_ids;
+                        if !docids.is_empty() {
+                            if self.must_reduce {
+                                *documents_ids -= &docids;
+                            }
+
+                            if level == 0 {
+                                return Some(Ok((left, docids)));
+                            }
+
+                            let rtxn = self.rtxn;
+                            let db = self.db;
+                            let fid = self.field_id;
+                            let left = Included(left);
+                            let right = Included(right);
+
+                            let result = if is_ascending {
+                                FacetNumberRange::new(rtxn, db, fid, level - 1, left, right)
+                                    .map(Left)
+                            } else {
+                                FacetNumberRevRange::new(rtxn, db, fid, level - 1, left, right)
+                                    .map(Right)
+                            };
+
+                            match result {
+                                Ok(iter) => {
+                                    self.level_iters.push((docids, iter));
+                                    continue 'outer;
+                                }
+                                Err(e) => return Some(Err(e)),
+                            }
+                        }
+                    }
+                    Err(e) => return Some(Err(e)),
+                }
+            }
+            self.level_iters.pop();
+        }
+    }
+}
diff --git a/milli/src/search/facet/facet_string.rs b/milli/src/search/facet/facet_string.rs
new file mode 100644
index 000000000..40ea8c04a
--- /dev/null
+++ b/milli/src/search/facet/facet_string.rs
@@ -0,0 +1,406 @@
+//! This module contains helpers iterators for facet strings.
+//!
+//! The purpose is to help iterate over the quite complex system of facets strings. A simple
+//! description of the system would be that every facet string value is stored into an LMDB database
+//! and that every value is associated with the document ids which are associated with this facet
+//! string value.
+//!
+//! In reality it is a little bit more complex as we have to create aggregations of runs of facet
+//! string values, those aggregations helps in choosing the right groups of facets to follow.
+//!
+//! ## A typical algorithm run
+//!
+//! If a group of aggregated facets values contains one of the documents ids, we must continue
+//! iterating over the sub-groups.
+//!
+//! If this group is the lowest level and contain at least one document id we yield the associated
+//! facet documents ids.
+//!
+//! If the group doesn't contain one of our documents ids, we continue to the next group at this
+//! same level.
+//!
+//! ## The complexity comes from the strings
+//!
+//! This algorithm is exactly the one that we use for facet numbers. It is quite easy to create
+//! aggregated facet number, groups of facets are easy to define in the LMDB key, we just put the
+//! two numbers bounds, the left and the right bound of the group, both inclusive.
+//!
+//! It is easy to make sure that the groups are ordered, LMDB sort its keys lexicographically and
+//! puting two numbers big-endian encoded one after the other gives us ordered groups. The values
+//! are simple unions of the documents ids coming from the groups below.
+//!
+//! ### Example of what a facet number LMDB database contain
+//!
+//! | level | left-bound | right-bound | documents ids    |
+//! |-------|------------|-------------|------------------|
+//! | 0     | 0          | _skipped_   | 1, 2             |
+//! | 0     | 1          | _skipped_   | 6, 7             |
+//! | 0     | 3          | _skipped_   | 4, 7             |
+//! | 0     | 5          | _skipped_   | 2, 3, 4          |
+//! | 1     | 0          | 1           | 1, 2, 6, 7       |
+//! | 1     | 3          | 5           | 2, 3, 4, 7       |
+//! | 2     | 0          | 5           | 1, 2, 3, 4, 6, 7 |
+//!
+//! As you can see the level 0 have two equal bounds, therefore we skip serializing the second
+//! bound, that's the base level where you can directly fetch the documents ids associated with an
+//! exact number.
+//!
+//! The next levels have two different bounds and the associated documents ids are simply the result
+//! of an union of all the documents ids associated with the aggregated groups above.
+//!
+//! ## The complexity of defining groups for facet strings
+//!
+//! As explained above, defining groups of facet numbers is easy, LMDB stores the keys in
+//! lexicographical order, it means that whatever the key represent the bytes are read in their raw
+//! form and a simple `strcmp` will define the order in which keys will be read from the store.
+//!
+//! That's easy for types with a known size, like floats or integers, they are 64 bytes long and
+//! appending one after the other in big-endian is consistent. LMDB will simply sort the keys by the
+//! first number then by the second if the the first number is equal on two keys.
+//!
+//! For strings it is a lot more complex as those types are unsized, it means that the size of facet
+//! strings is different for each facet value.
+//!
+//! ### Basic approach: padding the keys
+//!
+//! A first approach would be to simply define the maximum size of a facet string and pad the keys
+//! with zeroes. The big problem of this approach is that it:
+//!  1. reduces the maximum size of facet strings by half, as we need to put two keys one after the
+//!     other.
+//!  2. makes the keys of facet strings very big (approximately 250 bytes), impacting a lot LMDB
+//!     performances.
+//!
+//! ### Better approach: number the facet groups
+//!
+//! A better approach would be to number the groups, this way we don't have the downsides of the
+//! previously described approach but we need to be able to describe the groups by using a number.
+//!
+//! #### Example of facet strings with numbered groups
+//!
+//! | level | left-bound | right-bound | left-string | right-string | documents ids    |
+//! |-------|------------|-------------|-------------|--------------|------------------|
+//! | 0     | alpha      | _skipped_   | _skipped_   | _skipped_    | 1, 2             |
+//! | 0     | beta       | _skipped_   | _skipped_   | _skipped_    | 6, 7             |
+//! | 0     | gamma      | _skipped_   | _skipped_   | _skipped_    | 4, 7             |
+//! | 0     | omega      | _skipped_   | _skipped_   | _skipped_    | 2, 3, 4          |
+//! | 1     | 0          | 1           | alpha       | beta         | 1, 2, 6, 7       |
+//! | 1     | 2          | 3           | gamma       | omega        | 2, 3, 4, 7       |
+//! | 2     | 0          | 3           | _skipped_   | _skipped_    | 1, 2, 3, 4, 6, 7 |
+//!
+//! As you can see the level 0 doesn't actually change much, we skip nearly everything, we do not
+//! need to store the facet string value two times.
+//!
+//! The number in the left-bound and right-bound columns are incremental numbers representing the
+//! level 0 strings, .i.e. alpha is 0, beta is 1. Those numbers are just here to keep the ordering
+//! of the LMDB keys.
+//!
+//! In the value, not in the key, you can see that we added two new values: the left-string and the
+//! right-string, which defines the original facet strings associated with the given group.
+//!
+//! We put those two strings inside of the value, this way we do not limit the maximum size of the
+//! facet string values, and the impact on performances is not important as, IIRC, LMDB put big
+//! values on another page, this helps in iterating over keys fast enough and only fetch the page
+//! with the values when required.
+//!
+//! The other little advantage with this solution is that there is no a big overhead, compared with
+//! the facet number levels, we only duplicate the facet strings once for the level 1.
+//!
+//! #### A typical algorithm run
+//!
+//! Note that the algorithm is always moving from the highest level to the lowest one, one level
+//! by one level, this is why it is ok to only store the facets string on the level 1.
+//!
+//! If a group of aggregated facets values, a group with numbers contains one of the documents ids,
+//! we must continue iterating over the sub-groups. To do so:
+//!   - If we are at a level >= 2, we just do the same as with the facet numbers, get both bounds
+//!     and iterate over the facet groups defined by these numbers over the current level - 1.
+//!   - If we are at level 1, we retrieve both keys, the left-string and right-string, from the
+//!     value and just do the same as with the facet numbers but with strings: iterate over the
+//!     current level - 1 with both keys.
+//!
+//! If this group is the lowest level (level 0) and contain at least one document id we yield the
+//! associated facet documents ids.
+//!
+//! If the group doesn't contain one of our documents ids, we continue to the next group at this
+//! same level.
+//!
+
+use std::num::NonZeroU8;
+use std::ops::Bound;
+use std::ops::Bound::{Excluded, Included, Unbounded};
+
+use either::{Either, Left, Right};
+use heed::types::{ByteSlice, DecodeIgnore};
+use heed::{Database, LazyDecode, RoRange};
+use roaring::RoaringBitmap;
+
+use crate::heed_codec::facet::{
+    FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
+    FacetStringZeroBoundsValueCodec,
+};
+use crate::heed_codec::CboRoaringBitmapCodec;
+use crate::{FieldId, Index};
+
+/// An iterator that is used to explore the facets level strings
+/// from the level 1 to infinity.
+///
+/// It yields the level, group id that an entry covers, the optional group strings
+/// that it covers of the level 0 only if it is an entry from the level 1 and
+/// the roaring bitmap associated.
+pub struct FacetStringGroupRange<'t> {
+    iter: RoRange<
+        't,
+        FacetLevelValueU32Codec,
+        LazyDecode<FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>>,
+    >,
+    end: Bound<u32>,
+}
+
+impl<'t> FacetStringGroupRange<'t> {
+    pub fn new<X, Y>(
+        rtxn: &'t heed::RoTxn,
+        db: Database<X, Y>,
+        field_id: FieldId,
+        level: NonZeroU8,
+        left: Bound<u32>,
+        right: Bound<u32>,
+    ) -> heed::Result<FacetStringGroupRange<'t>> {
+        let db = db.remap_types::<
+            FacetLevelValueU32Codec,
+            FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>,
+        >();
+        let left_bound = match left {
+            Included(left) => Included((field_id, level, left, u32::MIN)),
+            Excluded(left) => Excluded((field_id, level, left, u32::MIN)),
+            Unbounded => Included((field_id, level, u32::MIN, u32::MIN)),
+        };
+        let right_bound = Included((field_id, level, u32::MAX, u32::MAX));
+        let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?;
+        Ok(FacetStringGroupRange { iter, end: right })
+    }
+}
+
+impl<'t> Iterator for FacetStringGroupRange<'t> {
+    type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.iter.next() {
+            Some(Ok(((_fid, level, left, right), docids))) => {
+                let must_be_returned = match self.end {
+                    Included(end) => right <= end,
+                    Excluded(end) => right < end,
+                    Unbounded => true,
+                };
+                if must_be_returned {
+                    match docids.decode() {
+                        Ok((bounds, docids)) => Some(Ok(((level, left, right), (bounds, docids)))),
+                        Err(e) => Some(Err(e)),
+                    }
+                } else {
+                    None
+                }
+            }
+            Some(Err(e)) => Some(Err(e)),
+            None => None,
+        }
+    }
+}
+
+/// An iterator that is used to explore the level 0 of the facets string database.
+///
+/// It yields the facet string and the roaring bitmap associated with it.
+pub struct FacetStringLevelZeroRange<'t> {
+    iter: RoRange<
+        't,
+        FacetStringLevelZeroCodec,
+        FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
+    >,
+}
+
+impl<'t> FacetStringLevelZeroRange<'t> {
+    pub fn new<X, Y>(
+        rtxn: &'t heed::RoTxn,
+        db: Database<X, Y>,
+        field_id: FieldId,
+        left: Bound<&str>,
+        right: Bound<&str>,
+    ) -> heed::Result<FacetStringLevelZeroRange<'t>> {
+        fn encode_value<'a>(buffer: &'a mut Vec<u8>, field_id: FieldId, value: &str) -> &'a [u8] {
+            buffer.extend_from_slice(&field_id.to_be_bytes());
+            buffer.push(0);
+            buffer.extend_from_slice(value.as_bytes());
+            &buffer[..]
+        }
+
+        let mut left_buffer = Vec::new();
+        let left_bound = match left {
+            Included(value) => Included(encode_value(&mut left_buffer, field_id, value)),
+            Excluded(value) => Excluded(encode_value(&mut left_buffer, field_id, value)),
+            Unbounded => {
+                left_buffer.extend_from_slice(&field_id.to_be_bytes());
+                left_buffer.push(0);
+                Included(&left_buffer[..])
+            }
+        };
+
+        let mut right_buffer = Vec::new();
+        let right_bound = match right {
+            Included(value) => Included(encode_value(&mut right_buffer, field_id, value)),
+            Excluded(value) => Excluded(encode_value(&mut right_buffer, field_id, value)),
+            Unbounded => {
+                right_buffer.extend_from_slice(&field_id.to_be_bytes());
+                right_buffer.push(1); // we must only get the level 0
+                Excluded(&right_buffer[..])
+            }
+        };
+
+        let iter = db
+            .remap_key_type::<ByteSlice>()
+            .range(rtxn, &(left_bound, right_bound))?
+            .remap_types::<
+                FacetStringLevelZeroCodec,
+                FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
+            >();
+
+        Ok(FacetStringLevelZeroRange { iter })
+    }
+}
+
+impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
+    type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.iter.next() {
+            Some(Ok(((_fid, normalized), (original, docids)))) => {
+                Some(Ok((normalized, original, docids)))
+            }
+            Some(Err(e)) => Some(Err(e)),
+            None => None,
+        }
+    }
+}
+
+/// An iterator that is used to explore the facet strings level by level,
+/// it will only return facets strings that are associated with the
+/// candidates documents ids given.
+pub struct FacetStringIter<'t> {
+    rtxn: &'t heed::RoTxn<'t>,
+    db: Database<ByteSlice, ByteSlice>,
+    field_id: FieldId,
+    level_iters:
+        Vec<(RoaringBitmap, Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>)>,
+}
+
+impl<'t> FacetStringIter<'t> {
+    pub fn new_non_reducing(
+        rtxn: &'t heed::RoTxn,
+        index: &'t Index,
+        field_id: FieldId,
+        documents_ids: RoaringBitmap,
+    ) -> heed::Result<FacetStringIter<'t>> {
+        let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
+        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
+        let highest_iter = match NonZeroU8::new(highest_level) {
+            Some(highest_level) => Left(FacetStringGroupRange::new(
+                rtxn,
+                index.facet_id_string_docids,
+                field_id,
+                highest_level,
+                Unbounded,
+                Unbounded,
+            )?),
+            None => Right(FacetStringLevelZeroRange::new(
+                rtxn,
+                index.facet_id_string_docids,
+                field_id,
+                Unbounded,
+                Unbounded,
+            )?),
+        };
+
+        Ok(FacetStringIter { rtxn, db, field_id, level_iters: vec![(documents_ids, highest_iter)] })
+    }
+
+    fn highest_level<X, Y>(
+        rtxn: &'t heed::RoTxn,
+        db: Database<X, Y>,
+        fid: FieldId,
+    ) -> heed::Result<Option<u8>> {
+        Ok(db
+            .remap_types::<ByteSlice, DecodeIgnore>()
+            .prefix_iter(rtxn, &fid.to_be_bytes())? // the field id is the first two bits
+            .last()
+            .transpose()?
+            .map(|(key_bytes, _)| key_bytes[2])) // the level is the third bit
+    }
+}
+
+impl<'t> Iterator for FacetStringIter<'t> {
+    type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'outer: loop {
+            let (documents_ids, last) = self.level_iters.last_mut()?;
+            match last {
+                Left(last) => {
+                    for result in last {
+                        match result {
+                            Ok(((level, left, right), (string_bounds, mut docids))) => {
+                                docids &= &*documents_ids;
+                                if !docids.is_empty() {
+                                    *documents_ids -= &docids;
+
+                                    let result = match string_bounds {
+                                        Some((left, right)) => FacetStringLevelZeroRange::new(
+                                            self.rtxn,
+                                            self.db,
+                                            self.field_id,
+                                            Included(left),
+                                            Included(right),
+                                        )
+                                        .map(Right),
+                                        None => FacetStringGroupRange::new(
+                                            self.rtxn,
+                                            self.db,
+                                            self.field_id,
+                                            NonZeroU8::new(level.get() - 1).unwrap(),
+                                            Included(left),
+                                            Included(right),
+                                        )
+                                        .map(Left),
+                                    };
+
+                                    match result {
+                                        Ok(iter) => {
+                                            self.level_iters.push((docids, iter));
+                                            continue 'outer;
+                                        }
+                                        Err(e) => return Some(Err(e)),
+                                    }
+                                }
+                            }
+                            Err(e) => return Some(Err(e)),
+                        }
+                    }
+                }
+                Right(last) => {
+                    // level zero only
+                    for result in last {
+                        match result {
+                            Ok((normalized, original, mut docids)) => {
+                                docids &= &*documents_ids;
+                                if !docids.is_empty() {
+                                    *documents_ids -= &docids;
+                                    return Some(Ok((normalized, original, docids)));
+                                }
+                            }
+                            Err(e) => return Some(Err(e)),
+                        }
+                    }
+                }
+            }
+
+            self.level_iters.pop();
+        }
+    }
+}
diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs
index 1b1eafcab..cc108f855 100644
--- a/milli/src/search/facet/filter_condition.rs
+++ b/milli/src/search/facet/filter_condition.rs
@@ -15,9 +15,11 @@ use roaring::RoaringBitmap;
 use self::FilterCondition::*;
 use self::Operator::*;
 use super::parser::{FilterParser, Rule, PREC_CLIMBER};
-use super::FacetRange;
+use super::FacetNumberRange;
 use crate::error::UserError;
-use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetValueStringCodec};
+use crate::heed_codec::facet::{
+    FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
+};
 use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result};
 
 #[derive(Debug, Clone, PartialEq)]
@@ -282,7 +284,7 @@ impl FilterCondition {
 
         // We must create a custom iterator to be able to iterate over the
         // requested range as the range iterator cannot express some conditions.
-        let iter = FacetRange::new(rtxn, db, field_id, level, left, right)?;
+        let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?;
 
         debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
 
@@ -363,7 +365,10 @@ impl FilterCondition {
         rtxn: &heed::RoTxn,
         index: &Index,
         numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
-        strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
+        strings_db: heed::Database<
+            FacetStringLevelZeroCodec,
+            FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
+        >,
         field_id: FieldId,
         operator: &Operator,
     ) -> Result<RoaringBitmap> {
@@ -374,7 +379,8 @@ impl FilterCondition {
             GreaterThan(val) => (Excluded(*val), Included(f64::MAX)),
             GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)),
             Equal(number, string) => {
-                let string_docids = strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default();
+                let (_original_value, string_docids) =
+                    strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default();
                 let number_docids = match number {
                     Some(n) => {
                         let n = Included(*n);
diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs
index 9774bdd52..ddf710e32 100644
--- a/milli/src/search/facet/mod.rs
+++ b/milli/src/search/facet/mod.rs
@@ -1,253 +1,11 @@
-use std::ops::Bound::{self, Excluded, Included, Unbounded};
-
-use either::Either::{self, Left, Right};
-use heed::types::{ByteSlice, DecodeIgnore};
-use heed::{Database, LazyDecode, RoRange, RoRevRange};
-use roaring::RoaringBitmap;
-
 pub use self::facet_distribution::FacetDistribution;
+pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
+pub use self::facet_string::FacetStringIter;
 pub use self::filter_condition::{FilterCondition, Operator};
 pub(crate) use self::parser::Rule as ParserRule;
-use crate::heed_codec::facet::FacetLevelValueF64Codec;
-use crate::heed_codec::CboRoaringBitmapCodec;
-use crate::{FieldId, Index};
 
 mod facet_distribution;
+mod facet_number;
+mod facet_string;
 mod filter_condition;
 mod parser;
-
-pub struct FacetRange<'t> {
-    iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
-    end: Bound<f64>,
-}
-
-impl<'t> FacetRange<'t> {
-    pub fn new(
-        rtxn: &'t heed::RoTxn,
-        db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
-        field_id: FieldId,
-        level: u8,
-        left: Bound<f64>,
-        right: Bound<f64>,
-    ) -> heed::Result<FacetRange<'t>> {
-        let left_bound = match left {
-            Included(left) => Included((field_id, level, left, f64::MIN)),
-            Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
-            Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
-        };
-        let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
-        let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?;
-        Ok(FacetRange { iter, end: right })
-    }
-}
-
-impl<'t> Iterator for FacetRange<'t> {
-    type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.iter.next() {
-            Some(Ok(((fid, level, left, right), docids))) => {
-                let must_be_returned = match self.end {
-                    Included(end) => right <= end,
-                    Excluded(end) => right < end,
-                    Unbounded => true,
-                };
-                if must_be_returned {
-                    match docids.decode() {
-                        Ok(docids) => Some(Ok(((fid, level, left, right), docids))),
-                        Err(e) => Some(Err(e)),
-                    }
-                } else {
-                    None
-                }
-            }
-            Some(Err(e)) => Some(Err(e)),
-            None => None,
-        }
-    }
-}
-
-pub struct FacetRevRange<'t> {
-    iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
-    end: Bound<f64>,
-}
-
-impl<'t> FacetRevRange<'t> {
-    pub fn new(
-        rtxn: &'t heed::RoTxn,
-        db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
-        field_id: FieldId,
-        level: u8,
-        left: Bound<f64>,
-        right: Bound<f64>,
-    ) -> heed::Result<FacetRevRange<'t>> {
-        let left_bound = match left {
-            Included(left) => Included((field_id, level, left, f64::MIN)),
-            Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
-            Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
-        };
-        let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
-        let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
-        Ok(FacetRevRange { iter, end: right })
-    }
-}
-
-impl<'t> Iterator for FacetRevRange<'t> {
-    type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            match self.iter.next() {
-                Some(Ok(((fid, level, left, right), docids))) => {
-                    let must_be_returned = match self.end {
-                        Included(end) => right <= end,
-                        Excluded(end) => right < end,
-                        Unbounded => true,
-                    };
-                    if must_be_returned {
-                        match docids.decode() {
-                            Ok(docids) => return Some(Ok(((fid, level, left, right), docids))),
-                            Err(e) => return Some(Err(e)),
-                        }
-                    }
-                    continue;
-                }
-                Some(Err(e)) => return Some(Err(e)),
-                None => return None,
-            }
-        }
-    }
-}
-
-pub struct FacetIter<'t> {
-    rtxn: &'t heed::RoTxn<'t>,
-    db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
-    field_id: FieldId,
-    level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t>, FacetRevRange<'t>>)>,
-    must_reduce: bool,
-}
-
-impl<'t> FacetIter<'t> {
-    /// Create a `FacetIter` that will iterate on the different facet entries
-    /// (facet value + documents ids) and that will reduce the given documents ids
-    /// while iterating on the different facet levels.
-    pub fn new_reducing(
-        rtxn: &'t heed::RoTxn,
-        index: &'t Index,
-        field_id: FieldId,
-        documents_ids: RoaringBitmap,
-    ) -> heed::Result<FacetIter<'t>> {
-        let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
-        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
-        let highest_iter =
-            FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
-        let level_iters = vec![(documents_ids, Left(highest_iter))];
-        Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
-    }
-
-    /// Create a `FacetIter` that will iterate on the different facet entries in reverse
-    /// (facet value + documents ids) and that will reduce the given documents ids
-    /// while iterating on the different facet levels.
-    pub fn new_reverse_reducing(
-        rtxn: &'t heed::RoTxn,
-        index: &'t Index,
-        field_id: FieldId,
-        documents_ids: RoaringBitmap,
-    ) -> heed::Result<FacetIter<'t>> {
-        let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
-        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
-        let highest_iter =
-            FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
-        let level_iters = vec![(documents_ids, Right(highest_iter))];
-        Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true })
-    }
-
-    /// Create a `FacetIter` that will iterate on the different facet entries
-    /// (facet value + documents ids) and that will not reduce the given documents ids
-    /// while iterating on the different facet levels, possibly returning multiple times
-    /// a document id associated with multiple facet values.
-    pub fn new_non_reducing(
-        rtxn: &'t heed::RoTxn,
-        index: &'t Index,
-        field_id: FieldId,
-        documents_ids: RoaringBitmap,
-    ) -> heed::Result<FacetIter<'t>> {
-        let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
-        let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
-        let highest_iter =
-            FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
-        let level_iters = vec![(documents_ids, Left(highest_iter))];
-        Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false })
-    }
-
-    fn highest_level<X>(
-        rtxn: &'t heed::RoTxn,
-        db: Database<FacetLevelValueF64Codec, X>,
-        fid: FieldId,
-    ) -> heed::Result<Option<u8>> {
-        let level = db
-            .remap_types::<ByteSlice, DecodeIgnore>()
-            .prefix_iter(rtxn, &fid.to_be_bytes())?
-            .remap_key_type::<FacetLevelValueF64Codec>()
-            .last()
-            .transpose()?
-            .map(|((_, level, _, _), _)| level);
-        Ok(level)
-    }
-}
-
-impl<'t> Iterator for FacetIter<'t> {
-    type Item = heed::Result<(f64, RoaringBitmap)>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        'outer: loop {
-            let (documents_ids, last) = self.level_iters.last_mut()?;
-            let is_ascending = last.is_left();
-            for result in last {
-                // If the last iterator must find an empty set of documents it means
-                // that we found all the documents in the sub level iterations already,
-                // we can pop this level iterator.
-                if documents_ids.is_empty() {
-                    break;
-                }
-
-                match result {
-                    Ok(((_fid, level, left, right), mut docids)) => {
-                        docids &= &*documents_ids;
-                        if !docids.is_empty() {
-                            if self.must_reduce {
-                                *documents_ids -= &docids;
-                            }
-
-                            if level == 0 {
-                                return Some(Ok((left, docids)));
-                            }
-
-                            let rtxn = self.rtxn;
-                            let db = self.db;
-                            let fid = self.field_id;
-                            let left = Included(left);
-                            let right = Included(right);
-
-                            let result = if is_ascending {
-                                FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left)
-                            } else {
-                                FacetRevRange::new(rtxn, db, fid, level - 1, left, right).map(Right)
-                            };
-
-                            match result {
-                                Ok(iter) => {
-                                    self.level_iters.push((docids, iter));
-                                    continue 'outer;
-                                }
-                                Err(e) => return Some(Err(e)),
-                            }
-                        }
-                    }
-                    Err(e) => return Some(Err(e)),
-                }
-            }
-            self.level_iters.pop();
-        }
-    }
-}
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index f40a6aed6..574459547 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -15,7 +15,7 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
 
 pub(crate) use self::facet::ParserRule;
-pub use self::facet::{FacetDistribution, FacetIter, FilterCondition, Operator};
+pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator};
 pub use self::matching_words::MatchingWords;
 use self::query_tree::QueryTreeBuilder;
 use crate::error::FieldIdMapMissingEntry;
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index 222f3b2d3..bcb7d7580 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -3,12 +3,13 @@ use std::collections::HashMap;
 
 use chrono::Utc;
 use fst::IntoStreamer;
-use heed::types::{ByteSlice, Unit};
+use heed::types::ByteSlice;
 use roaring::RoaringBitmap;
 use serde_json::Value;
 
 use super::ClearDocuments;
 use crate::error::{FieldIdMapMissingEntry, InternalError, UserError};
+use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::index::{db_name, main_key};
 use crate::{DocumentId, ExternalDocumentsIds, FieldId, Index, Result, SmallString32, BEU32};
@@ -374,13 +375,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
         drop(iter);
 
         // We delete the documents ids that are under the facet field id values.
-        remove_docids_from_facet_field_id_value_docids(
+        remove_docids_from_facet_field_id_number_docids(
             self.wtxn,
             facet_id_f64_docids,
             &self.documents_ids,
         )?;
 
-        remove_docids_from_facet_field_id_value_docids(
+        remove_docids_from_facet_field_id_string_docids(
             self.wtxn,
             facet_id_string_docids,
             &self.documents_ids,
@@ -419,15 +420,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
     }
 }
 
-fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F>(
+fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F, DC, V>(
     wtxn: &'a mut heed::RwTxn,
-    db: &heed::Database<C, Unit>,
+    db: &heed::Database<C, DC>,
     field_id: FieldId,
     to_remove: &RoaringBitmap,
     convert: F,
 ) -> heed::Result<()>
 where
-    C: heed::BytesDecode<'a, DItem = K> + heed::BytesEncode<'a, EItem = K>,
+    C: heed::BytesDecode<'a, DItem = K>,
+    DC: heed::BytesDecode<'a, DItem = V>,
     F: Fn(K) -> DocumentId,
 {
     let mut iter = db
@@ -436,7 +438,7 @@ where
         .remap_key_type::<C>();
 
     while let Some(result) = iter.next() {
-        let (key, ()) = result?;
+        let (key, _) = result?;
         if to_remove.contains(convert(key)) {
             // safety: we don't keep references from inside the LMDB database.
             unsafe { iter.del_current()? };
@@ -446,7 +448,33 @@ where
     Ok(())
 }
 
-fn remove_docids_from_facet_field_id_value_docids<'a, C>(
+fn remove_docids_from_facet_field_id_string_docids<'a, C>(
+    wtxn: &'a mut heed::RwTxn,
+    db: &heed::Database<C, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
+    to_remove: &RoaringBitmap,
+) -> heed::Result<()>
+where
+    C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
+{
+    let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
+    while let Some(result) = iter.next() {
+        let (bytes, (original_value, mut docids)) = result?;
+        let previous_len = docids.len();
+        docids -= to_remove;
+        if docids.is_empty() {
+            // safety: we don't keep references from inside the LMDB database.
+            unsafe { iter.del_current()? };
+        } else if docids.len() != previous_len {
+            let bytes = bytes.to_owned();
+            // safety: we don't keep references from inside the LMDB database.
+            unsafe { iter.put_current(&bytes, &(original_value, docids))? };
+        }
+    }
+
+    Ok(())
+}
+
+fn remove_docids_from_facet_field_id_number_docids<'a, C>(
     wtxn: &'a mut heed::RwTxn,
     db: &heed::Database<C, CboRoaringBitmapCodec>,
     to_remove: &RoaringBitmap,
diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs
index 5fabbc504..cb9a90f7e 100644
--- a/milli/src/update/facets.rs
+++ b/milli/src/update/facets.rs
@@ -1,6 +1,6 @@
-use std::cmp;
 use std::fs::File;
-use std::num::NonZeroUsize;
+use std::num::{NonZeroU8, NonZeroUsize};
+use std::{cmp, mem};
 
 use chrono::Utc;
 use grenad::{CompressionType, FileFuse, Reader, Writer};
@@ -10,7 +10,10 @@ use log::debug;
 use roaring::RoaringBitmap;
 
 use crate::error::InternalError;
-use crate::heed_codec::facet::FacetLevelValueF64Codec;
+use crate::heed_codec::facet::{
+    FacetLevelValueF64Codec, FacetLevelValueU32Codec, FacetStringLevelZeroCodec,
+    FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec,
+};
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::update::index_documents::{
     create_writer, write_into_lmdb_database, writer_into_reader, WriteMethod,
@@ -64,24 +67,42 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
         debug!("Computing and writing the facet values levels docids into LMDB on disk...");
 
         for field_id in faceted_fields {
+            // Clear the facet string levels.
+            clear_field_string_levels(
+                self.wtxn,
+                self.index.facet_id_string_docids.remap_types::<ByteSlice, DecodeIgnore>(),
+                field_id,
+            )?;
+
             // Compute and store the faceted strings documents ids.
-            let string_documents_ids = compute_faceted_documents_ids(
+            let string_documents_ids = compute_faceted_strings_documents_ids(
                 self.wtxn,
                 self.index.facet_id_string_docids.remap_key_type::<ByteSlice>(),
                 field_id,
             )?;
 
+            let facet_string_levels = compute_facet_string_levels(
+                self.wtxn,
+                self.index.facet_id_string_docids,
+                self.chunk_compression_type,
+                self.chunk_compression_level,
+                self.chunk_fusing_shrink_size,
+                self.level_group_size,
+                self.min_level_size,
+                field_id,
+            )?;
+
             // Clear the facet number levels.
             clear_field_number_levels(self.wtxn, self.index.facet_id_f64_docids, field_id)?;
 
             // Compute and store the faceted numbers documents ids.
-            let number_documents_ids = compute_faceted_documents_ids(
+            let number_documents_ids = compute_faceted_numbers_documents_ids(
                 self.wtxn,
                 self.index.facet_id_f64_docids.remap_key_type::<ByteSlice>(),
                 field_id,
             )?;
 
-            let content = compute_facet_number_levels(
+            let facet_number_levels = compute_facet_number_levels(
                 self.wtxn,
                 self.index.facet_id_f64_docids,
                 self.chunk_compression_type,
@@ -106,8 +127,16 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
             write_into_lmdb_database(
                 self.wtxn,
                 *self.index.facet_id_f64_docids.as_polymorph(),
-                content,
-                |_, _| Err(InternalError::IndexingMergingKeys { process: "facet number level" }),
+                facet_number_levels,
+                |_, _| Err(InternalError::IndexingMergingKeys { process: "facet number levels" }),
+                WriteMethod::GetMergePut,
+            )?;
+
+            write_into_lmdb_database(
+                self.wtxn,
+                *self.index.facet_id_string_docids.as_polymorph(),
+                facet_string_levels,
+                |_, _| Err(InternalError::IndexingMergingKeys { process: "facet string levels" }),
                 WriteMethod::GetMergePut,
             )?;
         }
@@ -193,21 +222,6 @@ fn compute_facet_number_levels<'t>(
     writer_into_reader(writer, shrink_size)
 }
 
-fn compute_faceted_documents_ids(
-    rtxn: &heed::RoTxn,
-    db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
-    field_id: FieldId,
-) -> Result<RoaringBitmap> {
-    let mut documents_ids = RoaringBitmap::new();
-
-    for result in db.prefix_iter(rtxn, &field_id.to_be_bytes())? {
-        let (_key, docids) = result?;
-        documents_ids |= docids;
-    }
-
-    Ok(documents_ids)
-}
-
 fn write_number_entry(
     writer: &mut Writer<File>,
     field_id: FieldId,
@@ -222,3 +236,129 @@ fn write_number_entry(
     writer.insert(&key, &data)?;
     Ok(())
 }
+
+fn compute_faceted_strings_documents_ids(
+    rtxn: &heed::RoTxn,
+    db: heed::Database<ByteSlice, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
+    field_id: FieldId,
+) -> Result<RoaringBitmap> {
+    let mut documents_ids = RoaringBitmap::new();
+    for result in db.prefix_iter(rtxn, &field_id.to_be_bytes())? {
+        let (_key, (_original_value, docids)) = result?;
+        documents_ids |= docids;
+    }
+
+    Ok(documents_ids)
+}
+
+fn compute_faceted_numbers_documents_ids(
+    rtxn: &heed::RoTxn,
+    db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
+    field_id: FieldId,
+) -> Result<RoaringBitmap> {
+    let mut documents_ids = RoaringBitmap::new();
+    for result in db.prefix_iter(rtxn, &field_id.to_be_bytes())? {
+        let (_key, docids) = result?;
+        documents_ids |= docids;
+    }
+
+    Ok(documents_ids)
+}
+
+fn clear_field_string_levels<'t>(
+    wtxn: &'t mut heed::RwTxn,
+    db: heed::Database<ByteSlice, DecodeIgnore>,
+    field_id: FieldId,
+) -> heed::Result<()> {
+    let left = (field_id, NonZeroU8::new(1).unwrap(), u32::MIN, u32::MIN);
+    let right = (field_id, NonZeroU8::new(u8::MAX).unwrap(), u32::MAX, u32::MAX);
+    let range = left..=right;
+    db.remap_key_type::<FacetLevelValueU32Codec>().delete_range(wtxn, &range).map(drop)
+}
+
+fn compute_facet_string_levels<'t>(
+    rtxn: &'t heed::RoTxn,
+    db: heed::Database<
+        FacetStringLevelZeroCodec,
+        FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
+    >,
+    compression_type: CompressionType,
+    compression_level: Option<u32>,
+    shrink_size: Option<u64>,
+    level_group_size: NonZeroUsize,
+    min_level_size: NonZeroUsize,
+    field_id: FieldId,
+) -> Result<Reader<FileFuse>> {
+    let first_level_size = db
+        .remap_key_type::<ByteSlice>()
+        .prefix_iter(rtxn, &field_id.to_be_bytes())?
+        .remap_types::<DecodeIgnore, DecodeIgnore>()
+        .fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
+
+    // It is forbidden to keep a cursor and write in a database at the same time with LMDB
+    // therefore we write the facet levels entries into a grenad file before transfering them.
+    let mut writer = tempfile::tempfile()
+        .and_then(|file| create_writer(compression_type, compression_level, file))?;
+
+    // Groups sizes are always a power of the original level_group_size and therefore a group
+    // always maps groups of the previous level and never splits previous levels groups in half.
+    let group_size_iter = (1u8..)
+        .map(|l| (l, level_group_size.get().pow(l as u32)))
+        .take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
+
+    for (level, group_size) in group_size_iter {
+        let level = NonZeroU8::new(level).unwrap();
+        let mut left = (0, "");
+        let mut right = (0, "");
+        let mut group_docids = RoaringBitmap::new();
+
+        // Because we know the size of the level 0 we can use a range iterator that starts
+        // at the first value of the level and goes to the last by simply counting.
+        for (i, result) in db.range(rtxn, &((field_id, "")..))?.take(first_level_size).enumerate() {
+            let ((_field_id, value), (_original_value, docids)) = result?;
+
+            if i == 0 {
+                left = (i as u32, value);
+            } else if i % group_size == 0 {
+                // we found the first bound of the next group, we must store the left
+                // and right bounds associated with the docids. We also reset the docids.
+                let docids = mem::take(&mut group_docids);
+                write_string_entry(&mut writer, field_id, level, left, right, docids)?;
+
+                // We save the left bound for the new group.
+                left = (i as u32, value);
+            }
+
+            // The right bound is always the bound we run through.
+            group_docids |= docids;
+            right = (i as u32, value);
+        }
+
+        if !group_docids.is_empty() {
+            let docids = mem::take(&mut group_docids);
+            write_string_entry(&mut writer, field_id, level, left, right, docids)?;
+        }
+    }
+
+    writer_into_reader(writer, shrink_size)
+}
+
+fn write_string_entry(
+    writer: &mut Writer<File>,
+    field_id: FieldId,
+    level: NonZeroU8,
+    (left_id, left_value): (u32, &str),
+    (right_id, right_value): (u32, &str),
+    docids: RoaringBitmap,
+) -> Result<()> {
+    let key = (field_id, level, left_id, right_id);
+    let key = FacetLevelValueU32Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
+    let data = match level.get() {
+        1 => (Some((left_value, right_value)), docids),
+        _ => (None, docids),
+    };
+    let data = FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&data)
+        .ok_or(Error::Encoding)?;
+    writer.insert(&key, &data)?;
+    Ok(())
+}
diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs
index 8613a8824..7e5d0b581 100644
--- a/milli/src/update/index_documents/merge_function.rs
+++ b/milli/src/update/index_documents/merge_function.rs
@@ -2,8 +2,11 @@ use std::borrow::Cow;
 use std::result::Result as StdResult;
 
 use fst::IntoStreamer;
+use heed::{BytesDecode, BytesEncode};
 use roaring::RoaringBitmap;
 
+use crate::error::SerializationError;
+use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
 use crate::heed_codec::CboRoaringBitmapCodec;
 use crate::Result;
 
@@ -69,6 +72,26 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>
     Ok(vec)
 }
 
+/// Uses the FacetStringLevelZeroValueCodec to merge the values.
+pub fn tuple_string_cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
+    let (head, tail) = values.split_first().unwrap();
+    let (head_string, mut head_rb) =
+        FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&head[..])
+            .ok_or(SerializationError::Decoding { db_name: None })?;
+
+    for value in tail {
+        let (_string, rb) =
+            FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&value[..])
+                .ok_or(SerializationError::Decoding { db_name: None })?;
+        head_rb |= rb;
+    }
+
+    FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&(head_string, head_rb))
+        .map(|cow| cow.into_owned())
+        .ok_or(SerializationError::Encoding { db_name: None })
+        .map_err(Into::into)
+}
+
 pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
     let (head, tail) = values.split_first().unwrap();
     let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 9ac05fe1a..efe16def7 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize};
 
 pub use self::merge_function::{
     cbo_roaring_bitmap_merge, fst_merge, keep_first, roaring_bitmap_merge,
+    tuple_string_cbo_roaring_bitmap_merge,
 };
 use self::store::{Readers, Store};
 pub use self::transform::{Transform, TransformOutput};
@@ -655,7 +656,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
             self.wtxn,
             *self.index.facet_id_string_docids.as_polymorph(),
             facet_field_strings_docids_readers,
-            cbo_roaring_bitmap_merge,
+            tuple_string_cbo_roaring_bitmap_merge,
             write_method,
         )?;
 
diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs
index ebf365f44..444b11e31 100644
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@@ -22,12 +22,13 @@ use tempfile::tempfile;
 
 use super::merge_function::{
     cbo_roaring_bitmap_merge, fst_merge, keep_first, roaring_bitmap_merge,
+    tuple_string_cbo_roaring_bitmap_merge,
 };
 use super::{create_sorter, create_writer, writer_into_reader, MergeFn};
 use crate::error::{Error, InternalError, SerializationError};
 use crate::heed_codec::facet::{
-    FacetLevelValueF64Codec, FacetValueStringCodec, FieldDocIdFacetF64Codec,
-    FieldDocIdFacetStringCodec,
+    FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
+    FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
 };
 use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
 use crate::update::UpdateIndexingStep;
@@ -65,7 +66,7 @@ pub struct Store<'s, A> {
         LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
     words_pairs_proximities_docids_limit: usize,
     facet_field_number_docids: LinkedHashMap<(FieldId, OrderedFloat<f64>), RoaringBitmap>,
-    facet_field_string_docids: LinkedHashMap<(FieldId, String), RoaringBitmap>,
+    facet_field_string_docids: LinkedHashMap<(FieldId, String), (String, RoaringBitmap)>,
     facet_field_value_docids_limit: usize,
     // MTBL parameters
     chunk_compression_type: CompressionType,
@@ -153,7 +154,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
             max_memory,
         );
         let facet_field_strings_docids_sorter = create_sorter(
-            cbo_roaring_bitmap_merge,
+            tuple_string_cbo_roaring_bitmap_merge,
             chunk_compression_type,
             chunk_compression_level,
             chunk_fusing_shrink_size,
@@ -283,21 +284,33 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
     fn insert_facet_string_values_docid(
         &mut self,
         field_id: FieldId,
-        value: String,
+        normalized_value: String,
+        original_value: String,
         id: DocumentId,
     ) -> Result<()> {
-        let sorter = &mut self.field_id_docid_facet_strings_sorter;
-        Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
+        if normalized_value.is_empty() {
+            return Ok(());
+        }
 
-        let key = (field_id, value);
+        let sorter = &mut self.field_id_docid_facet_strings_sorter;
+        Self::write_field_id_docid_facet_string_value(
+            sorter,
+            field_id,
+            id,
+            &normalized_value,
+            &original_value,
+        )?;
+
+        let key = (field_id, normalized_value);
         // if get_refresh finds the element it is assured to be at the end of the linked hash map.
         match self.facet_field_string_docids.get_refresh(&key) {
-            Some(old) => {
+            Some((_original_value, old)) => {
                 old.insert(id);
             }
             None => {
                 // A newly inserted element is append at the end of the linked hash map.
-                self.facet_field_string_docids.insert(key, RoaringBitmap::from_iter(Some(id)));
+                self.facet_field_string_docids
+                    .insert(key, (original_value, RoaringBitmap::from_iter(Some(id))));
                 // If the word docids just reached it's capacity we must make sure to remove
                 // one element, this way next time we insert we doesn't grow the capacity.
                 if self.facet_field_string_docids.len() == self.facet_field_value_docids_limit {
@@ -359,7 +372,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         document_id: DocumentId,
         words_positions: &mut HashMap<String, SmallVec32<Position>>,
         facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
-        facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
+        facet_strings_values: &mut HashMap<FieldId, Vec<(String, String)>>,
         record: &[u8],
     ) -> Result<()> {
         // We compute the list of words pairs proximities (self-join) and write it directly to disk.
@@ -395,8 +408,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
 
         // We store document_id associated with all the facet strings fields ids and values.
         for (field, values) in facet_strings_values.drain() {
-            for value in values {
-                self.insert_facet_string_values_docid(field, value, document_id)?;
+            for (normalized, original) in values {
+                self.insert_facet_string_values_docid(field, normalized, original, document_id)?;
             }
         }
 
@@ -512,23 +525,24 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
 
     fn write_facet_field_string_docids<I, E>(sorter: &mut Sorter<MergeFn<E>>, iter: I) -> Result<()>
     where
-        I: IntoIterator<Item = ((FieldId, String), RoaringBitmap)>,
+        I: IntoIterator<Item = ((FieldId, String), (String, RoaringBitmap))>,
         Error: From<E>,
     {
         let mut key_buffer = Vec::new();
-        let mut data_buffer = Vec::new();
 
-        for ((field_id, value), docids) in iter {
+        for ((field_id, normalized_value), (original_value, docids)) in iter {
             key_buffer.clear();
-            data_buffer.clear();
 
-            FacetValueStringCodec::serialize_into(field_id, &value, &mut key_buffer);
-            CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
+            FacetStringLevelZeroCodec::serialize_into(field_id, &normalized_value, &mut key_buffer);
+
+            let data = (original_value.as_str(), docids);
+            let data = FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&data)
+                .ok_or(SerializationError::Encoding { db_name: Some("facet-id-string-docids") })?;
 
             if lmdb_key_valid_size(&key_buffer) {
-                sorter.insert(&key_buffer, &data_buffer)?;
+                sorter.insert(&key_buffer, &data)?;
             } else {
-                warn!("facet value {:?} is too large to be saved", value);
+                warn!("facet value {:?} is too large to be saved", original_value);
             }
         }
 
@@ -583,19 +597,24 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
         sorter: &mut Sorter<MergeFn<E>>,
         field_id: FieldId,
         document_id: DocumentId,
-        value: &str,
+        normalized_value: &str,
+        original_value: &str,
     ) -> Result<()>
     where
         Error: From<E>,
     {
         let mut buffer = Vec::new();
-
-        FieldDocIdFacetStringCodec::serialize_into(field_id, document_id, value, &mut buffer);
+        FieldDocIdFacetStringCodec::serialize_into(
+            field_id,
+            document_id,
+            normalized_value,
+            &mut buffer,
+        );
 
         if lmdb_key_valid_size(&buffer) {
-            sorter.insert(&buffer, &[])?;
+            sorter.insert(&buffer, original_value.as_bytes())?;
         } else {
-            warn!("facet value {:?} is too large to be saved", value);
+            warn!("facet value {:?} is too large to be saved", original_value);
         }
 
         Ok(())
@@ -925,24 +944,24 @@ fn process_tokens<'a>(
         .filter(|(_, t)| t.is_word())
 }
 
-fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<String>) {
+fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
     fn inner_extract_facet_values(
         value: &Value,
         can_recurse: bool,
         output_numbers: &mut Vec<f64>,
-        output_strings: &mut Vec<String>,
+        output_strings: &mut Vec<(String, String)>,
     ) {
         match value {
             Value::Null => (),
-            Value::Bool(b) => output_strings.push(b.to_string()),
+            Value::Bool(b) => output_strings.push((b.to_string(), b.to_string())),
             Value::Number(number) => {
                 if let Some(float) = number.as_f64() {
                     output_numbers.push(float);
                 }
             }
-            Value::String(string) => {
-                let string = string.trim().to_lowercase();
-                output_strings.push(string);
+            Value::String(original) => {
+                let normalized = original.trim().to_lowercase();
+                output_strings.push((normalized, original.clone()));
             }
             Value::Array(values) => {
                 if can_recurse {
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index c6540b33a..e4adbccb9 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -276,8 +276,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         match self.searchable_fields {
             Setting::Set(ref fields) => {
                 // every time the searchable attributes are updated, we need to update the
-                // ids for any settings that uses the facets. (displayed_fields,
-                // filterable_fields)
+                // ids for any settings that uses the facets. (distinct_fields, filterable_fields).
                 let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
 
                 let mut new_fields_ids_map = FieldsIdsMap::new();