From 35758db9ece812d459cc5eda1034d0d839a1eb7b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 8 Aug 2023 12:00:51 +0200 Subject: [PATCH] Truncate the the normalized long facets used in search for facet value --- milli/src/lib.rs | 2 +- milli/src/update/facet/mod.rs | 13 +++++++++++-- .../extract/extract_facet_string_docids.rs | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 3e5f63fd5..cd97d6192 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500; /// /// This number is determined by the keys of the different facet databases /// and adding a margin of safety. -pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20; +pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32; /// The maximum length a word can be pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 16fc1cd2f..15776a709 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu use crate::heed_codec::ByteSliceRefCodec; use crate::update::index_documents::create_sorter; use crate::update::merge_btreeset_string; -use crate::{BEU16StrCodec, Index, Result, BEU16}; +use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH}; pub mod bulk; pub mod delete; @@ -191,7 +191,16 @@ impl<'i> FacetsUpdate<'i> { for result in database.iter(wtxn)? { let (facet_group_key, ()) = result?; if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { - let normalized_facet = left_bound.normalize(&options); + let mut normalized_facet = left_bound.normalize(&options); + let normalized_truncated_facet: String; + if normalized_facet.len() > MAX_FACET_VALUE_LENGTH { + normalized_truncated_facet = normalized_facet + .char_indices() + .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH) + .map(|(_, c)| c) + .collect(); + normalized_facet = normalized_truncated_facet.into(); + } let set = BTreeSet::from_iter(std::iter::once(left_bound)); let key = (field_id, normalized_facet.as_ref()); let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 0d9c0981e..dbb3fcfe8 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -44,7 +44,7 @@ pub fn extract_facet_string_docids( if normalised_value.len() > MAX_FACET_VALUE_LENGTH { normalised_truncated_value = normalised_value .char_indices() - .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH) + .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH) .map(|(_, c)| c) .collect(); normalised_value = normalised_truncated_value.as_str();