mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Merge #3981
3981: Truncate the normalized long facets used in the search for facet value r=irevoire a=ManyTheFish # Pull Request Truncate the normalized long facets used in the search for facet value ## targeted release v1.3.1 ## Related issue Fixes #3978 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
04671d0751
@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500;
|
|||||||
///
|
///
|
||||||
/// This number is determined by the keys of the different facet databases
|
/// This number is determined by the keys of the different facet databases
|
||||||
/// and adding a margin of safety.
|
/// and adding a margin of safety.
|
||||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20;
|
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
|
||||||
|
|
||||||
/// The maximum length a word can be
|
/// The maximum length a word can be
|
||||||
pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
||||||
|
@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
|||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::update::index_documents::create_sorter;
|
use crate::update::index_documents::create_sorter;
|
||||||
use crate::update::merge_btreeset_string;
|
use crate::update::merge_btreeset_string;
|
||||||
use crate::{BEU16StrCodec, Index, Result, BEU16};
|
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
|
||||||
|
|
||||||
pub mod bulk;
|
pub mod bulk;
|
||||||
pub mod delete;
|
pub mod delete;
|
||||||
@ -191,7 +191,16 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
for result in database.iter(wtxn)? {
|
for result in database.iter(wtxn)? {
|
||||||
let (facet_group_key, ()) = result?;
|
let (facet_group_key, ()) = result?;
|
||||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||||
let normalized_facet = left_bound.normalize(&options);
|
let mut normalized_facet = left_bound.normalize(&options);
|
||||||
|
let normalized_truncated_facet: String;
|
||||||
|
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
|
||||||
|
normalized_truncated_facet = normalized_facet
|
||||||
|
.char_indices()
|
||||||
|
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||||
|
.map(|(_, c)| c)
|
||||||
|
.collect();
|
||||||
|
normalized_facet = normalized_truncated_facet.into();
|
||||||
|
}
|
||||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||||
let key = (field_id, normalized_facet.as_ref());
|
let key = (field_id, normalized_facet.as_ref());
|
||||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||||
|
@ -44,7 +44,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
|
if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
|
||||||
normalised_truncated_value = normalised_value
|
normalised_truncated_value = normalised_value
|
||||||
.char_indices()
|
.char_indices()
|
||||||
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
|
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||||
.map(|(_, c)| c)
|
.map(|(_, c)| c)
|
||||||
.collect();
|
.collect();
|
||||||
normalised_value = normalised_truncated_value.as_str();
|
normalised_value = normalised_truncated_value.as_str();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user