From d1fc42b53a2bdfe06a50d313e41e7f643eda0f67 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 18 Jan 2023 15:02:13 +0100 Subject: [PATCH] Use compatibility decomposition normalizer in facets --- .../extract/extract_fid_docid_facet_values.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 0a7dfbeb1..71ac330e2 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -4,6 +4,7 @@ use std::fs::File; use std::io; use std::mem::size_of; +use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; use heed::zerocopy::AsBytes; use heed::BytesEncode; use roaring::RoaringBitmap; @@ -135,7 +136,9 @@ fn extract_facet_values(value: &Value) -> (Vec, Vec<(String, String)>) { } } Value::String(original) => { - let normalized = original.trim().to_lowercase(); + let normalized = CompatibilityDecompositionNormalizer + .normalize_str(original.trim()) + .to_lowercase(); output_strings.push((normalized, original.clone())); } Value::Array(values) => {