3904: Sort by lexicographic order after normalization r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3893

## What does this PR do?
- Re-sort stop words after normalization so they're not sent out-of-order to the FST


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-07-10 12:12:05 +00:00 committed by GitHub
commit eb7a1aa7af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -425,13 +425,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let current = self.index.stop_words(self.wtxn)?; let current = self.index.stop_words(self.wtxn)?;
// Apply an unlossy normalization on stop_words // Apply an unlossy normalization on stop_words
let stop_words = stop_words let stop_words: BTreeSet<String> = stop_words
.iter() .iter()
.map(|w| w.as_str().normalize(&Default::default()).into_owned()); .map(|w| w.as_str().normalize(&Default::default()).into_owned())
.collect();
// since we can't compare a BTreeSet with an FST we are going to convert the // since we can't compare a BTreeSet with an FST we are going to convert the
// BTreeSet to an FST and then compare bytes per bytes the two FSTs. // BTreeSet to an FST and then compare bytes per bytes the two FSTs.
let fst = fst::Set::from_iter(stop_words)?; let fst = fst::Set::from_iter(stop_words.into_iter())?;
// Does the new FST differ from the previous one? // Does the new FST differ from the previous one?
if current if current