From 161cb736ea501b1913c62917b622424c75a1f4a9 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 3 Jun 2025 10:37:29 +0200
Subject: [PATCH] Adapt tests to the Chinese word segmenter changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new Chinese segmenter is splitting words in smaller parts.
The words `小化妆包` was previously seegmented as `小 / 化妆包` and is now segmented as `小 / 化妆 / 包`,
which changes the tests results.
---
 crates/milli/src/update/index_documents/mod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs
index 4acb78b9a..e0f85ca2d 100644
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -1580,12 +1580,12 @@ mod tests {
         let rtxn = index.read_txn().unwrap();
 
         // Only the first document should match.
-        let count = index.word_docids.get(&rtxn, "huàzhuāngbāo").unwrap().unwrap().len();
+        let count = index.word_docids.get(&rtxn, "huàzhuāng").unwrap().unwrap().len();
         assert_eq!(count, 1);
 
         // Only the second document should match.
         let count = index.word_docids.get(&rtxn, "bāo").unwrap().unwrap().len();
-        assert_eq!(count, 1);
+        assert_eq!(count, 2);
 
         let mut search = crate::Search::new(&rtxn, &index);
         search.query("化妆包");