From 29d021ad4ddcb22f35b9c45d415ebbe7852ebd2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 6 Apr 2020 18:53:02 +0200 Subject: [PATCH 1/3] Fixes the stop words and words fst generation --- .../src/update/settings_update.rs | 83 ++++++++++--------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index 452d2f3f1..0ad2c904d 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -135,52 +135,61 @@ pub fn apply_stop_words_update( writer: &mut heed::RwTxn, index: &store::Index, stop_words: BTreeSet, -) -> MResult { +) -> MResult +{ + let mut must_reindex = false; let old_stop_words: BTreeSet = index.main .stop_words_fst(writer)? .unwrap_or_default() .stream() - .into_strs().unwrap().into_iter().collect(); + .into_strs()? + .into_iter() + .collect(); let deletion: BTreeSet = old_stop_words.difference(&stop_words).cloned().collect(); let addition: BTreeSet = stop_words.difference(&old_stop_words).cloned().collect(); if !addition.is_empty() { - apply_stop_words_addition( - writer, - index, - addition - )?; + apply_stop_words_addition(writer, index, addition)?; } if !deletion.is_empty() { - apply_stop_words_deletion( - writer, - index, - deletion - )?; - return Ok(true) + must_reindex = true; + apply_stop_words_deletion(writer, index, deletion)?; } - let stop_words_fst = fst::Set::from_iter(stop_words)?; - index.main.put_words_fst(writer, &stop_words_fst)?; - Ok(false) + if let Some(words_fst) = index.main.words_fst(writer)? { + let stop_words = fst::Set::from_iter(stop_words)?; + let op = OpBuilder::new() + .add(&words_fst) + .add(&stop_words) + .difference(); + + let mut builder = fst::SetBuilder::memory(); + builder.extend_stream(op)?; + let words_fst = builder.into_inner().and_then(fst::Set::from_bytes)?; + + index.main.put_words_fst(writer, &words_fst)?; + index.main.put_stop_words_fst(writer, &stop_words)?; + } + + Ok(must_reindex) } fn apply_stop_words_addition( writer: &mut heed::RwTxn, index: &store::Index, addition: BTreeSet, -) -> MResult<()> { - +) -> MResult<()> +{ let main_store = index.main; let postings_lists_store = index.postings_lists; let mut stop_words_builder = SetBuilder::memory(); for word in addition { - stop_words_builder.insert(&word).unwrap(); + stop_words_builder.insert(&word)?; // we remove every posting list associated to a new stop word postings_lists_store.del_postings_list(writer, word.as_bytes())?; } @@ -188,8 +197,7 @@ fn apply_stop_words_addition( // create the new delta stop words fst let delta_stop_words = stop_words_builder .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); + .and_then(fst::Set::from_bytes)?; // we also need to remove all the stop words from the main fst if let Some(word_fst) = main_store.words_fst(writer)? { @@ -199,11 +207,10 @@ fn apply_stop_words_addition( .difference(); let mut word_fst_builder = SetBuilder::memory(); - word_fst_builder.extend_stream(op).unwrap(); + word_fst_builder.extend_stream(op)?; let word_fst = word_fst_builder .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); + .and_then(fst::Set::from_bytes)?; main_store.put_words_fst(writer, &word_fst)?; } @@ -217,11 +224,10 @@ fn apply_stop_words_addition( .r#union(); let mut stop_words_builder = SetBuilder::memory(); - stop_words_builder.extend_stream(op).unwrap(); + stop_words_builder.extend_stream(op)?; let stop_words_fst = stop_words_builder .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); + .and_then(fst::Set::from_bytes)?; main_store.put_stop_words_fst(writer, &stop_words_fst)?; @@ -237,14 +243,13 @@ fn apply_stop_words_deletion( let mut stop_words_builder = SetBuilder::memory(); for word in deletion { - stop_words_builder.insert(&word).unwrap(); + stop_words_builder.insert(&word)?; } // create the new delta stop words fst let delta_stop_words = stop_words_builder .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); + .and_then(fst::Set::from_bytes)?; // now we delete all of these stop words from the main store let stop_words_fst = index.main.stop_words_fst(writer)?.unwrap_or_default(); @@ -255,11 +260,8 @@ fn apply_stop_words_deletion( .difference(); let mut stop_words_builder = SetBuilder::memory(); - stop_words_builder.extend_stream(op).unwrap(); - let stop_words_fst = stop_words_builder - .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); + stop_words_builder.extend_stream(op)?; + let stop_words_fst = stop_words_builder.into_inner().and_then(fst::Set::from_bytes)?; Ok(index.main.put_stop_words_fst(writer, &stop_words_fst)?) } @@ -276,14 +278,14 @@ pub fn apply_synonyms_update( let mut synonyms_builder = SetBuilder::memory(); synonyms_store.clear(writer)?; for (word, alternatives) in synonyms.clone() { - synonyms_builder.insert(&word).unwrap(); + synonyms_builder.insert(&word)?; let alternatives = { let alternatives = SetBuf::from_dirty(alternatives); let mut alternatives_builder = SetBuilder::memory(); - alternatives_builder.extend_iter(alternatives).unwrap(); - let bytes = alternatives_builder.into_inner().unwrap(); - fst::Set::from_bytes(bytes).unwrap() + alternatives_builder.extend_iter(alternatives)?; + let bytes = alternatives_builder.into_inner()?; + fst::Set::from_bytes(bytes)? }; synonyms_store.put_synonyms(writer, word.as_bytes(), &alternatives)?; @@ -291,8 +293,7 @@ pub fn apply_synonyms_update( let synonyms_set = synonyms_builder .into_inner() - .and_then(fst::Set::from_bytes) - .unwrap(); + .and_then(fst::Set::from_bytes)?; main_store.put_synonyms_fst(writer, &synonyms_set)?; From d24209f5a7d3f3dd6e3e8e127ccb6e1d972d709e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 6 Apr 2020 19:25:56 +0200 Subject: [PATCH 2/3] Adds a test to check that stop word ar correctly handled --- meilisearch-http/tests/settings_stop_words.rs | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/tests/settings_stop_words.rs b/meilisearch-http/tests/settings_stop_words.rs index 0c9257150..1adaccb2c 100644 --- a/meilisearch-http/tests/settings_stop_words.rs +++ b/meilisearch-http/tests/settings_stop_words.rs @@ -6,11 +6,7 @@ mod common; #[test] fn update_stop_words() { let mut server = common::Server::with_uid("movies"); - let body = json!({ - "uid": "movies", - "primaryKey": "id", - }); - server.create_index(body); + server.populate_movies(); // 1 - Get stop words @@ -36,3 +32,32 @@ fn update_stop_words() { let (response, _status_code) = server.get_stop_words(); assert_eq!(response.as_array().unwrap().is_empty(), true); } + +#[test] +fn add_documents_and_stop_words() { + let mut server = common::Server::with_uid("movies"); + server.populate_movies(); + + // 2 - Update stop words + + let body = json!(["the", "of"]); + server.update_stop_words(body.clone()); + + // 3 - Search for a document with stop words + + let (response, _status_code) = server.search("q=the%20mask"); + assert!(!response["hits"].as_array().unwrap().is_empty()); + + // 4 - Search for documents with *only* stop words + + let (response, _status_code) = server.search("q=the%20of"); + assert!(response["hits"].as_array().unwrap().is_empty()); + + // 5 - Delete all stop words + + // server.delete_stop_words(); + + // // 6 - Search for a document with one stop word + + // assert!(!response["hits"].as_array().unwrap().is_empty()); +} From 780ac5cfd36ed283e2491501da8da0693e2b10d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 6 Apr 2020 19:26:56 +0200 Subject: [PATCH 3/3] Update the CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5af34c0d..4ac0be34e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,4 +6,5 @@ - Add support of nested null, boolean and seq values (#571 and #568, #574) - Fixed the core benchmark (#576) - Publish an ARMv7 and ARMv8 binaries on releases (#540 and #581) - - Fixing a bug where the result of the update status after the first update was empty (#542) + - Fixed a bug where the result of the update status after the first update was empty (#542) + - Fixed a bug where stop words were not handled correctly (#594)