mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
implement a first version of the stop_words
The front must provide a BTreeSet containing the stop words The stop_words are set at None if an empty Set is provided add the stop-words in the http-ui interface Use maplit in the test and remove all the useless drop(rtxn) at the end of all tests
This commit is contained in:
parent
62a8f1d707
commit
a2f46029c7
7 changed files with 203 additions and 56 deletions
|
@ -1,4 +1,4 @@
|
|||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fmt::Display;
|
||||
use std::fs::{File, create_dir_all};
|
||||
use std::net::SocketAddr;
|
||||
|
@ -128,7 +128,10 @@ struct Highlighter<'a, A> {
|
|||
|
||||
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
|
||||
fn new(stop_words: &'a fst::Set<A>) -> Self {
|
||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
|
||||
let mut config = AnalyzerConfig::default();
|
||||
config.stop_words(stop_words);
|
||||
let analyzer = Analyzer::new(config);
|
||||
|
||||
Self { analyzer }
|
||||
}
|
||||
|
||||
|
@ -266,6 +269,13 @@ struct Settings {
|
|||
skip_serializing_if = "Option::is_none",
|
||||
)]
|
||||
criteria: Option<Option<Vec<String>>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
deserialize_with = "deserialize_some",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
)]
|
||||
stop_words: Option<Option<BTreeSet<String>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
@ -439,6 +449,14 @@ async fn main() -> anyhow::Result<()> {
|
|||
}
|
||||
}
|
||||
|
||||
// We transpose the settings JSON struct into a real setting update.
|
||||
if let Some(stop_words) = settings.stop_words {
|
||||
match stop_words {
|
||||
Some(stop_words) => builder.set_stop_words(stop_words),
|
||||
None => builder.reset_stop_words(),
|
||||
}
|
||||
}
|
||||
|
||||
let result = builder.execute(|indexing_step, update_id| {
|
||||
let (current, total) = match indexing_step {
|
||||
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue