implement a first version of the stop_words

The front must provide a BTreeSet containing the stop words
The stop_words are set at None if an empty Set is provided
add the stop-words in the http-ui interface

Use maplit in the test
and remove all the useless drop(rtxn) at the end of all tests
This commit is contained in:
tamo 2021-03-29 19:15:47 +02:00
parent 62a8f1d707
commit a2f46029c7
No known key found for this signature in database
GPG key ID: 20CD8020AFA88D69
7 changed files with 203 additions and 56 deletions

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fmt::Display;
use std::fs::{File, create_dir_all};
use std::net::SocketAddr;
@ -128,7 +128,10 @@ struct Highlighter<'a, A> {
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
fn new(stop_words: &'a fst::Set<A>) -> Self {
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
let mut config = AnalyzerConfig::default();
config.stop_words(stop_words);
let analyzer = Analyzer::new(config);
Self { analyzer }
}
@ -266,6 +269,13 @@ struct Settings {
skip_serializing_if = "Option::is_none",
)]
criteria: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
stop_words: Option<Option<BTreeSet<String>>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -439,6 +449,14 @@ async fn main() -> anyhow::Result<()> {
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(stop_words) = settings.stop_words {
match stop_words {
Some(stop_words) => builder.set_stop_words(stop_words),
None => builder.reset_stop_words(),
}
}
let result = builder.execute(|indexing_step, update_id| {
let (current, total) = match indexing_step {
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),