Take stop word in account

This commit is contained in:
many 2021-08-17 12:25:07 +02:00
parent 823da19745
commit 2d1727697d
No known key found for this signature in database
GPG key ID: 2CEF23B75189EACA
3 changed files with 11 additions and 0 deletions

View file

@ -21,6 +21,7 @@ pub fn extract_docid_word_positions<R: io::Read>(
mut obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
searchable_fields: &Option<HashSet<FieldId>>,
stop_words: Option<&fst::Set<&[u8]>>,
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
let max_memory = indexer.max_memory_by_thread();
@ -35,6 +36,10 @@ pub fn extract_docid_word_positions<R: io::Read>(
let mut key_buffer = Vec::new();
let mut field_buffer = String::new();
let mut config = AnalyzerConfig::default();
if let Some(stop_words) = stop_words {
config.stop_words(stop_words);
}
let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
while let Some((key, value)) = obkv_documents.next()? {