mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Take stop word in account
This commit is contained in:
parent
823da19745
commit
2d1727697d
@ -21,6 +21,7 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
mut obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@ -35,6 +36,10 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut field_buffer = String::new();
|
||||
let mut config = AnalyzerConfig::default();
|
||||
if let Some(stop_words) = stop_words {
|
||||
config.stop_words(stop_words);
|
||||
}
|
||||
let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
|
||||
|
||||
while let Some((key, value)) = obkv_documents.next()? {
|
||||
|
@ -37,6 +37,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
lmdb_writer_sx: Sender<TypedChunk>,
|
||||
searchable_fields: Option<HashSet<FieldId>>,
|
||||
faceted_fields: HashSet<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
) -> Result<()> {
|
||||
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
||||
.par_bridge()
|
||||
@ -54,6 +55,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
documents_chunk.clone(),
|
||||
indexer.clone(),
|
||||
&searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
)?;
|
||||
|
||||
// send documents_ids to DB writer
|
||||
|
@ -231,6 +231,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
// get filterable fields for facet databases
|
||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
// let stop_words = stop_words.as_ref();
|
||||
|
||||
// Run extraction pipeline in parallel.
|
||||
pool.install(|| {
|
||||
let params = GrenadParameters {
|
||||
@ -255,6 +258,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
lmdb_writer_sx,
|
||||
searchable_fields,
|
||||
faceted_fields,
|
||||
stop_words,
|
||||
)
|
||||
.unwrap();
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user