Use the word pair proximity and fid word count docids extractors

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
Clément Renault 2024-09-05 10:56:22 +02:00
parent 0fc02f7351
commit 73ce67862d
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
9 changed files with 205 additions and 157 deletions

View File

@ -112,23 +112,27 @@ pub struct WriterOperation {
} }
pub enum Database { pub enum Database {
WordDocids,
ExactWordDocids,
WordFidDocids,
WordPositionDocids,
Documents, Documents,
ExactWordDocids,
FidWordCountDocids,
Main, Main,
WordDocids,
WordFidDocids,
WordPairProximityDocids,
WordPositionDocids,
} }
impl WriterOperation { impl WriterOperation {
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> { pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
match self.database { match self.database {
Database::Main => index.main.remap_types(),
Database::Documents => index.documents.remap_types(), Database::Documents => index.documents.remap_types(),
Database::WordDocids => index.word_docids.remap_types(),
Database::ExactWordDocids => index.exact_word_docids.remap_types(), Database::ExactWordDocids => index.exact_word_docids.remap_types(),
Database::Main => index.main.remap_types(),
Database::WordDocids => index.word_docids.remap_types(),
Database::WordFidDocids => index.word_fid_docids.remap_types(), Database::WordFidDocids => index.word_fid_docids.remap_types(),
Database::WordPositionDocids => index.word_position_docids.remap_types(), Database::WordPositionDocids => index.word_position_docids.remap_types(),
Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(),
Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(),
} }
} }
@ -198,9 +202,11 @@ impl MainSender<'_> {
} }
} }
pub enum WordDocids {}
pub enum ExactWordDocids {} pub enum ExactWordDocids {}
pub enum FidWordCountDocids {}
pub enum WordDocids {}
pub enum WordFidDocids {} pub enum WordFidDocids {}
pub enum WordPairProximityDocids {}
pub enum WordPositionDocids {} pub enum WordPositionDocids {}
pub trait DatabaseType { pub trait DatabaseType {
@ -209,14 +215,6 @@ pub trait DatabaseType {
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation; fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation;
} }
impl DatabaseType for WordDocids {
const DATABASE: Database = Database::WordDocids;
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordDocidsMerger(merger)
}
}
impl DatabaseType for ExactWordDocids { impl DatabaseType for ExactWordDocids {
const DATABASE: Database = Database::ExactWordDocids; const DATABASE: Database = Database::ExactWordDocids;
@ -225,6 +223,22 @@ impl DatabaseType for ExactWordDocids {
} }
} }
impl DatabaseType for FidWordCountDocids {
const DATABASE: Database = Database::FidWordCountDocids;
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::FidWordCountDocidsMerger(merger)
}
}
impl DatabaseType for WordDocids {
const DATABASE: Database = Database::WordDocids;
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordDocidsMerger(merger)
}
}
impl DatabaseType for WordFidDocids { impl DatabaseType for WordFidDocids {
const DATABASE: Database = Database::WordFidDocids; const DATABASE: Database = Database::WordFidDocids;
@ -233,6 +247,14 @@ impl DatabaseType for WordFidDocids {
} }
} }
impl DatabaseType for WordPairProximityDocids {
const DATABASE: Database = Database::WordPairProximityDocids;
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
MergerOperation::WordPairProximityDocidsMerger(merger)
}
}
impl DatabaseType for WordPositionDocids { impl DatabaseType for WordPositionDocids {
const DATABASE: Database = Database::WordPositionDocids; const DATABASE: Database = Database::WordPositionDocids;
@ -293,12 +315,14 @@ impl DocumentsSender<'_> {
} }
pub enum MergerOperation { pub enum MergerOperation {
WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
FidWordCountDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordPairProximityDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>), WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
DeleteDocument { docid: DocumentId }, DeleteDocument { docid: DocumentId },
InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
} }
pub struct MergerReceiver(Receiver<MergerOperation>); pub struct MergerReceiver(Receiver<MergerOperation>);

View File

@ -1,5 +1,6 @@
use serde_json::Value; use serde_json::Value;
use crate::update::new::extract::perm_json_p;
use crate::update::new::KvReaderFieldId; use crate::update::new::KvReaderFieldId;
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError}; use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};

View File

@ -2,6 +2,7 @@ use std::collections::HashSet;
use std::fmt::Debug; use std::fmt::Debug;
use std::fs::File; use std::fs::File;
pub use extract_facets::*;
use grenad::{MergeFunction, Merger}; use grenad::{MergeFunction, Merger};
use heed::RoTxn; use heed::RoTxn;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IntoParallelIterator, ParallelIterator};

View File

@ -2,16 +2,8 @@ mod cache;
mod faceted; mod faceted;
mod searchable; mod searchable;
pub use faceted::modname::{ pub use faceted::*;
FieldIdFacetExistsDocidsExtractor, FieldIdFacetIsEmptyDocidsExtractor, pub use searchable::*;
FieldIdFacetIsNullDocidsExtractor, FieldIdFacetNumberDocidsExtractor,
FieldIdFacetStringDocidsExtractor,
};
pub use faceted::FacetedExtractor;
pub use searchable::{
ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor,
WordPositionDocidsExtractor,
};
/// TODO move in permissive json pointer /// TODO move in permissive json pointer
pub mod perm_json_p { pub mod perm_json_p {

View File

@ -1,15 +1,14 @@
use std::{borrow::Cow, collections::HashMap}; use std::borrow::Cow;
use std::collections::HashMap;
use heed::RoTxn; use heed::RoTxn;
use super::{tokenize_document::DocumentTokenizer, SearchableExtractor}; use super::tokenize_document::DocumentTokenizer;
use crate::{ use super::SearchableExtractor;
update::{ use crate::update::new::extract::cache::CboCachedSorter;
new::{extract::cache::CboCachedSorter, DocumentChange}, use crate::update::new::DocumentChange;
MergeDeladdCboRoaringBitmaps, use crate::update::MergeDeladdCboRoaringBitmaps;
}, use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
FieldId, GlobalFieldsIdsMap, Index, Result,
};
const MAX_COUNTED_WORDS: usize = 30; const MAX_COUNTED_WORDS: usize = 30;
@ -22,12 +21,13 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
index.user_defined_searchable_fields(rtxn).map_err(Into::into) index.user_defined_searchable_fields(rtxn).map_err(Into::into)
} }
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> { fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
Ok(vec![]) Ok(vec![])
} }
/// This case is unreachable because extract_document_change has been reimplemented to not call this function. /// This case is unreachable because extract_document_change has been reimplemented to not call this function.
fn build_key<'a>(_field_id: FieldId, _position: u16, _word: &'a str) -> Cow<'a, [u8]> { fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> {
/// TODO remove this
unreachable!() unreachable!()
} }
@ -45,7 +45,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
match document_change { match document_change {
DocumentChange::Deletion(inner) => { DocumentChange::Deletion(inner) => {
let mut fid_word_count = HashMap::new(); let mut fid_word_count = HashMap::new();
let mut token_fn = |fid: FieldId, pos: u16, word: &str| { let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1); fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
Ok(()) Ok(())
}; };
@ -66,10 +66,10 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
} }
DocumentChange::Update(inner) => { DocumentChange::Update(inner) => {
let mut fid_word_count = HashMap::new(); let mut fid_word_count = HashMap::new();
let mut token_fn = |fid: FieldId, pos: u16, word: &str| { let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
fid_word_count fid_word_count
.entry(fid) .entry(fid)
.and_modify(|(current_count, new_count)| *current_count += 1) .and_modify(|(current_count, _new_count)| *current_count += 1)
.or_insert((1, 0)); .or_insert((1, 0));
Ok(()) Ok(())
}; };
@ -79,10 +79,10 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
&mut token_fn, &mut token_fn,
)?; )?;
let mut token_fn = |fid: FieldId, pos: u16, word: &str| { let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
fid_word_count fid_word_count
.entry(fid) .entry(fid)
.and_modify(|(current_count, new_count)| *new_count += 1) .and_modify(|(_current_count, new_count)| *new_count += 1)
.or_insert((0, 1)); .or_insert((0, 1));
Ok(()) Ok(())
}; };
@ -106,7 +106,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
} }
DocumentChange::Insertion(inner) => { DocumentChange::Insertion(inner) => {
let mut fid_word_count = HashMap::new(); let mut fid_word_count = HashMap::new();
let mut token_fn = |fid: FieldId, pos: u16, word: &str| { let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1); fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
Ok(()) Ok(())
}; };

View File

@ -20,7 +20,7 @@ impl SearchableExtractor for WordDocidsExtractor {
} }
/// TODO write in an external Vec buffer /// TODO write in an external Vec buffer
fn build_key<'a>(_field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> { fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
Cow::Borrowed(word.as_bytes()) Cow::Borrowed(word.as_bytes())
} }
} }
@ -49,7 +49,7 @@ impl SearchableExtractor for ExactWordDocidsExtractor {
Ok(vec![]) Ok(vec![])
} }
fn build_key<'a>(_field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> { fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
Cow::Borrowed(word.as_bytes()) Cow::Borrowed(word.as_bytes())
} }
} }
@ -67,7 +67,7 @@ impl SearchableExtractor for WordFidDocidsExtractor {
Ok(vec![]) Ok(vec![])
} }
fn build_key<'a>(field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> { fn build_key(field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
let mut key = Vec::new(); let mut key = Vec::new();
key.extend_from_slice(word.as_bytes()); key.extend_from_slice(word.as_bytes());
key.push(0); key.push(0);
@ -89,7 +89,7 @@ impl SearchableExtractor for WordPositionDocidsExtractor {
Ok(vec![]) Ok(vec![])
} }
fn build_key<'a>(_field_id: FieldId, position: u16, word: &'a str) -> Cow<'a, [u8]> { fn build_key(_field_id: FieldId, position: u16, word: &str) -> Cow<[u8]> {
// position must be bucketed to reduce the number of keys in the DB. // position must be bucketed to reduce the number of keys in the DB.
let position = bucketed_position(position); let position = bucketed_position(position);
let mut key = Vec::new(); let mut key = Vec::new();

View File

@ -1,21 +1,17 @@
use std::{ use std::borrow::Cow;
borrow::Cow, use std::collections::{BTreeMap, VecDeque};
collections::{BTreeMap, VecDeque},
};
use heed::RoTxn; use heed::RoTxn;
use itertools::merge_join_by; use itertools::merge_join_by;
use obkv::KvReader; use obkv::KvReader;
use super::{tokenize_document::DocumentTokenizer, SearchableExtractor}; use super::tokenize_document::DocumentTokenizer;
use crate::{ use super::SearchableExtractor;
proximity::{index_proximity, MAX_DISTANCE}, use crate::proximity::{index_proximity, MAX_DISTANCE};
update::{ use crate::update::new::extract::cache::CboCachedSorter;
new::{extract::cache::CboCachedSorter, DocumentChange}, use crate::update::new::DocumentChange;
MergeDeladdCboRoaringBitmaps, use crate::update::MergeDeladdCboRoaringBitmaps;
}, use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
FieldId, GlobalFieldsIdsMap, Index, Result,
};
pub struct WordPairProximityDocidsExtractor; pub struct WordPairProximityDocidsExtractor;
impl SearchableExtractor for WordPairProximityDocidsExtractor { impl SearchableExtractor for WordPairProximityDocidsExtractor {
@ -26,12 +22,13 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
index.user_defined_searchable_fields(rtxn).map_err(Into::into) index.user_defined_searchable_fields(rtxn).map_err(Into::into)
} }
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> { fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
Ok(vec![]) Ok(vec![])
} }
/// This case is unreachable because extract_document_change has been reimplemented to not call this function. /// This case is unreachable because extract_document_change has been reimplemented to not call this function.
fn build_key<'a>(_field_id: FieldId, _position: u16, _word: &'a str) -> Cow<'a, [u8]> { fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> {
/// TODO remove this
unreachable!() unreachable!()
} }
@ -159,7 +156,7 @@ fn process_document_tokens(
word_positions: &mut VecDeque<(String, u16)>, word_positions: &mut VecDeque<(String, u16)>,
word_pair_proximity: &mut BTreeMap<(String, String), u8>, word_pair_proximity: &mut BTreeMap<(String, String), u8>,
) -> Result<()> { ) -> Result<()> {
let mut token_fn = |fid: FieldId, pos: u16, word: &str| { let mut token_fn = |_fid: FieldId, pos: u16, word: &str| {
// drain the proximity window until the head word is considered close to the word we are inserting. // drain the proximity window until the head word is considered close to the word we are inserting.
while word_positions while word_positions
.front() .front()

View File

@ -11,15 +11,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
use rayon::ThreadPool; use rayon::ThreadPool;
pub use update_by_function::UpdateByFunction; pub use update_by_function::UpdateByFunction;
use super::channel::{ use super::channel::*;
extractors_merger_channels, merger_writer_channel, EntryOperation, ExactWordDocids, WordDocids,
WordFidDocids, WordPositionDocids,
};
use super::document_change::DocumentChange; use super::document_change::DocumentChange;
use super::extract::{ use super::extract::*;
ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor,
WordPositionDocidsExtractor,
};
use super::merger::merge_grenad_entries; use super::merger::merge_grenad_entries;
use super::StdResult; use super::StdResult;
use crate::documents::{ use crate::documents::{
@ -71,79 +65,98 @@ where
// TODO manage the errors correctly // TODO manage the errors correctly
let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
pool.in_place_scope(|_s| { pool.in_place_scope(|_s| {
let document_changes = document_changes.into_par_iter(); let document_changes = document_changes.into_par_iter();
// document but we need to create a function that collects and compresses documents. // document but we need to create a function that collects and compresses documents.
document_changes.clone().into_par_iter().try_for_each(|result| { document_changes.clone().into_par_iter().try_for_each(|result| {
match result? { match result? {
DocumentChange::Deletion(deletion) => { DocumentChange::Deletion(deletion) => {
let docid = deletion.docid(); let docid = deletion.docid();
extractor_sender.document_delete(docid).unwrap(); extractor_sender.document_delete(docid).unwrap();
}
DocumentChange::Update(update) => {
let docid = update.docid();
let content = update.new();
extractor_sender.document_insert(docid, content.boxed()).unwrap();
}
DocumentChange::Insertion(insertion) => {
let docid = insertion.docid();
let content = insertion.new();
extractor_sender.document_insert(docid, content.boxed()).unwrap();
// extracted_dictionary_sender.send(self, dictionary: &[u8]);
}
} }
DocumentChange::Update(update) => { Ok(()) as Result<_>
let docid = update.docid(); })?;
let content = update.new();
extractor_sender.document_insert(docid, content.boxed()).unwrap(); extract_and_send_docids::<WordDocidsExtractor, WordDocids>(
} index,
DocumentChange::Insertion(insertion) => { &global_fields_ids_map,
let docid = insertion.docid(); GrenadParameters::default(),
let content = insertion.new(); document_changes.clone(),
extractor_sender.document_insert(docid, content.boxed()).unwrap(); &extractor_sender,
// extracted_dictionary_sender.send(self, dictionary: &[u8]); )?;
}
} extract_and_send_docids::<WordFidDocidsExtractor, WordFidDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<FidWordCountDocidsExtractor, FidWordCountDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<
WordPairProximityDocidsExtractor,
WordPairProximityDocids,
>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
// TODO THIS IS TOO MUCH
// Extract fieldid docid facet number
// Extract fieldid docid facet string
// Extract facetid string fst
// Extract facetid normalized string strings
// TODO Inverted Indexes again
// Extract fieldid facet isempty docids
// Extract fieldid facet isnull docids
// Extract fieldid facet exists docids
// TODO This is the normal system
// Extract fieldid facet number docids
// Extract fieldid facet string docids
Ok(()) as Result<_> Ok(()) as Result<_>
})?; })
extract_and_send_docids::<WordDocidsExtractor, WordDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<WordFidDocidsExtractor, WordFidDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
index,
&global_fields_ids_map,
GrenadParameters::default(),
document_changes.clone(),
&extractor_sender,
)?;
// TODO THIS IS TOO MUCH
// Extract fieldid docid facet number
// Extract fieldid docid facet string
// Extract facetid string fst
// Extract facetid normalized string strings
// TODO Inverted Indexes again
// Extract fieldid facet isempty docids
// Extract fieldid facet isnull docids
// Extract fieldid facet exists docids
// TODO This is the normal system
// Extract fieldid facet number docids
// Extract fieldid facet string docids
Ok(()) as Result<_>
})
})?; })?;
// TODO manage the errors correctly // TODO manage the errors correctly

View File

@ -8,10 +8,7 @@ use memmap2::Mmap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use tempfile::tempfile; use tempfile::tempfile;
use super::channel::{ use super::channel::*;
DatabaseType, DocidsSender, ExactWordDocids, MergerReceiver, MergerSender, WordDocids,
WordFidDocids, WordPositionDocids,
};
use super::KvReaderDelAdd; use super::KvReaderDelAdd;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::new::channel::MergerOperation; use crate::update::new::channel::MergerOperation;
@ -30,6 +27,29 @@ pub fn merge_grenad_entries(
for merger_operation in receiver { for merger_operation in receiver {
match merger_operation { match merger_operation {
MergerOperation::ExactWordDocidsMerger(merger) => {
merge_and_send_docids(
merger,
/// TODO do a MergerOperation::database(&Index) -> Database<Bytes, Bytes>.
index.exact_word_docids.remap_types(),
rtxn,
&mut buffer,
sender.docids::<ExactWordDocids>(),
|_key| Ok(()),
|_key| Ok(()),
)?;
}
MergerOperation::FidWordCountDocidsMerger(merger) => {
merge_and_send_docids(
merger,
index.field_id_word_count_docids.remap_types(),
rtxn,
&mut buffer,
sender.docids::<FidWordCountDocids>(),
|_key| Ok(()),
|_key| Ok(()),
)?;
}
MergerOperation::WordDocidsMerger(merger) => { MergerOperation::WordDocidsMerger(merger) => {
let mut add_words_fst = SetBuilder::new(tempfile()?)?; let mut add_words_fst = SetBuilder::new(tempfile()?)?;
let mut del_words_fst = SetBuilder::new(tempfile()?)?; let mut del_words_fst = SetBuilder::new(tempfile()?)?;
@ -49,17 +69,6 @@ pub fn merge_grenad_entries(
let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?; let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?;
sender.main().write_words_fst(mmap).unwrap(); sender.main().write_words_fst(mmap).unwrap();
} }
MergerOperation::ExactWordDocidsMerger(merger) => {
merge_and_send_docids(
merger,
index.exact_word_docids.remap_types(),
rtxn,
&mut buffer,
sender.docids::<ExactWordDocids>(),
|_key| Ok(()),
|_key| Ok(()),
)?;
}
MergerOperation::WordFidDocidsMerger(merger) => { MergerOperation::WordFidDocidsMerger(merger) => {
merge_and_send_docids( merge_and_send_docids(
merger, merger,
@ -71,6 +80,17 @@ pub fn merge_grenad_entries(
|_key| Ok(()), |_key| Ok(()),
)?; )?;
} }
MergerOperation::WordPairProximityDocidsMerger(merger) => {
merge_and_send_docids(
merger,
index.word_pair_proximity_docids.remap_types(),
rtxn,
&mut buffer,
sender.docids::<WordPairProximityDocids>(),
|_key| Ok(()),
|_key| Ok(()),
)?;
}
MergerOperation::WordPositionDocidsMerger(merger) => { MergerOperation::WordPositionDocidsMerger(merger) => {
merge_and_send_docids( merge_and_send_docids(
merger, merger,