mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-09 22:48:54 +01:00
Use the word pair proximity and fid word count docids extractors
Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
parent
0fc02f7351
commit
73ce67862d
@ -112,23 +112,27 @@ pub struct WriterOperation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub enum Database {
|
pub enum Database {
|
||||||
WordDocids,
|
|
||||||
ExactWordDocids,
|
|
||||||
WordFidDocids,
|
|
||||||
WordPositionDocids,
|
|
||||||
Documents,
|
Documents,
|
||||||
|
ExactWordDocids,
|
||||||
|
FidWordCountDocids,
|
||||||
Main,
|
Main,
|
||||||
|
WordDocids,
|
||||||
|
WordFidDocids,
|
||||||
|
WordPairProximityDocids,
|
||||||
|
WordPositionDocids,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WriterOperation {
|
impl WriterOperation {
|
||||||
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
|
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
|
||||||
match self.database {
|
match self.database {
|
||||||
Database::Main => index.main.remap_types(),
|
|
||||||
Database::Documents => index.documents.remap_types(),
|
Database::Documents => index.documents.remap_types(),
|
||||||
Database::WordDocids => index.word_docids.remap_types(),
|
|
||||||
Database::ExactWordDocids => index.exact_word_docids.remap_types(),
|
Database::ExactWordDocids => index.exact_word_docids.remap_types(),
|
||||||
|
Database::Main => index.main.remap_types(),
|
||||||
|
Database::WordDocids => index.word_docids.remap_types(),
|
||||||
Database::WordFidDocids => index.word_fid_docids.remap_types(),
|
Database::WordFidDocids => index.word_fid_docids.remap_types(),
|
||||||
Database::WordPositionDocids => index.word_position_docids.remap_types(),
|
Database::WordPositionDocids => index.word_position_docids.remap_types(),
|
||||||
|
Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(),
|
||||||
|
Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,9 +202,11 @@ impl MainSender<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum WordDocids {}
|
|
||||||
pub enum ExactWordDocids {}
|
pub enum ExactWordDocids {}
|
||||||
|
pub enum FidWordCountDocids {}
|
||||||
|
pub enum WordDocids {}
|
||||||
pub enum WordFidDocids {}
|
pub enum WordFidDocids {}
|
||||||
|
pub enum WordPairProximityDocids {}
|
||||||
pub enum WordPositionDocids {}
|
pub enum WordPositionDocids {}
|
||||||
|
|
||||||
pub trait DatabaseType {
|
pub trait DatabaseType {
|
||||||
@ -209,14 +215,6 @@ pub trait DatabaseType {
|
|||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation;
|
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DatabaseType for WordDocids {
|
|
||||||
const DATABASE: Database = Database::WordDocids;
|
|
||||||
|
|
||||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
|
||||||
MergerOperation::WordDocidsMerger(merger)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DatabaseType for ExactWordDocids {
|
impl DatabaseType for ExactWordDocids {
|
||||||
const DATABASE: Database = Database::ExactWordDocids;
|
const DATABASE: Database = Database::ExactWordDocids;
|
||||||
|
|
||||||
@ -225,6 +223,22 @@ impl DatabaseType for ExactWordDocids {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DatabaseType for FidWordCountDocids {
|
||||||
|
const DATABASE: Database = Database::FidWordCountDocids;
|
||||||
|
|
||||||
|
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||||
|
MergerOperation::FidWordCountDocidsMerger(merger)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DatabaseType for WordDocids {
|
||||||
|
const DATABASE: Database = Database::WordDocids;
|
||||||
|
|
||||||
|
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||||
|
MergerOperation::WordDocidsMerger(merger)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl DatabaseType for WordFidDocids {
|
impl DatabaseType for WordFidDocids {
|
||||||
const DATABASE: Database = Database::WordFidDocids;
|
const DATABASE: Database = Database::WordFidDocids;
|
||||||
|
|
||||||
@ -233,6 +247,14 @@ impl DatabaseType for WordFidDocids {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DatabaseType for WordPairProximityDocids {
|
||||||
|
const DATABASE: Database = Database::WordPairProximityDocids;
|
||||||
|
|
||||||
|
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||||
|
MergerOperation::WordPairProximityDocidsMerger(merger)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl DatabaseType for WordPositionDocids {
|
impl DatabaseType for WordPositionDocids {
|
||||||
const DATABASE: Database = Database::WordPositionDocids;
|
const DATABASE: Database = Database::WordPositionDocids;
|
||||||
|
|
||||||
@ -293,12 +315,14 @@ impl DocumentsSender<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub enum MergerOperation {
|
pub enum MergerOperation {
|
||||||
WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
|
||||||
ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
ExactWordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||||
|
FidWordCountDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||||
|
WordDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||||
WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||||
|
WordPairProximityDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||||
WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||||
InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
|
|
||||||
DeleteDocument { docid: DocumentId },
|
DeleteDocument { docid: DocumentId },
|
||||||
|
InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MergerReceiver(Receiver<MergerOperation>);
|
pub struct MergerReceiver(Receiver<MergerOperation>);
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
|
use crate::update::new::extract::perm_json_p;
|
||||||
use crate::update::new::KvReaderFieldId;
|
use crate::update::new::KvReaderFieldId;
|
||||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ use std::collections::HashSet;
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
|
pub use extract_facets::*;
|
||||||
use grenad::{MergeFunction, Merger};
|
use grenad::{MergeFunction, Merger};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
|
@ -2,16 +2,8 @@ mod cache;
|
|||||||
mod faceted;
|
mod faceted;
|
||||||
mod searchable;
|
mod searchable;
|
||||||
|
|
||||||
pub use faceted::modname::{
|
pub use faceted::*;
|
||||||
FieldIdFacetExistsDocidsExtractor, FieldIdFacetIsEmptyDocidsExtractor,
|
pub use searchable::*;
|
||||||
FieldIdFacetIsNullDocidsExtractor, FieldIdFacetNumberDocidsExtractor,
|
|
||||||
FieldIdFacetStringDocidsExtractor,
|
|
||||||
};
|
|
||||||
pub use faceted::FacetedExtractor;
|
|
||||||
pub use searchable::{
|
|
||||||
ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor,
|
|
||||||
WordPositionDocidsExtractor,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// TODO move in permissive json pointer
|
/// TODO move in permissive json pointer
|
||||||
pub mod perm_json_p {
|
pub mod perm_json_p {
|
||||||
|
@ -1,15 +1,14 @@
|
|||||||
use std::{borrow::Cow, collections::HashMap};
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
|
||||||
use super::{tokenize_document::DocumentTokenizer, SearchableExtractor};
|
use super::tokenize_document::DocumentTokenizer;
|
||||||
use crate::{
|
use super::SearchableExtractor;
|
||||||
update::{
|
use crate::update::new::extract::cache::CboCachedSorter;
|
||||||
new::{extract::cache::CboCachedSorter, DocumentChange},
|
use crate::update::new::DocumentChange;
|
||||||
MergeDeladdCboRoaringBitmaps,
|
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||||
},
|
use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
|
||||||
FieldId, GlobalFieldsIdsMap, Index, Result,
|
|
||||||
};
|
|
||||||
|
|
||||||
const MAX_COUNTED_WORDS: usize = 30;
|
const MAX_COUNTED_WORDS: usize = 30;
|
||||||
|
|
||||||
@ -22,12 +21,13 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
|
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This case is unreachable because extract_document_change has been reimplemented to not call this function.
|
/// This case is unreachable because extract_document_change has been reimplemented to not call this function.
|
||||||
fn build_key<'a>(_field_id: FieldId, _position: u16, _word: &'a str) -> Cow<'a, [u8]> {
|
fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> {
|
||||||
|
/// TODO remove this
|
||||||
unreachable!()
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,7 +45,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
match document_change {
|
match document_change {
|
||||||
DocumentChange::Deletion(inner) => {
|
DocumentChange::Deletion(inner) => {
|
||||||
let mut fid_word_count = HashMap::new();
|
let mut fid_word_count = HashMap::new();
|
||||||
let mut token_fn = |fid: FieldId, pos: u16, word: &str| {
|
let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
|
||||||
fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
|
fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
@ -66,10 +66,10 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
}
|
}
|
||||||
DocumentChange::Update(inner) => {
|
DocumentChange::Update(inner) => {
|
||||||
let mut fid_word_count = HashMap::new();
|
let mut fid_word_count = HashMap::new();
|
||||||
let mut token_fn = |fid: FieldId, pos: u16, word: &str| {
|
let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
|
||||||
fid_word_count
|
fid_word_count
|
||||||
.entry(fid)
|
.entry(fid)
|
||||||
.and_modify(|(current_count, new_count)| *current_count += 1)
|
.and_modify(|(current_count, _new_count)| *current_count += 1)
|
||||||
.or_insert((1, 0));
|
.or_insert((1, 0));
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
@ -79,10 +79,10 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
&mut token_fn,
|
&mut token_fn,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut token_fn = |fid: FieldId, pos: u16, word: &str| {
|
let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
|
||||||
fid_word_count
|
fid_word_count
|
||||||
.entry(fid)
|
.entry(fid)
|
||||||
.and_modify(|(current_count, new_count)| *new_count += 1)
|
.and_modify(|(_current_count, new_count)| *new_count += 1)
|
||||||
.or_insert((0, 1));
|
.or_insert((0, 1));
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
@ -106,7 +106,7 @@ impl SearchableExtractor for FidWordCountDocidsExtractor {
|
|||||||
}
|
}
|
||||||
DocumentChange::Insertion(inner) => {
|
DocumentChange::Insertion(inner) => {
|
||||||
let mut fid_word_count = HashMap::new();
|
let mut fid_word_count = HashMap::new();
|
||||||
let mut token_fn = |fid: FieldId, pos: u16, word: &str| {
|
let mut token_fn = |fid: FieldId, _pos: u16, _word: &str| {
|
||||||
fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
|
fid_word_count.entry(fid).and_modify(|count| *count += 1).or_insert(1);
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
|
@ -20,7 +20,7 @@ impl SearchableExtractor for WordDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// TODO write in an external Vec buffer
|
/// TODO write in an external Vec buffer
|
||||||
fn build_key<'a>(_field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> {
|
fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
|
||||||
Cow::Borrowed(word.as_bytes())
|
Cow::Borrowed(word.as_bytes())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -49,7 +49,7 @@ impl SearchableExtractor for ExactWordDocidsExtractor {
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_key<'a>(_field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> {
|
fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
|
||||||
Cow::Borrowed(word.as_bytes())
|
Cow::Borrowed(word.as_bytes())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -67,7 +67,7 @@ impl SearchableExtractor for WordFidDocidsExtractor {
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_key<'a>(field_id: FieldId, _position: u16, word: &'a str) -> Cow<'a, [u8]> {
|
fn build_key(field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
|
||||||
let mut key = Vec::new();
|
let mut key = Vec::new();
|
||||||
key.extend_from_slice(word.as_bytes());
|
key.extend_from_slice(word.as_bytes());
|
||||||
key.push(0);
|
key.push(0);
|
||||||
@ -89,7 +89,7 @@ impl SearchableExtractor for WordPositionDocidsExtractor {
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_key<'a>(_field_id: FieldId, position: u16, word: &'a str) -> Cow<'a, [u8]> {
|
fn build_key(_field_id: FieldId, position: u16, word: &str) -> Cow<[u8]> {
|
||||||
// position must be bucketed to reduce the number of keys in the DB.
|
// position must be bucketed to reduce the number of keys in the DB.
|
||||||
let position = bucketed_position(position);
|
let position = bucketed_position(position);
|
||||||
let mut key = Vec::new();
|
let mut key = Vec::new();
|
||||||
|
@ -1,21 +1,17 @@
|
|||||||
use std::{
|
use std::borrow::Cow;
|
||||||
borrow::Cow,
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
collections::{BTreeMap, VecDeque},
|
|
||||||
};
|
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use itertools::merge_join_by;
|
use itertools::merge_join_by;
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
|
|
||||||
use super::{tokenize_document::DocumentTokenizer, SearchableExtractor};
|
use super::tokenize_document::DocumentTokenizer;
|
||||||
use crate::{
|
use super::SearchableExtractor;
|
||||||
proximity::{index_proximity, MAX_DISTANCE},
|
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
||||||
update::{
|
use crate::update::new::extract::cache::CboCachedSorter;
|
||||||
new::{extract::cache::CboCachedSorter, DocumentChange},
|
use crate::update::new::DocumentChange;
|
||||||
MergeDeladdCboRoaringBitmaps,
|
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||||
},
|
use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
|
||||||
FieldId, GlobalFieldsIdsMap, Index, Result,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub struct WordPairProximityDocidsExtractor;
|
pub struct WordPairProximityDocidsExtractor;
|
||||||
impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||||
@ -26,12 +22,13 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
|
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This case is unreachable because extract_document_change has been reimplemented to not call this function.
|
/// This case is unreachable because extract_document_change has been reimplemented to not call this function.
|
||||||
fn build_key<'a>(_field_id: FieldId, _position: u16, _word: &'a str) -> Cow<'a, [u8]> {
|
fn build_key(_field_id: FieldId, _position: u16, _word: &str) -> Cow<[u8]> {
|
||||||
|
/// TODO remove this
|
||||||
unreachable!()
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,7 +156,7 @@ fn process_document_tokens(
|
|||||||
word_positions: &mut VecDeque<(String, u16)>,
|
word_positions: &mut VecDeque<(String, u16)>,
|
||||||
word_pair_proximity: &mut BTreeMap<(String, String), u8>,
|
word_pair_proximity: &mut BTreeMap<(String, String), u8>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut token_fn = |fid: FieldId, pos: u16, word: &str| {
|
let mut token_fn = |_fid: FieldId, pos: u16, word: &str| {
|
||||||
// drain the proximity window until the head word is considered close to the word we are inserting.
|
// drain the proximity window until the head word is considered close to the word we are inserting.
|
||||||
while word_positions
|
while word_positions
|
||||||
.front()
|
.front()
|
||||||
|
@ -11,15 +11,9 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
|||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
|
|
||||||
use super::channel::{
|
use super::channel::*;
|
||||||
extractors_merger_channels, merger_writer_channel, EntryOperation, ExactWordDocids, WordDocids,
|
|
||||||
WordFidDocids, WordPositionDocids,
|
|
||||||
};
|
|
||||||
use super::document_change::DocumentChange;
|
use super::document_change::DocumentChange;
|
||||||
use super::extract::{
|
use super::extract::*;
|
||||||
ExactWordDocidsExtractor, SearchableExtractor, WordDocidsExtractor, WordFidDocidsExtractor,
|
|
||||||
WordPositionDocidsExtractor,
|
|
||||||
};
|
|
||||||
use super::merger::merge_grenad_entries;
|
use super::merger::merge_grenad_entries;
|
||||||
use super::StdResult;
|
use super::StdResult;
|
||||||
use crate::documents::{
|
use crate::documents::{
|
||||||
@ -71,79 +65,98 @@ where
|
|||||||
// TODO manage the errors correctly
|
// TODO manage the errors correctly
|
||||||
let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
|
let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
|
||||||
pool.in_place_scope(|_s| {
|
pool.in_place_scope(|_s| {
|
||||||
let document_changes = document_changes.into_par_iter();
|
let document_changes = document_changes.into_par_iter();
|
||||||
|
|
||||||
// document but we need to create a function that collects and compresses documents.
|
// document but we need to create a function that collects and compresses documents.
|
||||||
document_changes.clone().into_par_iter().try_for_each(|result| {
|
document_changes.clone().into_par_iter().try_for_each(|result| {
|
||||||
match result? {
|
match result? {
|
||||||
DocumentChange::Deletion(deletion) => {
|
DocumentChange::Deletion(deletion) => {
|
||||||
let docid = deletion.docid();
|
let docid = deletion.docid();
|
||||||
extractor_sender.document_delete(docid).unwrap();
|
extractor_sender.document_delete(docid).unwrap();
|
||||||
|
}
|
||||||
|
DocumentChange::Update(update) => {
|
||||||
|
let docid = update.docid();
|
||||||
|
let content = update.new();
|
||||||
|
extractor_sender.document_insert(docid, content.boxed()).unwrap();
|
||||||
|
}
|
||||||
|
DocumentChange::Insertion(insertion) => {
|
||||||
|
let docid = insertion.docid();
|
||||||
|
let content = insertion.new();
|
||||||
|
extractor_sender.document_insert(docid, content.boxed()).unwrap();
|
||||||
|
// extracted_dictionary_sender.send(self, dictionary: &[u8]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
DocumentChange::Update(update) => {
|
Ok(()) as Result<_>
|
||||||
let docid = update.docid();
|
})?;
|
||||||
let content = update.new();
|
|
||||||
extractor_sender.document_insert(docid, content.boxed()).unwrap();
|
extract_and_send_docids::<WordDocidsExtractor, WordDocids>(
|
||||||
}
|
index,
|
||||||
DocumentChange::Insertion(insertion) => {
|
&global_fields_ids_map,
|
||||||
let docid = insertion.docid();
|
GrenadParameters::default(),
|
||||||
let content = insertion.new();
|
document_changes.clone(),
|
||||||
extractor_sender.document_insert(docid, content.boxed()).unwrap();
|
&extractor_sender,
|
||||||
// extracted_dictionary_sender.send(self, dictionary: &[u8]);
|
)?;
|
||||||
}
|
|
||||||
}
|
extract_and_send_docids::<WordFidDocidsExtractor, WordFidDocids>(
|
||||||
|
index,
|
||||||
|
&global_fields_ids_map,
|
||||||
|
GrenadParameters::default(),
|
||||||
|
document_changes.clone(),
|
||||||
|
&extractor_sender,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
|
||||||
|
index,
|
||||||
|
&global_fields_ids_map,
|
||||||
|
GrenadParameters::default(),
|
||||||
|
document_changes.clone(),
|
||||||
|
&extractor_sender,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
|
||||||
|
index,
|
||||||
|
&global_fields_ids_map,
|
||||||
|
GrenadParameters::default(),
|
||||||
|
document_changes.clone(),
|
||||||
|
&extractor_sender,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
extract_and_send_docids::<FidWordCountDocidsExtractor, FidWordCountDocids>(
|
||||||
|
index,
|
||||||
|
&global_fields_ids_map,
|
||||||
|
GrenadParameters::default(),
|
||||||
|
document_changes.clone(),
|
||||||
|
&extractor_sender,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
extract_and_send_docids::<
|
||||||
|
WordPairProximityDocidsExtractor,
|
||||||
|
WordPairProximityDocids,
|
||||||
|
>(
|
||||||
|
index,
|
||||||
|
&global_fields_ids_map,
|
||||||
|
GrenadParameters::default(),
|
||||||
|
document_changes.clone(),
|
||||||
|
&extractor_sender,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// TODO THIS IS TOO MUCH
|
||||||
|
// Extract fieldid docid facet number
|
||||||
|
// Extract fieldid docid facet string
|
||||||
|
// Extract facetid string fst
|
||||||
|
// Extract facetid normalized string strings
|
||||||
|
|
||||||
|
// TODO Inverted Indexes again
|
||||||
|
// Extract fieldid facet isempty docids
|
||||||
|
// Extract fieldid facet isnull docids
|
||||||
|
// Extract fieldid facet exists docids
|
||||||
|
|
||||||
|
// TODO This is the normal system
|
||||||
|
// Extract fieldid facet number docids
|
||||||
|
// Extract fieldid facet string docids
|
||||||
|
|
||||||
Ok(()) as Result<_>
|
Ok(()) as Result<_>
|
||||||
})?;
|
})
|
||||||
|
|
||||||
extract_and_send_docids::<WordDocidsExtractor, WordDocids>(
|
|
||||||
index,
|
|
||||||
&global_fields_ids_map,
|
|
||||||
GrenadParameters::default(),
|
|
||||||
document_changes.clone(),
|
|
||||||
&extractor_sender,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
extract_and_send_docids::<WordFidDocidsExtractor, WordFidDocids>(
|
|
||||||
index,
|
|
||||||
&global_fields_ids_map,
|
|
||||||
GrenadParameters::default(),
|
|
||||||
document_changes.clone(),
|
|
||||||
&extractor_sender,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
|
|
||||||
index,
|
|
||||||
&global_fields_ids_map,
|
|
||||||
GrenadParameters::default(),
|
|
||||||
document_changes.clone(),
|
|
||||||
&extractor_sender,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
|
|
||||||
index,
|
|
||||||
&global_fields_ids_map,
|
|
||||||
GrenadParameters::default(),
|
|
||||||
document_changes.clone(),
|
|
||||||
&extractor_sender,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// TODO THIS IS TOO MUCH
|
|
||||||
// Extract fieldid docid facet number
|
|
||||||
// Extract fieldid docid facet string
|
|
||||||
// Extract facetid string fst
|
|
||||||
// Extract facetid normalized string strings
|
|
||||||
|
|
||||||
// TODO Inverted Indexes again
|
|
||||||
// Extract fieldid facet isempty docids
|
|
||||||
// Extract fieldid facet isnull docids
|
|
||||||
// Extract fieldid facet exists docids
|
|
||||||
|
|
||||||
// TODO This is the normal system
|
|
||||||
// Extract fieldid facet number docids
|
|
||||||
// Extract fieldid facet string docids
|
|
||||||
|
|
||||||
Ok(()) as Result<_>
|
|
||||||
})
|
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// TODO manage the errors correctly
|
// TODO manage the errors correctly
|
||||||
|
@ -8,10 +8,7 @@ use memmap2::Mmap;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use tempfile::tempfile;
|
use tempfile::tempfile;
|
||||||
|
|
||||||
use super::channel::{
|
use super::channel::*;
|
||||||
DatabaseType, DocidsSender, ExactWordDocids, MergerReceiver, MergerSender, WordDocids,
|
|
||||||
WordFidDocids, WordPositionDocids,
|
|
||||||
};
|
|
||||||
use super::KvReaderDelAdd;
|
use super::KvReaderDelAdd;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::new::channel::MergerOperation;
|
use crate::update::new::channel::MergerOperation;
|
||||||
@ -30,6 +27,29 @@ pub fn merge_grenad_entries(
|
|||||||
|
|
||||||
for merger_operation in receiver {
|
for merger_operation in receiver {
|
||||||
match merger_operation {
|
match merger_operation {
|
||||||
|
MergerOperation::ExactWordDocidsMerger(merger) => {
|
||||||
|
merge_and_send_docids(
|
||||||
|
merger,
|
||||||
|
/// TODO do a MergerOperation::database(&Index) -> Database<Bytes, Bytes>.
|
||||||
|
index.exact_word_docids.remap_types(),
|
||||||
|
rtxn,
|
||||||
|
&mut buffer,
|
||||||
|
sender.docids::<ExactWordDocids>(),
|
||||||
|
|_key| Ok(()),
|
||||||
|
|_key| Ok(()),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
MergerOperation::FidWordCountDocidsMerger(merger) => {
|
||||||
|
merge_and_send_docids(
|
||||||
|
merger,
|
||||||
|
index.field_id_word_count_docids.remap_types(),
|
||||||
|
rtxn,
|
||||||
|
&mut buffer,
|
||||||
|
sender.docids::<FidWordCountDocids>(),
|
||||||
|
|_key| Ok(()),
|
||||||
|
|_key| Ok(()),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
MergerOperation::WordDocidsMerger(merger) => {
|
MergerOperation::WordDocidsMerger(merger) => {
|
||||||
let mut add_words_fst = SetBuilder::new(tempfile()?)?;
|
let mut add_words_fst = SetBuilder::new(tempfile()?)?;
|
||||||
let mut del_words_fst = SetBuilder::new(tempfile()?)?;
|
let mut del_words_fst = SetBuilder::new(tempfile()?)?;
|
||||||
@ -49,17 +69,6 @@ pub fn merge_grenad_entries(
|
|||||||
let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?;
|
let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?;
|
||||||
sender.main().write_words_fst(mmap).unwrap();
|
sender.main().write_words_fst(mmap).unwrap();
|
||||||
}
|
}
|
||||||
MergerOperation::ExactWordDocidsMerger(merger) => {
|
|
||||||
merge_and_send_docids(
|
|
||||||
merger,
|
|
||||||
index.exact_word_docids.remap_types(),
|
|
||||||
rtxn,
|
|
||||||
&mut buffer,
|
|
||||||
sender.docids::<ExactWordDocids>(),
|
|
||||||
|_key| Ok(()),
|
|
||||||
|_key| Ok(()),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
MergerOperation::WordFidDocidsMerger(merger) => {
|
MergerOperation::WordFidDocidsMerger(merger) => {
|
||||||
merge_and_send_docids(
|
merge_and_send_docids(
|
||||||
merger,
|
merger,
|
||||||
@ -71,6 +80,17 @@ pub fn merge_grenad_entries(
|
|||||||
|_key| Ok(()),
|
|_key| Ok(()),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
MergerOperation::WordPairProximityDocidsMerger(merger) => {
|
||||||
|
merge_and_send_docids(
|
||||||
|
merger,
|
||||||
|
index.word_pair_proximity_docids.remap_types(),
|
||||||
|
rtxn,
|
||||||
|
&mut buffer,
|
||||||
|
sender.docids::<WordPairProximityDocids>(),
|
||||||
|
|_key| Ok(()),
|
||||||
|
|_key| Ok(()),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
MergerOperation::WordPositionDocidsMerger(merger) => {
|
MergerOperation::WordPositionDocidsMerger(merger) => {
|
||||||
merge_and_send_docids(
|
merge_and_send_docids(
|
||||||
merger,
|
merger,
|
||||||
|
Loading…
Reference in New Issue
Block a user