mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
WIP using try_map_try_init
This commit is contained in:
parent
3843240940
commit
31de5c747e
@ -1,6 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use grenad::{MergeFunction, Merger};
|
use grenad::{MergeFunction, Merger};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
@ -14,7 +15,9 @@ use crate::facet::value_encoding::f64_into_bytes;
|
|||||||
use crate::update::new::extract::DocidsExtractor;
|
use crate::update::new::extract::DocidsExtractor;
|
||||||
use crate::update::new::{DocumentChange, ItemsPool};
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::{DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result, MAX_FACET_VALUE_LENGTH};
|
use crate::{
|
||||||
|
DocumentId, Error, FieldId, GlobalFieldsIdsMap, Index, Result, MAX_FACET_VALUE_LENGTH,
|
||||||
|
};
|
||||||
pub struct FacetedDocidsExtractor;
|
pub struct FacetedDocidsExtractor;
|
||||||
|
|
||||||
impl FacetedDocidsExtractor {
|
impl FacetedDocidsExtractor {
|
||||||
@ -195,7 +198,9 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
fields_ids_map: &GlobalFieldsIdsMap,
|
fields_ids_map: &GlobalFieldsIdsMap,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
document_changes: impl IntoParallelIterator<
|
||||||
|
Item = std::result::Result<DocumentChange, Arc<Error>>,
|
||||||
|
>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ mod lru;
|
|||||||
mod searchable;
|
mod searchable;
|
||||||
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
pub use faceted::*;
|
pub use faceted::*;
|
||||||
use grenad::Merger;
|
use grenad::Merger;
|
||||||
@ -12,14 +13,16 @@ pub use searchable::*;
|
|||||||
|
|
||||||
use super::DocumentChange;
|
use super::DocumentChange;
|
||||||
use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::{GlobalFieldsIdsMap, Index, Result};
|
use crate::{Error, GlobalFieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
pub trait DocidsExtractor {
|
pub trait DocidsExtractor {
|
||||||
fn run_extraction(
|
fn run_extraction(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
fields_ids_map: &GlobalFieldsIdsMap,
|
fields_ids_map: &GlobalFieldsIdsMap,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
document_changes: impl IntoParallelIterator<
|
||||||
|
Item = std::result::Result<DocumentChange, Arc<Error>>,
|
||||||
|
>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>>;
|
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ mod extract_word_pair_proximity_docids;
|
|||||||
mod tokenize_document;
|
mod tokenize_document;
|
||||||
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
|
pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers};
|
||||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
||||||
@ -15,14 +16,16 @@ use super::cache::CboCachedSorter;
|
|||||||
use super::DocidsExtractor;
|
use super::DocidsExtractor;
|
||||||
use crate::update::new::{DocumentChange, ItemsPool};
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
use crate::{GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
use crate::{Error, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||||
|
|
||||||
pub trait SearchableExtractor {
|
pub trait SearchableExtractor {
|
||||||
fn run_extraction(
|
fn run_extraction(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
fields_ids_map: &GlobalFieldsIdsMap,
|
fields_ids_map: &GlobalFieldsIdsMap,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
document_changes: impl IntoParallelIterator<
|
||||||
|
Item = std::result::Result<DocumentChange, Arc<Error>>,
|
||||||
|
>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
@ -132,7 +135,9 @@ impl<T: SearchableExtractor> DocidsExtractor for T {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
fields_ids_map: &GlobalFieldsIdsMap,
|
fields_ids_map: &GlobalFieldsIdsMap,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
document_changes: impl IntoParallelIterator<
|
||||||
|
Item = std::result::Result<DocumentChange, Arc<Error>>,
|
||||||
|
>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||||
Self::run_extraction(index, fields_ids_map, indexer, document_changes)
|
Self::run_extraction(index, fields_ids_map, indexer, document_changes)
|
||||||
}
|
}
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
use crate::update::new::{Deletion, DocumentChange, ItemsPool};
|
use crate::update::new::items_pool::ParallelIteratorExt as _;
|
||||||
use crate::{FieldsIdsMap, Index, Result};
|
use crate::update::new::{Deletion, DocumentChange};
|
||||||
|
use crate::{Error, FieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
pub struct DocumentDeletion {
|
pub struct DocumentDeletion {
|
||||||
pub to_delete: RoaringBitmap,
|
pub to_delete: RoaringBitmap,
|
||||||
@ -28,15 +29,19 @@ impl<'p> DocumentChanges<'p> for DocumentDeletion {
|
|||||||
self,
|
self,
|
||||||
_fields_ids_map: &mut FieldsIdsMap,
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<
|
||||||
|
impl IndexedParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>
|
||||||
|
+ Clone
|
||||||
|
+ 'p,
|
||||||
|
> {
|
||||||
let index = param;
|
let index = param;
|
||||||
let items = Arc::new(ItemsPool::new(|| index.read_txn().map_err(crate::Error::from)));
|
|
||||||
let to_delete: Vec<_> = self.to_delete.into_iter().collect();
|
let to_delete: Vec<_> = self.to_delete.into_iter().collect();
|
||||||
Ok(to_delete.into_par_iter().map_with(items, |items, docid| {
|
Ok(to_delete.into_par_iter().try_map_try_init(
|
||||||
items.with(|rtxn| {
|
|| index.read_txn().map_err(crate::Error::from),
|
||||||
|
|rtxn, docid| {
|
||||||
let current = index.document(rtxn, docid)?;
|
let current = index.document(rtxn, docid)?;
|
||||||
Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))
|
Ok(DocumentChange::Deletion(Deletion::create(docid, current.boxed())))
|
||||||
})
|
},
|
||||||
}))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,9 +75,8 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
|
|||||||
fields_ids_map: &mut FieldsIdsMap,
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<
|
) -> Result<
|
||||||
impl IndexedParallelIterator<
|
impl IndexedParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>
|
||||||
Item = std::result::Result<DocumentChange, Option<crate::Error>>,
|
+ Clone
|
||||||
> + Clone
|
|
||||||
+ 'p,
|
+ 'p,
|
||||||
> {
|
> {
|
||||||
let (index, rtxn, primary_key) = param;
|
let (index, rtxn, primary_key) = param;
|
||||||
@ -206,7 +205,7 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
|
|||||||
docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops));
|
docids_version_offsets.sort_unstable_by_key(|(_, (_, docops))| sort_function_key(docops));
|
||||||
|
|
||||||
Ok(docids_version_offsets.into_par_iter().try_map_try_init(
|
Ok(docids_version_offsets.into_par_iter().try_map_try_init(
|
||||||
|| index.read_txn().map_err(crate::Error::from),
|
|| index.read_txn().map_err(Error::from),
|
||||||
move |rtxn, (external_docid, (internal_docid, operations))| {
|
move |rtxn, (external_docid, (internal_docid, operations))| {
|
||||||
let document_merge_function = match self.index_documents_method {
|
let document_merge_function = match self.index_documents_method {
|
||||||
Idm::ReplaceDocuments => MergeDocumentForReplacement::merge,
|
Idm::ReplaceDocuments => MergeDocumentForReplacement::merge,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::sync::RwLock;
|
use std::sync::{Arc, RwLock};
|
||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
@ -38,9 +38,8 @@ pub trait DocumentChanges<'p> {
|
|||||||
fields_ids_map: &mut FieldsIdsMap,
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<
|
) -> Result<
|
||||||
impl IndexedParallelIterator<
|
impl IndexedParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>
|
||||||
Item = std::result::Result<DocumentChange, Option<crate::Error>>,
|
+ Clone
|
||||||
> + Clone
|
|
||||||
+ 'p,
|
+ 'p,
|
||||||
>;
|
>;
|
||||||
}
|
}
|
||||||
@ -58,7 +57,7 @@ pub fn index<PI>(
|
|||||||
document_changes: PI,
|
document_changes: PI,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
PI: IndexedParallelIterator<Item = std::result::Result<DocumentChange, Option<Error>>>
|
PI: IndexedParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>
|
||||||
+ Send
|
+ Send
|
||||||
+ Clone,
|
+ Clone,
|
||||||
{
|
{
|
||||||
@ -249,7 +248,7 @@ fn extract_and_send_docids<E: DocidsExtractor, D: MergerOperationType>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
fields_ids_map: &GlobalFieldsIdsMap,
|
fields_ids_map: &GlobalFieldsIdsMap,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
document_changes: impl IntoParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>,
|
||||||
sender: &ExtractorSender,
|
sender: &ExtractorSender,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let merger = E::run_extraction(index, fields_ids_map, indexer, document_changes)?;
|
let merger = E::run_extraction(index, fields_ids_map, indexer, document_changes)?;
|
||||||
|
@ -30,7 +30,11 @@ where
|
|||||||
self,
|
self,
|
||||||
_fields_ids_map: &mut FieldsIdsMap,
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<
|
||||||
|
impl IndexedParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>
|
||||||
|
+ Clone
|
||||||
|
+ 'p,
|
||||||
|
> {
|
||||||
let (fields_ids_map, concurrent_available_ids, primary_key) = param;
|
let (fields_ids_map, concurrent_available_ids, primary_key) = param;
|
||||||
|
|
||||||
Ok(self.iter.map(|object| {
|
Ok(self.iter.map(|object| {
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||||
|
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::{FieldsIdsMap, Result};
|
use crate::{Error, FieldsIdsMap, Result};
|
||||||
|
|
||||||
pub struct UpdateByFunction;
|
pub struct UpdateByFunction;
|
||||||
|
|
||||||
@ -13,7 +15,11 @@ impl<'p> DocumentChanges<'p> for UpdateByFunction {
|
|||||||
self,
|
self,
|
||||||
_fields_ids_map: &mut FieldsIdsMap,
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
_param: Self::Parameter,
|
_param: Self::Parameter,
|
||||||
) -> Result<impl IndexedParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<
|
||||||
|
impl IndexedParallelIterator<Item = std::result::Result<DocumentChange, Arc<Error>>>
|
||||||
|
+ Clone
|
||||||
|
+ 'p,
|
||||||
|
> {
|
||||||
Ok((0..100).into_par_iter().map(|_| todo!()))
|
Ok((0..100).into_par_iter().map(|_| todo!()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user