mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-12 16:08:55 +01:00
Use the GlobalFieldsIdsMap everywhere and write it to disk
Co-authored-by: Dureuill <louis@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
parent
c50d3edc4a
commit
c1557734dc
@ -36,7 +36,7 @@ use meilisearch_types::milli::update::{
|
|||||||
use meilisearch_types::milli::vector::parsed_vectors::{
|
use meilisearch_types::milli::vector::parsed_vectors::{
|
||||||
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
|
||||||
};
|
};
|
||||||
use meilisearch_types::milli::{self, Filter, Object, UserError};
|
use meilisearch_types::milli::{self, Filter, GlobalFieldsIdsMap, Object, UserError};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
@ -1302,49 +1302,6 @@ impl IndexScheduler {
|
|||||||
let primary_key =
|
let primary_key =
|
||||||
guess_primary_key(&rtxn, index, cursor, &documents_batch_index)?.unwrap();
|
guess_primary_key(&rtxn, index, cursor, &documents_batch_index)?.unwrap();
|
||||||
|
|
||||||
// if let Some(primary_key) = primary_key {
|
|
||||||
// match index.primary_key(index_wtxn)? {
|
|
||||||
// // if a primary key was set AND had already been defined in the index
|
|
||||||
// // but to a different value, we can make the whole batch fail.
|
|
||||||
// Some(pk) => {
|
|
||||||
// if primary_key != pk {
|
|
||||||
// return Err(milli::Error::from(
|
|
||||||
// milli::UserError::PrimaryKeyCannotBeChanged(pk.to_string()),
|
|
||||||
// )
|
|
||||||
// .into());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// // if the primary key was set and there was no primary key set for this index
|
|
||||||
// // we set it to the received value before starting the indexing process.
|
|
||||||
// None => {
|
|
||||||
// todo!();
|
|
||||||
// let mut builder =
|
|
||||||
// milli::update::Settings::new(index_wtxn, index, indexer_config);
|
|
||||||
// builder.set_primary_key(primary_key);
|
|
||||||
// builder.execute(
|
|
||||||
// |indexing_step| tracing::debug!(update = ?indexing_step),
|
|
||||||
// || must_stop_processing.clone().get(),
|
|
||||||
// )?;
|
|
||||||
// primary_key_has_been_set = true;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
|
|
||||||
|
|
||||||
// let embedder_configs = index.embedding_configs(index_wtxn)?;
|
|
||||||
// // TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
|
|
||||||
// let embedders = self.embedders(embedder_configs)?;
|
|
||||||
|
|
||||||
// let mut builder = milli::update::IndexDocuments::new(
|
|
||||||
// index_wtxn,
|
|
||||||
// index,
|
|
||||||
// indexer_config,
|
|
||||||
// config,
|
|
||||||
// |indexing_step| tracing::trace!(?indexing_step, "Update"),
|
|
||||||
// || must_stop_processing.get(),
|
|
||||||
// )?;
|
|
||||||
|
|
||||||
let mut indexer = indexer::DocumentOperation::new(method);
|
let mut indexer = indexer::DocumentOperation::new(method);
|
||||||
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
|
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
|
||||||
match operation {
|
match operation {
|
||||||
@ -1401,12 +1358,10 @@ impl IndexScheduler {
|
|||||||
// let pool = indexer_config.thread_pool.unwrap();
|
// let pool = indexer_config.thread_pool.unwrap();
|
||||||
let pool = rayon::ThreadPoolBuilder::new().build().unwrap();
|
let pool = rayon::ThreadPoolBuilder::new().build().unwrap();
|
||||||
// let fields_ids_map = RwLock::new(fields_ids_map);
|
// let fields_ids_map = RwLock::new(fields_ids_map);
|
||||||
let param = (index, &rtxn, &mut fields_ids_map, &primary_key);
|
let param = (index, &rtxn, &primary_key);
|
||||||
let document_changes = indexer.document_changes(param)?;
|
let document_changes = indexer.document_changes(&mut fields_ids_map, param)?;
|
||||||
indexer::index(index_wtxn, index, &pool, document_changes)?;
|
/// TODO pass/write the FieldsIdsMap
|
||||||
|
indexer::index(index_wtxn, index, fields_ids_map, &pool, document_changes)?;
|
||||||
/// TODO we must store it or not?
|
|
||||||
let fields_ids_map = fields_ids_map;
|
|
||||||
|
|
||||||
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
// tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
|
||||||
} else if primary_key_has_been_set {
|
} else if primary_key_has_been_set {
|
||||||
|
@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use crate::FieldId;
|
use crate::FieldId;
|
||||||
|
|
||||||
mod global;
|
mod global;
|
||||||
|
pub use global::GlobalFieldsIdsMap;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct FieldsIdsMap {
|
pub struct FieldsIdsMap {
|
||||||
|
@ -4,11 +4,13 @@ use std::sync::RwLock;
|
|||||||
use crate::{FieldId, FieldsIdsMap};
|
use crate::{FieldId, FieldsIdsMap};
|
||||||
|
|
||||||
/// A fields ids map that can be globally updated to add fields
|
/// A fields ids map that can be globally updated to add fields
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub struct GlobalFieldsIdsMap<'indexing> {
|
pub struct GlobalFieldsIdsMap<'indexing> {
|
||||||
global: &'indexing RwLock<FieldsIdsMap>,
|
global: &'indexing RwLock<FieldsIdsMap>,
|
||||||
local: LocalFieldsIdsMap,
|
local: LocalFieldsIdsMap,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
struct LocalFieldsIdsMap {
|
struct LocalFieldsIdsMap {
|
||||||
names_ids: BTreeMap<String, FieldId>,
|
names_ids: BTreeMap<String, FieldId>,
|
||||||
ids_names: BTreeMap<FieldId, String>,
|
ids_names: BTreeMap<FieldId, String>,
|
||||||
|
@ -55,7 +55,7 @@ pub use self::error::{
|
|||||||
};
|
};
|
||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
|
||||||
pub use self::heed_codec::{
|
pub use self::heed_codec::{
|
||||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||||
|
@ -1,32 +1,20 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use grenad::Merger;
|
use grenad::{Merger, ReaderCursor};
|
||||||
use grenad::ReaderCursor;
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::IntoParallelIterator;
|
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
||||||
use rayon::iter::ParallelBridge;
|
|
||||||
use rayon::iter::ParallelIterator;
|
|
||||||
|
|
||||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
use super::cache::CachedSorter;
|
||||||
use crate::{
|
use super::tokenize_document::DocumentTokenizer;
|
||||||
update::{
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
create_sorter,
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
new::{DocumentChange, ItemsPool},
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||||
GrenadParameters,
|
|
||||||
},
|
|
||||||
FieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
cache::{CachedSorter, DelAddRoaringBitmapMerger},
|
|
||||||
tokenize_document::DocumentTokenizer,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub trait SearchableExtractor {
|
pub trait SearchableExtractor {
|
||||||
fn run_extraction(
|
fn run_extraction(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &GlobalFieldsIdsMap,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
||||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||||
@ -62,12 +50,13 @@ pub trait SearchableExtractor {
|
|||||||
Ok((
|
Ok((
|
||||||
index.read_txn()?,
|
index.read_txn()?,
|
||||||
&document_tokenizer,
|
&document_tokenizer,
|
||||||
|
fields_ids_map.clone(),
|
||||||
CachedSorter::new(
|
CachedSorter::new(
|
||||||
// TODO use a better value
|
// TODO use a better value
|
||||||
100.try_into().unwrap(),
|
100.try_into().unwrap(),
|
||||||
create_sorter(
|
create_sorter(
|
||||||
grenad::SortAlgorithm::Stable,
|
grenad::SortAlgorithm::Stable,
|
||||||
DelAddRoaringBitmapMerger,
|
MergeDeladdCboRoaringBitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
@ -78,12 +67,12 @@ pub trait SearchableExtractor {
|
|||||||
});
|
});
|
||||||
|
|
||||||
document_changes.into_par_iter().try_for_each(|document_change| {
|
document_changes.into_par_iter().try_for_each(|document_change| {
|
||||||
context_pool.with(|(rtxn, document_tokenizer, cached_sorter)| {
|
context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
|
||||||
Self::extract_document_change(
|
Self::extract_document_change(
|
||||||
&*rtxn,
|
&*rtxn,
|
||||||
index,
|
index,
|
||||||
document_tokenizer,
|
document_tokenizer,
|
||||||
&fields_ids_map,
|
fields_ids_map,
|
||||||
cached_sorter,
|
cached_sorter,
|
||||||
document_change?,
|
document_change?,
|
||||||
)
|
)
|
||||||
@ -91,7 +80,7 @@ pub trait SearchableExtractor {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||||
for (_rtxn, _tokenizer, cache) in context_pool.into_items() {
|
for (_rtxn, _tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
|
||||||
let sorter = cache.into_sorter()?;
|
let sorter = cache.into_sorter()?;
|
||||||
let readers = sorter.into_reader_cursors()?;
|
let readers = sorter.into_reader_cursors()?;
|
||||||
builder.extend(readers);
|
builder.extend(readers);
|
||||||
@ -104,8 +93,8 @@ pub trait SearchableExtractor {
|
|||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||||
cached_sorter: &mut CachedSorter<DelAddRoaringBitmapMerger>,
|
cached_sorter: &mut CachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
) -> Result<()>;
|
) -> Result<()>;
|
||||||
}
|
}
|
||||||
@ -116,9 +105,8 @@ impl SearchableExtractor for WordDocidsExtractor {
|
|||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
document_tokenizer: &DocumentTokenizer,
|
document_tokenizer: &DocumentTokenizer,
|
||||||
fields_ids_map: &FieldsIdsMap,
|
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||||
// TODO: DelAddRoaringBitmapMerger should be CBO
|
cached_sorter: &mut CachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||||
cached_sorter: &mut CachedSorter<DelAddRoaringBitmapMerger>,
|
|
||||||
document_change: DocumentChange,
|
document_change: DocumentChange,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
match document_change {
|
match document_change {
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
use crate::{
|
use std::collections::HashMap;
|
||||||
update::new::KvReaderFieldId, FieldId, FieldsIdsMap, Index, InternalError,
|
|
||||||
LocalizedAttributesRule, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
|
|
||||||
};
|
|
||||||
use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::collections::HashMap;
|
|
||||||
|
use crate::update::new::KvReaderFieldId;
|
||||||
|
use crate::{
|
||||||
|
FieldId, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule,
|
||||||
|
Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct DocumentTokenizer<'a> {
|
pub struct DocumentTokenizer<'a> {
|
||||||
pub tokenizer: &'a Tokenizer<'a>,
|
pub tokenizer: &'a Tokenizer<'a>,
|
||||||
@ -18,18 +21,24 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
pub fn tokenize_document(
|
pub fn tokenize_document(
|
||||||
&self,
|
&self,
|
||||||
obkv: &KvReaderFieldId,
|
obkv: &KvReaderFieldId,
|
||||||
field_id_map: &FieldsIdsMap,
|
field_id_map: &mut GlobalFieldsIdsMap,
|
||||||
token_fn: &mut impl FnMut(FieldId, u16, &str),
|
token_fn: &mut impl FnMut(FieldId, u16, &str),
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut field_position = HashMap::new();
|
let mut field_position = HashMap::new();
|
||||||
|
let mut field_name = String::new();
|
||||||
for (field_id, field_bytes) in obkv {
|
for (field_id, field_bytes) in obkv {
|
||||||
let Some(field_name) = field_id_map.name(field_id) else {
|
let Some(field_name) = field_id_map.name(field_id).map(|s| {
|
||||||
|
field_name.clear();
|
||||||
|
field_name.push_str(s);
|
||||||
|
&field_name
|
||||||
|
}) else {
|
||||||
unreachable!("field id not found in field id map");
|
unreachable!("field id not found in field id map");
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut tokenize_field = |name: &str, value: &Value| {
|
let mut tokenize_field = |name: &str, value: &Value| {
|
||||||
let Some(field_id) = field_id_map.id(name) else {
|
let Some(field_id) = field_id_map.id_or_insert(name) else {
|
||||||
unreachable!("field name not found in field id map");
|
/// TODO: better error
|
||||||
|
panic!("it's over 9000");
|
||||||
};
|
};
|
||||||
|
|
||||||
let position =
|
let position =
|
||||||
@ -75,7 +84,7 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
// if the current field is searchable or contains a searchable attribute
|
||||||
if self.searchable_attributes.map_or(true, |attributes| {
|
if self.searchable_attributes.map_or(true, |attributes| {
|
||||||
attributes.iter().any(|name| perm_json_p::contained_in(name, field_name))
|
attributes.iter().any(|name| perm_json_p::contained_in(name, &field_name))
|
||||||
}) {
|
}) {
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
match serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)? {
|
||||||
@ -224,11 +233,12 @@ mod perm_json_p {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
|
||||||
use charabia::TokenizerBuilder;
|
use charabia::TokenizerBuilder;
|
||||||
use meili_snap::snapshot;
|
use meili_snap::snapshot;
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tokenize_document() {
|
fn test_tokenize_document() {
|
||||||
let mut fields_ids_map = FieldsIdsMap::new();
|
let mut fields_ids_map = FieldsIdsMap::new();
|
||||||
|
@ -27,6 +27,7 @@ impl<'p> DocumentChanges<'p> for DocumentDeletion {
|
|||||||
|
|
||||||
fn document_changes(
|
fn document_changes(
|
||||||
self,
|
self,
|
||||||
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
let (index, fields, primary_key) = param;
|
let (index, fields, primary_key) = param;
|
||||||
|
@ -73,13 +73,14 @@ impl DocumentOperation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'p> DocumentChanges<'p> for DocumentOperation {
|
impl<'p> DocumentChanges<'p> for DocumentOperation {
|
||||||
type Parameter = (&'p Index, &'p RoTxn<'p>, &'p mut FieldsIdsMap, &'p PrimaryKey<'p>);
|
type Parameter = (&'p Index, &'p RoTxn<'p>, &'p PrimaryKey<'p>);
|
||||||
|
|
||||||
fn document_changes(
|
fn document_changes(
|
||||||
self,
|
self,
|
||||||
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
let (index, rtxn, fields_ids_map, primary_key) = param;
|
let (index, rtxn, primary_key) = param;
|
||||||
|
|
||||||
let documents_ids = index.documents_ids(rtxn)?;
|
let documents_ids = index.documents_ids(rtxn)?;
|
||||||
let mut available_docids = AvailableIds::new(&documents_ids);
|
let mut available_docids = AvailableIds::new(&documents_ids);
|
||||||
@ -174,7 +175,7 @@ impl<'p> DocumentChanges<'p> for DocumentOperation {
|
|||||||
|
|
||||||
/// TODO is it the best way to provide FieldsIdsMap to the parallel iterator?
|
/// TODO is it the best way to provide FieldsIdsMap to the parallel iterator?
|
||||||
let fields_ids_map = fields_ids_map.clone();
|
let fields_ids_map = fields_ids_map.clone();
|
||||||
// We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
||||||
let docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect();
|
let docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect();
|
||||||
|
|
||||||
Ok(docids_version_offsets
|
Ok(docids_version_offsets
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
use std::sync::RwLock;
|
||||||
use std::thread::{self, Builder};
|
use std::thread::{self, Builder};
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
@ -22,7 +23,7 @@ use crate::documents::{
|
|||||||
obkv_to_object, DocumentsBatchCursor, DocumentsBatchIndex, PrimaryKey, DEFAULT_PRIMARY_KEY,
|
obkv_to_object, DocumentsBatchCursor, DocumentsBatchIndex, PrimaryKey, DEFAULT_PRIMARY_KEY,
|
||||||
};
|
};
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::{Index, Result, UserError};
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};
|
||||||
|
|
||||||
mod document_deletion;
|
mod document_deletion;
|
||||||
mod document_operation;
|
mod document_operation;
|
||||||
@ -34,6 +35,7 @@ pub trait DocumentChanges<'p> {
|
|||||||
|
|
||||||
fn document_changes(
|
fn document_changes(
|
||||||
self,
|
self,
|
||||||
|
fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>;
|
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p>;
|
||||||
}
|
}
|
||||||
@ -46,6 +48,7 @@ pub trait DocumentChanges<'p> {
|
|||||||
pub fn index<PI>(
|
pub fn index<PI>(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
|
fields_ids_map: FieldsIdsMap,
|
||||||
pool: &ThreadPool,
|
pool: &ThreadPool,
|
||||||
document_changes: PI,
|
document_changes: PI,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
@ -57,6 +60,9 @@ where
|
|||||||
let ExtractorsMergerChannels { merger_receiver, deladd_cbo_roaring_bitmap_sender } =
|
let ExtractorsMergerChannels { merger_receiver, deladd_cbo_roaring_bitmap_sender } =
|
||||||
extractors_merger_channels(100);
|
extractors_merger_channels(100);
|
||||||
|
|
||||||
|
let fields_ids_map_lock = RwLock::new(fields_ids_map);
|
||||||
|
let global_fields_ids_map = GlobalFieldsIdsMap::new(&fields_ids_map_lock);
|
||||||
|
|
||||||
thread::scope(|s| {
|
thread::scope(|s| {
|
||||||
// TODO manage the errors correctly
|
// TODO manage the errors correctly
|
||||||
let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, || {
|
let handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, || {
|
||||||
@ -65,7 +71,7 @@ where
|
|||||||
// word docids
|
// word docids
|
||||||
let merger = WordDocidsExtractor::run_extraction(
|
let merger = WordDocidsExtractor::run_extraction(
|
||||||
index,
|
index,
|
||||||
todo!(),
|
&global_fields_ids_map,
|
||||||
/// TODO: GrenadParameters::default() should be removed in favor a passed parameter
|
/// TODO: GrenadParameters::default() should be removed in favor a passed parameter
|
||||||
GrenadParameters::default(),
|
GrenadParameters::default(),
|
||||||
document_changes.clone(),
|
document_changes.clone(),
|
||||||
@ -100,8 +106,13 @@ where
|
|||||||
handle.join().unwrap()?;
|
handle.join().unwrap()?;
|
||||||
handle2.join().unwrap()?;
|
handle2.join().unwrap()?;
|
||||||
|
|
||||||
Ok(())
|
Ok(()) as Result<_>
|
||||||
})
|
})?;
|
||||||
|
|
||||||
|
let fields_ids_map = fields_ids_map_lock.into_inner().unwrap();
|
||||||
|
index.put_fields_ids_map(wtxn, &fields_ids_map)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO move this elsewhere
|
/// TODO move this elsewhere
|
||||||
|
@ -30,6 +30,7 @@ where
|
|||||||
/// - We recommend sending chunks of documents in this `PartialDumpIndexer` we therefore need to create a custom take_while_size method (that doesn't drop items).
|
/// - We recommend sending chunks of documents in this `PartialDumpIndexer` we therefore need to create a custom take_while_size method (that doesn't drop items).
|
||||||
fn document_changes(
|
fn document_changes(
|
||||||
self,
|
self,
|
||||||
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
param: Self::Parameter,
|
param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
let (fields_ids_map, concurrent_available_ids, primary_key) = param;
|
let (fields_ids_map, concurrent_available_ids, primary_key) = param;
|
||||||
|
@ -2,7 +2,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
|||||||
|
|
||||||
use super::DocumentChanges;
|
use super::DocumentChanges;
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::Result;
|
use crate::{FieldsIdsMap, Result};
|
||||||
|
|
||||||
pub struct UpdateByFunction;
|
pub struct UpdateByFunction;
|
||||||
|
|
||||||
@ -11,6 +11,7 @@ impl<'p> DocumentChanges<'p> for UpdateByFunction {
|
|||||||
|
|
||||||
fn document_changes(
|
fn document_changes(
|
||||||
self,
|
self,
|
||||||
|
_fields_ids_map: &mut FieldsIdsMap,
|
||||||
_param: Self::Parameter,
|
_param: Self::Parameter,
|
||||||
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
) -> Result<impl ParallelIterator<Item = Result<DocumentChange>> + Clone + 'p> {
|
||||||
Ok((0..100).into_par_iter().map(|_| todo!()))
|
Ok((0..100).into_par_iter().map(|_| todo!()))
|
||||||
|
Loading…
Reference in New Issue
Block a user