mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Finialize the GeoExtractor
This commit is contained in:
parent
a01bc7b454
commit
b17896d899
19 changed files with 497 additions and 93 deletions
|
@ -197,7 +197,7 @@ pub struct DocumentChangeContext<
|
|||
/// inside of the DB.
|
||||
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||
/// A transaction providing data from the DB before all indexing operations
|
||||
pub txn: RoTxn<'indexer>,
|
||||
pub rtxn: RoTxn<'indexer>,
|
||||
|
||||
/// Global field id map that is up to date with the current state of the indexing process.
|
||||
///
|
||||
|
@ -255,7 +255,7 @@ impl<
|
|||
let txn = index.read_txn()?;
|
||||
Ok(DocumentChangeContext {
|
||||
index,
|
||||
txn,
|
||||
rtxn: txn,
|
||||
db_fields_ids_map,
|
||||
new_fields_ids_map: fields_ids_map,
|
||||
doc_alloc,
|
||||
|
|
|
@ -63,7 +63,7 @@ impl<'pl> DocumentChanges<'pl> for DocumentDeletionChanges<'pl> {
|
|||
where
|
||||
'pl: 'doc, // the payload must survive the process calls
|
||||
{
|
||||
let current = context.index.document(&context.txn, *docid)?;
|
||||
let current = context.index.document(&context.rtxn, *docid)?;
|
||||
|
||||
let external_document_id = self.primary_key.extract_docid_from_db(
|
||||
current,
|
||||
|
|
|
@ -33,6 +33,7 @@ use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
|||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::new::extract::EmbeddingExtractor;
|
||||
use crate::update::new::merger::merge_and_send_rtree;
|
||||
use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
|
||||
use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
|
||||
use crate::update::settings::InnerIndexSettings;
|
||||
|
@ -57,6 +58,7 @@ mod steps {
|
|||
"extracting words",
|
||||
"extracting word proximity",
|
||||
"extracting embeddings",
|
||||
"writing geo points",
|
||||
"writing to database",
|
||||
"writing embeddings to database",
|
||||
"waiting for extractors",
|
||||
|
@ -93,29 +95,33 @@ mod steps {
|
|||
step(4)
|
||||
}
|
||||
|
||||
pub const fn write_db() -> (u16, &'static str) {
|
||||
pub const fn extract_geo_points() -> (u16, &'static str) {
|
||||
step(5)
|
||||
}
|
||||
|
||||
pub const fn write_embedding_db() -> (u16, &'static str) {
|
||||
pub const fn write_db() -> (u16, &'static str) {
|
||||
step(6)
|
||||
}
|
||||
|
||||
pub const fn waiting_extractors() -> (u16, &'static str) {
|
||||
pub const fn write_embedding_db() -> (u16, &'static str) {
|
||||
step(7)
|
||||
}
|
||||
|
||||
pub const fn post_processing_facets() -> (u16, &'static str) {
|
||||
pub const fn waiting_extractors() -> (u16, &'static str) {
|
||||
step(8)
|
||||
}
|
||||
|
||||
pub const fn post_processing_words() -> (u16, &'static str) {
|
||||
pub const fn post_processing_facets() -> (u16, &'static str) {
|
||||
step(9)
|
||||
}
|
||||
|
||||
pub const fn finalizing() -> (u16, &'static str) {
|
||||
pub const fn post_processing_words() -> (u16, &'static str) {
|
||||
step(10)
|
||||
}
|
||||
|
||||
pub const fn finalizing() -> (u16, &'static str) {
|
||||
step(11)
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the main function of this crate.
|
||||
|
@ -144,11 +150,8 @@ where
|
|||
let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
|
||||
|
||||
let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||
|
||||
let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
|
||||
|
||||
let new_fields_ids_map = RwLock::new(new_fields_ids_map);
|
||||
|
||||
let fields_ids_map_store = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
let mut extractor_allocs = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
let doc_allocs = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
|
@ -328,7 +331,15 @@ where
|
|||
|
||||
let (finished_steps, step_name) = steps::extract_word_proximity();
|
||||
|
||||
let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs, finished_steps, total_steps, step_name)?;
|
||||
let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(grenad_parameters,
|
||||
document_changes,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
finished_steps,
|
||||
total_steps,
|
||||
step_name,
|
||||
)?;
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
index.word_pair_proximity_docids.remap_types(),
|
||||
|
@ -351,8 +362,6 @@ where
|
|||
let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
|
||||
let mut datastore = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
let (finished_steps, step_name) = steps::extract_embeddings();
|
||||
|
||||
|
||||
extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, finished_steps, total_steps, step_name)?;
|
||||
|
||||
for config in &mut index_embeddings {
|
||||
|
@ -366,6 +375,35 @@ where
|
|||
embedding_sender.finish(index_embeddings).unwrap();
|
||||
}
|
||||
|
||||
'geo: {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
|
||||
let _entered = span.enter();
|
||||
|
||||
// let geo_sender = extractor_sender.geo_points();
|
||||
let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
|
||||
break 'geo;
|
||||
};
|
||||
let datastore = ThreadLocal::with_capacity(pool.current_num_threads());
|
||||
let (finished_steps, step_name) = steps::extract_geo_points();
|
||||
extract(document_changes,
|
||||
&extractor,
|
||||
indexing_context,
|
||||
&mut extractor_allocs,
|
||||
&datastore,
|
||||
finished_steps,
|
||||
total_steps,
|
||||
step_name,
|
||||
)?;
|
||||
|
||||
merge_and_send_rtree(
|
||||
datastore,
|
||||
&rtxn,
|
||||
index,
|
||||
extractor_sender.geo(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
// TODO THIS IS TOO MUCH
|
||||
// - [ ] Extract fieldid docid facet number
|
||||
// - [ ] Extract fieldid docid facet string
|
||||
|
|
|
@ -93,7 +93,7 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
|
|||
let DocumentChangeContext {
|
||||
index,
|
||||
db_fields_ids_map,
|
||||
txn,
|
||||
rtxn: txn,
|
||||
new_fields_ids_map,
|
||||
doc_alloc,
|
||||
..
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue