mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-19 06:08:20 +01:00
Add support for the progress API of arroy
This commit is contained in:
parent
82912e191b
commit
009c36a4d0
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -393,12 +393,13 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4a885313dfac15b64fd61a39d1970a2befa076c69a763434117c5b6163f9fecb"
|
checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
|
"enum-iterator",
|
||||||
"heed",
|
"heed",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"nohash",
|
"nohash",
|
||||||
@ -3017,7 +3018,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"windows-targets 0.48.1",
|
"windows-targets 0.52.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -87,7 +87,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
|
|||||||
"no_time",
|
"no_time",
|
||||||
"sync",
|
"sync",
|
||||||
] }
|
] }
|
||||||
arroy = "0.6.0"
|
arroy = "0.6.1"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
ureq = { version = "2.12.1", features = ["json"] }
|
ureq = { version = "2.12.1", features = ["json"] }
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use enum_iterator::Sequence;
|
||||||
use std::any::TypeId;
|
use std::any::TypeId;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
@ -76,6 +77,14 @@ impl Progress {
|
|||||||
|
|
||||||
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: ideally we should expose the progress in a way that let arroy use it directly
|
||||||
|
pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) {
|
||||||
|
self.update_progress(progress.main);
|
||||||
|
if let Some(sub) = progress.sub {
|
||||||
|
self.update_progress(sub);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate the names associated with the durations and push them.
|
/// Generate the names associated with the durations and push them.
|
||||||
@ -238,3 +247,44 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
|
|||||||
self.total
|
self.total
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Step for arroy::MainStep {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
match self {
|
||||||
|
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
|
||||||
|
arroy::MainStep::WritingTheDescendantsAndMetadata => {
|
||||||
|
"writing the descendants and metadata"
|
||||||
|
}
|
||||||
|
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
|
||||||
|
arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
|
||||||
|
arroy::MainStep::UpdatingTheTrees => "updating the trees",
|
||||||
|
arroy::MainStep::CreateNewTrees => "create new trees",
|
||||||
|
arroy::MainStep::WritingNodesToDatabase => "writing nodes to database",
|
||||||
|
arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
|
||||||
|
arroy::MainStep::WriteTheMetadata => "write the metadata",
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
*self as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
Self::CARDINALITY as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Step for arroy::SubStep {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
self.unit.into()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
self.current.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
self.max
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -31,6 +31,7 @@ use super::new::StdResult;
|
|||||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError};
|
use crate::error::{Error, InternalError};
|
||||||
use crate::index::{PrefixSearch, PrefixSettings};
|
use crate::index::{PrefixSearch, PrefixSettings};
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
@ -522,6 +523,8 @@ where
|
|||||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
wtxn,
|
wtxn,
|
||||||
|
// In the settings we don't have any progress to share
|
||||||
|
&Progress::default(),
|
||||||
&mut rng,
|
&mut rng,
|
||||||
dimension,
|
dimension,
|
||||||
is_quantizing,
|
is_quantizing,
|
||||||
|
@ -201,6 +201,7 @@ where
|
|||||||
build_vectors(
|
build_vectors(
|
||||||
index,
|
index,
|
||||||
wtxn,
|
wtxn,
|
||||||
|
indexing_context.progress,
|
||||||
index_embeddings,
|
index_embeddings,
|
||||||
arroy_memory,
|
arroy_memory,
|
||||||
&mut arroy_writers,
|
&mut arroy_writers,
|
||||||
|
@ -10,6 +10,7 @@ use super::super::channel::*;
|
|||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::update::settings::InnerIndexSettings;
|
use crate::update::settings::InnerIndexSettings;
|
||||||
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings};
|
||||||
use crate::{Error, Index, InternalError, Result};
|
use crate::{Error, Index, InternalError, Result};
|
||||||
@ -100,6 +101,7 @@ impl ChannelCongestion {
|
|||||||
pub fn build_vectors<MSP>(
|
pub fn build_vectors<MSP>(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
|
progress: &Progress,
|
||||||
index_embeddings: Vec<IndexEmbeddingConfig>,
|
index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||||
arroy_memory: Option<usize>,
|
arroy_memory: Option<usize>,
|
||||||
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||||
@ -118,6 +120,7 @@ where
|
|||||||
let dimensions = *dimensions;
|
let dimensions = *dimensions;
|
||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
wtxn,
|
wtxn,
|
||||||
|
progress,
|
||||||
&mut rng,
|
&mut rng,
|
||||||
dimensions,
|
dimensions,
|
||||||
false,
|
false,
|
||||||
|
@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
|
|
||||||
use self::error::{EmbedError, NewEmbedderError};
|
use self::error::{EmbedError, NewEmbedderError};
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::prompt::{Prompt, PromptData};
|
use crate::prompt::{Prompt, PromptData};
|
||||||
use crate::ThreadPoolNoAbort;
|
use crate::ThreadPoolNoAbort;
|
||||||
|
|
||||||
@ -81,9 +82,11 @@ impl ArroyWrapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
||||||
&mut self,
|
&mut self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
|
progress: &Progress,
|
||||||
rng: &mut R,
|
rng: &mut R,
|
||||||
dimension: usize,
|
dimension: usize,
|
||||||
quantizing: bool,
|
quantizing: bool,
|
||||||
@ -110,12 +113,14 @@ impl ArroyWrapper {
|
|||||||
writer
|
writer
|
||||||
.builder(rng)
|
.builder(rng)
|
||||||
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||||
|
.progress(|step| progress.update_progress_from_arroy(step))
|
||||||
.cancel(cancel)
|
.cancel(cancel)
|
||||||
.build(wtxn)?;
|
.build(wtxn)?;
|
||||||
} else if writer.need_build(wtxn)? {
|
} else if writer.need_build(wtxn)? {
|
||||||
writer
|
writer
|
||||||
.builder(rng)
|
.builder(rng)
|
||||||
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||||
|
.progress(|step| progress.update_progress_from_arroy(step))
|
||||||
.cancel(cancel)
|
.cancel(cancel)
|
||||||
.build(wtxn)?;
|
.build(wtxn)?;
|
||||||
} else if writer.is_empty(wtxn)? {
|
} else if writer.is_empty(wtxn)? {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user