mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Merge #4327
4327: Bring back changes from `release-v1.6.0` to `main` r=dureuill a=curquiza Co-authored-by: Paul Sanders <psanders1@gmail.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
This commit is contained in:
commit
34e814f400
31
Cargo.lock
generated
31
Cargo.lock
generated
@ -383,7 +383,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://github.com/meilisearch/arroy.git#4f193fd534acd357b65bfe9eec4b3fed8ece2007"
|
source = "git+https://github.com/meilisearch/arroy.git#d372648212e561a4845077cdb9239423d78655a2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@ -1592,9 +1592,6 @@ name = "esaxx-rs"
|
|||||||
version = "0.1.10"
|
version = "0.1.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fancy-regex"
|
name = "fancy-regex"
|
||||||
@ -1680,9 +1677,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flate2"
|
name = "flate2"
|
||||||
version = "1.0.26"
|
version = "1.0.28"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
|
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crc32fast",
|
"crc32fast",
|
||||||
"miniz_oxide",
|
"miniz_oxide",
|
||||||
@ -1704,9 +1701,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "form_urlencoded"
|
name = "form_urlencoded"
|
||||||
version = "1.2.0"
|
version = "1.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
|
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
@ -2756,9 +2753,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "idna"
|
name = "idna"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
|
checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-bidi",
|
"unicode-bidi",
|
||||||
"unicode-normalization",
|
"unicode-normalization",
|
||||||
@ -2777,6 +2774,7 @@ dependencies = [
|
|||||||
"dump",
|
"dump",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
"file-store",
|
"file-store",
|
||||||
|
"flate2",
|
||||||
"insta",
|
"insta",
|
||||||
"log",
|
"log",
|
||||||
"meili-snap",
|
"meili-snap",
|
||||||
@ -2792,6 +2790,7 @@ dependencies = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"time",
|
"time",
|
||||||
|
"ureq",
|
||||||
"uuid 1.5.0",
|
"uuid 1.5.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -3562,6 +3561,7 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
"toml",
|
"toml",
|
||||||
|
"url",
|
||||||
"urlencoding",
|
"urlencoding",
|
||||||
"uuid 1.5.0",
|
"uuid 1.5.0",
|
||||||
"vergen",
|
"vergen",
|
||||||
@ -4070,9 +4070,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.3.0"
|
version = "2.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
|
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
@ -5310,11 +5310,9 @@ version = "0.14.1"
|
|||||||
source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
|
source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"clap",
|
|
||||||
"derive_builder",
|
"derive_builder",
|
||||||
"esaxx-rs",
|
"esaxx-rs",
|
||||||
"getrandom",
|
"getrandom",
|
||||||
"indicatif",
|
|
||||||
"itertools 0.11.0",
|
"itertools 0.11.0",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log",
|
"log",
|
||||||
@ -5602,13 +5600,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "url"
|
name = "url"
|
||||||
version = "2.4.0"
|
version = "2.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb"
|
checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"form_urlencoded",
|
"form_urlencoded",
|
||||||
"idna",
|
"idna",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -18,6 +18,7 @@ derive_builder = "0.12.0"
|
|||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
enum-iterator = "1.4.0"
|
enum-iterator = "1.4.0"
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
|
flate2 = "1.0.28"
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
@ -30,6 +31,7 @@ synchronoise = "1.0.1"
|
|||||||
tempfile = "3.5.0"
|
tempfile = "3.5.0"
|
||||||
thiserror = "1.0.40"
|
thiserror = "1.0.40"
|
||||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
|
ureq = "2.9.1"
|
||||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
@ -936,8 +936,8 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
|
||||||
*self.currently_updating_index.write().unwrap() =
|
self.index_mapper
|
||||||
Some((index_uid.clone(), index.clone()));
|
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||||
|
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
||||||
@ -1351,9 +1351,6 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||||
let checked_settings = settings.clone().check();
|
let checked_settings = settings.clone().check();
|
||||||
if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
|
|
||||||
self.features().check_vector("Passing `embedders` in settings")?
|
|
||||||
}
|
|
||||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||||
|
|
||||||
|
@ -69,6 +69,10 @@ pub struct IndexMapper {
|
|||||||
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
|
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
|
||||||
enable_mdb_writemap: bool,
|
enable_mdb_writemap: bool,
|
||||||
pub indexer_config: Arc<IndexerConfig>,
|
pub indexer_config: Arc<IndexerConfig>,
|
||||||
|
|
||||||
|
/// A few types of long running batches of tasks that act on a single index set this field
|
||||||
|
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
||||||
|
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether the index is available for use or is forbidden to be inserted back in the index map
|
/// Whether the index is available for use or is forbidden to be inserted back in the index map
|
||||||
@ -151,6 +155,7 @@ impl IndexMapper {
|
|||||||
index_growth_amount,
|
index_growth_amount,
|
||||||
enable_mdb_writemap,
|
enable_mdb_writemap,
|
||||||
indexer_config: Arc::new(indexer_config),
|
indexer_config: Arc::new(indexer_config),
|
||||||
|
currently_updating_index: Default::default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,6 +308,14 @@ impl IndexMapper {
|
|||||||
|
|
||||||
/// Return an index, may open it if it wasn't already opened.
|
/// Return an index, may open it if it wasn't already opened.
|
||||||
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
|
||||||
|
if let Some((current_name, current_index)) =
|
||||||
|
self.currently_updating_index.read().unwrap().as_ref()
|
||||||
|
{
|
||||||
|
if current_name == name {
|
||||||
|
return Ok(current_index.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let uuid = self
|
let uuid = self
|
||||||
.index_mapping
|
.index_mapping
|
||||||
.get(rtxn, name)?
|
.get(rtxn, name)?
|
||||||
@ -474,4 +487,8 @@ impl IndexMapper {
|
|||||||
pub fn indexer_config(&self) -> &IndexerConfig {
|
pub fn indexer_config(&self) -> &IndexerConfig {
|
||||||
&self.indexer_config
|
&self.indexer_config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
|
||||||
|
*self.currently_updating_index.write().unwrap() = index;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,10 +37,11 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
|||||||
snapshots_path: _,
|
snapshots_path: _,
|
||||||
auth_path: _,
|
auth_path: _,
|
||||||
version_file_path: _,
|
version_file_path: _,
|
||||||
|
webhook_url: _,
|
||||||
|
webhook_authorization_header: _,
|
||||||
test_breakpoint_sdr: _,
|
test_breakpoint_sdr: _,
|
||||||
planned_failures: _,
|
planned_failures: _,
|
||||||
run_loop_iteration: _,
|
run_loop_iteration: _,
|
||||||
currently_updating_index: _,
|
|
||||||
embedders: _,
|
embedders: _,
|
||||||
} = scheduler;
|
} = scheduler;
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ pub type TaskId = u32;
|
|||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
use std::io::{self, BufReader, Read};
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
@ -45,6 +46,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
|
|||||||
pub use error::Error;
|
pub use error::Error;
|
||||||
pub use features::RoFeatures;
|
pub use features::RoFeatures;
|
||||||
use file_store::FileStore;
|
use file_store::FileStore;
|
||||||
|
use flate2::bufread::GzEncoder;
|
||||||
|
use flate2::Compression;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::ResponseError;
|
||||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||||
use meilisearch_types::heed::byteorder::BE;
|
use meilisearch_types::heed::byteorder::BE;
|
||||||
@ -54,6 +57,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
|||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||||
use puffin::FrameView;
|
use puffin::FrameView;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -170,8 +174,8 @@ impl ProcessingTasks {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Set the processing tasks to an empty list
|
/// Set the processing tasks to an empty list
|
||||||
fn stop_processing(&mut self) {
|
fn stop_processing(&mut self) -> RoaringBitmap {
|
||||||
self.processing = RoaringBitmap::new();
|
std::mem::take(&mut self.processing)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||||
@ -241,6 +245,10 @@ pub struct IndexSchedulerOptions {
|
|||||||
pub snapshots_path: PathBuf,
|
pub snapshots_path: PathBuf,
|
||||||
/// The path to the folder containing the dumps.
|
/// The path to the folder containing the dumps.
|
||||||
pub dumps_path: PathBuf,
|
pub dumps_path: PathBuf,
|
||||||
|
/// The URL on which we must send the tasks statuses
|
||||||
|
pub webhook_url: Option<String>,
|
||||||
|
/// The value we will send into the Authorization HTTP header on the webhook URL
|
||||||
|
pub webhook_authorization_header: Option<String>,
|
||||||
/// The maximum size, in bytes, of the task index.
|
/// The maximum size, in bytes, of the task index.
|
||||||
pub task_db_size: usize,
|
pub task_db_size: usize,
|
||||||
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
||||||
@ -323,6 +331,11 @@ pub struct IndexScheduler {
|
|||||||
/// The maximum number of tasks that will be batched together.
|
/// The maximum number of tasks that will be batched together.
|
||||||
pub(crate) max_number_of_batched_tasks: usize,
|
pub(crate) max_number_of_batched_tasks: usize,
|
||||||
|
|
||||||
|
/// The webhook url we should send tasks to after processing every batches.
|
||||||
|
pub(crate) webhook_url: Option<String>,
|
||||||
|
/// The Authorization header to send to the webhook URL.
|
||||||
|
pub(crate) webhook_authorization_header: Option<String>,
|
||||||
|
|
||||||
/// A frame to output the indexation profiling files to disk.
|
/// A frame to output the indexation profiling files to disk.
|
||||||
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
|
||||||
|
|
||||||
@ -338,10 +351,6 @@ pub struct IndexScheduler {
|
|||||||
/// The path to the version file of Meilisearch.
|
/// The path to the version file of Meilisearch.
|
||||||
pub(crate) version_file_path: PathBuf,
|
pub(crate) version_file_path: PathBuf,
|
||||||
|
|
||||||
/// A few types of long running batches of tasks that act on a single index set this field
|
|
||||||
/// so that a handle to the index is available from other threads (search) in an optimized manner.
|
|
||||||
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
|
|
||||||
|
|
||||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||||
|
|
||||||
// ================= test
|
// ================= test
|
||||||
@ -388,7 +397,8 @@ impl IndexScheduler {
|
|||||||
dumps_path: self.dumps_path.clone(),
|
dumps_path: self.dumps_path.clone(),
|
||||||
auth_path: self.auth_path.clone(),
|
auth_path: self.auth_path.clone(),
|
||||||
version_file_path: self.version_file_path.clone(),
|
version_file_path: self.version_file_path.clone(),
|
||||||
currently_updating_index: self.currently_updating_index.clone(),
|
webhook_url: self.webhook_url.clone(),
|
||||||
|
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
||||||
embedders: self.embedders.clone(),
|
embedders: self.embedders.clone(),
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||||
@ -487,7 +497,8 @@ impl IndexScheduler {
|
|||||||
snapshots_path: options.snapshots_path,
|
snapshots_path: options.snapshots_path,
|
||||||
auth_path: options.auth_path,
|
auth_path: options.auth_path,
|
||||||
version_file_path: options.version_file_path,
|
version_file_path: options.version_file_path,
|
||||||
currently_updating_index: Arc::new(RwLock::new(None)),
|
webhook_url: options.webhook_url,
|
||||||
|
webhook_authorization_header: options.webhook_authorization_header,
|
||||||
embedders: Default::default(),
|
embedders: Default::default(),
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -671,13 +682,6 @@ impl IndexScheduler {
|
|||||||
/// If you need to fetch information from or perform an action on all indexes,
|
/// If you need to fetch information from or perform an action on all indexes,
|
||||||
/// see the `try_for_each_index` function.
|
/// see the `try_for_each_index` function.
|
||||||
pub fn index(&self, name: &str) -> Result<Index> {
|
pub fn index(&self, name: &str) -> Result<Index> {
|
||||||
if let Some((current_name, current_index)) =
|
|
||||||
self.currently_updating_index.read().unwrap().as_ref()
|
|
||||||
{
|
|
||||||
if current_name == name {
|
|
||||||
return Ok(current_index.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
self.index_mapper.index(&rtxn, name)
|
self.index_mapper.index(&rtxn, name)
|
||||||
}
|
}
|
||||||
@ -1158,7 +1162,7 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Reset the currently updating index to relinquish the index handle
|
// Reset the currently updating index to relinquish the index handle
|
||||||
*self.currently_updating_index.write().unwrap() = None;
|
self.index_mapper.set_currently_updating_index(None);
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||||
@ -1251,19 +1255,99 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.processing_tasks.write().unwrap().stop_processing();
|
let processed = self.processing_tasks.write().unwrap().stop_processing();
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
|
self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
|
||||||
|
|
||||||
wtxn.commit().map_err(Error::HeedTransaction)?;
|
wtxn.commit().map_err(Error::HeedTransaction)?;
|
||||||
|
|
||||||
|
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||||
|
let _ = self.notify_webhook(&processed);
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.breakpoint(Breakpoint::AfterProcessing);
|
self.breakpoint(Breakpoint::AfterProcessing);
|
||||||
|
|
||||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
|
||||||
|
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
|
||||||
|
if let Some(ref url) = self.webhook_url {
|
||||||
|
struct TaskReader<'a, 'b> {
|
||||||
|
rtxn: &'a RoTxn<'a>,
|
||||||
|
index_scheduler: &'a IndexScheduler,
|
||||||
|
tasks: &'b mut roaring::bitmap::Iter<'b>,
|
||||||
|
buffer: Vec<u8>,
|
||||||
|
written: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, 'b> Read for TaskReader<'a, 'b> {
|
||||||
|
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
|
if self.buffer.is_empty() {
|
||||||
|
match self.tasks.next() {
|
||||||
|
None => return Ok(0),
|
||||||
|
Some(task_id) => {
|
||||||
|
let task = self
|
||||||
|
.index_scheduler
|
||||||
|
.get_task(self.rtxn, task_id)
|
||||||
|
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||||
|
.ok_or_else(|| {
|
||||||
|
io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
Error::CorruptedTaskQueue,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
serde_json::to_writer(
|
||||||
|
&mut self.buffer,
|
||||||
|
&TaskView::from_task(&task),
|
||||||
|
)?;
|
||||||
|
self.buffer.push(b'\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut to_write = &self.buffer[self.written..];
|
||||||
|
let wrote = io::copy(&mut to_write, &mut buf)?;
|
||||||
|
self.written += wrote as usize;
|
||||||
|
|
||||||
|
// we wrote everything and must refresh our buffer on the next call
|
||||||
|
if self.written == self.buffer.len() {
|
||||||
|
self.written = 0;
|
||||||
|
self.buffer.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(wrote as usize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
|
let task_reader = TaskReader {
|
||||||
|
rtxn: &rtxn,
|
||||||
|
index_scheduler: self,
|
||||||
|
tasks: &mut updated.into_iter(),
|
||||||
|
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
|
||||||
|
written: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||||
|
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||||
|
let request = ureq::post(url).set("Content-Encoding", "gzip");
|
||||||
|
let request = match &self.webhook_authorization_header {
|
||||||
|
Some(header) => request.set("Authorization", header),
|
||||||
|
None => request,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Err(e) = request.send(reader) {
|
||||||
|
log::error!("While sending data to the webhook: {e}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Register a task to cleanup the task queue if needed
|
/// Register a task to cleanup the task queue if needed
|
||||||
fn cleanup_task_queue(&self) -> Result<()> {
|
fn cleanup_task_queue(&self) -> Result<()> {
|
||||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||||
@ -1677,6 +1761,8 @@ mod tests {
|
|||||||
indexes_path: tempdir.path().join("indexes"),
|
indexes_path: tempdir.path().join("indexes"),
|
||||||
snapshots_path: tempdir.path().join("snapshots"),
|
snapshots_path: tempdir.path().join("snapshots"),
|
||||||
dumps_path: tempdir.path().join("dumps"),
|
dumps_path: tempdir.path().join("dumps"),
|
||||||
|
webhook_url: None,
|
||||||
|
webhook_authorization_header: None,
|
||||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
enable_mdb_writemap: false,
|
enable_mdb_writemap: false,
|
||||||
|
@ -344,7 +344,10 @@ impl ErrorCode for milli::Error {
|
|||||||
Code::InvalidDocumentId
|
Code::InvalidDocumentId
|
||||||
}
|
}
|
||||||
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
|
UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
|
||||||
UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidFieldForSource { .. }
|
||||||
|
| UserError::MissingFieldForSource { .. }
|
||||||
|
| UserError::InvalidOpenAiModel { .. }
|
||||||
|
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||||
|
@ -9,6 +9,7 @@ pub mod index_uid_pattern;
|
|||||||
pub mod keys;
|
pub mod keys;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
pub mod star_or;
|
pub mod star_or;
|
||||||
|
pub mod task_view;
|
||||||
pub mod tasks;
|
pub mod tasks;
|
||||||
pub mod versioning;
|
pub mod versioning;
|
||||||
pub use milli::{heed, Index};
|
pub use milli::{heed, Index};
|
||||||
|
@ -318,6 +318,21 @@ impl Settings<Unchecked> {
|
|||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn validate(self) -> Result<Self, milli::Error> {
|
||||||
|
self.validate_embedding_settings()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
|
||||||
|
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
||||||
|
for (name, config) in configs.iter_mut() {
|
||||||
|
let config_to_check = std::mem::take(config);
|
||||||
|
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
|
||||||
|
*config = checked_config
|
||||||
|
}
|
||||||
|
self.embedders = Setting::Set(configs);
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -585,11 +600,12 @@ pub fn settings(
|
|||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
let embedders = index
|
let embedders: BTreeMap<_, _> = index
|
||||||
.embedding_configs(rtxn)?
|
.embedding_configs(rtxn)?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, config)| (name, Setting::Set(config.into())))
|
.map(|(name, config)| (name, Setting::Set(config.into())))
|
||||||
.collect();
|
.collect();
|
||||||
|
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||||
|
|
||||||
Ok(Settings {
|
Ok(Settings {
|
||||||
displayed_attributes: match displayed_attributes {
|
displayed_attributes: match displayed_attributes {
|
||||||
@ -611,15 +627,12 @@ pub fn settings(
|
|||||||
Some(field) => Setting::Set(field),
|
Some(field) => Setting::Set(field),
|
||||||
None => Setting::Reset,
|
None => Setting::Reset,
|
||||||
},
|
},
|
||||||
proximity_precision: match proximity_precision {
|
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
|
||||||
Some(precision) => Setting::Set(precision),
|
|
||||||
None => Setting::Reset,
|
|
||||||
},
|
|
||||||
synonyms: Setting::Set(synonyms),
|
synonyms: Setting::Set(synonyms),
|
||||||
typo_tolerance: Setting::Set(typo_tolerance),
|
typo_tolerance: Setting::Set(typo_tolerance),
|
||||||
faceting: Setting::Set(faceting),
|
faceting: Setting::Set(faceting),
|
||||||
pagination: Setting::Set(pagination),
|
pagination: Setting::Set(pagination),
|
||||||
embedders: Setting::Set(embedders),
|
embedders,
|
||||||
_kind: PhantomData,
|
_kind: PhantomData,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -720,10 +733,11 @@ impl From<RankingRuleView> for Criterion {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub enum ProximityPrecisionView {
|
pub enum ProximityPrecisionView {
|
||||||
|
#[default]
|
||||||
ByWord,
|
ByWord,
|
||||||
ByAttribute,
|
ByAttribute,
|
||||||
}
|
}
|
||||||
|
139
meilisearch-types/src/task_view.rs
Normal file
139
meilisearch-types/src/task_view.rs
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
use serde::Serialize;
|
||||||
|
use time::{Duration, OffsetDateTime};
|
||||||
|
|
||||||
|
use crate::error::ResponseError;
|
||||||
|
use crate::settings::{Settings, Unchecked};
|
||||||
|
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct TaskView {
|
||||||
|
pub uid: TaskId,
|
||||||
|
#[serde(default)]
|
||||||
|
pub index_uid: Option<String>,
|
||||||
|
pub status: Status,
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub kind: Kind,
|
||||||
|
pub canceled_by: Option<TaskId>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub details: Option<DetailsView>,
|
||||||
|
pub error: Option<ResponseError>,
|
||||||
|
#[serde(serialize_with = "serialize_duration", default)]
|
||||||
|
pub duration: Option<Duration>,
|
||||||
|
#[serde(with = "time::serde::rfc3339")]
|
||||||
|
pub enqueued_at: OffsetDateTime,
|
||||||
|
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||||
|
pub started_at: Option<OffsetDateTime>,
|
||||||
|
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||||
|
pub finished_at: Option<OffsetDateTime>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TaskView {
|
||||||
|
pub fn from_task(task: &Task) -> TaskView {
|
||||||
|
TaskView {
|
||||||
|
uid: task.uid,
|
||||||
|
index_uid: task.index_uid().map(ToOwned::to_owned),
|
||||||
|
status: task.status,
|
||||||
|
kind: task.kind.as_kind(),
|
||||||
|
canceled_by: task.canceled_by,
|
||||||
|
details: task.details.clone().map(DetailsView::from),
|
||||||
|
error: task.error.clone(),
|
||||||
|
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
||||||
|
enqueued_at: task.enqueued_at,
|
||||||
|
started_at: task.started_at,
|
||||||
|
finished_at: task.finished_at,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct DetailsView {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub received_documents: Option<u64>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub indexed_documents: Option<Option<u64>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub primary_key: Option<Option<String>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub provided_ids: Option<usize>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub deleted_documents: Option<Option<u64>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub matched_tasks: Option<u64>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub canceled_tasks: Option<Option<u64>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub deleted_tasks: Option<Option<u64>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub original_filter: Option<Option<String>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub dump_uid: Option<Option<String>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub settings: Option<Box<Settings<Unchecked>>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub swaps: Option<Vec<IndexSwap>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Details> for DetailsView {
|
||||||
|
fn from(details: Details) -> Self {
|
||||||
|
match details {
|
||||||
|
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
||||||
|
DetailsView {
|
||||||
|
received_documents: Some(received_documents),
|
||||||
|
indexed_documents: Some(indexed_documents),
|
||||||
|
..DetailsView::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Details::SettingsUpdate { settings } => {
|
||||||
|
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
||||||
|
}
|
||||||
|
Details::IndexInfo { primary_key } => {
|
||||||
|
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
||||||
|
}
|
||||||
|
Details::DocumentDeletion {
|
||||||
|
provided_ids: received_document_ids,
|
||||||
|
deleted_documents,
|
||||||
|
} => DetailsView {
|
||||||
|
provided_ids: Some(received_document_ids),
|
||||||
|
deleted_documents: Some(deleted_documents),
|
||||||
|
original_filter: Some(None),
|
||||||
|
..DetailsView::default()
|
||||||
|
},
|
||||||
|
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||||
|
DetailsView {
|
||||||
|
provided_ids: Some(0),
|
||||||
|
original_filter: Some(Some(original_filter)),
|
||||||
|
deleted_documents: Some(deleted_documents),
|
||||||
|
..DetailsView::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Details::ClearAll { deleted_documents } => {
|
||||||
|
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||||
|
}
|
||||||
|
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
||||||
|
DetailsView {
|
||||||
|
matched_tasks: Some(matched_tasks),
|
||||||
|
canceled_tasks: Some(canceled_tasks),
|
||||||
|
original_filter: Some(Some(original_filter)),
|
||||||
|
..DetailsView::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
||||||
|
DetailsView {
|
||||||
|
matched_tasks: Some(matched_tasks),
|
||||||
|
deleted_tasks: Some(deleted_tasks),
|
||||||
|
original_filter: Some(Some(original_filter)),
|
||||||
|
..DetailsView::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Details::Dump { dump_uid } => {
|
||||||
|
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
||||||
|
}
|
||||||
|
Details::IndexSwap { swaps } => {
|
||||||
|
DetailsView { swaps: Some(swaps), ..Default::default() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -104,6 +104,7 @@ walkdir = "2.3.3"
|
|||||||
yaup = "0.2.1"
|
yaup = "0.2.1"
|
||||||
serde_urlencoded = "0.7.1"
|
serde_urlencoded = "0.7.1"
|
||||||
termcolor = "1.2.0"
|
termcolor = "1.2.0"
|
||||||
|
url = { version = "2.5.0", features = ["serde"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.8.0"
|
actix-rt = "2.8.0"
|
||||||
@ -153,5 +154,5 @@ greek = ["meilisearch-types/greek"]
|
|||||||
khmer = ["meilisearch-types/khmer"]
|
khmer = ["meilisearch-types/khmer"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.12/build.zip"
|
||||||
sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
|
sha1 = "acfe9a018c93eb0604ea87ee87bff7df5474e18e"
|
||||||
|
@ -264,6 +264,8 @@ struct Infos {
|
|||||||
ignore_snapshot_if_db_exists: bool,
|
ignore_snapshot_if_db_exists: bool,
|
||||||
http_addr: bool,
|
http_addr: bool,
|
||||||
http_payload_size_limit: Byte,
|
http_payload_size_limit: Byte,
|
||||||
|
task_queue_webhook: bool,
|
||||||
|
task_webhook_authorization_header: bool,
|
||||||
log_level: String,
|
log_level: String,
|
||||||
max_indexing_memory: MaxMemory,
|
max_indexing_memory: MaxMemory,
|
||||||
max_indexing_threads: MaxThreads,
|
max_indexing_threads: MaxThreads,
|
||||||
@ -290,6 +292,8 @@ impl From<Opt> for Infos {
|
|||||||
http_addr,
|
http_addr,
|
||||||
master_key: _,
|
master_key: _,
|
||||||
env,
|
env,
|
||||||
|
task_webhook_url,
|
||||||
|
task_webhook_authorization_header,
|
||||||
max_index_size: _,
|
max_index_size: _,
|
||||||
max_task_db_size: _,
|
max_task_db_size: _,
|
||||||
http_payload_size_limit,
|
http_payload_size_limit,
|
||||||
@ -343,6 +347,8 @@ impl From<Opt> for Infos {
|
|||||||
http_addr: http_addr != default_http_addr(),
|
http_addr: http_addr != default_http_addr(),
|
||||||
http_payload_size_limit,
|
http_payload_size_limit,
|
||||||
experimental_max_number_of_batched_tasks,
|
experimental_max_number_of_batched_tasks,
|
||||||
|
task_queue_webhook: task_webhook_url.is_some(),
|
||||||
|
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||||
log_level: log_level.to_string(),
|
log_level: log_level.to_string(),
|
||||||
max_indexing_memory,
|
max_indexing_memory,
|
||||||
max_indexing_threads,
|
max_indexing_threads,
|
||||||
|
@ -228,6 +228,8 @@ fn open_or_create_database_unchecked(
|
|||||||
indexes_path: opt.db_path.join("indexes"),
|
indexes_path: opt.db_path.join("indexes"),
|
||||||
snapshots_path: opt.snapshot_dir.clone(),
|
snapshots_path: opt.snapshot_dir.clone(),
|
||||||
dumps_path: opt.dump_dir.clone(),
|
dumps_path: opt.dump_dir.clone(),
|
||||||
|
webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
|
||||||
|
webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
|
||||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||||
|
@ -21,6 +21,7 @@ use rustls::RootCertStore;
|
|||||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sysinfo::{RefreshKind, System, SystemExt};
|
use sysinfo::{RefreshKind, System, SystemExt};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
|
||||||
|
|
||||||
@ -28,6 +29,8 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
|||||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||||
const MEILI_ENV: &str = "MEILI_ENV";
|
const MEILI_ENV: &str = "MEILI_ENV";
|
||||||
|
const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
|
||||||
|
const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
|
||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||||
@ -156,6 +159,14 @@ pub struct Opt {
|
|||||||
#[serde(default = "default_env")]
|
#[serde(default = "default_env")]
|
||||||
pub env: String,
|
pub env: String,
|
||||||
|
|
||||||
|
/// Called whenever a task finishes so a third party can be notified.
|
||||||
|
#[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
|
||||||
|
pub task_webhook_url: Option<Url>,
|
||||||
|
|
||||||
|
/// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
|
||||||
|
#[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
|
||||||
|
pub task_webhook_authorization_header: Option<String>,
|
||||||
|
|
||||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||||
///
|
///
|
||||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||||
@ -375,6 +386,8 @@ impl Opt {
|
|||||||
http_addr,
|
http_addr,
|
||||||
master_key,
|
master_key,
|
||||||
env,
|
env,
|
||||||
|
task_webhook_url,
|
||||||
|
task_webhook_authorization_header,
|
||||||
max_index_size: _,
|
max_index_size: _,
|
||||||
max_task_db_size: _,
|
max_task_db_size: _,
|
||||||
http_payload_size_limit,
|
http_payload_size_limit,
|
||||||
@ -409,6 +422,16 @@ impl Opt {
|
|||||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||||
}
|
}
|
||||||
export_to_env_if_not_present(MEILI_ENV, env);
|
export_to_env_if_not_present(MEILI_ENV, env);
|
||||||
|
if let Some(task_webhook_url) = task_webhook_url {
|
||||||
|
export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
|
||||||
|
}
|
||||||
|
if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
|
||||||
|
task_webhook_authorization_header,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
{
|
{
|
||||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||||
|
@ -90,6 +90,11 @@ macro_rules! make_setting_route {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let new_settings = $crate::routes::indexes::settings::validate_settings(
|
||||||
|
new_settings,
|
||||||
|
&index_scheduler,
|
||||||
|
)?;
|
||||||
|
|
||||||
let allow_index_creation =
|
let allow_index_creation =
|
||||||
index_scheduler.filters().allow_index_creation(&index_uid);
|
index_scheduler.filters().allow_index_creation(&index_uid);
|
||||||
|
|
||||||
@ -453,7 +458,7 @@ make_setting_route!(
|
|||||||
json!({
|
json!({
|
||||||
"proximity_precision": {
|
"proximity_precision": {
|
||||||
"set": precision.is_some(),
|
"set": precision.is_some(),
|
||||||
"value": precision,
|
"value": precision.unwrap_or_default(),
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
Some(req),
|
Some(req),
|
||||||
@ -582,13 +587,13 @@ fn embedder_analytics(
|
|||||||
for source in s
|
for source in s
|
||||||
.values()
|
.values()
|
||||||
.filter_map(|config| config.clone().set())
|
.filter_map(|config| config.clone().set())
|
||||||
.filter_map(|config| config.embedder_options.set())
|
.filter_map(|config| config.source.set())
|
||||||
{
|
{
|
||||||
use meilisearch_types::milli::vector::settings::EmbedderSettings;
|
use meilisearch_types::milli::vector::settings::EmbedderSource;
|
||||||
match source {
|
match source {
|
||||||
EmbedderSettings::OpenAi(_) => sources.insert("openAi"),
|
EmbedderSource::OpenAi => sources.insert("openAi"),
|
||||||
EmbedderSettings::HuggingFace(_) => sources.insert("huggingFace"),
|
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
||||||
EmbedderSettings::UserProvided(_) => sources.insert("userProvided"),
|
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -651,6 +656,7 @@ pub async fn update_all(
|
|||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
let new_settings = body.into_inner();
|
let new_settings = body.into_inner();
|
||||||
|
let new_settings = validate_settings(new_settings, &index_scheduler)?;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
"Settings Updated".to_string(),
|
"Settings Updated".to_string(),
|
||||||
@ -684,7 +690,8 @@ pub async fn update_all(
|
|||||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||||
},
|
},
|
||||||
"proximity_precision": {
|
"proximity_precision": {
|
||||||
"set": new_settings.proximity_precision.as_ref().set().is_some()
|
"set": new_settings.proximity_precision.as_ref().set().is_some(),
|
||||||
|
"value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
|
||||||
},
|
},
|
||||||
"typo_tolerance": {
|
"typo_tolerance": {
|
||||||
"enabled": new_settings.typo_tolerance
|
"enabled": new_settings.typo_tolerance
|
||||||
@ -800,3 +807,13 @@ pub async fn delete_all(
|
|||||||
debug!("returns: {:?}", task);
|
debug!("returns: {:?}", task);
|
||||||
Ok(HttpResponse::Accepted().json(task))
|
Ok(HttpResponse::Accepted().json(task))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn validate_settings(
|
||||||
|
settings: Settings<Unchecked>,
|
||||||
|
index_scheduler: &IndexScheduler,
|
||||||
|
) -> Result<Settings<Unchecked>, ResponseError> {
|
||||||
|
if matches!(settings.embedders, Setting::Set(_)) {
|
||||||
|
index_scheduler.features().check_vector("Passing `embedders` in settings")?
|
||||||
|
}
|
||||||
|
Ok(settings.validate()?)
|
||||||
|
}
|
||||||
|
@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
|
|||||||
use meilisearch_types::error::deserr_codes::*;
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
|
use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::settings::{Settings, Unchecked};
|
|
||||||
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
|
||||||
use meilisearch_types::tasks::{
|
use meilisearch_types::task_view::TaskView;
|
||||||
serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status};
|
||||||
};
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use time::format_description::well_known::Rfc3339;
|
use time::format_description::well_known::Rfc3339;
|
||||||
@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
.service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
|
||||||
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
.service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct TaskView {
|
|
||||||
pub uid: TaskId,
|
|
||||||
#[serde(default)]
|
|
||||||
pub index_uid: Option<String>,
|
|
||||||
pub status: Status,
|
|
||||||
#[serde(rename = "type")]
|
|
||||||
pub kind: Kind,
|
|
||||||
pub canceled_by: Option<TaskId>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub details: Option<DetailsView>,
|
|
||||||
pub error: Option<ResponseError>,
|
|
||||||
#[serde(serialize_with = "serialize_duration", default)]
|
|
||||||
pub duration: Option<Duration>,
|
|
||||||
#[serde(with = "time::serde::rfc3339")]
|
|
||||||
pub enqueued_at: OffsetDateTime,
|
|
||||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
|
||||||
pub started_at: Option<OffsetDateTime>,
|
|
||||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
|
||||||
pub finished_at: Option<OffsetDateTime>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TaskView {
|
|
||||||
pub fn from_task(task: &Task) -> TaskView {
|
|
||||||
TaskView {
|
|
||||||
uid: task.uid,
|
|
||||||
index_uid: task.index_uid().map(ToOwned::to_owned),
|
|
||||||
status: task.status,
|
|
||||||
kind: task.kind.as_kind(),
|
|
||||||
canceled_by: task.canceled_by,
|
|
||||||
details: task.details.clone().map(DetailsView::from),
|
|
||||||
error: task.error.clone(),
|
|
||||||
duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
|
|
||||||
enqueued_at: task.enqueued_at,
|
|
||||||
started_at: task.started_at,
|
|
||||||
finished_at: task.finished_at,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct DetailsView {
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub received_documents: Option<u64>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub indexed_documents: Option<Option<u64>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub primary_key: Option<Option<String>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub provided_ids: Option<usize>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub deleted_documents: Option<Option<u64>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub matched_tasks: Option<u64>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub canceled_tasks: Option<Option<u64>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub deleted_tasks: Option<Option<u64>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub original_filter: Option<Option<String>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub dump_uid: Option<Option<String>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
#[serde(flatten)]
|
|
||||||
pub settings: Option<Box<Settings<Unchecked>>>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub swaps: Option<Vec<IndexSwap>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Details> for DetailsView {
|
|
||||||
fn from(details: Details) -> Self {
|
|
||||||
match details {
|
|
||||||
Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
|
|
||||||
DetailsView {
|
|
||||||
received_documents: Some(received_documents),
|
|
||||||
indexed_documents: Some(indexed_documents),
|
|
||||||
..DetailsView::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Details::SettingsUpdate { settings } => {
|
|
||||||
DetailsView { settings: Some(settings), ..DetailsView::default() }
|
|
||||||
}
|
|
||||||
Details::IndexInfo { primary_key } => {
|
|
||||||
DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
|
|
||||||
}
|
|
||||||
Details::DocumentDeletion {
|
|
||||||
provided_ids: received_document_ids,
|
|
||||||
deleted_documents,
|
|
||||||
} => DetailsView {
|
|
||||||
provided_ids: Some(received_document_ids),
|
|
||||||
deleted_documents: Some(deleted_documents),
|
|
||||||
original_filter: Some(None),
|
|
||||||
..DetailsView::default()
|
|
||||||
},
|
|
||||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
|
||||||
DetailsView {
|
|
||||||
provided_ids: Some(0),
|
|
||||||
original_filter: Some(Some(original_filter)),
|
|
||||||
deleted_documents: Some(deleted_documents),
|
|
||||||
..DetailsView::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Details::ClearAll { deleted_documents } => {
|
|
||||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
|
||||||
}
|
|
||||||
Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
|
|
||||||
DetailsView {
|
|
||||||
matched_tasks: Some(matched_tasks),
|
|
||||||
canceled_tasks: Some(canceled_tasks),
|
|
||||||
original_filter: Some(Some(original_filter)),
|
|
||||||
..DetailsView::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
|
|
||||||
DetailsView {
|
|
||||||
matched_tasks: Some(matched_tasks),
|
|
||||||
deleted_tasks: Some(deleted_tasks),
|
|
||||||
original_filter: Some(Some(original_filter)),
|
|
||||||
..DetailsView::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Details::Dump { dump_uid } => {
|
|
||||||
DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
|
|
||||||
}
|
|
||||||
Details::IndexSwap { swaps } => {
|
|
||||||
DetailsView { swaps: Some(swaps), ..Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Deserr)]
|
#[derive(Debug, Deserr)]
|
||||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub struct TasksFilterQuery {
|
pub struct TasksFilterQuery {
|
||||||
|
@ -735,6 +735,9 @@ pub fn perform_facet_search(
|
|||||||
if let Some(facet_query) = &facet_query {
|
if let Some(facet_query) = &facet_query {
|
||||||
facet_search.query(facet_query);
|
facet_search.query(facet_query);
|
||||||
}
|
}
|
||||||
|
if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
|
||||||
|
facet_search.max_values(max_facets as usize);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(FacetSearchResult {
|
Ok(FacetSearchResult {
|
||||||
facet_hits: facet_search.execute()?,
|
facet_hits: facet_search.execute()?,
|
||||||
@ -897,6 +900,14 @@ fn format_fields<'a>(
|
|||||||
let mut matches_position = compute_matches.then(BTreeMap::new);
|
let mut matches_position = compute_matches.then(BTreeMap::new);
|
||||||
let mut document = document.clone();
|
let mut document = document.clone();
|
||||||
|
|
||||||
|
// reduce the formatted option list to the attributes that should be formatted,
|
||||||
|
// instead of all the attributes to display.
|
||||||
|
let formatting_fields_options: Vec<_> = formatted_options
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, option)| option.should_format())
|
||||||
|
.map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
|
||||||
|
.collect();
|
||||||
|
|
||||||
// select the attributes to retrieve
|
// select the attributes to retrieve
|
||||||
let displayable_names =
|
let displayable_names =
|
||||||
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||||
@ -905,13 +916,15 @@ fn format_fields<'a>(
|
|||||||
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
|
||||||
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
|
||||||
// highlighted.
|
// highlighted.
|
||||||
let format = formatted_options
|
// Warn: The time to compute the format list scales with the number of fields to format;
|
||||||
|
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
|
||||||
|
// d*f where d is the total number of fields to display and f is the total number of fields to format.
|
||||||
|
let format = formatting_fields_options
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(field, _option)| {
|
.filter(|(name, _option)| {
|
||||||
let name = field_ids_map.name(**field).unwrap();
|
|
||||||
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
|
||||||
})
|
})
|
||||||
.map(|(_, option)| *option)
|
.map(|(_, option)| **option)
|
||||||
.reduce(|acc, option| acc.merge(option));
|
.reduce(|acc, option| acc.merge(option));
|
||||||
let mut infos = Vec::new();
|
let mut infos = Vec::new();
|
||||||
|
|
||||||
@ -1008,7 +1021,7 @@ fn format_value<'a>(
|
|||||||
let value = matcher.format(format_options);
|
let value = matcher.format(format_options);
|
||||||
Value::String(value.into_owned())
|
Value::String(value.into_owned())
|
||||||
}
|
}
|
||||||
None => Value::Number(number),
|
None => Value::String(s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
value => value,
|
value => value,
|
||||||
|
@ -59,7 +59,7 @@ async fn import_dump_v1_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -77,8 +77,7 @@ async fn import_dump_v1_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -221,7 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -239,8 +238,7 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -369,7 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -387,8 +385,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -503,7 +500,7 @@ async fn import_dump_v2_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -521,8 +518,7 @@ async fn import_dump_v2_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -649,7 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -667,8 +663,7 @@ async fn import_dump_v2_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -794,7 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -812,8 +807,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -928,7 +922,7 @@ async fn import_dump_v3_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -946,8 +940,7 @@ async fn import_dump_v3_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1074,7 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1092,8 +1085,7 @@ async fn import_dump_v3_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1219,7 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1237,8 +1229,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1353,7 +1344,7 @@ async fn import_dump_v4_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1371,8 +1362,7 @@ async fn import_dump_v4_movie_raw() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1499,7 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1517,8 +1507,7 @@ async fn import_dump_v4_movie_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1644,7 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
"proximityPrecision": null,
|
"proximityPrecision": "byWord",
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1662,8 +1651,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
@ -1907,8 +1895,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||||||
},
|
},
|
||||||
"pagination": {
|
"pagination": {
|
||||||
"maxTotalHits": 1000
|
"maxTotalHits": 1000
|
||||||
},
|
}
|
||||||
"embedders": {}
|
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
@ -105,6 +105,24 @@ async fn more_advanced_facet_search() {
|
|||||||
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
|
snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn simple_facet_search_with_max_values() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
|
||||||
|
index.update_settings_filterable_attributes(json!(["genres"])).await;
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.wait_task(2).await;
|
||||||
|
|
||||||
|
let (response, code) =
|
||||||
|
index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
|
||||||
|
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn non_filterable_facet_search_error() {
|
async fn non_filterable_facet_search_error() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
@ -21,9 +21,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.update_settings(
|
.update_settings(json!({ "embedders": {"default": {
|
||||||
json!({ "embedders": {"default": {"source": {"userProvided": {"dimensions": 2}}}} }),
|
"source": "userProvided",
|
||||||
)
|
"dimensions": 2}}} ))
|
||||||
.await;
|
.await;
|
||||||
assert_eq!(202, code, "{:?}", response);
|
assert_eq!(202, code, "{:?}", response);
|
||||||
index.wait_task(response.uid()).await;
|
index.wait_task(response.uid()).await;
|
||||||
@ -56,6 +56,15 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
|||||||
}])
|
}])
|
||||||
});
|
});
|
||||||
|
|
||||||
|
static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"desc": "a Captain Marvel ersatz",
|
||||||
|
"id": "1",
|
||||||
|
"_vectors": {"default": [1.0, 3.0]},
|
||||||
|
}])
|
||||||
|
});
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn simple_search() {
|
async fn simple_search() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
@ -149,3 +158,18 @@ async fn invalid_semantic_ratio() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn single_document() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(
|
||||||
|
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
|
||||||
|
}
|
||||||
|
@ -890,13 +890,21 @@ async fn experimental_feature_vector_store() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.update_settings(json!({"embedders": {
|
.update_settings(json!({"embedders": {
|
||||||
"manual": {
|
"manual": {
|
||||||
"source": {
|
"source": "userProvided",
|
||||||
"userProvided": {"dimensions": 3}
|
"dimensions": 3,
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}}))
|
}}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
meili_snap::snapshot!(response, @r###"
|
||||||
|
{
|
||||||
|
"taskUid": 1,
|
||||||
|
"indexUid": "test",
|
||||||
|
"status": "enqueued",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"enqueuedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
meili_snap::snapshot!(code, @"202 Accepted");
|
meili_snap::snapshot!(code, @"202 Accepted");
|
||||||
let response = index.wait_task(response.uid()).await;
|
let response = index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ async fn get_settings() {
|
|||||||
let (response, code) = index.settings().await;
|
let (response, code) = index.settings().await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
let settings = response.as_object().unwrap();
|
let settings = response.as_object().unwrap();
|
||||||
assert_eq!(settings.keys().len(), 16);
|
assert_eq!(settings.keys().len(), 15);
|
||||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||||
@ -83,7 +83,7 @@ async fn get_settings() {
|
|||||||
"maxTotalHits": 1000,
|
"maxTotalHits": 1000,
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
assert_eq!(settings["embedders"], json!({}));
|
assert_eq!(settings["proximityPrecision"], json!("byWord"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
mod errors;
|
mod errors;
|
||||||
|
mod webhook;
|
||||||
|
|
||||||
use meili_snap::insta::assert_json_snapshot;
|
use meili_snap::insta::assert_json_snapshot;
|
||||||
use time::format_description::well_known::Rfc3339;
|
use time::format_description::well_known::Rfc3339;
|
||||||
|
123
meilisearch/tests/tasks/webhook.rs
Normal file
123
meilisearch/tests/tasks/webhook.rs
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
//! To test the webhook, we need to spawn a new server with a URL listening for
|
||||||
|
//! post requests. The webhook handle starts a server and forwards all the
|
||||||
|
//! received requests into a channel for you to handle.
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use actix_http::body::MessageBody;
|
||||||
|
use actix_web::dev::{ServiceFactory, ServiceResponse};
|
||||||
|
use actix_web::web::{Bytes, Data};
|
||||||
|
use actix_web::{post, App, HttpResponse, HttpServer};
|
||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use meilisearch::Opt;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::common::{default_settings, Server};
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
#[post("/")]
|
||||||
|
async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
|
||||||
|
let body = body.to_vec();
|
||||||
|
sender.send(body).unwrap();
|
||||||
|
HttpResponse::Ok().into()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_app(
|
||||||
|
sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
|
||||||
|
) -> actix_web::App<
|
||||||
|
impl ServiceFactory<
|
||||||
|
actix_web::dev::ServiceRequest,
|
||||||
|
Config = (),
|
||||||
|
Response = ServiceResponse<impl MessageBody>,
|
||||||
|
Error = actix_web::Error,
|
||||||
|
InitError = (),
|
||||||
|
>,
|
||||||
|
> {
|
||||||
|
App::new().service(forward_body).app_data(Data::from(sender))
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WebhookHandle {
|
||||||
|
pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
|
||||||
|
pub url: String,
|
||||||
|
pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_webhook_server() -> WebhookHandle {
|
||||||
|
let mut log_builder = env_logger::Builder::new();
|
||||||
|
log_builder.parse_filters("info");
|
||||||
|
log_builder.init();
|
||||||
|
|
||||||
|
let (sender, receiver) = mpsc::unbounded_channel();
|
||||||
|
let sender = Arc::new(sender);
|
||||||
|
|
||||||
|
// By listening on the port 0, the system will give us any available port.
|
||||||
|
let server =
|
||||||
|
HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
|
||||||
|
let (ip, scheme) = server.addrs_with_scheme()[0];
|
||||||
|
let url = format!("{scheme}://{ip}/");
|
||||||
|
|
||||||
|
let server_handle = tokio::spawn(server.run());
|
||||||
|
WebhookHandle { server_handle, url, receiver }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_web::test]
|
||||||
|
async fn test_basic_webhook() {
|
||||||
|
let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
|
||||||
|
|
||||||
|
let db_path = tempfile::tempdir().unwrap();
|
||||||
|
let server = Server::new_with_options(Opt {
|
||||||
|
task_webhook_url: Some(Url::parse(&url).unwrap()),
|
||||||
|
..default_settings(db_path.path())
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let index = server.index("tamo");
|
||||||
|
// May be flaky: we're relying on the fact that while the first document addition is processed, the other
|
||||||
|
// operations will be received and will be batched together. If it doesn't happen it's not a problem
|
||||||
|
// the rest of the test won't assume anything about the number of tasks per batch.
|
||||||
|
for i in 0..5 {
|
||||||
|
let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut nb_tasks = 0;
|
||||||
|
while let Some(payload) = receiver.recv().await {
|
||||||
|
let payload = String::from_utf8(payload).unwrap();
|
||||||
|
let jsonl = payload.split('\n');
|
||||||
|
for json in jsonl {
|
||||||
|
if json.is_empty() {
|
||||||
|
break; // we reached EOF
|
||||||
|
}
|
||||||
|
nb_tasks += 1;
|
||||||
|
let json: serde_json::Value = serde_json::from_str(json).unwrap();
|
||||||
|
snapshot!(
|
||||||
|
json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
|
||||||
|
@r###"
|
||||||
|
{
|
||||||
|
"uid": "[uid]",
|
||||||
|
"indexUid": "tamo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 1
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
if nb_tasks == 5 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
|
||||||
|
|
||||||
|
server_handle.abort();
|
||||||
|
}
|
@ -77,7 +77,7 @@ csv = "1.2.1"
|
|||||||
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||||
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||||
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1" }
|
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
|
||||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
||||||
"online",
|
"online",
|
||||||
] }
|
] }
|
||||||
|
@ -192,7 +192,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
|
MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
|
InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
|
||||||
#[error("Invalid prompt in for embeddings with name '{0}': {1}.")]
|
#[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
|
||||||
InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
|
InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
|
||||||
#[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
|
#[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
|
||||||
TooManyEmbedders(usize),
|
TooManyEmbedders(usize),
|
||||||
@ -200,6 +200,33 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
InvalidEmbedder(String),
|
InvalidEmbedder(String),
|
||||||
#[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
|
#[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
|
||||||
TooManyVectors(String, usize),
|
TooManyVectors(String, usize),
|
||||||
|
#[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
|
||||||
|
allowed_sources_for_field
|
||||||
|
.iter()
|
||||||
|
.map(|accepted| format!("`{}`", accepted))
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(", "),
|
||||||
|
allowed_fields_for_source
|
||||||
|
.iter()
|
||||||
|
.map(|accepted| format!("`{}`", accepted))
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(", ")
|
||||||
|
)]
|
||||||
|
InvalidFieldForSource {
|
||||||
|
embedder_name: String,
|
||||||
|
source_: crate::vector::settings::EmbedderSource,
|
||||||
|
field: &'static str,
|
||||||
|
allowed_fields_for_source: &'static [&'static str],
|
||||||
|
allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
|
||||||
|
},
|
||||||
|
#[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
|
||||||
|
InvalidOpenAiModel { embedder_name: String, model: String },
|
||||||
|
#[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
|
||||||
|
MissingFieldForSource {
|
||||||
|
field: &'static str,
|
||||||
|
source_: crate::vector::settings::EmbedderSource,
|
||||||
|
embedder_name: String,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<crate::vector::Error> for Error {
|
impl From<crate::vector::Error> for Error {
|
||||||
|
@ -27,8 +27,8 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
|||||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||||
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
||||||
|
|
||||||
/// The maximum number of facets returned by the facet search route.
|
/// The maximum number of values per facet returned by the facet search route.
|
||||||
const MAX_NUMBER_OF_FACETS: usize = 100;
|
const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
|
||||||
|
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
mod fst_utils;
|
mod fst_utils;
|
||||||
@ -306,6 +306,7 @@ pub struct SearchForFacetValues<'a> {
|
|||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
facet: String,
|
facet: String,
|
||||||
search_query: Search<'a>,
|
search_query: Search<'a>,
|
||||||
|
max_values: usize,
|
||||||
is_hybrid: bool,
|
is_hybrid: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,7 +316,13 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
search_query: Search<'a>,
|
search_query: Search<'a>,
|
||||||
is_hybrid: bool,
|
is_hybrid: bool,
|
||||||
) -> SearchForFacetValues<'a> {
|
) -> SearchForFacetValues<'a> {
|
||||||
SearchForFacetValues { query: None, facet, search_query, is_hybrid }
|
SearchForFacetValues {
|
||||||
|
query: None,
|
||||||
|
facet,
|
||||||
|
search_query,
|
||||||
|
max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
|
||||||
|
is_hybrid,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||||
@ -323,6 +330,11 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn max_values(&mut self, max: usize) -> &mut Self {
|
||||||
|
self.max_values = max;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
fn one_original_value_of(
|
fn one_original_value_of(
|
||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
@ -462,7 +474,7 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
.unwrap_or_else(|| left_bound.to_string());
|
.unwrap_or_else(|| left_bound.to_string());
|
||||||
results.push(FacetValueHit { value, count });
|
results.push(FacetValueHit { value, count });
|
||||||
}
|
}
|
||||||
if results.len() >= MAX_NUMBER_OF_FACETS {
|
if results.len() >= self.max_values {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -507,7 +519,7 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
.unwrap_or_else(|| query.to_string());
|
.unwrap_or_else(|| query.to_string());
|
||||||
results.push(FacetValueHit { value, count });
|
results.push(FacetValueHit { value, count });
|
||||||
}
|
}
|
||||||
if results.len() >= MAX_NUMBER_OF_FACETS {
|
if results.len() >= self.max_values {
|
||||||
return Ok(ControlFlow::Break(()));
|
return Ok(ControlFlow::Break(()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,7 @@ pub struct BucketSortOutput {
|
|||||||
|
|
||||||
// TODO: would probably be good to regroup some of these inside of a struct?
|
// TODO: would probably be good to regroup some of these inside of a struct?
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
#[logging_timer::time]
|
||||||
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
||||||
|
@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Default)]
|
#[derive(Copy, Clone, Default, Debug)]
|
||||||
pub struct FormatOptions {
|
pub struct FormatOptions {
|
||||||
pub highlight: bool,
|
pub highlight: bool,
|
||||||
pub crop: Option<usize>,
|
pub crop: Option<usize>,
|
||||||
@ -82,6 +82,10 @@ impl FormatOptions {
|
|||||||
pub fn merge(self, other: Self) -> Self {
|
pub fn merge(self, other: Self) -> Self {
|
||||||
Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
|
Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn should_format(&self) -> bool {
|
||||||
|
self.highlight || self.crop.is_some()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph(
|
|||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[logging_timer::time]
|
||||||
fn resolve_universe(
|
fn resolve_universe(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
initial_universe: &RoaringBitmap,
|
initial_universe: &RoaringBitmap,
|
||||||
@ -556,6 +557,7 @@ pub fn execute_vector_search(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
#[logging_timer::time]
|
||||||
pub fn execute_search(
|
pub fn execute_search(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
query: Option<&str>,
|
query: Option<&str>,
|
||||||
|
@ -5,6 +5,7 @@ use super::*;
|
|||||||
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
||||||
|
|
||||||
/// Convert the tokenised search query into a list of located query terms.
|
/// Convert the tokenised search query into a list of located query terms.
|
||||||
|
#[logging_timer::time]
|
||||||
pub fn located_query_terms_from_tokens(
|
pub fn located_query_terms_from_tokens(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
query: NormalizedTokenIter,
|
query: NormalizedTokenIter,
|
||||||
|
@ -2553,7 +2553,7 @@ mod tests {
|
|||||||
/// Vectors must be of the same length.
|
/// Vectors must be of the same length.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_multiple_vectors() {
|
fn test_multiple_vectors() {
|
||||||
use crate::vector::settings::{EmbedderSettings, EmbeddingSettings};
|
use crate::vector::settings::EmbeddingSettings;
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
index
|
index
|
||||||
@ -2562,9 +2562,11 @@ mod tests {
|
|||||||
embedders.insert(
|
embedders.insert(
|
||||||
"manual".to_string(),
|
"manual".to_string(),
|
||||||
Setting::Set(EmbeddingSettings {
|
Setting::Set(EmbeddingSettings {
|
||||||
embedder_options: Setting::Set(EmbedderSettings::UserProvided(
|
source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided),
|
||||||
crate::vector::settings::UserProvidedSettings { dimensions: 3 },
|
model: Setting::NotSet,
|
||||||
)),
|
revision: Setting::NotSet,
|
||||||
|
api_key: Setting::NotSet,
|
||||||
|
dimensions: Setting::Set(3),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
@ -2579,10 +2581,10 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
index.add_documents(documents!([{"id": 1, "_vectors": { "manual": [6, 7, 8] }}])).unwrap();
|
index.add_documents(documents!([{"id": 1, "_vectors": { "manual": [6, 7, 8] }}])).unwrap();
|
||||||
index
|
index
|
||||||
.add_documents(
|
.add_documents(
|
||||||
documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
|
documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0].to_vec()).execute().unwrap();
|
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0].to_vec()).execute().unwrap();
|
||||||
|
@ -8,7 +8,7 @@ pub use self::index_documents::{
|
|||||||
MergeFn,
|
MergeFn,
|
||||||
};
|
};
|
||||||
pub use self::indexer_config::IndexerConfig;
|
pub use self::indexer_config::IndexerConfig;
|
||||||
pub use self::settings::{Setting, Settings};
|
pub use self::settings::{validate_embedding_settings, Setting, Settings};
|
||||||
pub use self::update_step::UpdateIndexingStep;
|
pub use self::update_step::UpdateIndexingStep;
|
||||||
pub use self::word_prefix_docids::WordPrefixDocids;
|
pub use self::word_prefix_docids::WordPrefixDocids;
|
||||||
pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
|
pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
|
||||||
|
@ -17,7 +17,7 @@ use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS
|
|||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::index_documents::IndexDocumentsMethod;
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::vector::settings::{EmbeddingSettings, PromptSettings};
|
use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
|
||||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
|
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
|
||||||
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
||||||
|
|
||||||
@ -78,11 +78,19 @@ impl<T> Setting<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn apply(&mut self, new: Self) {
|
/// Returns `true` if applying the new setting changed this setting
|
||||||
|
pub fn apply(&mut self, new: Self) -> bool
|
||||||
|
where
|
||||||
|
T: PartialEq + Eq,
|
||||||
|
{
|
||||||
if let Setting::NotSet = new {
|
if let Setting::NotSet = new {
|
||||||
return;
|
return false;
|
||||||
|
}
|
||||||
|
if self == &new {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
*self = new;
|
*self = new;
|
||||||
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -950,17 +958,23 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
.merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right))
|
.merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right))
|
||||||
{
|
{
|
||||||
match joined {
|
match joined {
|
||||||
|
// updated config
|
||||||
EitherOrBoth::Both((name, mut old), (_, new)) => {
|
EitherOrBoth::Both((name, mut old), (_, new)) => {
|
||||||
old.apply(new);
|
changed |= old.apply(new);
|
||||||
let new = validate_prompt(&name, old)?;
|
let new = validate_embedding_settings(old, &name)?;
|
||||||
changed = true;
|
|
||||||
new_configs.insert(name, new);
|
new_configs.insert(name, new);
|
||||||
}
|
}
|
||||||
|
// unchanged config
|
||||||
EitherOrBoth::Left((name, setting)) => {
|
EitherOrBoth::Left((name, setting)) => {
|
||||||
new_configs.insert(name, setting);
|
new_configs.insert(name, setting);
|
||||||
}
|
}
|
||||||
EitherOrBoth::Right((name, setting)) => {
|
// new config
|
||||||
let setting = validate_prompt(&name, setting)?;
|
EitherOrBoth::Right((name, mut setting)) => {
|
||||||
|
// apply the default source in case the source was not set so that it gets validated
|
||||||
|
crate::vector::settings::EmbeddingSettings::apply_default_source(
|
||||||
|
&mut setting,
|
||||||
|
);
|
||||||
|
let setting = validate_embedding_settings(setting, &name)?;
|
||||||
changed = true;
|
changed = true;
|
||||||
new_configs.insert(name, setting);
|
new_configs.insert(name, setting);
|
||||||
}
|
}
|
||||||
@ -1072,8 +1086,12 @@ fn validate_prompt(
|
|||||||
) -> Result<Setting<EmbeddingSettings>> {
|
) -> Result<Setting<EmbeddingSettings>> {
|
||||||
match new {
|
match new {
|
||||||
Setting::Set(EmbeddingSettings {
|
Setting::Set(EmbeddingSettings {
|
||||||
embedder_options,
|
source,
|
||||||
document_template: Setting::Set(PromptSettings { template: Setting::Set(template) }),
|
model,
|
||||||
|
revision,
|
||||||
|
api_key,
|
||||||
|
dimensions,
|
||||||
|
document_template: Setting::Set(template),
|
||||||
}) => {
|
}) => {
|
||||||
// validate
|
// validate
|
||||||
let template = crate::prompt::Prompt::new(template)
|
let template = crate::prompt::Prompt::new(template)
|
||||||
@ -1081,16 +1099,71 @@ fn validate_prompt(
|
|||||||
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
|
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
|
||||||
|
|
||||||
Ok(Setting::Set(EmbeddingSettings {
|
Ok(Setting::Set(EmbeddingSettings {
|
||||||
embedder_options,
|
source,
|
||||||
document_template: Setting::Set(PromptSettings {
|
model,
|
||||||
template: Setting::Set(template),
|
revision,
|
||||||
}),
|
api_key,
|
||||||
|
dimensions,
|
||||||
|
document_template: Setting::Set(template),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
new => Ok(new),
|
new => Ok(new),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn validate_embedding_settings(
|
||||||
|
settings: Setting<EmbeddingSettings>,
|
||||||
|
name: &str,
|
||||||
|
) -> Result<Setting<EmbeddingSettings>> {
|
||||||
|
let settings = validate_prompt(name, settings)?;
|
||||||
|
let Setting::Set(settings) = settings else { return Ok(settings) };
|
||||||
|
let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
|
||||||
|
settings;
|
||||||
|
let Some(inferred_source) = source.set() else {
|
||||||
|
return Ok(Setting::Set(EmbeddingSettings {
|
||||||
|
source,
|
||||||
|
model,
|
||||||
|
revision,
|
||||||
|
api_key,
|
||||||
|
dimensions,
|
||||||
|
document_template,
|
||||||
|
}));
|
||||||
|
};
|
||||||
|
match inferred_source {
|
||||||
|
EmbedderSource::OpenAi => {
|
||||||
|
check_unset(&revision, "revision", inferred_source, name)?;
|
||||||
|
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
||||||
|
if let Setting::Set(model) = &model {
|
||||||
|
crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or(
|
||||||
|
crate::error::UserError::InvalidOpenAiModel {
|
||||||
|
embedder_name: name.to_owned(),
|
||||||
|
model: model.clone(),
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EmbedderSource::HuggingFace => {
|
||||||
|
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
||||||
|
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
||||||
|
}
|
||||||
|
EmbedderSource::UserProvided => {
|
||||||
|
check_unset(&model, "model", inferred_source, name)?;
|
||||||
|
check_unset(&revision, "revision", inferred_source, name)?;
|
||||||
|
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
||||||
|
check_unset(&document_template, "documentTemplate", inferred_source, name)?;
|
||||||
|
check_set(&dimensions, "dimensions", inferred_source, name)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Setting::Set(EmbeddingSettings {
|
||||||
|
source,
|
||||||
|
model,
|
||||||
|
revision,
|
||||||
|
api_key,
|
||||||
|
dimensions,
|
||||||
|
document_template,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
|
@ -34,6 +34,9 @@ pub struct EmbedderOptions {
|
|||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub enum EmbeddingModel {
|
pub enum EmbeddingModel {
|
||||||
|
// # WARNING
|
||||||
|
//
|
||||||
|
// If ever adding a model, make sure to add it to the list of supported models below.
|
||||||
#[default]
|
#[default]
|
||||||
#[serde(rename = "text-embedding-ada-002")]
|
#[serde(rename = "text-embedding-ada-002")]
|
||||||
#[deserr(rename = "text-embedding-ada-002")]
|
#[deserr(rename = "text-embedding-ada-002")]
|
||||||
@ -41,6 +44,10 @@ pub enum EmbeddingModel {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl EmbeddingModel {
|
impl EmbeddingModel {
|
||||||
|
pub fn supported_models() -> &'static [&'static str] {
|
||||||
|
&["text-embedding-ada-002"]
|
||||||
|
}
|
||||||
|
|
||||||
pub fn max_token(&self) -> usize {
|
pub fn max_token(&self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
EmbeddingModel::TextEmbeddingAda002 => 8191,
|
EmbeddingModel::TextEmbeddingAda002 => 8191,
|
||||||
@ -59,7 +66,7 @@ impl EmbeddingModel {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_name(name: &'static str) -> Option<Self> {
|
pub fn from_name(name: &str) -> Option<Self> {
|
||||||
match name {
|
match name {
|
||||||
"text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002),
|
"text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002),
|
||||||
_ => None,
|
_ => None,
|
||||||
|
@ -4,32 +4,189 @@ use serde::{Deserialize, Serialize};
|
|||||||
use crate::prompt::PromptData;
|
use crate::prompt::PromptData;
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
use crate::vector::EmbeddingConfig;
|
use crate::vector::EmbeddingConfig;
|
||||||
|
use crate::UserError;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||||
pub struct EmbeddingSettings {
|
pub struct EmbeddingSettings {
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "source")]
|
|
||||||
#[deserr(default, rename = "source")]
|
|
||||||
pub embedder_options: Setting<EmbedderSettings>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub document_template: Setting<PromptSettings>,
|
pub source: Setting<EmbedderSource>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
|
pub model: Setting<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
|
pub revision: Setting<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
|
pub api_key: Setting<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
|
pub dimensions: Setting<usize>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
|
pub document_template: Setting<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check_unset<T>(
|
||||||
|
key: &Setting<T>,
|
||||||
|
field: &'static str,
|
||||||
|
source: EmbedderSource,
|
||||||
|
embedder_name: &str,
|
||||||
|
) -> Result<(), UserError> {
|
||||||
|
if matches!(key, Setting::NotSet) {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(UserError::InvalidFieldForSource {
|
||||||
|
embedder_name: embedder_name.to_owned(),
|
||||||
|
source_: source,
|
||||||
|
field,
|
||||||
|
allowed_fields_for_source: EmbeddingSettings::allowed_fields_for_source(source),
|
||||||
|
allowed_sources_for_field: EmbeddingSettings::allowed_sources_for_field(field),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check_set<T>(
|
||||||
|
key: &Setting<T>,
|
||||||
|
field: &'static str,
|
||||||
|
source: EmbedderSource,
|
||||||
|
embedder_name: &str,
|
||||||
|
) -> Result<(), UserError> {
|
||||||
|
if matches!(key, Setting::Set(_)) {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(UserError::MissingFieldForSource {
|
||||||
|
field,
|
||||||
|
source_: source,
|
||||||
|
embedder_name: embedder_name.to_owned(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbeddingSettings {
|
||||||
|
pub const SOURCE: &'static str = "source";
|
||||||
|
pub const MODEL: &'static str = "model";
|
||||||
|
pub const REVISION: &'static str = "revision";
|
||||||
|
pub const API_KEY: &'static str = "apiKey";
|
||||||
|
pub const DIMENSIONS: &'static str = "dimensions";
|
||||||
|
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
||||||
|
|
||||||
|
pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
|
||||||
|
match field {
|
||||||
|
Self::SOURCE => {
|
||||||
|
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
|
||||||
|
}
|
||||||
|
Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
|
||||||
|
Self::REVISION => &[EmbedderSource::HuggingFace],
|
||||||
|
Self::API_KEY => &[EmbedderSource::OpenAi],
|
||||||
|
Self::DIMENSIONS => &[EmbedderSource::UserProvided],
|
||||||
|
Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
|
||||||
|
_other => unreachable!("unknown field"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] {
|
||||||
|
match source {
|
||||||
|
EmbedderSource::OpenAi => {
|
||||||
|
&[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE]
|
||||||
|
}
|
||||||
|
EmbedderSource::HuggingFace => {
|
||||||
|
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
|
||||||
|
}
|
||||||
|
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn apply_default_source(setting: &mut Setting<EmbeddingSettings>) {
|
||||||
|
if let Setting::Set(EmbeddingSettings {
|
||||||
|
source: source @ (Setting::NotSet | Setting::Reset),
|
||||||
|
..
|
||||||
|
}) = setting
|
||||||
|
{
|
||||||
|
*source = Setting::Set(EmbedderSource::default())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
||||||
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
|
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub enum EmbedderSource {
|
||||||
|
#[default]
|
||||||
|
OpenAi,
|
||||||
|
HuggingFace,
|
||||||
|
UserProvided,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for EmbedderSource {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let s = match self {
|
||||||
|
EmbedderSource::OpenAi => "openAi",
|
||||||
|
EmbedderSource::HuggingFace => "huggingFace",
|
||||||
|
EmbedderSource::UserProvided => "userProvided",
|
||||||
|
};
|
||||||
|
f.write_str(s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbeddingSettings {
|
impl EmbeddingSettings {
|
||||||
pub fn apply(&mut self, new: Self) {
|
pub fn apply(&mut self, new: Self) {
|
||||||
let EmbeddingSettings { embedder_options, document_template: prompt } = new;
|
let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
|
||||||
self.embedder_options.apply(embedder_options);
|
new;
|
||||||
self.document_template.apply(prompt);
|
let old_source = self.source;
|
||||||
|
self.source.apply(source);
|
||||||
|
// Reinitialize the whole setting object on a source change
|
||||||
|
if old_source != self.source {
|
||||||
|
*self = EmbeddingSettings {
|
||||||
|
source,
|
||||||
|
model,
|
||||||
|
revision,
|
||||||
|
api_key,
|
||||||
|
dimensions,
|
||||||
|
document_template,
|
||||||
|
};
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.model.apply(model);
|
||||||
|
self.revision.apply(revision);
|
||||||
|
self.api_key.apply(api_key);
|
||||||
|
self.dimensions.apply(dimensions);
|
||||||
|
self.document_template.apply(document_template);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
Self {
|
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||||
embedder_options: Setting::Set(value.embedder_options.into()),
|
match embedder_options {
|
||||||
document_template: Setting::Set(value.prompt.into()),
|
super::EmbedderOptions::HuggingFace(options) => Self {
|
||||||
|
source: Setting::Set(EmbedderSource::HuggingFace),
|
||||||
|
model: Setting::Set(options.model),
|
||||||
|
revision: options.revision.map(Setting::Set).unwrap_or_default(),
|
||||||
|
api_key: Setting::NotSet,
|
||||||
|
dimensions: Setting::NotSet,
|
||||||
|
document_template: Setting::Set(prompt.template),
|
||||||
|
},
|
||||||
|
super::EmbedderOptions::OpenAi(options) => Self {
|
||||||
|
source: Setting::Set(EmbedderSource::OpenAi),
|
||||||
|
model: Setting::Set(options.embedding_model.name().to_owned()),
|
||||||
|
revision: Setting::NotSet,
|
||||||
|
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
|
dimensions: Setting::NotSet,
|
||||||
|
document_template: Setting::Set(prompt.template),
|
||||||
|
},
|
||||||
|
super::EmbedderOptions::UserProvided(options) => Self {
|
||||||
|
source: Setting::Set(EmbedderSource::UserProvided),
|
||||||
|
model: Setting::NotSet,
|
||||||
|
revision: Setting::NotSet,
|
||||||
|
api_key: Setting::NotSet,
|
||||||
|
dimensions: Setting::Set(options.dimensions),
|
||||||
|
document_template: Setting::NotSet,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -37,256 +194,51 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
impl From<EmbeddingSettings> for EmbeddingConfig {
|
impl From<EmbeddingSettings> for EmbeddingConfig {
|
||||||
fn from(value: EmbeddingSettings) -> Self {
|
fn from(value: EmbeddingSettings) -> Self {
|
||||||
let mut this = Self::default();
|
let mut this = Self::default();
|
||||||
let EmbeddingSettings { embedder_options, document_template: prompt } = value;
|
let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
|
||||||
if let Some(embedder_options) = embedder_options.set() {
|
value;
|
||||||
this.embedder_options = embedder_options.into();
|
if let Some(source) = source.set() {
|
||||||
}
|
match source {
|
||||||
if let Some(prompt) = prompt.set() {
|
EmbedderSource::OpenAi => {
|
||||||
this.prompt = prompt.into();
|
let mut options = super::openai::EmbedderOptions::with_default_model(None);
|
||||||
}
|
if let Some(model) = model.set() {
|
||||||
this
|
if let Some(model) = super::openai::EmbeddingModel::from_name(&model) {
|
||||||
}
|
options.embedding_model = model;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
if let Some(api_key) = api_key.set() {
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
options.api_key = Some(api_key);
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
}
|
||||||
pub struct PromptSettings {
|
this.embedder_options = super::EmbedderOptions::OpenAi(options);
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub template: Setting<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PromptSettings {
|
|
||||||
pub fn apply(&mut self, new: Self) {
|
|
||||||
let PromptSettings { template } = new;
|
|
||||||
self.template.apply(template);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<PromptData> for PromptSettings {
|
|
||||||
fn from(value: PromptData) -> Self {
|
|
||||||
Self { template: Setting::Set(value.template) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<PromptSettings> for PromptData {
|
|
||||||
fn from(value: PromptSettings) -> Self {
|
|
||||||
let mut this = PromptData::default();
|
|
||||||
let PromptSettings { template } = value;
|
|
||||||
if let Some(template) = template.set() {
|
|
||||||
this.template = template;
|
|
||||||
}
|
|
||||||
this
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
|
||||||
pub enum EmbedderSettings {
|
|
||||||
HuggingFace(Setting<HfEmbedderSettings>),
|
|
||||||
OpenAi(Setting<OpenAiEmbedderSettings>),
|
|
||||||
UserProvided(UserProvidedSettings),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<E> Deserr<E> for EmbedderSettings
|
|
||||||
where
|
|
||||||
E: deserr::DeserializeError,
|
|
||||||
{
|
|
||||||
fn deserialize_from_value<V: deserr::IntoValue>(
|
|
||||||
value: deserr::Value<V>,
|
|
||||||
location: deserr::ValuePointerRef,
|
|
||||||
) -> Result<Self, E> {
|
|
||||||
match value {
|
|
||||||
deserr::Value::Map(map) => {
|
|
||||||
if deserr::Map::len(&map) != 1 {
|
|
||||||
return Err(deserr::take_cf_content(E::error::<V>(
|
|
||||||
None,
|
|
||||||
deserr::ErrorKind::Unexpected {
|
|
||||||
msg: format!(
|
|
||||||
"Expected a single field, got {} fields",
|
|
||||||
deserr::Map::len(&map)
|
|
||||||
),
|
|
||||||
},
|
|
||||||
location,
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
let mut it = deserr::Map::into_iter(map);
|
EmbedderSource::HuggingFace => {
|
||||||
let (k, v) = it.next().unwrap();
|
let mut options = super::hf::EmbedderOptions::default();
|
||||||
|
if let Some(model) = model.set() {
|
||||||
match k.as_str() {
|
options.model = model;
|
||||||
"huggingFace" => Ok(EmbedderSettings::HuggingFace(Setting::Set(
|
// Reset the revision if we are setting the model.
|
||||||
HfEmbedderSettings::deserialize_from_value(
|
// This allows the following:
|
||||||
v.into_value(),
|
// "huggingFace": {} -> default model with default revision
|
||||||
location.push_key(&k),
|
// "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision
|
||||||
)?,
|
// "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision
|
||||||
))),
|
options.revision = None;
|
||||||
"openAi" => Ok(EmbedderSettings::OpenAi(Setting::Set(
|
}
|
||||||
OpenAiEmbedderSettings::deserialize_from_value(
|
if let Some(revision) = revision.set() {
|
||||||
v.into_value(),
|
options.revision = Some(revision);
|
||||||
location.push_key(&k),
|
}
|
||||||
)?,
|
this.embedder_options = super::EmbedderOptions::HuggingFace(options);
|
||||||
))),
|
}
|
||||||
"userProvided" => Ok(EmbedderSettings::UserProvided(
|
EmbedderSource::UserProvided => {
|
||||||
UserProvidedSettings::deserialize_from_value(
|
this.embedder_options =
|
||||||
v.into_value(),
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
location.push_key(&k),
|
dimensions: dimensions.set().unwrap(),
|
||||||
)?,
|
});
|
||||||
)),
|
|
||||||
other => Err(deserr::take_cf_content(E::error::<V>(
|
|
||||||
None,
|
|
||||||
deserr::ErrorKind::UnknownKey {
|
|
||||||
key: other,
|
|
||||||
accepted: &["huggingFace", "openAi", "userProvided"],
|
|
||||||
},
|
|
||||||
location,
|
|
||||||
))),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => Err(deserr::take_cf_content(E::error::<V>(
|
|
||||||
None,
|
|
||||||
deserr::ErrorKind::IncorrectValueKind {
|
|
||||||
actual: value,
|
|
||||||
accepted: &[deserr::ValueKind::Map],
|
|
||||||
},
|
|
||||||
location,
|
|
||||||
))),
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for EmbedderSettings {
|
if let Setting::Set(template) = document_template {
|
||||||
fn default() -> Self {
|
this.prompt = PromptData { template }
|
||||||
Self::OpenAi(Default::default())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<crate::vector::EmbedderOptions> for EmbedderSettings {
|
|
||||||
fn from(value: crate::vector::EmbedderOptions) -> Self {
|
|
||||||
match value {
|
|
||||||
crate::vector::EmbedderOptions::HuggingFace(hf) => {
|
|
||||||
Self::HuggingFace(Setting::Set(hf.into()))
|
|
||||||
}
|
|
||||||
crate::vector::EmbedderOptions::OpenAi(openai) => {
|
|
||||||
Self::OpenAi(Setting::Set(openai.into()))
|
|
||||||
}
|
|
||||||
crate::vector::EmbedderOptions::UserProvided(user_provided) => {
|
|
||||||
Self::UserProvided(user_provided.into())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<EmbedderSettings> for crate::vector::EmbedderOptions {
|
|
||||||
fn from(value: EmbedderSettings) -> Self {
|
|
||||||
match value {
|
|
||||||
EmbedderSettings::HuggingFace(Setting::Set(hf)) => Self::HuggingFace(hf.into()),
|
|
||||||
EmbedderSettings::HuggingFace(_setting) => Self::HuggingFace(Default::default()),
|
|
||||||
EmbedderSettings::OpenAi(Setting::Set(ai)) => Self::OpenAi(ai.into()),
|
|
||||||
EmbedderSettings::OpenAi(_setting) => {
|
|
||||||
Self::OpenAi(crate::vector::openai::EmbedderOptions::with_default_model(None))
|
|
||||||
}
|
|
||||||
EmbedderSettings::UserProvided(user_provided) => {
|
|
||||||
Self::UserProvided(user_provided.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
|
||||||
pub struct HfEmbedderSettings {
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub model: Setting<String>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub revision: Setting<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl HfEmbedderSettings {
|
|
||||||
pub fn apply(&mut self, new: Self) {
|
|
||||||
let HfEmbedderSettings { model, revision } = new;
|
|
||||||
self.model.apply(model);
|
|
||||||
self.revision.apply(revision);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<crate::vector::hf::EmbedderOptions> for HfEmbedderSettings {
|
|
||||||
fn from(value: crate::vector::hf::EmbedderOptions) -> Self {
|
|
||||||
Self {
|
|
||||||
model: Setting::Set(value.model),
|
|
||||||
revision: value.revision.map(Setting::Set).unwrap_or(Setting::NotSet),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<HfEmbedderSettings> for crate::vector::hf::EmbedderOptions {
|
|
||||||
fn from(value: HfEmbedderSettings) -> Self {
|
|
||||||
let HfEmbedderSettings { model, revision } = value;
|
|
||||||
let mut this = Self::default();
|
|
||||||
if let Some(model) = model.set() {
|
|
||||||
this.model = model;
|
|
||||||
}
|
|
||||||
if let Some(revision) = revision.set() {
|
|
||||||
this.revision = Some(revision);
|
|
||||||
}
|
|
||||||
this
|
this
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
|
||||||
pub struct OpenAiEmbedderSettings {
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
|
||||||
#[deserr(default)]
|
|
||||||
pub api_key: Setting<String>,
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "model")]
|
|
||||||
#[deserr(default, rename = "model")]
|
|
||||||
pub embedding_model: Setting<crate::vector::openai::EmbeddingModel>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl OpenAiEmbedderSettings {
|
|
||||||
pub fn apply(&mut self, new: Self) {
|
|
||||||
let Self { api_key, embedding_model: embedding_mode } = new;
|
|
||||||
self.api_key.apply(api_key);
|
|
||||||
self.embedding_model.apply(embedding_mode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<crate::vector::openai::EmbedderOptions> for OpenAiEmbedderSettings {
|
|
||||||
fn from(value: crate::vector::openai::EmbedderOptions) -> Self {
|
|
||||||
Self {
|
|
||||||
api_key: value.api_key.map(Setting::Set).unwrap_or(Setting::Reset),
|
|
||||||
embedding_model: Setting::Set(value.embedding_model),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<OpenAiEmbedderSettings> for crate::vector::openai::EmbedderOptions {
|
|
||||||
fn from(value: OpenAiEmbedderSettings) -> Self {
|
|
||||||
let OpenAiEmbedderSettings { api_key, embedding_model } = value;
|
|
||||||
Self { api_key: api_key.set(), embedding_model: embedding_model.set().unwrap_or_default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
|
||||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
|
||||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
|
||||||
pub struct UserProvidedSettings {
|
|
||||||
pub dimensions: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<UserProvidedSettings> for crate::vector::manual::EmbedderOptions {
|
|
||||||
fn from(value: UserProvidedSettings) -> Self {
|
|
||||||
Self { dimensions: value.dimensions }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<crate::vector::manual::EmbedderOptions> for UserProvidedSettings {
|
|
||||||
fn from(value: crate::vector::manual::EmbedderOptions) -> Self {
|
|
||||||
Self { dimensions: value.dimensions }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user