Merge branch 'main' into indexer-edition-2024

This commit is contained in:
Louis Dureuil 2024-11-20 16:59:58 +01:00
commit 6e6acfcf1b
No known key found for this signature in database
330 changed files with 10063 additions and 1499 deletions

View file

@ -0,0 +1,35 @@
use serde::Serialize;
use time::{Duration, OffsetDateTime};
use crate::{
batches::{Batch, BatchId, BatchStats},
task_view::DetailsView,
tasks::serialize_duration,
};
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct BatchView {
pub uid: BatchId,
pub details: DetailsView,
pub stats: BatchStats,
#[serde(serialize_with = "serialize_duration", default)]
pub duration: Option<Duration>,
#[serde(with = "time::serde::rfc3339", default)]
pub started_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
}
impl BatchView {
pub fn from_batch(batch: &Batch) -> Self {
Self {
uid: batch.uid,
details: batch.details.clone(),
stats: batch.stats.clone(),
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
started_at: batch.started_at,
finished_at: batch.finished_at,
}
}
}

View file

@ -0,0 +1,34 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use crate::{
task_view::DetailsView,
tasks::{Kind, Status},
};
pub type BatchId = u32;
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Batch {
pub uid: BatchId,
pub details: DetailsView,
pub stats: BatchStats,
#[serde(with = "time::serde::rfc3339")]
pub started_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option")]
pub finished_at: Option<OffsetDateTime>,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct BatchStats {
pub total_nb_tasks: BatchId,
pub status: BTreeMap<Status, u32>,
pub types: BTreeMap<Kind, u32>,
pub index_uids: BTreeMap<String, u32>,
}

View file

@ -318,9 +318,11 @@ InvalidTaskBeforeStartedAt , InvalidRequest , BAD_REQUEST ;
InvalidTaskCanceledBy , InvalidRequest , BAD_REQUEST ;
InvalidTaskFrom , InvalidRequest , BAD_REQUEST ;
InvalidTaskLimit , InvalidRequest , BAD_REQUEST ;
InvalidTaskReverse , InvalidRequest , BAD_REQUEST ;
InvalidTaskStatuses , InvalidRequest , BAD_REQUEST ;
InvalidTaskTypes , InvalidRequest , BAD_REQUEST ;
InvalidTaskUids , InvalidRequest , BAD_REQUEST ;
InvalidBatchUids , InvalidRequest , BAD_REQUEST ;
IoError , System , UNPROCESSABLE_ENTITY;
FeatureNotEnabled , InvalidRequest , BAD_REQUEST ;
MalformedPayload , InvalidRequest , BAD_REQUEST ;
@ -342,6 +344,7 @@ NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENT
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
TooManySearchRequests , System , SERVICE_UNAVAILABLE ;
TaskNotFound , InvalidRequest , NOT_FOUND ;
BatchNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ;

View file

@ -1,3 +1,5 @@
pub mod batch_view;
pub mod batches;
pub mod compression;
pub mod deserr;
pub mod document_formats;

View file

@ -378,6 +378,64 @@ impl Settings<Unchecked> {
self.embedders = Setting::Set(configs);
Ok(self)
}
pub fn merge(&mut self, other: &Self) {
// For most settings only the latest version is kept
*self = Self {
displayed_attributes: other
.displayed_attributes
.clone()
.or(self.displayed_attributes.clone()),
searchable_attributes: other
.searchable_attributes
.clone()
.or(self.searchable_attributes.clone()),
filterable_attributes: other
.filterable_attributes
.clone()
.or(self.filterable_attributes.clone()),
sortable_attributes: other
.sortable_attributes
.clone()
.or(self.sortable_attributes.clone()),
ranking_rules: other.ranking_rules.clone().or(self.ranking_rules.clone()),
stop_words: other.stop_words.clone().or(self.stop_words.clone()),
non_separator_tokens: other
.non_separator_tokens
.clone()
.or(self.non_separator_tokens.clone()),
separator_tokens: other.separator_tokens.clone().or(self.separator_tokens.clone()),
dictionary: other.dictionary.clone().or(self.dictionary.clone()),
synonyms: other.synonyms.clone().or(self.synonyms.clone()),
distinct_attribute: other
.distinct_attribute
.clone()
.or(self.distinct_attribute.clone()),
proximity_precision: other.proximity_precision.or(self.proximity_precision),
typo_tolerance: other.typo_tolerance.clone().or(self.typo_tolerance.clone()),
faceting: other.faceting.clone().or(self.faceting.clone()),
pagination: other.pagination.clone().or(self.pagination.clone()),
search_cutoff_ms: other.search_cutoff_ms.or(self.search_cutoff_ms),
localized_attributes: other
.localized_attributes
.clone()
.or(self.localized_attributes.clone()),
embedders: match (self.embedders.clone(), other.embedders.clone()) {
(Setting::NotSet, set) | (set, Setting::NotSet) => set,
(Setting::Set(_) | Setting::Reset, Setting::Reset) => Setting::Reset,
(Setting::Reset, Setting::Set(embedder)) => Setting::Set(embedder),
// If both are set we must merge the embeddings settings
(Setting::Set(mut this), Setting::Set(other)) => {
for (k, v) in other {
this.insert(k, v);
}
Setting::Set(this)
}
},
_kind: PhantomData,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -868,6 +926,12 @@ impl From<ProximityPrecisionView> for ProximityPrecision {
#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
pub struct WildcardSetting(Setting<Vec<String>>);
impl WildcardSetting {
pub fn or(self, other: Self) -> Self {
Self(self.0.or(other.0))
}
}
impl From<Setting<Vec<String>>> for WildcardSetting {
fn from(setting: Setting<Vec<String>>) -> Self {
Self(setting)

View file

@ -1,7 +1,8 @@
use milli::Object;
use serde::Serialize;
use serde::{Deserialize, Serialize};
use time::{Duration, OffsetDateTime};
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
@ -10,6 +11,7 @@ use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, T
#[serde(rename_all = "camelCase")]
pub struct TaskView {
pub uid: TaskId,
pub batch_uid: Option<BatchId>,
#[serde(default)]
pub index_uid: Option<String>,
pub status: Status,
@ -33,6 +35,7 @@ impl TaskView {
pub fn from_task(task: &Task) -> TaskView {
TaskView {
uid: task.uid,
batch_uid: task.batch_uid,
index_uid: task.index_uid().map(ToOwned::to_owned),
status: task.status,
kind: task.kind.as_kind(),
@ -47,7 +50,7 @@ impl TaskView {
}
}
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
@ -83,6 +86,128 @@ pub struct DetailsView {
pub swaps: Option<Vec<IndexSwap>>,
}
impl DetailsView {
pub fn accumulate(&mut self, other: &Self) {
*self = Self {
received_documents: match (self.received_documents, other.received_documents) {
(None, None) => None,
(None, Some(doc)) | (Some(doc), None) => Some(doc),
(Some(left), Some(right)) => Some(left + right),
},
indexed_documents: match (self.indexed_documents, other.indexed_documents) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(doc))) | (Some(Some(doc)), None | Some(None)) => {
Some(Some(doc))
}
(Some(Some(left)), Some(Some(right))) => Some(Some(left + right)),
},
edited_documents: match (self.edited_documents, other.edited_documents) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(doc))) | (Some(Some(doc)), None | Some(None)) => {
Some(Some(doc))
}
(Some(Some(left)), Some(Some(right))) => Some(Some(left + right)),
},
primary_key: match (&self.primary_key, &other.primary_key) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(doc))) | (Some(Some(doc)), None | Some(None)) => {
Some(Some(doc.to_string()))
}
// In the case we receive multiple primary keys (which shouldn't happens) we only return the first one encountered.
(Some(Some(left)), Some(Some(_right))) => Some(Some(left.to_string())),
},
provided_ids: match (self.provided_ids, other.provided_ids) {
(None, None) => None,
(None, Some(ids)) | (Some(ids), None) => Some(ids),
(Some(left), Some(right)) => Some(left + right),
},
deleted_documents: match (self.deleted_documents, other.deleted_documents) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(doc))) | (Some(Some(doc)), None | Some(None)) => {
Some(Some(doc))
}
(Some(Some(left)), Some(Some(right))) => Some(Some(left + right)),
},
matched_tasks: match (self.matched_tasks, other.matched_tasks) {
(None, None) => None,
(None, Some(task)) | (Some(task), None) => Some(task),
(Some(left), Some(right)) => Some(left + right),
},
canceled_tasks: match (self.canceled_tasks, other.canceled_tasks) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(task))) | (Some(Some(task)), None | Some(None)) => {
Some(Some(task))
}
(Some(Some(left)), Some(Some(right))) => Some(Some(left + right)),
},
deleted_tasks: match (self.deleted_tasks, other.deleted_tasks) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(task))) | (Some(Some(task)), None | Some(None)) => {
Some(Some(task))
}
(Some(Some(left)), Some(Some(right))) => Some(Some(left + right)),
},
original_filter: match (&self.original_filter, &other.original_filter) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(filter)))
| (Some(Some(filter)), None | Some(None)) => Some(Some(filter.to_string())),
// In this case, we cannot really merge both filters or return an array so we're going to return
// all the conditions one after the other.
(Some(Some(left)), Some(Some(right))) => Some(Some(format!("{left}&{right}"))),
},
dump_uid: match (&self.dump_uid, &other.dump_uid) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(dump_uid)))
| (Some(Some(dump_uid)), None | Some(None)) => Some(Some(dump_uid.to_string())),
// We should never be able to batch multiple dumps at the same time. So we return
// the first one we encounter but that shouldn't be an issue anyway.
(Some(Some(left)), Some(Some(_right))) => Some(Some(left.to_string())),
},
context: match (&self.context, &other.context) {
(None, None) => None,
(None, Some(None)) | (Some(None), None) | (Some(None), Some(None)) => Some(None),
(None | Some(None), Some(Some(ctx))) | (Some(Some(ctx)), None | Some(None)) => {
Some(Some(ctx.clone()))
}
// We should never be able to batch multiple documents edited at the same time. So we return
// the first one we encounter but that shouldn't be an issue anyway.
(Some(Some(left)), Some(Some(_right))) => Some(Some(left.clone())),
},
function: match (&self.function, &other.function) {
(None, None) => None,
(None, Some(fun)) | (Some(fun), None) => Some(fun.to_string()),
// We should never be able to batch multiple documents edited at the same time. So we return
// the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left.to_string()),
},
settings: match (self.settings.clone(), other.settings.clone()) {
(None, None) => None,
(None, Some(settings)) | (Some(settings), None) => Some(settings),
(Some(mut left), Some(right)) => {
left.merge(&right);
Some(left)
}
},
swaps: match (self.swaps.clone(), other.swaps.clone()) {
(None, None) => None,
(None, Some(swaps)) | (Some(swaps), None) => Some(swaps),
(Some(mut left), Some(mut right)) => {
left.append(&mut right);
Some(left)
}
},
}
}
}
impl From<Details> for DetailsView {
fn from(details: Details) -> Self {
match details {

View file

@ -12,6 +12,7 @@ use serde::{Deserialize, Serialize, Serializer};
use time::{Duration, OffsetDateTime};
use uuid::Uuid;
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::keys::Key;
use crate::settings::{Settings, Unchecked};
@ -23,6 +24,7 @@ pub type TaskId = u32;
#[serde(rename_all = "camelCase")]
pub struct Task {
pub uid: TaskId,
pub batch_uid: Option<BatchId>,
#[serde(with = "time::serde::rfc3339")]
pub enqueued_at: OffsetDateTime,
@ -418,7 +420,9 @@ impl From<&KindWithContent> for Option<Details> {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence)]
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence, PartialOrd, Ord,
)]
#[serde(rename_all = "camelCase")]
pub enum Status {
Enqueued,
@ -477,7 +481,9 @@ impl fmt::Display for ParseTaskStatusError {
}
impl std::error::Error for ParseTaskStatusError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence)]
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence, PartialOrd, Ord,
)]
#[serde(rename_all = "camelCase")]
pub enum Kind {
DocumentAdditionOrUpdate,