143: Shared update store r=irevoire a=MarinPostma

This PR changes the updates process so that only one instance of an update store is shared among indexes.

This allows updates to always be processed sequentially without additional synchronization, and fixes the bug where all the first pending update for each index were reported as processing whereas only one was.

EDIT:

I ended having to rewrite the whole `UpdateStore` to allow updates being really queued and processed sequentially in the ordered they were added. For that purpose I created a `pending_queue` that orders the updates by a global update id.

To find the next `update_id` to use, both globally and for each index, I have created another database that contains the next id to use.

Finally, all updates that have been processed (with success or otherwise) are all stores in an `updates` database.

The layout for the keys of these databases are such that it is easy to iterate over the elements for a particular index, and greatly reduces the amount of code to do so, compared to the former implementation.

I have also simplified the locking mechanism for the update store, thanks to the StateLock data structure, that allow both an arbitrary number of readers and a single writer to concurrently access the state. The current state can be either Idle, Processing, or Snapshotting. When an update or snapshotting is ongoing, the process holds the state lock until it is done processing its task. When it is done, it sets bask the state to Idle.

I have made other small improvements here and there, and have let some other for work, such as:
- When creating an update file to hold a request's content, it would be preferable to first create a temporary file, and then atomically persist it when we have written to it. This would simplify the case when there is no data to be written to the file, since we wouldn't have to take care about cleaning after ourselves.
- The logic for content validation must be factored.
- Some more tests related to error handling in the process_pending_update function.
- The issue #159

close #114


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
This commit is contained in:
bors[bot] 2021-04-27 18:41:55 +00:00 committed by GitHub
commit 8bc7dd8b03
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 1139 additions and 1034 deletions

View file

@ -84,9 +84,9 @@ async fn delete_document(
.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()])
.await
{
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -107,14 +107,11 @@ async fn get_all_documents(
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
let attributes_to_retrieve = params
.attributes_to_retrieve
.as_ref()
.and_then(|attrs| {
let attributes_to_retrieve = params.attributes_to_retrieve.as_ref().and_then(|attrs| {
let mut names = Vec::new();
for name in attrs.split(',').map(String::from) {
if name == "*" {
return None
return None;
}
names.push(name);
}
@ -163,9 +160,9 @@ async fn add_documents(
.await;
match addition_result {
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -242,9 +239,9 @@ async fn delete_documents(
.collect();
match data.delete_documents(path.index_uid.clone(), ids).await {
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -258,9 +255,9 @@ async fn clear_all_documents(
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.clear_documents(path.index_uid.clone()).await {
Ok(update_status) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
}
Ok(update_status) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}

View file

@ -144,9 +144,9 @@ async fn update_all(
.update_settings(index_uid.into_inner(), body.into_inner(), true)
.await
{
Ok(update_result) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() })))
}
Ok(update_result) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}
@ -176,9 +176,9 @@ async fn delete_all(
.update_settings(index_uid.into_inner(), settings, false)
.await
{
Ok(update_result) => {
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() })))
}
Ok(update_result) => Ok(
HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_result.id() }))
),
Err(e) => {
Ok(HttpResponse::BadRequest().json(serde_json::json!({ "error": e.to_string() })))
}

View file

@ -1,15 +1,10 @@
use std::collections::BTreeMap;
use actix_web::get;
use actix_web::web;
use actix_web::HttpResponse;
use chrono::{DateTime, Utc};
use serde::Serialize;
use crate::data::Stats;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::index_controller::IndexStats;
use crate::routes::IndexParam;
use crate::Data;
@ -19,59 +14,19 @@ pub fn services(cfg: &mut web::ServiceConfig) {
.service(get_version);
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct IndexStatsResponse {
number_of_documents: u64,
is_indexing: bool,
fields_distribution: BTreeMap<String, u64>,
}
impl From<IndexStats> for IndexStatsResponse {
fn from(stats: IndexStats) -> Self {
Self {
number_of_documents: stats.number_of_documents,
is_indexing: stats.is_indexing,
fields_distribution: stats.fields_distribution.into_iter().collect(),
}
}
}
#[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")]
async fn get_index_stats(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let response: IndexStatsResponse = data.get_index_stats(path.index_uid.clone()).await?.into();
let response = data.get_index_stats(path.index_uid.clone()).await?;
Ok(HttpResponse::Ok().json(response))
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct StatsResponse {
database_size: u64,
last_update: Option<DateTime<Utc>>,
indexes: BTreeMap<String, IndexStatsResponse>,
}
impl From<Stats> for StatsResponse {
fn from(stats: Stats) -> Self {
Self {
database_size: stats.database_size,
last_update: stats.last_update,
indexes: stats
.indexes
.into_iter()
.map(|(uid, index_stats)| (uid, index_stats.into()))
.collect(),
}
}
}
#[get("/stats", wrap = "Authentication::Private")]
async fn get_stats(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
let response: StatsResponse = data.get_stats().await?.into();
let response = data.get_all_stats().await?;
Ok(HttpResponse::Ok().json(response))
}