211: fix index deletion race condition r=MarinPostma a=MarinPostma

Make update store block if the currently processing update is from an index we are trying to delete. This ensure that no write to the index can occur after it has been deleted.

218: Update milli version to v0.5.0 r=MarinPostma a=curquiza



Co-authored-by: marin postma <postma.marin@protonmail.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
This commit is contained in:
bors[bot] 2021-06-22 14:36:34 +00:00 committed by GitHub
commit 4b37a4a415
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 92 additions and 56 deletions

94
Cargo.lock generated
View File

@ -67,7 +67,7 @@ dependencies = [
"percent-encoding",
"pin-project",
"pin-project-lite",
"rand 0.8.3",
"rand 0.8.4",
"regex",
"serde",
"sha-1 0.9.6",
@ -209,9 +209,9 @@ dependencies = [
[[package]]
name = "actix-web-codegen"
version = "0.5.0-beta.2"
version = "0.5.0-beta.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f138ac357a674c3b480ddb7bbd894b13c1b6e8927d728bc9ea5e17eee2f8fc9"
checksum = "0d048c6986743105c1e8e9729fbc8d5d1667f2f62393a58be8d85a7d9a5a6c8d"
dependencies = [
"proc-macro2 1.0.27",
"quote 1.0.9",
@ -667,9 +667,9 @@ checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173"
[[package]]
name = "cpufeatures"
version = "0.1.4"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed00c67cb5d0a7d64a44f6ad2668db7e7530311dd53ea79bcd4fb022c64911c8"
checksum = "66c99696f6c9dd7f35d486b9d04d7e6e202aa3e8c40d553f2fdf5e7e0c6a71ef"
dependencies = [
"libc",
]
@ -1375,9 +1375,9 @@ dependencies = [
[[package]]
name = "ipnet"
version = "2.3.0"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135"
checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9"
[[package]]
name = "itertools"
@ -1626,7 +1626,7 @@ dependencies = [
"log",
"main_error",
"meilisearch-error",
"meilisearch-tokenizer",
"meilisearch-tokenizer 0.2.3",
"memmap",
"milli",
"mime",
@ -1680,6 +1680,22 @@ dependencies = [
"whatlang",
]
[[package]]
name = "meilisearch-tokenizer"
version = "0.2.3"
source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.3#c2399c3f879144ad92e20ae057e14984dfd22781"
dependencies = [
"character_converter",
"cow-utils",
"deunicode",
"fst",
"jieba-rs",
"once_cell",
"slice-group-by",
"unicode-segmentation",
"whatlang",
]
[[package]]
name = "memchr"
version = "2.4.0"
@ -1707,8 +1723,8 @@ dependencies = [
[[package]]
name = "milli"
version = "0.4.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.4.1#a67ccfdf3ac093b51bdf5ada3621fd6663897497"
version = "0.5.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.5.0#b073fd49ea04fbb8da940f6357c952b34af94c0e"
dependencies = [
"bstr",
"byteorder",
@ -1726,7 +1742,7 @@ dependencies = [
"linked-hash-map",
"log",
"logging_timer",
"meilisearch-tokenizer",
"meilisearch-tokenizer 0.2.2",
"memmap",
"obkv",
"once_cell",
@ -1774,9 +1790,9 @@ dependencies = [
[[package]]
name = "mio"
version = "0.7.11"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
checksum = "8c2bdb6314ec10835cd3293dd268473a835c02b7b352e788be788b3c6ca6bb16"
dependencies = [
"libc",
"log",
@ -1930,9 +1946,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "openssl"
version = "0.10.34"
version = "0.10.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d7830286ad6a3973c0f1d9b73738f69c76b739301d0229c4b96501695cbe4c8"
checksum = "549430950c79ae24e6d02e0b7404534ecf311d94cc9f861e9e4020187d13d885"
dependencies = [
"bitflags",
"cfg-if 1.0.0",
@ -1950,9 +1966,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a"
[[package]]
name = "openssl-sys"
version = "0.9.63"
version = "0.9.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b0d6fb7d80f877617dfcb014e605e2b5ab2fb0afdf27935219bb6bd984cb98"
checksum = "7a7907e3bfa08bb85105209cdfcb6c63d109f8f6c1ed6ca318fff5c1853fbc1d"
dependencies = [
"autocfg",
"cc",
@ -2301,14 +2317,14 @@ dependencies = [
[[package]]
name = "rand"
version = "0.8.3"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [
"libc",
"rand_chacha 0.3.1",
"rand_core 0.6.2",
"rand_hc 0.3.0",
"rand_core 0.6.3",
"rand_hc 0.3.1",
]
[[package]]
@ -2328,7 +2344,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.2",
"rand_core 0.6.3",
]
[[package]]
@ -2357,9 +2373,9 @@ dependencies = [
[[package]]
name = "rand_core"
version = "0.6.2"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom 0.2.3",
]
@ -2375,11 +2391,11 @@ dependencies = [
[[package]]
name = "rand_hc"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
dependencies = [
"rand_core 0.6.2",
"rand_core 0.6.3",
]
[[package]]
@ -2427,9 +2443,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.2.8"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
dependencies = [
"bitflags",
]
@ -2468,9 +2484,9 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.3"
version = "0.11.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124"
checksum = "246e9f61b9bb77df069a947682be06e31ac43ea37862e244a69f177694ea6d22"
dependencies = [
"base64",
"bytes 1.0.1",
@ -2539,9 +2555,9 @@ dependencies = [
[[package]]
name = "rustc-demangle"
version = "0.1.19"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "410f7acf3cb3a44527c5d9546bad4bf4e6c460915d5f9f2fc524498bfe8f70ce"
checksum = "dead70b0b5e03e9c814bcb6b01e03e68f7c57a80aa48c72ec92152ab3e818d49"
[[package]]
name = "rustc_version"
@ -2735,7 +2751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbbe485e384cb5540940e65d729820ffcbedc0c902fcb27081e44dacfe6a0c34"
dependencies = [
"lazy_static",
"rand 0.8.3",
"rand 0.8.4",
"sentry-types",
"serde",
"serde_json",
@ -3099,7 +3115,7 @@ checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22"
dependencies = [
"cfg-if 1.0.0",
"libc",
"rand 0.8.3",
"rand 0.8.4",
"redox_syscall",
"remove_dir_all",
"winapi",
@ -3219,9 +3235,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
version = "1.6.1"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a38d31d7831c6ed7aad00aa4c12d9375fd225a6dd77da1d25b707346319a975"
checksum = "5fb2ed024293bb19f7a5dc54fe83bf86532a44c12a2bb8ba40d64a4509395ca2"
dependencies = [
"autocfg",
"bytes 1.0.1",
@ -3445,9 +3461,9 @@ dependencies = [
[[package]]
name = "vcpkg"
version = "0.2.13"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "025ce40a007e1907e58d5bc1a594def78e5573bb0b1160bc389634e8f12e4faa"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vec_map"

View File

@ -49,9 +49,9 @@ itertools = "0.10.0"
log = "0.4.8"
main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.2" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.3" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.4.1" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.5.0" }
mime = "0.3.16"
once_cell = "1.5.2"
oxidized-json-checker = "0.3.2"

View File

@ -340,7 +340,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
size: index.size(),
number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: None,
fields_distribution: index.fields_distribution(&rtxn)?,
field_distribution: index.field_distribution(&rtxn)?,
})
})
.await?

View File

@ -6,8 +6,9 @@ use std::time::Duration;
use actix_web::web::{Bytes, Payload};
use chrono::{DateTime, Utc};
use futures::stream::StreamExt;
use log::error;
use log::info;
use milli::FieldsDistribution;
use milli::FieldDistribution;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use tokio::time::sleep;
@ -63,7 +64,7 @@ pub struct IndexStats {
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub fields_distribution: FieldsDistribution,
pub field_distribution: FieldDistribution,
}
#[derive(Clone)]
@ -256,8 +257,20 @@ impl IndexController {
pub async fn delete_index(&self, uid: String) -> Result<()> {
let uuid = self.uuid_resolver.delete(uid).await?;
self.update_handle.delete(uuid).await?;
self.index_handle.delete(uuid).await?;
// We remove the index from the resolver synchronously, and effectively perform the index
// deletion as a background task.
let update_handle = self.update_handle.clone();
let index_handle = self.index_handle.clone();
tokio::spawn(async move {
if let Err(e) = update_handle.delete(uuid).await {
error!("Error while deleting index: {}", e);
}
if let Err(e) = index_handle.delete(uuid).await {
error!("Error while deleting index: {}", e);
}
});
Ok(())
}

View File

@ -428,7 +428,8 @@ impl UpdateStore {
Ok(None)
}
/// Delete all updates for an index from the update store.
/// Delete all updates for an index from the update store. If the currently processing update
/// is for `index_uuid`, the call will block until the update is terminated.
pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> {
let mut txn = self.env.write_txn()?;
// Contains all the content file paths that we need to be removed if the deletion was successful.
@ -469,8 +470,14 @@ impl UpdateStore {
let _ = remove_file(path);
});
// We don't care about the currently processing update, since it will be removed by itself
// once its done processing, and we can't abort a running update.
// If the currently processing update is from our index, we wait until it is
// finished before returning. This ensure that no write to the index occurs after we delete it.
if let State::Processing(uuid, _) = *self.state.read() {
if uuid == index_uuid {
// wait for a write lock, do nothing with it.
self.state.write();
}
}
Ok(())
}

View File

@ -15,7 +15,7 @@ async fn stats() {
assert_eq!(code, 200);
assert_eq!(response["numberOfDocuments"], 0);
assert!(response["isIndexing"] == false);
assert!(response["fieldsDistribution"]
assert!(response["fieldDistribution"]
.as_object()
.unwrap()
.is_empty());
@ -42,7 +42,7 @@ async fn stats() {
assert_eq!(code, 200);
assert_eq!(response["numberOfDocuments"], 2);
assert!(response["isIndexing"] == false);
assert_eq!(response["fieldsDistribution"]["id"], 2);
assert_eq!(response["fieldsDistribution"]["name"], 1);
assert_eq!(response["fieldsDistribution"]["age"], 1);
assert_eq!(response["fieldDistribution"]["id"], 2);
assert_eq!(response["fieldDistribution"]["name"], 1);
assert_eq!(response["fieldDistribution"]["age"], 1);
}

View File

@ -67,7 +67,7 @@ async fn stats() {
assert!(response.get("lastUpdate").is_some());
assert_eq!(response["indexes"]["test"]["numberOfDocuments"], 2);
assert!(response["indexes"]["test"]["isIndexing"] == false);
assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["id"], 2);
assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["name"], 1);
assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["age"], 1);
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["id"], 2);
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
}