rewrite update store

This commit is contained in:
Marin Postma 2021-04-22 10:14:29 +02:00
parent 51829ad85e
commit 4fe2a13c71
No known key found for this signature in database
GPG Key ID: D5241F0C0C865F30
28 changed files with 896 additions and 826 deletions

33
Cargo.lock generated
View File

@ -286,6 +286,12 @@ version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b"
[[package]]
name = "arc-swap"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4d7d63395147b81a9e570bcc6243aaf71c017bd666d4909cfef0085bdda8d73"
[[package]]
name = "assert-json-diff"
version = "1.0.1"
@ -295,19 +301,6 @@ dependencies = [
"serde_json",
]
[[package]]
name = "async-compression"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b72c1f1154e234325b50864a349b9c8e56939e266a4c307c0f159812df2f9537"
dependencies = [
"flate2",
"futures-core",
"memchr",
"pin-project-lite 0.2.6",
"tokio 0.2.25",
]
[[package]]
name = "async-stream"
version = "0.3.1"
@ -775,16 +768,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "dashmap"
version = "4.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c"
dependencies = [
"cfg-if 1.0.0",
"num_cpus",
]
[[package]]
name = "debugid"
version = "0.7.2"
@ -1751,17 +1734,15 @@ dependencies = [
"actix-web",
"actix-web-static-files",
"anyhow",
"arc-swap",
"assert-json-diff",
"async-compression",
"async-stream",
"async-trait",
"byte-unit",
"bytemuck",
"bytes 0.6.0",
"cargo_toml",
"chrono",
"crossbeam-channel",
"dashmap",
"either",
"env_logger 0.8.3",
"flate2",

View File

@ -28,15 +28,13 @@ actix-service = "2.0.0"
actix-web = { version = "=4.0.0-beta.6", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "6db8c3e", optional = true }
anyhow = "1.0.36"
async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] }
async-stream = "0.3.0"
async-trait = "0.1.42"
arc-swap = "1.2.0"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
bytemuck = "1.5.1"
bytes = "0.6.0"
chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.0"
dashmap = "4.0.2"
either = "1.6.1"
env_logger = "0.8.2"
flate2 = "1.0.19"

View File

@ -8,7 +8,7 @@ use serde_json::{Map, Value};
use crate::helpers::EnvSizer;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Facets, Settings, UpdateResult};
pub use updates::{Facets, Settings};
mod search;
mod updates;
@ -59,9 +59,7 @@ impl Index {
})
.transpose()?
.unwrap_or_else(BTreeSet::new);
let distinct_attribute = self
.distinct_attribute(&txn)?
.map(String::from);
let distinct_attribute = self.distinct_attribute(&txn)?.map(String::from);
Ok(Settings {
displayed_attributes: Some(Some(displayed_attributes)),

View File

@ -4,17 +4,11 @@ use std::num::NonZeroUsize;
use flate2::read::GzDecoder;
use log::info;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{de::Deserializer, Deserialize, Serialize};
use super::Index;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
use crate::index_controller::UpdateResult;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
@ -91,7 +85,7 @@ impl Index {
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: impl io::Read,
content: Option<impl io::Read>,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
@ -108,16 +102,15 @@ impl Index {
builder.update_format(format);
builder.index_documents_method(method);
let gzipped = false;
let reader = if gzipped {
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let indexing_callback =
|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step);
let result = builder.execute(reader, |indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
});
let gzipped = false;
let result = match content {
Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback),
Some(content) => builder.execute(content, indexing_callback),
None => builder.execute(std::io::empty(), indexing_callback),
};
info!("document addition done: {:?}", result);
@ -228,10 +221,13 @@ impl Index {
pub fn delete_documents(
&self,
document_ids: impl io::Read,
document_ids: Option<impl io::Read>,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_reader(document_ids)?;
let ids = match document_ids {
Some(reader) => serde_json::from_reader(reader)?,
None => Vec::<String>::new(),
};
let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?;

View File

@ -11,13 +11,13 @@ use tokio::task::spawn_blocking;
use uuid::Uuid;
use crate::index::{Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::update_handler::UpdateHandler;
use crate::index_controller::{
get_arc_ownership_blocking, updates::Processing, IndexStats, UpdateMeta,
get_arc_ownership_blocking, update_handler::UpdateHandler, Failed, IndexStats, Processed,
Processing,
};
use crate::option::IndexerOpts;
use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult};
use super::{IndexError, IndexMeta, IndexMsg, IndexResult, IndexSettings, IndexStore};
pub const CONCURRENT_INDEX_MSG: usize = 10;
@ -28,7 +28,7 @@ pub struct IndexActor<S> {
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> Result<Self> {
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> IndexResult<Self> {
let options = IndexerOpts::default();
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
let update_handler = Arc::new(update_handler);
@ -40,9 +40,6 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
})
}
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
pub async fn run(mut self) {
let mut receiver = self
.receiver
@ -145,7 +142,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
) -> IndexResult<IndexMeta> {
let index = self.store.create(uuid, primary_key).await?;
let meta = spawn_blocking(move || IndexMeta::new(&index))
.await
@ -156,9 +153,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
async fn handle_update(
&self,
uuid: Uuid,
meta: Processing<UpdateMeta>,
data: File,
) -> Result<UpdateResult> {
meta: Processing,
data: Option<File>,
) -> IndexResult<Result<Processed, Failed>> {
debug!("Processing update {}", meta.id());
let update_handler = self.update_handler.clone();
let index = match self.store.get(uuid).await? {
@ -171,7 +168,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
.map_err(|e| IndexError::Error(e.into()))
}
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings> {
async fn handle_settings(&self, uuid: Uuid) -> IndexResult<Settings> {
let index = self
.store
.get(uuid)
@ -188,7 +185,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
) -> IndexResult<Vec<Document>> {
let index = self
.store
.get(uuid)
@ -208,7 +205,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
) -> IndexResult<Document> {
let index = self
.store
.get(uuid)
@ -223,7 +220,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
async fn handle_delete(&self, uuid: Uuid) -> IndexResult<()> {
let index = self.store.delete(uuid).await?;
if let Some(index) = index {
@ -240,7 +237,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(())
}
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
async fn handle_get_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
match self.store.get(uuid).await? {
Some(index) => {
let meta = spawn_blocking(move || IndexMeta::new(&index))
@ -256,7 +253,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
) -> IndexResult<IndexMeta> {
let index = self
.store
.get(uuid)
@ -283,7 +280,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> {
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
use tokio::fs::create_dir_all;
path.push("indexes");
@ -313,7 +310,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(())
}
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> {
async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
let index = self
.store
.get(uuid)

View File

@ -3,14 +3,14 @@ use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index::{Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::{updates::Processing, UpdateMeta};
use crate::index_controller::{IndexSettings, IndexStats};
use super::{
IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore, Result, UpdateResult,
use crate::index_controller::{IndexSettings, IndexStats, Processing};
use crate::{
index::{Document, SearchQuery, SearchResult, Settings},
index_controller::{Failed, Processed},
};
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, IndexResult, MapIndexStore};
#[derive(Clone)]
pub struct IndexActorHandleImpl {
sender: mpsc::Sender<IndexMsg>,
@ -18,7 +18,11 @@ pub struct IndexActorHandleImpl {
#[async_trait::async_trait]
impl IndexActorHandle for IndexActorHandleImpl {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
async fn create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> IndexResult<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex {
ret,
@ -32,9 +36,9 @@ impl IndexActorHandle for IndexActorHandleImpl {
async fn update(
&self,
uuid: Uuid,
meta: Processing<UpdateMeta>,
data: std::fs::File,
) -> anyhow::Result<UpdateResult> {
meta: Processing,
data: Option<std::fs::File>,
) -> anyhow::Result<Result<Processed, Failed>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update {
ret,
@ -46,14 +50,14 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
async fn search(&self, uuid: Uuid, query: SearchQuery) -> IndexResult<SearchResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn settings(&self, uuid: Uuid) -> Result<Settings> {
async fn settings(&self, uuid: Uuid) -> IndexResult<Settings> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret };
let _ = self.sender.send(msg).await;
@ -66,7 +70,7 @@ impl IndexActorHandle for IndexActorHandleImpl {
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
) -> IndexResult<Vec<Document>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents {
uuid,
@ -84,7 +88,7 @@ impl IndexActorHandle for IndexActorHandleImpl {
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
) -> IndexResult<Document> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document {
uuid,
@ -96,21 +100,25 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
async fn delete(&self, uuid: Uuid) -> IndexResult<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
async fn get_index_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> IndexResult<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::UpdateIndex {
uuid,
@ -121,14 +129,14 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Snapshot { uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.sender.send(msg).await;

View File

@ -4,21 +4,21 @@ use tokio::sync::oneshot;
use uuid::Uuid;
use crate::index::{Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::{updates::Processing, IndexStats, UpdateMeta};
use crate::index_controller::{Failed, IndexStats, Processed, Processing};
use super::{IndexMeta, IndexSettings, Result, UpdateResult};
use super::{IndexMeta, IndexResult, IndexSettings};
pub enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<Result<IndexMeta>>,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Update {
uuid: Uuid,
meta: Processing<UpdateMeta>,
data: std::fs::File,
ret: oneshot::Sender<Result<UpdateResult>>,
meta: Processing,
data: Option<std::fs::File>,
ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
},
Search {
uuid: Uuid,
@ -27,41 +27,41 @@ pub enum IndexMsg {
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<Result<Settings>>,
ret: oneshot::Sender<IndexResult<Settings>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<Result<Vec<Document>>>,
ret: oneshot::Sender<IndexResult<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<Result<Document>>,
ret: oneshot::Sender<IndexResult<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
ret: oneshot::Sender<IndexResult<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<Result<IndexMeta>>,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
UpdateIndex {
uuid: Uuid,
index_settings: IndexSettings,
ret: oneshot::Sender<Result<IndexMeta>>,
ret: oneshot::Sender<IndexResult<IndexMeta>>,
},
Snapshot {
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
ret: oneshot::Sender<IndexResult<()>>,
},
GetStats {
uuid: Uuid,
ret: oneshot::Sender<Result<IndexStats>>,
ret: oneshot::Sender<IndexResult<IndexStats>>,
},
}

View File

@ -1,5 +1,4 @@
#[cfg(test)]
use std::sync::Arc;
use std::fs::File;
use std::path::PathBuf;
use chrono::{DateTime, Utc};
@ -15,12 +14,8 @@ pub use handle_impl::IndexActorHandleImpl;
use message::IndexMsg;
use store::{IndexStore, MapIndexStore};
use crate::index::UpdateResult as UResult;
use crate::index::{Document, Index, SearchQuery, SearchResult, Settings};
use crate::index_controller::{
updates::{Failed, Processed, Processing},
IndexStats, UpdateMeta,
};
use crate::index_controller::{Failed, Processed, Processing, IndexStats};
use super::IndexSettings;
@ -29,8 +24,7 @@ mod handle_impl;
mod message;
mod store;
pub type Result<T> = std::result::Result<T, IndexError>;
type UpdateResult = std::result::Result<Processed<UpdateMeta, UResult>, Failed<UpdateMeta, String>>;
pub type IndexResult<T> = std::result::Result<T, IndexError>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
@ -41,12 +35,12 @@ pub struct IndexMeta {
}
impl IndexMeta {
fn new(index: &Index) -> Result<Self> {
fn new(index: &Index) -> IndexResult<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
fn new_txn(index: &Index, txn: &heed::RoTxn) -> IndexResult<Self> {
let created_at = index.created_at(&txn)?;
let updated_at = index.updated_at(&txn)?;
let primary_key = index.primary_key(&txn)?.map(String::from);
@ -72,82 +66,19 @@ pub enum IndexError {
ExistingPrimaryKey,
}
#[cfg(test)]
#[async_trait::async_trait]
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing<UpdateMeta>,
data: std::fs::File,
) -> anyhow::Result<UpdateResult> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> Result<Settings> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
self.as_ref().documents(uuid, offset, limit, attributes_to_retrieve).await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
self.as_ref().document(uuid, doc_id, attributes_to_retrieve).await
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
#[async_trait::async_trait]
#[cfg_attr(test, automock)]
pub trait IndexActorHandle {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>)
-> IndexResult<IndexMeta>;
async fn update(
&self,
uuid: Uuid,
meta: Processing<UpdateMeta>,
data: std::fs::File,
) -> anyhow::Result<UpdateResult>;
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>;
async fn settings(&self, uuid: Uuid) -> Result<Settings>;
meta: Processing,
data: Option<File>,
) -> anyhow::Result<Result<Processed, Failed>>;
async fn search(&self, uuid: Uuid, query: SearchQuery) -> IndexResult<SearchResult>;
async fn settings(&self, uuid: Uuid) -> IndexResult<Settings>;
async fn documents(
&self,
@ -155,16 +86,103 @@ pub trait IndexActorHandle {
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>>;
) -> IndexResult<Vec<Document>>;
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>;
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>;
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>;
) -> IndexResult<Document>;
async fn delete(&self, uuid: Uuid) -> IndexResult<()>;
async fn get_index_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta>;
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> IndexResult<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats>;
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use super::*;
#[async_trait::async_trait]
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> IndexResult<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> anyhow::Result<Result<Processed, Failed>> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> IndexResult<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> IndexResult<Settings> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> IndexResult<Vec<Document>> {
self.as_ref()
.documents(uuid, offset, limit, attributes_to_retrieve)
.await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> IndexResult<Document> {
self.as_ref()
.document(uuid, doc_id, attributes_to_retrieve)
.await
}
async fn delete(&self, uuid: Uuid) -> IndexResult<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> IndexResult<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
}

View File

@ -8,16 +8,16 @@ use tokio::sync::RwLock;
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::{IndexError, Result};
use super::{IndexError, IndexResult};
use crate::index::Index;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
#[async_trait::async_trait]
pub trait IndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> IndexResult<Index>;
async fn get(&self, uuid: Uuid) -> IndexResult<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> IndexResult<Option<Index>>;
}
pub struct MapIndexStore {
@ -40,14 +40,14 @@ impl MapIndexStore {
#[async_trait::async_trait]
impl IndexStore for MapIndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> IndexResult<Index> {
let path = self.path.join(format!("index-{}", uuid));
if path.exists() {
return Err(IndexError::IndexAlreadyExists);
}
let index_size = self.index_size;
let index = spawn_blocking(move || -> Result<Index> {
let index = spawn_blocking(move || -> IndexResult<Index> {
let index = open_index(&path, index_size)?;
if let Some(primary_key) = primary_key {
let mut txn = index.write_txn()?;
@ -64,7 +64,7 @@ impl IndexStore for MapIndexStore {
Ok(index)
}
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
async fn get(&self, uuid: Uuid) -> IndexResult<Option<Index>> {
let guard = self.index_store.read().await;
match guard.get(&uuid) {
Some(index) => Ok(Some(index.clone())),
@ -86,7 +86,7 @@ impl IndexStore for MapIndexStore {
}
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
async fn delete(&self, uuid: Uuid) -> IndexResult<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid));
fs::remove_dir_all(db_path)
.await
@ -96,7 +96,7 @@ impl IndexStore for MapIndexStore {
}
}
fn open_index(path: impl AsRef<Path>, size: usize) -> Result<Index> {
fn open_index(path: impl AsRef<Path>, size: usize) -> IndexResult<Index> {
std::fs::create_dir_all(&path).map_err(|e| IndexError::Error(e.into()))?;
let mut options = EnvOpenOptions::new();
options.map_size(size);

View File

@ -8,23 +8,19 @@ use anyhow::bail;
use chrono::{DateTime, Utc};
use futures::stream::StreamExt;
use log::info;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use milli::FieldsDistribution;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use tokio::time::sleep;
use uuid::Uuid;
pub use updates::*;
use index_actor::IndexActorHandle;
use snapshot::load_snapshot;
use snapshot::SnapshotService;
use snapshot::{SnapshotService, load_snapshot};
use update_actor::UpdateActorHandle;
pub use updates::{Failed, Processed, Processing};
use uuid_resolver::UuidError;
use uuid_resolver::UuidResolverHandle;
use uuid_resolver::{UuidError, UuidResolverHandle};
use crate::index::{Document, SearchQuery, SearchResult};
use crate::index::{Facets, Settings, UpdateResult};
use crate::index::{Settings, Document, SearchQuery, SearchResult};
use crate::option::Opt;
mod index_actor;
@ -34,8 +30,6 @@ mod update_handler;
mod updates;
mod uuid_resolver;
pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
@ -47,20 +41,6 @@ pub struct IndexMetadata {
pub meta: index_actor::IndexMeta,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments,
Settings(Settings),
Facets(Facets),
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub uid: Option<String>,
@ -73,6 +53,9 @@ pub struct IndexStats {
#[serde(skip)]
pub size: u64,
pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub fields_distribution: FieldsDistribution,
}
@ -180,7 +163,8 @@ impl IndexController {
Err(UuidError::UnexistingIndex(name)) => {
let uuid = Uuid::new_v4();
let status = perform_update(uuid).await?;
self.index_handle.create_index(uuid, None).await?;
// ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?;
Ok(status)
}
@ -233,7 +217,8 @@ impl IndexController {
Err(UuidError::UnexistingIndex(name)) if create => {
let uuid = Uuid::new_v4();
let status = perform_udpate(uuid).await?;
self.index_handle.create_index(uuid, None).await?;
// ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?;
Ok(status)
}
@ -378,7 +363,8 @@ impl IndexController {
let uuid = self.uuid_resolver.get(uid).await?;
let update_infos = self.update_handle.get_info().await?;
let mut stats = self.index_handle.get_index_stats(uuid).await?;
stats.is_indexing = (Some(uuid) == update_infos.processing).into();
// Check if the currently indexing update is from out index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
Ok(stats)
}
@ -396,7 +382,7 @@ impl IndexController {
Some(last.max(index.meta.updated_at))
});
index_stats.is_indexing = (Some(index.uuid) == update_infos.processing).into();
index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing);
indexes.insert(index.uid, index_stats);
}

View File

@ -131,7 +131,8 @@ pub fn load_snapshot(
#[cfg(test)]
mod test {
use std::sync::Arc;
use std::iter::FromIterator;
use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok};
use rand::Rng;
@ -139,15 +140,19 @@ mod test {
use uuid::Uuid;
use super::*;
use crate::index_controller::update_actor::{UpdateError, MockUpdateActorHandle, UpdateActorHandleImpl};
use crate::index_controller::index_actor::MockIndexActorHandle;
use crate::index_controller::update_actor::{
MockUpdateActorHandle, UpdateActorHandleImpl, UpdateError,
};
use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidError};
#[actix_rt::test]
async fn test_normal() {
let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5, 10);
let uuids = (0..uuids_num).map(|_| Uuid::new_v4()).collect::<Vec<_>>();
let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>();
let mut uuid_resolver = MockUuidResolverHandle::new();
let uuids_clone = uuids.clone();
@ -162,13 +167,12 @@ mod test {
.expect_snapshot()
.withf(move |uuid, _path| uuids_clone.contains(uuid))
.times(uuids_num)
.returning(move |_, _| {
Box::pin(ok(()))
});
.returning(move |_, _| Box::pin(ok(())));
let dir = tempfile::tempdir_in(".").unwrap();
let handle = Arc::new(index_handle);
let update_handle = UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
let update_handle =
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new(
@ -214,7 +218,7 @@ mod test {
uuid_resolver
.expect_snapshot()
.times(1)
.returning(move |_| Box::pin(ok(vec![uuid])));
.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
let mut update_handle = MockUpdateActorHandle::new();
update_handle

View File

@ -1,14 +1,16 @@
use std::collections::HashSet;
use std::io::SeekFrom;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use futures::StreamExt;
use log::info;
use oxidized_json_checker::JsonChecker;
use tokio::fs;
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
use tokio::io::AsyncWriteExt;
use tokio::runtime::Handle;
use tokio::sync::mpsc;
use uuid::Uuid;
use futures::StreamExt;
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo};
use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG};
@ -32,18 +34,14 @@ where
path: impl AsRef<Path>,
index_handle: I,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned().join("updates");
let path = path.as_ref().join("updates");
std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size);
let handle = index_handle.clone();
let store = UpdateStore::open(options, &path, move |uuid, meta, file| {
futures::executor::block_on(handle.update(uuid, meta, file))
})
.map_err(|e| UpdateError::Error(e.into()))?;
let store = UpdateStore::open(options, &path, index_handle.clone())?;
std::fs::create_dir_all(path.join("update_files"))?;
assert!(path.exists());
Ok(Self {
@ -95,40 +93,54 @@ where
meta: UpdateMeta,
mut payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<UpdateStatus> {
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file.write_all(bytes.as_ref())
let file_path = match meta {
UpdateMeta::DocumentsAddition { .. }
| UpdateMeta::DeleteDocuments => {
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file_len = 0;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file_len += bytes.as_ref().len();
file.write_all(bytes.as_ref())
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
}
}
}
if file_len != 0 {
file.flush()
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
let file = file.into_std().await;
Some((file, path))
} else {
// empty update, delete the empty file.
fs::remove_file(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
None
}
}
}
file.flush()
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
file.seek(SeekFrom::Start(0))
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file = file.into_std().await;
_ => None
};
let update_store = self.store.clone();
@ -136,12 +148,9 @@ where
use std::io::{copy, sink, BufReader, Seek};
// If the payload is empty, ignore the check.
if file
.metadata()
.map_err(|e| UpdateError::Error(Box::new(e)))?
.len()
> 0
{
let path = if let Some((mut file, path)) = file_path {
// set the file back to the beginning
file.seek(SeekFrom::Start(0)).map_err(|e| UpdateError::Error(Box::new(e)))?;
// Check that the json payload is valid:
let reader = BufReader::new(&mut file);
let mut checker = JsonChecker::new(reader);
@ -153,7 +162,10 @@ where
let _: serde_json::Value = serde_json::from_reader(file)
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
}
Some(path)
} else {
None
};
// The payload is valid, we can register it to the update store.
update_store
@ -197,17 +209,11 @@ where
Ok(())
}
async fn handle_snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
// acquire write lock to prevent further writes during snapshot
// the update lock must be acquired BEFORE the write lock to prevent dead lock
let _lock = update_store.update_lock.lock();
let mut txn = update_store.env.write_txn()?;
// create db snapshot
update_store.snapshot(&mut txn, &path)?;
update_store.snapshot(&uuids, &path)?;
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
@ -218,7 +224,7 @@ where
.map(|&uuid| handle.snapshot(uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
futures::executor::block_on(async {
Handle::current().block_on(async {
while let Some(res) = stream.next().await {
res?;
}
@ -234,25 +240,14 @@ where
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let processing = self.store.processing.clone();
let info = tokio::task::spawn_blocking(move || -> anyhow::Result<UpdateStoreInfo> {
let txn = update_store.env.read_txn()?;
let size = update_store.get_size(&txn)?;
let processing = processing
.read()
.as_ref()
.map(|(uuid, _)| uuid)
.cloned();
let info = UpdateStoreInfo {
size, processing
};
let info = update_store.get_info()?;
Ok(info)
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(info)
}
}

View File

@ -1,12 +1,13 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index_controller::IndexActorHandle;
use crate::index_controller::{IndexActorHandle, UpdateStatus};
use super::{
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus, UpdateStoreInfo
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStoreInfo,
};
#[derive(Clone)]
@ -63,7 +64,7 @@ where
receiver.await.expect("update actor killed.")
}
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Snapshot { uuids, path, ret };
let _ = self.sender.send(msg).await;

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot};
@ -26,7 +27,7 @@ pub enum UpdateMsg<D> {
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
uuids: Vec<Uuid>,
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},

View File

@ -3,22 +3,22 @@ mod handle_impl;
mod message;
mod update_store;
use std::path::PathBuf;
use std::{collections::HashSet, path::PathBuf};
use thiserror::Error;
use tokio::sync::mpsc;
use uuid::Uuid;
use crate::index::UpdateResult;
use crate::index_controller::{UpdateMeta, UpdateStatus};
use actor::UpdateActor;
use message::UpdateMsg;
use update_store::UpdateStore;
pub use update_store::UpdateStoreInfo;
pub use handle_impl::UpdateActorHandleImpl;
pub type Result<T> = std::result::Result<T, UpdateError>;
type UpdateStore = update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
#[cfg(test)]
@ -32,13 +32,6 @@ pub enum UpdateError {
UnexistingUpdate(u64),
}
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
/// Uuid of the currently processing update if it exists
pub processing: Option<Uuid>,
}
#[async_trait::async_trait]
#[cfg_attr(test, automock(type Data=Vec<u8>;))]
pub trait UpdateActorHandle {
@ -47,7 +40,7 @@ pub trait UpdateActorHandle {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()>;
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn get_info(&self) -> Result<UpdateStoreInfo>;
async fn update(
&self,

View File

@ -6,9 +6,8 @@ use grenad::CompressionType;
use milli::update::UpdateBuilder;
use rayon::ThreadPool;
use crate::index::UpdateResult;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::UpdateMeta;
use crate::index_controller::{Failed, Processed, Processing};
use crate::option::IndexerOpts;
pub struct UpdateHandler {
@ -59,10 +58,10 @@ impl UpdateHandler {
pub fn handle_update(
&self,
meta: Processing<UpdateMeta>,
content: File,
meta: Processing,
content: Option<File>,
index: Index,
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
) -> Result<Processed, Failed> {
use UpdateMeta::*;
let update_id = meta.id();

View File

@ -1,87 +1,121 @@
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat};
use serde::{Deserialize, Serialize};
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued<M> {
pub update_id: u64,
pub meta: M,
pub enqueued_at: DateTime<Utc>,
use crate::index::{Facets, Settings};
pub type UpdateError = String;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
impl<M> Enqueued<M> {
pub fn new(meta: M, update_id: u64) -> Self {
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments,
Settings(Settings),
Facets(Facets),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
pub enqueued_at: DateTime<Utc>,
pub content: Option<PathBuf>,
}
impl Enqueued {
pub fn new(meta: UpdateMeta, update_id: u64, content: Option<PathBuf>) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
content,
}
}
pub fn processing(self) -> Processing<M> {
pub fn processing(self) -> Processing {
Processing {
from: self,
started_processing_at: Utc::now(),
}
}
pub fn abort(self) -> Aborted<M> {
pub fn abort(self) -> Aborted {
Aborted {
from: self,
aborted_at: Utc::now(),
}
}
pub fn meta(&self) -> &M {
pub fn meta(&self) -> &UpdateMeta {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
pub fn content_path(&self) -> Option<&Path> {
self.content.as_deref()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed<M, N> {
pub success: N,
pub struct Processed {
pub success: UpdateResult,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing<M>,
pub from: Processing,
}
impl<M, N> Processed<M, N> {
impl Processed {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing<M> {
pub struct Processing {
#[serde(flatten)]
pub from: Enqueued<M>,
pub from: Enqueued,
pub started_processing_at: DateTime<Utc>,
}
impl<M> Processing<M> {
impl Processing {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &M {
pub fn meta(&self) -> &UpdateMeta {
self.from.meta()
}
pub fn process<N>(self, meta: N) -> Processed<M, N> {
pub fn process(self, success: UpdateResult) -> Processed {
Processed {
success: meta,
success,
from: self,
processed_at: Utc::now(),
}
}
pub fn fail<E>(self, error: E) -> Failed<M, E> {
pub fn fail(self, error: UpdateError) -> Failed {
Failed {
from: self,
error,
@ -90,46 +124,46 @@ impl<M> Processing<M> {
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted<M> {
pub struct Aborted {
#[serde(flatten)]
from: Enqueued<M>,
from: Enqueued,
aborted_at: DateTime<Utc>,
}
impl<M> Aborted<M> {
impl Aborted {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Failed<M, E> {
pub struct Failed {
#[serde(flatten)]
from: Processing<M>,
error: E,
from: Processing,
error: UpdateError,
failed_at: DateTime<Utc>,
}
impl<M, E> Failed<M, E> {
impl Failed {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize)]
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus<M, N, E> {
Processing(Processing<M>),
Enqueued(Enqueued<M>),
Processed(Processed<M, N>),
Aborted(Aborted<M>),
Failed(Failed<M, E>),
pub enum UpdateStatus {
Processing(Processing),
Enqueued(Enqueued),
Processed(Processed),
Aborted(Aborted),
Failed(Failed),
}
impl<M, N, E> UpdateStatus<M, N, E> {
impl UpdateStatus {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
@ -140,7 +174,7 @@ impl<M, N, E> UpdateStatus<M, N, E> {
}
}
pub fn processed(&self) -> Option<&Processed<M, N>> {
pub fn processed(&self) -> Option<&Processed> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
@ -148,32 +182,32 @@ impl<M, N, E> UpdateStatus<M, N, E> {
}
}
impl<M, N, E> From<Enqueued<M>> for UpdateStatus<M, N, E> {
fn from(other: Enqueued<M>) -> Self {
impl From<Enqueued> for UpdateStatus {
fn from(other: Enqueued) -> Self {
Self::Enqueued(other)
}
}
impl<M, N, E> From<Aborted<M>> for UpdateStatus<M, N, E> {
fn from(other: Aborted<M>) -> Self {
impl From<Aborted> for UpdateStatus {
fn from(other: Aborted) -> Self {
Self::Aborted(other)
}
}
impl<M, N, E> From<Processed<M, N>> for UpdateStatus<M, N, E> {
fn from(other: Processed<M, N>) -> Self {
impl From<Processed> for UpdateStatus {
fn from(other: Processed) -> Self {
Self::Processed(other)
}
}
impl<M, N, E> From<Processing<M>> for UpdateStatus<M, N, E> {
fn from(other: Processing<M>) -> Self {
impl From<Processing> for UpdateStatus {
fn from(other: Processing) -> Self {
Self::Processing(other)
}
}
impl<M, N, E> From<Failed<M, E>> for UpdateStatus<M, N, E> {
fn from(other: Failed<M, E>) -> Self {
impl From<Failed> for UpdateStatus {
fn from(other: Failed) -> Self {
Self::Failed(other)
}
}

View File

@ -1,4 +1,4 @@
use std::path::PathBuf;
use std::{collections::HashSet, path::PathBuf};
use log::{info, warn};
use tokio::sync::mpsc;
@ -78,7 +78,7 @@ impl<S: UuidStore> UuidResolverActor<S> {
Ok(result)
}
async fn handle_snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>> {
async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.snapshot(path).await
}

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
@ -67,7 +68,7 @@ impl UuidResolverHandle for UuidResolverHandleImpl {
.expect("Uuid resolver actor has been killed")?)
}
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>> {
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::SnapshotRequest { path, ret };
let _ = self.sender.send(msg).await;

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::sync::oneshot;
@ -28,7 +29,7 @@ pub enum UuidResolveMsg {
},
SnapshotRequest {
path: PathBuf,
ret: oneshot::Sender<Result<Vec<Uuid>>>,
ret: oneshot::Sender<Result<HashSet<Uuid>>>,
},
GetSize {
ret: oneshot::Sender<Result<u64>>,

View File

@ -3,6 +3,7 @@ mod handle_impl;
mod message;
mod store;
use std::collections::HashSet;
use std::path::PathBuf;
use thiserror::Error;
@ -29,7 +30,7 @@ pub trait UuidResolverHandle {
async fn create(&self, name: String) -> anyhow::Result<Uuid>;
async fn delete(&self, name: String) -> anyhow::Result<Uuid>;
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
}

View File

@ -1,5 +1,6 @@
use std::fs::create_dir_all;
use std::path::{Path, PathBuf};
use std::collections::HashSet;
use std::fs::create_dir_all;
use heed::{
types::{ByteSlice, Str},
@ -19,7 +20,7 @@ pub trait UuidStore {
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>>;
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>;
}
@ -129,17 +130,17 @@ impl UuidStore for HeedUuidStore {
.await?
}
async fn snapshot(&self, mut path: PathBuf) -> Result<Vec<Uuid>> {
async fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone();
let db = self.db;
tokio::task::spawn_blocking(move || {
// Write transaction to acquire a lock on the database.
let txn = env.write_txn()?;
let mut entries = Vec::new();
let mut entries = HashSet::new();
for entry in db.iter(&txn)? {
let (_, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.push(uuid)
entries.insert(uuid);
}
// only perform snapshot if there are indexes

View File

@ -107,14 +107,11 @@ async fn get_all_documents(
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
let attributes_to_retrieve = params
.attributes_to_retrieve
.as_ref()
.and_then(|attrs| {
let attributes_to_retrieve = params.attributes_to_retrieve.as_ref().and_then(|attrs| {
let mut names = Vec::new();
for name in attrs.split(',').map(String::from) {
if name == "*" {
return None
return None;
}
names.push(name);
}

View File

@ -185,12 +185,9 @@ impl Index<'_> {
self.service.get(url).await
}
make_settings_test_routes!(
distinct_attribute
);
make_settings_test_routes!(distinct_attribute);
}
pub struct GetDocumentOptions;
#[derive(Debug, Default)]

View File

@ -77,8 +77,8 @@ async fn document_addition_with_primary_key() {
"content": "foo",
}
]);
let (_response, code) = index.add_documents(documents, Some("primary")).await;
assert_eq!(code, 202);
let (response, code) = index.add_documents(documents, Some("primary")).await;
assert_eq!(code, 202, "response: {}", response);
index.wait_update_id(0).await;
@ -189,8 +189,8 @@ async fn replace_document() {
}
]);
let (_response, code) = index.add_documents(documents, None).await;
assert_eq!(code, 202);
let (response, code) = index.add_documents(documents, None).await;
assert_eq!(code, 202, "response: {}", response);
index.wait_update_id(0).await;
@ -260,8 +260,8 @@ async fn update_document() {
}
]);
let (_response, code) = index.update_documents(documents, None).await;
assert_eq!(code, 202);
let (response, code) = index.update_documents(documents, None).await;
assert_eq!(code, 202, "response: {}", response);
index.wait_update_id(1).await;

View File

@ -6,14 +6,18 @@ async fn set_and_reset_distinct_attribute() {
let server = Server::new().await;
let index = server.index("test");
let (_response, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await;
let (_response, _code) = index
.update_settings(json!({ "distinctAttribute": "test"}))
.await;
index.wait_update_id(0).await;
let (response, _) = index.settings().await;
assert_eq!(response["distinctAttribute"], "test");
index.update_settings(json!({ "distinctAttribute": null })).await;
index
.update_settings(json!({ "distinctAttribute": null }))
.await;
index.wait_update_id(1).await;

View File

@ -1,2 +1,2 @@
mod get_settings;
mod distinct;
mod get_settings;