MeiliSearch/meilisearch-core/src/update/mod.rs

421 lines
13 KiB
Rust
Raw Normal View History

2019-10-23 16:32:11 +02:00
mod clear_all;
2019-10-11 15:33:35 +02:00
mod customs_update;
2019-10-03 15:04:11 +02:00
mod documents_addition;
mod documents_deletion;
mod schema_update;
mod stop_words_addition;
mod stop_words_deletion;
2019-10-08 17:06:56 +02:00
mod synonyms_addition;
2019-10-08 17:16:48 +02:00
mod synonyms_deletion;
2019-10-03 15:04:11 +02:00
2019-10-23 16:32:11 +02:00
pub use self::clear_all::{apply_clear_all, push_clear_all};
2019-10-11 15:33:35 +02:00
pub use self::customs_update::{apply_customs_update, push_customs_update};
pub use self::documents_addition::{
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
};
2019-10-18 13:05:28 +02:00
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
pub use self::schema_update::{apply_schema_update, push_schema_update};
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
2019-10-18 13:05:28 +02:00
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
2019-10-03 15:04:11 +02:00
use std::cmp;
use std::collections::{BTreeMap, BTreeSet, HashMap};
2019-11-12 16:18:53 +01:00
use std::time::Instant;
2019-11-12 18:00:47 +01:00
use chrono::{DateTime, Utc};
2019-10-21 12:05:53 +02:00
use heed::Result as ZResult;
use log::debug;
2019-10-18 13:05:28 +02:00
use serde::{Deserialize, Serialize};
use crate::{store, DocumentId, MResult};
2019-11-26 11:06:55 +01:00
use meilisearch_schema::Schema;
2019-10-03 15:04:11 +02:00
2019-10-16 17:05:24 +02:00
#[derive(Debug, Clone, Serialize, Deserialize)]
2019-11-12 18:00:47 +01:00
pub struct Update {
data: UpdateData,
enqueued_at: DateTime<Utc>,
}
impl Update {
fn clear_all() -> Update {
Update {
data: UpdateData::ClearAll,
enqueued_at: Utc::now(),
}
}
fn schema(data: Schema) -> Update {
Update {
data: UpdateData::Schema(data),
enqueued_at: Utc::now(),
}
}
fn customs(data: Vec<u8>) -> Update {
Update {
data: UpdateData::Customs(data),
enqueued_at: Utc::now(),
}
}
fn documents_addition(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
Update {
data: UpdateData::DocumentsAddition(data),
enqueued_at: Utc::now(),
}
}
fn documents_partial(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
Update {
data: UpdateData::DocumentsPartial(data),
enqueued_at: Utc::now(),
}
}
fn documents_deletion(data: Vec<DocumentId>) -> Update {
Update {
data: UpdateData::DocumentsDeletion(data),
enqueued_at: Utc::now(),
}
}
fn synonyms_addition(data: BTreeMap<String, Vec<String>>) -> Update {
Update {
data: UpdateData::SynonymsAddition(data),
enqueued_at: Utc::now(),
}
}
fn synonyms_deletion(data: BTreeMap<String, Option<Vec<String>>>) -> Update {
Update {
data: UpdateData::SynonymsDeletion(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_addition(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsAddition(data),
enqueued_at: Utc::now(),
}
}
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
Update {
data: UpdateData::StopWordsDeletion(data),
enqueued_at: Utc::now(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateData {
2019-10-23 16:32:11 +02:00
ClearAll,
Schema(Schema),
2019-10-11 15:33:35 +02:00
Customs(Vec<u8>),
DocumentsAddition(Vec<HashMap<String, serde_json::Value>>),
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
2019-10-03 15:04:11 +02:00
DocumentsDeletion(Vec<DocumentId>),
2019-10-08 17:06:56 +02:00
SynonymsAddition(BTreeMap<String, Vec<String>>),
2019-10-08 17:16:48 +02:00
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
StopWordsAddition(BTreeSet<String>),
StopWordsDeletion(BTreeSet<String>),
2019-10-03 15:04:11 +02:00
}
2019-11-12 18:00:47 +01:00
impl UpdateData {
pub fn update_type(&self) -> UpdateType {
match self {
2019-11-12 18:00:47 +01:00
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Schema(_) => UpdateType::Schema,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
number: addition.len(),
},
2019-11-12 18:00:47 +01:00
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
number: addition.len(),
},
2019-11-12 18:00:47 +01:00
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
},
2019-11-12 18:00:47 +01:00
UpdateData::SynonymsAddition(addition) => UpdateType::SynonymsAddition {
number: addition.len(),
},
2019-11-12 18:00:47 +01:00
UpdateData::SynonymsDeletion(deletion) => UpdateType::SynonymsDeletion {
number: deletion.len(),
},
2019-11-12 18:00:47 +01:00
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
number: addition.len(),
},
2019-11-12 18:00:47 +01:00
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
number: deletion.len(),
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
2019-11-12 16:18:53 +01:00
#[serde(tag = "name")]
2019-10-03 16:13:09 +02:00
pub enum UpdateType {
2019-10-23 16:32:11 +02:00
ClearAll,
2019-11-12 16:18:53 +01:00
Schema,
2019-10-11 15:33:35 +02:00
Customs,
2019-10-03 16:13:09 +02:00
DocumentsAddition { number: usize },
DocumentsPartial { number: usize },
2019-10-03 16:13:09 +02:00
DocumentsDeletion { number: usize },
2019-10-08 17:06:56 +02:00
SynonymsAddition { number: usize },
2019-10-08 17:16:48 +02:00
SynonymsDeletion { number: usize },
StopWordsAddition { number: usize },
StopWordsDeletion { number: usize },
2019-10-03 16:13:09 +02:00
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessedUpdateResult {
2019-10-03 16:13:09 +02:00
pub update_id: u64,
2019-11-12 16:18:53 +01:00
#[serde(rename = "type")]
2019-10-03 16:13:09 +02:00
pub update_type: UpdateType,
2019-11-12 16:18:53 +01:00
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
pub duration: f64, // in seconds
2019-11-12 18:00:47 +01:00
pub enqueued_at: DateTime<Utc>,
pub processed_at: DateTime<Utc>,
2019-10-03 16:13:09 +02:00
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnqueuedUpdateResult {
pub update_id: u64,
pub update_type: UpdateType,
2019-11-12 18:00:47 +01:00
pub enqueued_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
2019-11-12 18:00:47 +01:00
#[serde(rename_all = "camelCase", tag = "status")]
2019-10-03 16:54:37 +02:00
pub enum UpdateStatus {
2019-11-12 16:18:53 +01:00
Enqueued {
#[serde(flatten)]
2019-11-12 18:00:47 +01:00
content: EnqueuedUpdateResult,
2019-11-12 16:18:53 +01:00
},
Processed {
#[serde(flatten)]
2019-11-12 18:00:47 +01:00
content: ProcessedUpdateResult,
2019-11-12 16:18:53 +01:00
},
2019-10-03 16:54:37 +02:00
}
2019-10-16 17:05:24 +02:00
pub fn update_status(
2019-10-21 12:05:53 +02:00
reader: &heed::RoTxn,
2019-10-03 16:54:37 +02:00
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
update_id: u64,
2019-11-12 18:00:47 +01:00
) -> MResult<Option<UpdateStatus>> {
2019-10-03 16:54:37 +02:00
match updates_results_store.update_result(reader, update_id)? {
2019-11-12 18:00:47 +01:00
Some(result) => Ok(Some(UpdateStatus::Processed { content: result })),
None => match updates_store.get(reader, update_id)? {
Some(update) => Ok(Some(UpdateStatus::Enqueued {
content: EnqueuedUpdateResult {
update_id,
2019-11-12 18:00:47 +01:00
update_type: update.data.update_type(),
enqueued_at: update.enqueued_at,
},
})),
None => Ok(None),
},
2019-10-03 16:54:37 +02:00
}
}
pub fn next_update_id(
2019-10-21 12:05:53 +02:00
writer: &mut heed::RwTxn,
updates_store: store::Updates,
updates_results_store: store::UpdatesResults,
2019-10-18 13:05:28 +02:00
) -> ZResult<u64> {
let last_update_id = updates_store.last_update_id(writer)?;
let last_update_id = last_update_id.map(|(n, _)| n);
let last_update_results_id = updates_results_store.last_update_id(writer)?;
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
let max_update_id = cmp::max(last_update_id, last_update_results_id);
let new_update_id = max_update_id.map_or(0, |n| n + 1);
2019-10-08 17:16:48 +02:00
Ok(new_update_id)
2019-10-08 17:16:48 +02:00
}
2019-11-04 10:49:27 +01:00
pub fn update_task<'a, 'b>(
writer: &'a mut heed::RwTxn<'b>,
index: store::Index,
update_id: u64,
update: Update,
) -> MResult<ProcessedUpdateResult> {
debug!("Processing update number {}", update_id);
2019-11-12 18:00:47 +01:00
let Update { enqueued_at, data } = update;
let (update_type, result, duration) = match data {
UpdateData::ClearAll => {
2019-10-23 16:32:11 +02:00
let start = Instant::now();
let update_type = UpdateType::ClearAll;
let result = apply_clear_all(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
);
(update_type, result, start.elapsed())
}
2019-11-12 18:00:47 +01:00
UpdateData::Schema(schema) => {
let start = Instant::now();
2019-10-09 11:45:19 +02:00
2019-11-12 16:18:53 +01:00
let update_type = UpdateType::Schema;
let result = apply_schema_update(
writer,
&schema,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
);
(update_type, result, start.elapsed())
2019-10-18 13:05:28 +02:00
}
2019-11-12 18:00:47 +01:00
UpdateData::Customs(customs) => {
2019-10-11 15:33:35 +02:00
let start = Instant::now();
let update_type = UpdateType::Customs;
2019-10-16 17:05:24 +02:00
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
2019-10-11 15:33:35 +02:00
(update_type, result, start.elapsed())
}
2019-11-12 18:00:47 +01:00
UpdateData::DocumentsAddition(documents) => {
let start = Instant::now();
2019-10-18 13:05:28 +02:00
let update_type = UpdateType::DocumentsAddition {
number: documents.len(),
};
let result = apply_documents_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
2019-10-18 13:05:28 +02:00
}
2019-11-12 18:00:47 +01:00
UpdateData::DocumentsPartial(documents) => {
let start = Instant::now();
let update_type = UpdateType::DocumentsPartial {
number: documents.len(),
};
let result = apply_documents_partial_addition(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
(update_type, result, start.elapsed())
}
2019-11-12 18:00:47 +01:00
UpdateData::DocumentsDeletion(documents) => {
let start = Instant::now();
2019-10-18 13:05:28 +02:00
let update_type = UpdateType::DocumentsDeletion {
number: documents.len(),
};
let result = apply_documents_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
documents,
);
2019-10-08 17:06:56 +02:00
(update_type, result, start.elapsed())
2019-10-18 13:05:28 +02:00
}
2019-11-12 18:00:47 +01:00
UpdateData::SynonymsAddition(synonyms) => {
2019-10-08 17:06:56 +02:00
let start = Instant::now();
2019-10-18 13:05:28 +02:00
let update_type = UpdateType::SynonymsAddition {
number: synonyms.len(),
};
2019-10-08 17:06:56 +02:00
2019-10-18 13:05:28 +02:00
let result = apply_synonyms_addition(writer, index.main, index.synonyms, synonyms);
2019-10-08 17:06:56 +02:00
2019-10-08 17:16:48 +02:00
(update_type, result, start.elapsed())
2019-10-18 13:05:28 +02:00
}
2019-11-12 18:00:47 +01:00
UpdateData::SynonymsDeletion(synonyms) => {
2019-10-08 17:16:48 +02:00
let start = Instant::now();
2019-10-18 13:05:28 +02:00
let update_type = UpdateType::SynonymsDeletion {
number: synonyms.len(),
};
2019-10-08 17:16:48 +02:00
2019-10-18 13:05:28 +02:00
let result = apply_synonyms_deletion(writer, index.main, index.synonyms, synonyms);
2019-10-08 17:16:48 +02:00
(update_type, result, start.elapsed())
}
2019-11-12 18:00:47 +01:00
UpdateData::StopWordsAddition(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsAddition {
number: stop_words.len(),
};
let result =
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
(update_type, result, start.elapsed())
}
2019-11-12 18:00:47 +01:00
UpdateData::StopWordsDeletion(stop_words) => {
let start = Instant::now();
let update_type = UpdateType::StopWordsDeletion {
number: stop_words.len(),
};
let result = apply_stop_words_deletion(
writer,
index.main,
index.documents_fields,
index.documents_fields_counts,
index.postings_lists,
index.docs_words,
stop_words,
);
(update_type, result, start.elapsed())
2019-10-18 13:05:28 +02:00
}
};
2019-10-18 13:05:28 +02:00
debug!(
"Processed update number {} {:?} {:?}",
update_id, update_type, result
);
let status = ProcessedUpdateResult {
update_id,
update_type,
2019-11-12 16:18:53 +01:00
error: result.map_err(|e| e.to_string()).err(),
duration: duration.as_secs_f64(),
2019-11-12 18:00:47 +01:00
enqueued_at,
processed_at: Utc::now(),
};
Ok(status)
2019-10-03 15:04:11 +02:00
}