restructure project

This commit is contained in:
mpostma 2021-03-10 13:46:49 +01:00
parent 8061a04661
commit 5ecf514d28
No known key found for this signature in database
GPG key ID: CBC8A7C1D7A28C3A
75 changed files with 4377 additions and 323 deletions

View file

@ -0,0 +1,137 @@
use std::hash::{Hash, Hasher};
use std::{error, thread};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use log::error;
use serde::Serialize;
use serde_qs as qs;
use siphasher::sip::SipHasher;
use walkdir::WalkDir;
use crate::Data;
use crate::Opt;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
#[derive(Debug, Serialize)]
struct EventProperties {
database_size: u64,
last_update_timestamp: Option<i64>, //timestamp
number_of_documents: Vec<u64>,
}
impl EventProperties {
fn from(data: Data) -> Result<EventProperties, Box<dyn error::Error>> {
let mut index_list = Vec::new();
let reader = data.db.main_read_txn()?;
for index_uid in data.db.indexes_uids() {
if let Some(index) = data.db.open_index(&index_uid) {
let number_of_documents = index.main.number_of_documents(&reader)?;
index_list.push(number_of_documents);
}
}
let database_size = WalkDir::new(&data.db_path)
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len());
let last_update_timestamp = data.db.last_update(&reader)?.map(|u| u.timestamp());
Ok(EventProperties {
database_size,
last_update_timestamp,
number_of_documents: index_list,
})
}
}
#[derive(Debug, Serialize)]
struct UserProperties<'a> {
env: &'a str,
start_since_days: u64,
user_email: Option<String>,
server_provider: Option<String>,
}
#[derive(Debug, Serialize)]
struct Event<'a> {
user_id: &'a str,
event_type: &'a str,
device_id: &'a str,
time: u64,
app_version: &'a str,
user_properties: UserProperties<'a>,
event_properties: Option<EventProperties>,
}
#[derive(Debug, Serialize)]
struct AmplitudeRequest<'a> {
api_key: &'a str,
event: &'a str,
}
pub fn analytics_sender(data: Data, opt: Opt) {
let username = whoami::username();
let hostname = whoami::hostname();
let platform = whoami::platform();
let uid = username + &hostname + &platform.to_string();
let mut hasher = SipHasher::new();
uid.hash(&mut hasher);
let hash = hasher.finish();
let uid = format!("{:X}", hash);
let platform = platform.to_string();
let first_start = Instant::now();
loop {
let n = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let user_id = &uid;
let device_id = &platform;
let time = n.as_secs();
let event_type = "runtime_tick";
let elapsed_since_start = first_start.elapsed().as_secs() / 86_400; // One day
let event_properties = EventProperties::from(data.clone()).ok();
let app_version = env!("CARGO_PKG_VERSION").to_string();
let app_version = app_version.as_str();
let user_email = std::env::var("MEILI_USER_EMAIL").ok();
let server_provider = std::env::var("MEILI_SERVER_PROVIDER").ok();
let user_properties = UserProperties {
env: &opt.env,
start_since_days: elapsed_since_start,
user_email,
server_provider,
};
let event = Event {
user_id,
event_type,
device_id,
time,
app_version,
user_properties,
event_properties
};
let event = serde_json::to_string(&event).unwrap();
let request = AmplitudeRequest {
api_key: AMPLITUDE_API_KEY,
event: &event,
};
let body = qs::to_string(&request).unwrap();
let response = ureq::post("https://api.amplitude.com/httpapi").send_string(&body);
if !response.ok() {
let body = response.into_string().unwrap();
error!("Unsuccessful call to Amplitude: {}", body);
}
thread::sleep(Duration::from_secs(3600)) // one hour
}
}

View file

@ -0,0 +1,110 @@
pub mod search;
mod updates;
use std::fs::create_dir_all;
use std::ops::Deref;
use std::sync::Arc;
use sha2::Digest;
use crate::index_controller::{IndexMetadata, IndexSettings};
use crate::index_controller::IndexController;
use crate::index::Settings;
use crate::option::Opt;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
pub struct DataInner {
pub index_controller: IndexController,
pub api_keys: ApiKeys,
options: Opt,
}
#[derive(Clone)]
pub struct ApiKeys {
pub public: Option<String>,
pub private: Option<String>,
pub master: Option<String>,
}
impl ApiKeys {
pub fn generate_missing_api_keys(&mut self) {
if let Some(master_key) = &self.master {
if self.private.is_none() {
let key = format!("{}-private", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.private = Some(format!("{:x}", sha));
}
if self.public.is_none() {
let key = format!("{}-public", master_key);
let sha = sha2::Sha256::digest(key.as_bytes());
self.public = Some(format!("{:x}", sha));
}
}
}
}
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
create_dir_all(&path)?;
let index_controller = IndexController::new(&path)?;
let mut api_keys = ApiKeys {
master: options.clone().master_key,
private: None,
public: None,
};
api_keys.generate_missing_api_keys();
let inner = DataInner { index_controller, options, api_keys };
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub async fn settings<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<Settings> {
self.index_controller.settings(index_uid.as_ref().to_string()).await
}
pub async fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes().await
}
pub async fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<IndexMetadata>> {
self.index_controller.get_index(name.as_ref().to_string()).await
}
pub async fn create_index(&self, name: impl AsRef<str>, primary_key: Option<impl AsRef<str>>) -> anyhow::Result<IndexMetadata> {
let settings = IndexSettings {
name: Some(name.as_ref().to_string()),
primary_key: primary_key.map(|s| s.as_ref().to_string()),
};
let meta = self.index_controller.create_index(settings).await?;
Ok(meta)
}
#[inline]
pub fn http_payload_size_limit(&self) -> usize {
self.options.http_payload_size_limit.get_bytes() as usize
}
#[inline]
pub fn api_keys(&self) -> &ApiKeys {
&self.api_keys
}
}

View file

@ -0,0 +1,34 @@
use serde_json::{Map, Value};
use crate::index::{SearchQuery, SearchResult};
use super::Data;
impl Data {
pub async fn search<S: AsRef<str>>(
&self,
index: S,
search_query: SearchQuery,
) -> anyhow::Result<SearchResult> {
self.index_controller.search(index.as_ref().to_string(), search_query).await
}
pub async fn retrieve_documents(
&self,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Vec<Map<String, Value>>> {
self.index_controller.documents(index, offset, limit, attributes_to_retrieve).await
}
pub async fn retrieve_document(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
document_id: impl AsRef<str> + Sync + Send + 'static,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Map<String, Value>>
{
self.index_controller.document(index.as_ref().to_string(), document_id.as_ref().to_string(), attributes_to_retrieve).await
}
}

View file

@ -0,0 +1,82 @@
//use async_compression::tokio_02::write::GzipEncoder;
//use futures_util::stream::StreamExt;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
//use tokio::io::AsyncWriteExt;
use actix_web::web::Payload;
use crate::index_controller::{UpdateStatus, IndexMetadata};
use crate::index::Settings;
use super::Data;
impl Data {
pub async fn add_documents(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
method: IndexDocumentsMethod,
format: UpdateFormat,
stream: Payload,
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus>
{
let update_status = self.index_controller.add_documents(index.as_ref().to_string(), method, format, stream, primary_key).await?;
Ok(update_status)
}
pub async fn update_settings(
&self,
index: String,
settings: Settings
) -> anyhow::Result<UpdateStatus> {
let update = self.index_controller.update_settings(index, settings).await?;
Ok(update.into())
}
pub async fn clear_documents(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
) -> anyhow::Result<UpdateStatus> {
let update = self.index_controller.clear_documents(index.as_ref().to_string()).await?;
Ok(update)
}
pub async fn delete_documents(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
document_ids: Vec<String>,
) -> anyhow::Result<UpdateStatus> {
let update = self.index_controller.delete_documents(index.as_ref().to_string(), document_ids).await?;
Ok(update.into())
}
pub async fn delete_index(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
) -> anyhow::Result<()> {
self.index_controller.delete_index(index.as_ref().to_owned()).await?;
Ok(())
}
pub async fn get_update_status(&self, index: impl AsRef<str>, uid: u64) -> anyhow::Result<Option<UpdateStatus>> {
self.index_controller.update_status(index.as_ref().to_string(), uid).await
}
pub async fn get_updates_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index.as_ref().to_string()).await
}
pub fn update_index(
&self,
name: impl AsRef<str>,
primary_key: Option<impl AsRef<str>>,
new_name: Option<impl AsRef<str>>
) -> anyhow::Result<IndexMetadata> {
todo!()
//let settings = IndexSettings {
//name: new_name.map(|s| s.as_ref().to_string()),
//primary_key: primary_key.map(|s| s.as_ref().to_string()),
//};
//self.index_controller.update_index(name, settings)
}
}

View file

@ -0,0 +1,423 @@
use std::fs::{create_dir_all, File};
use std::io::prelude::*;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::thread;
use actix_web::web;
use chrono::offset::Utc;
use indexmap::IndexMap;
use log::{error, info};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tempfile::TempDir;
use crate::Data;
use crate::error::{Error, ResponseError};
use crate::helpers::compression;
use crate::routes::index;
use crate::routes::setting::Settings;
use crate::routes::index::IndexResponse;
// Mutex to share dump progress.
static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default);
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
enum DumpVersion {
V1,
}
impl DumpVersion {
const CURRENT: Self = Self::V1;
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DumpMetadata {
indexes: Vec<crate::routes::index::IndexResponse>,
db_version: String,
dump_version: DumpVersion,
}
impl DumpMetadata {
/// Create a DumpMetadata with the current dump version of meilisearch.
pub fn new(indexes: Vec<crate::routes::index::IndexResponse>, db_version: String) -> Self {
DumpMetadata {
indexes,
db_version,
dump_version: DumpVersion::CURRENT,
}
}
/// Extract DumpMetadata from `metadata.json` file present at provided `dir_path`
fn from_path(dir_path: &Path) -> Result<Self, Error> {
let path = dir_path.join("metadata.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);
let metadata = serde_json::from_reader(reader)?;
Ok(metadata)
}
/// Write DumpMetadata in `metadata.json` file at provided `dir_path`
fn to_path(&self, dir_path: &Path) -> Result<(), Error> {
let path = dir_path.join("metadata.json");
let file = File::create(path)?;
serde_json::to_writer(file, &self)?;
Ok(())
}
}
/// Extract Settings from `settings.json` file present at provided `dir_path`
fn settings_from_path(dir_path: &Path) -> Result<Settings, Error> {
let path = dir_path.join("settings.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);
let metadata = serde_json::from_reader(reader)?;
Ok(metadata)
}
/// Write Settings in `settings.json` file at provided `dir_path`
fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> {
let path = dir_path.join("settings.json");
let file = File::create(path)?;
serde_json::to_writer(file, settings)?;
Ok(())
}
/// Import settings and documents of a dump with version `DumpVersion::V1` in specified index.
fn import_index_v1(
data: &Data,
dumps_dir: &Path,
index_uid: &str,
document_batch_size: usize,
write_txn: &mut MainWriter,
) -> Result<(), Error> {
// open index
let index = data
.db
.open_index(index_uid)
.ok_or(Error::index_not_found(index_uid))?;
// index dir path in dump dir
let index_path = &dumps_dir.join(index_uid);
// extract `settings.json` file and import content
let settings = settings_from_path(&index_path)?;
let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?;
apply_settings_update(write_txn, &index, settings)?;
// create iterator over documents in `documents.jsonl` to make batch importation
// create iterator over documents in `documents.jsonl` to make batch importation
let documents = {
let file = File::open(&index_path.join("documents.jsonl"))?;
let reader = std::io::BufReader::new(file);
let deserializer = serde_json::Deserializer::from_reader(reader);
deserializer.into_iter::<IndexMap<String, serde_json::Value>>()
};
// batch import document every `document_batch_size`:
// create a Vec to bufferize documents
let mut values = Vec::with_capacity(document_batch_size);
// iterate over documents
for document in documents {
// push document in buffer
values.push(document?);
// if buffer is full, create and apply a batch, and clean buffer
if values.len() == document_batch_size {
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
apply_documents_addition(write_txn, &index, batch)?;
}
}
// apply documents remaining in the buffer
if !values.is_empty() {
apply_documents_addition(write_txn, &index, values)?;
}
// sync index information: stats, updated_at, last_update
if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) {
return Err(Error::Internal(e));
}
Ok(())
}
/// Import dump from `dump_path` in database.
pub fn import_dump(
data: &Data,
dump_path: &Path,
document_batch_size: usize,
) -> Result<(), Error> {
info!("Importing dump from {:?}...", dump_path);
// create a temporary directory
let tmp_dir = TempDir::new()?;
let tmp_dir_path = tmp_dir.path();
// extract dump in temporary directory
compression::from_tar_gz(dump_path, tmp_dir_path)?;
// read dump metadata
let metadata = DumpMetadata::from_path(&tmp_dir_path)?;
// choose importation function from DumpVersion of metadata
let import_index = match metadata.dump_version {
DumpVersion::V1 => import_index_v1,
};
// remove indexes which have same `uid` than indexes to import and create empty indexes
let existing_index_uids = data.db.indexes_uids();
for index in metadata.indexes.iter() {
if existing_index_uids.contains(&index.uid) {
data.db.delete_index(index.uid.clone())?;
}
index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?;
}
// import each indexes content
data.db.main_write::<_, _, Error>(|mut writer| {
for index in metadata.indexes {
import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?;
}
Ok(())
})?;
info!("Dump importation from {:?} succeed", dump_path);
Ok(())
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
#[serde(rename_all = "snake_case")]
pub enum DumpStatus {
Done,
InProgress,
Failed,
}
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct DumpInfo {
pub uid: String,
pub status: DumpStatus,
#[serde(skip_serializing_if = "Option::is_none", flatten)]
pub error: Option<serde_json::Value>,
}
impl DumpInfo {
pub fn new(uid: String, status: DumpStatus) -> Self {
Self { uid, status, error: None }
}
pub fn with_error(mut self, error: ResponseError) -> Self {
self.status = DumpStatus::Failed;
self.error = Some(json!(error));
self
}
pub fn dump_already_in_progress(&self) -> bool {
self.status == DumpStatus::InProgress
}
pub fn get_current() -> Option<Self> {
DUMP_INFO.lock().unwrap().clone()
}
pub fn set_current(&self) {
*DUMP_INFO.lock().unwrap() = Some(self.clone());
}
}
/// Generate uid from creation date
fn generate_uid() -> String {
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
}
/// Infer dumps_dir from dump_uid
pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf {
dumps_dir.join(format!("{}.dump", dump_uid))
}
/// Write metadata in dump
fn dump_metadata(data: &web::Data<Data>, dir_path: &Path, indexes: Vec<IndexResponse>) -> Result<(), Error> {
let (db_major, db_minor, db_patch) = data.db.version();
let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch));
metadata.to_path(dir_path)
}
/// Export settings of provided index in dump
fn dump_index_settings(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?;
settings_to_path(&settings, dir_path)
}
/// Export updates of provided index in dump
fn dump_index_updates(data: &web::Data<Data>, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
let updates_path = dir_path.join("updates.jsonl");
let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?;
let file = File::create(updates_path)?;
for update in updates {
serde_json::to_writer(&file, &update)?;
writeln!(&file)?;
}
Ok(())
}
/// Export documents of provided index in dump
fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> {
let documents_path = dir_path.join("documents.jsonl");
let file = File::create(documents_path)?;
let dump_batch_size = data.dump_batch_size;
let mut offset = 0;
loop {
let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?;
if documents.is_empty() { break; } else { offset += dump_batch_size; }
for document in documents {
serde_json::to_writer(&file, &document)?;
writeln!(&file)?;
}
}
Ok(())
}
/// Write error with a context.
fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) {
let error_message = format!("{}; {}", context, error);
error!("Something went wrong during dump process: {}", &error_message);
dump_info.with_error(Error::dump_failed(error_message).into()).set_current();
}
/// Main function of dump.
fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo) {
// open read transaction on Update
let update_reader = match data.db.update_read_txn() {
Ok(r) => r,
Err(e) => {
fail_dump_process(dump_info, "creating RO transaction on updates", e);
return ;
}
};
// open read transaction on Main
let main_reader = match data.db.main_read_txn() {
Ok(r) => r,
Err(e) => {
fail_dump_process(dump_info, "creating RO transaction on main", e);
return ;
}
};
// create a temporary directory
let tmp_dir = match TempDir::new() {
Ok(tmp_dir) => tmp_dir,
Err(e) => {
fail_dump_process(dump_info, "creating temporary directory", e);
return ;
}
};
let tmp_dir_path = tmp_dir.path();
// fetch indexes
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
Ok(indexes) => indexes,
Err(e) => {
fail_dump_process(dump_info, "listing indexes", e);
return ;
}
};
// create metadata
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
fail_dump_process(dump_info, "generating metadata", e);
return ;
}
// export settings, updates and documents for each indexes
for index in indexes {
let index_path = tmp_dir_path.join(&index.uid);
// create index sub-dircetory
if let Err(e) = create_dir_all(&index_path) {
fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e);
return ;
}
// export settings
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e);
return ;
}
// export documents
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e);
return ;
}
// export updates
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e);
return ;
}
}
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
fail_dump_process(dump_info, "compressing dump", e);
return ;
}
// update dump info to `done`
let resume = DumpInfo::new(
dump_info.uid,
DumpStatus::Done
);
resume.set_current();
}
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
// check if a dump is already in progress
if let Some(resume) = DumpInfo::get_current() {
if resume.dump_already_in_progress() {
return Err(Error::dump_conflict())
}
}
// generate a new dump info
let info = DumpInfo::new(
generate_uid(),
DumpStatus::InProgress
);
info.set_current();
let data = data.clone();
let dumps_dir = dumps_dir.to_path_buf();
let info_cloned = info.clone();
// run dump process in a new thread
thread::spawn(move ||
dump_process(data, dumps_dir, info_cloned)
);
Ok(info)
}

View file

@ -0,0 +1,298 @@
use std::error;
use std::fmt;
use actix_web::dev::HttpResponseBuilder;
use actix_web::http::Error as HttpError;
use actix_web as aweb;
use actix_web::error::{JsonPayloadError, QueryPayloadError};
use actix_web::http::StatusCode;
use serde::ser::{Serialize, Serializer, SerializeStruct};
use meilisearch_error::{ErrorCode, Code};
#[derive(Debug)]
pub struct ResponseError {
inner: Box<dyn ErrorCode>,
}
impl error::Error for ResponseError {}
impl ErrorCode for ResponseError {
fn error_code(&self) -> Code {
self.inner.error_code()
}
}
impl fmt::Display for ResponseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}
// TODO: remove this when implementing actual error handling
impl From<anyhow::Error> for ResponseError {
fn from(other: anyhow::Error) -> ResponseError {
ResponseError { inner: Box::new(Error::NotFound(other.to_string())) }
}
}
impl From<Error> for ResponseError {
fn from(error: Error) -> ResponseError {
ResponseError { inner: Box::new(error) }
}
}
impl From<FacetCountError> for ResponseError {
fn from(err: FacetCountError) -> ResponseError {
ResponseError { inner: Box::new(err) }
}
}
impl Serialize for ResponseError {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let struct_name = "ResponseError";
let field_count = 4;
let mut state = serializer.serialize_struct(struct_name, field_count)?;
state.serialize_field("message", &self.to_string())?;
state.serialize_field("errorCode", &self.error_name())?;
state.serialize_field("errorType", &self.error_type())?;
state.serialize_field("errorLink", &self.error_url())?;
state.end()
}
}
impl aweb::error::ResponseError for ResponseError {
fn error_response(&self) -> aweb::HttpResponse {
HttpResponseBuilder::new(self.status_code()).json(&self)
}
fn status_code(&self) -> StatusCode {
self.http_status()
}
}
#[derive(Debug)]
pub enum Error {
BadParameter(String, String),
BadRequest(String),
CreateIndex(String),
DocumentNotFound(String),
IndexNotFound(String),
IndexAlreadyExists(String),
Internal(String),
InvalidIndexUid,
InvalidToken(String),
MissingAuthorizationHeader,
NotFound(String),
OpenIndex(String),
RetrieveDocument(u32, String),
SearchDocuments(String),
PayloadTooLarge,
UnsupportedMediaType,
DumpAlreadyInProgress,
DumpProcessFailed(String),
}
impl error::Error for Error {}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
use Error::*;
match self {
BadParameter(_, _) => Code::BadParameter,
BadRequest(_) => Code::BadRequest,
CreateIndex(_) => Code::CreateIndex,
DocumentNotFound(_) => Code::DocumentNotFound,
IndexNotFound(_) => Code::IndexNotFound,
IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Internal(_) => Code::Internal,
InvalidIndexUid => Code::InvalidIndexUid,
InvalidToken(_) => Code::InvalidToken,
MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
NotFound(_) => Code::NotFound,
OpenIndex(_) => Code::OpenIndex,
RetrieveDocument(_, _) => Code::RetrieveDocument,
SearchDocuments(_) => Code::SearchDocuments,
PayloadTooLarge => Code::PayloadTooLarge,
UnsupportedMediaType => Code::UnsupportedMediaType,
_ => unreachable!()
//DumpAlreadyInProgress => Code::DumpAlreadyInProgress,
//DumpProcessFailed(_) => Code::DumpProcessFailed,
}
}
}
#[derive(Debug)]
pub enum FacetCountError {
AttributeNotSet(String),
SyntaxError(String),
UnexpectedToken { found: String, expected: &'static [&'static str] },
NoFacetSet,
}
impl error::Error for FacetCountError {}
impl ErrorCode for FacetCountError {
fn error_code(&self) -> Code {
Code::BadRequest
}
}
impl FacetCountError {
pub fn unexpected_token(found: impl ToString, expected: &'static [&'static str]) -> FacetCountError {
let found = found.to_string();
FacetCountError::UnexpectedToken { expected, found }
}
}
impl From<serde_json::error::Error> for FacetCountError {
fn from(other: serde_json::error::Error) -> FacetCountError {
FacetCountError::SyntaxError(other.to_string())
}
}
impl fmt::Display for FacetCountError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use FacetCountError::*;
match self {
AttributeNotSet(attr) => write!(f, "Attribute {} is not set as facet", attr),
SyntaxError(msg) => write!(f, "Syntax error: {}", msg),
UnexpectedToken { expected, found } => write!(f, "Unexpected {} found, expected {:?}", found, expected),
NoFacetSet => write!(f, "Can't perform facet count, as no facet is set"),
}
}
}
impl Error {
pub fn internal(err: impl fmt::Display) -> Error {
Error::Internal(err.to_string())
}
pub fn bad_request(err: impl fmt::Display) -> Error {
Error::BadRequest(err.to_string())
}
pub fn missing_authorization_header() -> Error {
Error::MissingAuthorizationHeader
}
pub fn invalid_token(err: impl fmt::Display) -> Error {
Error::InvalidToken(err.to_string())
}
pub fn not_found(err: impl fmt::Display) -> Error {
Error::NotFound(err.to_string())
}
pub fn index_not_found(err: impl fmt::Display) -> Error {
Error::IndexNotFound(err.to_string())
}
pub fn document_not_found(err: impl fmt::Display) -> Error {
Error::DocumentNotFound(err.to_string())
}
pub fn bad_parameter(param: impl fmt::Display, err: impl fmt::Display) -> Error {
Error::BadParameter(param.to_string(), err.to_string())
}
pub fn open_index(err: impl fmt::Display) -> Error {
Error::OpenIndex(err.to_string())
}
pub fn create_index(err: impl fmt::Display) -> Error {
Error::CreateIndex(err.to_string())
}
pub fn invalid_index_uid() -> Error {
Error::InvalidIndexUid
}
pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> Error {
Error::RetrieveDocument(doc_id, err.to_string())
}
pub fn search_documents(err: impl fmt::Display) -> Error {
Error::SearchDocuments(err.to_string())
}
pub fn dump_conflict() -> Error {
Error::DumpAlreadyInProgress
}
pub fn dump_failed(message: String) -> Error {
Error::DumpProcessFailed(message)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::BadParameter(param, err) => write!(f, "Url parameter {} error: {}", param, err),
Self::BadRequest(err) => f.write_str(err),
Self::CreateIndex(err) => write!(f, "Impossible to create index; {}", err),
Self::DocumentNotFound(document_id) => write!(f, "Document with id {} not found", document_id),
Self::IndexNotFound(index_uid) => write!(f, "Index {} not found", index_uid),
Self::IndexAlreadyExists(index_uid) => write!(f, "Index {} already exists", index_uid),
Self::Internal(err) => f.write_str(err),
Self::InvalidIndexUid => f.write_str("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_)."),
Self::InvalidToken(err) => write!(f, "Invalid API key: {}", err),
Self::MissingAuthorizationHeader => f.write_str("You must have an authorization token"),
Self::NotFound(err) => write!(f, "{} not found", err),
Self::OpenIndex(err) => write!(f, "Impossible to open index; {}", err),
Self::RetrieveDocument(id, err) => write!(f, "Impossible to retrieve the document with id: {}; {}", id, err),
Self::SearchDocuments(err) => write!(f, "Impossible to search documents; {}", err),
Self::PayloadTooLarge => f.write_str("Payload too large"),
Self::UnsupportedMediaType => f.write_str("Unsupported media type"),
Self::DumpAlreadyInProgress => f.write_str("Another dump is already in progress"),
Self::DumpProcessFailed(message) => write!(f, "Dump process failed: {}", message),
}
}
}
impl From<std::io::Error> for Error {
fn from(err: std::io::Error) -> Error {
Error::Internal(err.to_string())
}
}
impl From<HttpError> for Error {
fn from(err: HttpError) -> Error {
Error::Internal(err.to_string())
}
}
impl From<serde_json::error::Error> for Error {
fn from(err: serde_json::error::Error) -> Error {
Error::Internal(err.to_string())
}
}
impl From<JsonPayloadError> for Error {
fn from(err: JsonPayloadError) -> Error {
match err {
JsonPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid JSON: {}", err)),
JsonPayloadError::Overflow => Error::PayloadTooLarge,
JsonPayloadError::ContentType => Error::UnsupportedMediaType,
JsonPayloadError::Payload(err) => Error::BadRequest(format!("Problem while decoding the request: {}", err)),
}
}
}
impl From<QueryPayloadError> for Error {
fn from(err: QueryPayloadError) -> Error {
match err {
QueryPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid query parameters: {}", err)),
}
}
}
pub fn payload_error_handler<E: Into<Error>>(err: E) -> ResponseError {
let error: Error = err.into();
error.into()
}

View file

@ -0,0 +1,101 @@
use std::cell::RefCell;
use std::pin::Pin;
use std::rc::Rc;
use std::task::{Context, Poll};
use actix_web::dev::{Transform, Service, ServiceResponse, ServiceRequest};
use actix_web::web;
use futures::future::{err, ok, Future, Ready};
use crate::error::{Error, ResponseError};
use crate::Data;
#[derive(Clone, Copy)]
pub enum Authentication {
Public,
Private,
Admin,
}
impl<S: 'static, B> Transform<S, ServiceRequest> for Authentication
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type InitError = ();
type Transform = LoggingMiddleware<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(LoggingMiddleware {
acl: *self,
service: Rc::new(RefCell::new(service)),
})
}
}
pub struct LoggingMiddleware<S> {
acl: Authentication,
service: Rc<RefCell<S>>,
}
#[allow(clippy::type_complexity)]
impl<S, B> Service<ServiceRequest> for LoggingMiddleware<S>
where
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>;
fn poll_ready(&self, cx: &mut Context) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&self, req: ServiceRequest) -> Self::Future {
let svc = self.service.clone();
// This unwrap is left because this error should never appear. If that's the case, then
// it means that actix-web has an issue or someone changes the type `Data`.
let data = req.app_data::<web::Data<Data>>().unwrap();
if data.api_keys().master.is_none() {
return Box::pin(svc.call(req));
}
let auth_header = match req.headers().get("X-Meili-API-Key") {
Some(auth) => match auth.to_str() {
Ok(auth) => auth,
Err(_) => return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into())),
},
None => {
return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into()));
}
};
let authenticated = match self.acl {
Authentication::Admin => data.api_keys().master.as_deref() == Some(auth_header),
Authentication::Private => {
data.api_keys().master.as_deref() == Some(auth_header)
|| data.api_keys().private.as_deref() == Some(auth_header)
}
Authentication::Public => {
data.api_keys().master.as_deref() == Some(auth_header)
|| data.api_keys().private.as_deref() == Some(auth_header)
|| data.api_keys().public.as_deref() == Some(auth_header)
}
};
if authenticated {
Box::pin(svc.call(req))
} else {
Box::pin(err(
ResponseError::from(Error::InvalidToken(auth_header.to_string())).into()
))
}
}
}

View file

@ -0,0 +1,27 @@
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use std::fs::{create_dir_all, File};
use std::path::Path;
use tar::{Builder, Archive};
use crate::error::Error;
pub fn to_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
let f = File::create(dest)?;
let gz_encoder = GzEncoder::new(f, Compression::default());
let mut tar_encoder = Builder::new(gz_encoder);
tar_encoder.append_dir_all(".", src)?;
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
Ok(())
}
pub fn from_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
let f = File::open(src)?;
let gz = GzDecoder::new(f);
let mut ar = Archive::new(gz);
create_dir_all(dest)?;
ar.unpack(dest)?;
Ok(())
}

View file

@ -0,0 +1,6 @@
pub mod authentication;
//pub mod normalize_path;
pub mod compression;
pub use authentication::Authentication;
//pub use normalize_path::NormalizePath;

View file

@ -0,0 +1,86 @@
/// From https://docs.rs/actix-web/3.0.0-alpha.2/src/actix_web/middleware/normalize.rs.html#34
use actix_web::http::Error;
use actix_service::{Service, Transform};
use actix_web::{
dev::ServiceRequest,
dev::ServiceResponse,
http::uri::{PathAndQuery, Uri},
};
use futures::future::{ok, Ready};
use regex::Regex;
use std::task::{Context, Poll};
pub struct NormalizePath;
impl<S, B> Transform<S> for NormalizePath
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = Error;
type InitError = ();
type Transform = NormalizePathNormalization<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(NormalizePathNormalization {
service,
merge_slash: Regex::new("//+").unwrap(),
})
}
}
pub struct NormalizePathNormalization<S> {
service: S,
merge_slash: Regex,
}
impl<S, B> Service for NormalizePathNormalization<S>
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = Error;
type Future = S::Future;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&mut self, mut req: ServiceRequest) -> Self::Future {
let head = req.head_mut();
// always add trailing slash, might be an extra one
let path = head.uri.path().to_string() + "/";
if self.merge_slash.find(&path).is_some() {
// normalize multiple /'s to one /
let path = self.merge_slash.replace_all(&path, "/");
let path = if path.len() > 1 {
path.trim_end_matches('/')
} else {
&path
};
let mut parts = head.uri.clone().into_parts();
let pq = parts.path_and_query.as_ref().unwrap();
let path = if let Some(q) = pq.query() {
bytes::Bytes::from(format!("{}?{}", path, q))
} else {
bytes::Bytes::copy_from_slice(path.as_bytes())
};
parts.path_and_query = Some(PathAndQuery::from_maybe_shared(path).unwrap());
let uri = Uri::from_parts(parts).unwrap();
req.match_info_mut().get_mut().update(&uri);
req.head_mut().uri = uri;
}
self.service.call(req)
}
}

View file

@ -0,0 +1,126 @@
mod search;
mod updates;
use std::sync::Arc;
use std::ops::Deref;
use anyhow::{bail, Context};
use serde_json::{Value, Map};
use milli::obkv_to_json;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Settings, Facets, UpdateResult};
pub type Document = Map<String, Value>;
#[derive(Clone)]
pub struct Index(pub Arc<milli::Index>);
impl Deref for Index {
type Target = milli::Index;
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
impl Index {
pub fn settings(&self) -> anyhow::Result<Settings> {
let txn = self.read_txn()?;
let displayed_attributes = self
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let searchable_attributes = self
.searchable_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let faceted_attributes = self
.faceted_fields(&txn)?
.into_iter()
.map(|(k, v)| (k, v.to_string()))
.collect();
Ok(Settings {
displayed_attributes: Some(Some(displayed_attributes)),
searchable_attributes: Some(Some(searchable_attributes)),
faceted_attributes: Some(Some(faceted_attributes)),
criteria: None,
})
}
pub fn retrieve_documents<S>(
&self,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Vec<Map<String, Value>>>
where
S: AsRef<str> + Send + Sync + 'static,
{
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
let mut documents = Vec::new();
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?;
documents.push(object);
}
Ok(documents)
}
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Map<String, Value>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let internal_id = self
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.with_context(|| format!("Document with id {} not found", doc_id))?;
let document = self
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d);
match document {
Some(document) => Ok(obkv_to_json(
&attributes_to_retrieve_ids,
&fields_ids_map,
document,
)?),
None => bail!("Document with id {} not found", doc_id),
}
}
}

View file

@ -0,0 +1,245 @@
use std::time::Instant;
use std::collections::{HashSet, BTreeMap};
use std::mem;
use either::Either;
use anyhow::bail;
use heed::RoTxn;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use milli::{FacetCondition, facet::FacetValue};
use serde::{Serialize, Deserialize};
use serde_json::{Value, Map};
use super::Index;
pub const DEFAULT_SEARCH_LIMIT: usize = 20;
const fn default_search_limit() -> usize {
DEFAULT_SEARCH_LIMIT
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
#[allow(dead_code)]
pub struct SearchQuery {
pub q: Option<String>,
pub offset: Option<usize>,
#[serde(default = "default_search_limit")]
pub limit: usize,
pub attributes_to_retrieve: Option<Vec<String>>,
pub attributes_to_crop: Option<Vec<String>>,
pub crop_length: Option<usize>,
pub attributes_to_highlight: Option<HashSet<String>>,
pub filters: Option<String>,
pub matches: Option<bool>,
pub facet_filters: Option<Value>,
pub facet_distributions: Option<Vec<String>>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<Map<String, Value>>,
pub nb_hits: u64,
pub query: String,
pub limit: usize,
pub offset: usize,
pub processing_time_ms: u128,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distributions: Option<BTreeMap<String, BTreeMap<FacetValue, u64>>>,
}
impl Index {
pub fn perform_search(&self, query: SearchQuery) -> anyhow::Result<SearchResult> {
let before_search = Instant::now();
let rtxn = self.read_txn()?;
let mut search = self.search(&rtxn);
if let Some(ref query) = query.q {
search.query(query);
}
search.limit(query.limit);
search.offset(query.offset.unwrap_or_default());
if let Some(ref facets) = query.facet_filters {
if let Some(facets) = parse_facets(facets, self, &rtxn)? {
search.facet_condition(facets);
}
}
let milli::SearchResult {
documents_ids,
found_words,
candidates,
..
} = search.execute()?;
let mut documents = Vec::new();
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
let displayed_fields_ids = self.displayed_fields_ids(&rtxn).unwrap();
let attributes_to_retrieve_ids = match query.attributes_to_retrieve {
Some(ref attrs) if attrs.iter().any(|f| f == "*") => None,
Some(ref attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f))
.collect::<Vec<_>>()
.into(),
None => None,
};
let displayed_fields_ids = match (displayed_fields_ids, attributes_to_retrieve_ids) {
(_, Some(ids)) => ids,
(Some(ids), None) => ids,
(None, None) => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
let mut object = milli::obkv_to_json(&displayed_fields_ids, &fields_ids_map, obkv).unwrap();
if let Some(ref attributes_to_highlight) = query.attributes_to_highlight {
highlighter.highlight_record(&mut object, &found_words, attributes_to_highlight);
}
documents.push(object);
}
let nb_hits = candidates.len();
let facet_distributions = match query.facet_distributions {
Some(ref fields) => {
let mut facet_distribution = self.facets_distribution(&rtxn);
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}
Some(facet_distribution.candidates(candidates).execute()?)
}
None => None,
};
let result = SearchResult {
hits: documents,
nb_hits,
query: query.q.clone().unwrap_or_default(),
limit: query.limit,
offset: query.offset.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distributions,
};
Ok(result)
}
}
fn parse_facets_array(
txn: &RoTxn,
index: &Index,
arr: &Vec<Value>,
) -> anyhow::Result<Option<FacetCondition>> {
let mut ands = Vec::new();
for value in arr {
match value {
Value::String(s) => ands.push(Either::Right(s.clone())),
Value::Array(arr) => {
let mut ors = Vec::new();
for value in arr {
match value {
Value::String(s) => ors.push(s.clone()),
v => bail!("Invalid facet expression, expected String, found: {:?}", v),
}
}
ands.push(Either::Left(ors));
}
v => bail!(
"Invalid facet expression, expected String or [String], found: {:?}",
v
),
}
}
FacetCondition::from_array(txn, &index.0, ands)
}
pub struct Highlighter<'a, A> {
analyzer: Analyzer<'a, A>,
}
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
pub fn new(stop_words: &'a fst::Set<A>) -> Self {
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
Self { analyzer }
}
pub fn highlight_value(&self, value: Value, words_to_highlight: &HashSet<String>) -> Value {
match value {
Value::Null => Value::Null,
Value::Bool(boolean) => Value::Bool(boolean),
Value::Number(number) => Value::Number(number),
Value::String(old_string) => {
let mut string = String::new();
let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
let to_highlight = words_to_highlight.contains(token.text());
if to_highlight {
string.push_str("<mark>")
}
string.push_str(word);
if to_highlight {
string.push_str("</mark>")
}
} else {
string.push_str(word);
}
}
Value::String(string)
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| self.highlight_value(v, words_to_highlight))
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| (k, self.highlight_value(v, words_to_highlight)))
.collect(),
),
}
}
pub fn highlight_record(
&self,
object: &mut Map<String, Value>,
words_to_highlight: &HashSet<String>,
attributes_to_highlight: &HashSet<String>,
) {
// TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) {
let old_value = mem::take(value);
*value = self.highlight_value(old_value, words_to_highlight);
}
}
}
}
fn parse_facets(
facets: &Value,
index: &Index,
txn: &RoTxn,
) -> anyhow::Result<Option<FacetCondition>> {
match facets {
// Disabled for now
//Value::String(expr) => Ok(Some(FacetCondition::from_str(txn, index, expr)?)),
Value::Array(arr) => parse_facets_array(txn, index, arr),
v => bail!(
"Invalid facet expression, expected Array, found: {:?}",
v
),
}
}

View file

@ -0,0 +1,229 @@
use std::collections::HashMap;
use std::io;
use std::num::NonZeroUsize;
use flate2::read::GzDecoder;
use log::info;
use milli::update::{UpdateFormat, IndexDocumentsMethod, UpdateBuilder, DocumentAdditionResult};
use serde::{Serialize, Deserialize, de::Deserializer};
use super::Index;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: usize },
Other,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Settings {
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)]
pub faceted_attributes: Option<Option<HashMap<String, String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub criteria: Option<Option<Vec<String>>>,
}
impl Settings {
pub fn cleared() -> Self {
Self {
displayed_attributes: Some(None),
searchable_attributes: Some(None),
faceted_attributes: Some(None),
criteria: Some(None),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where T: Deserialize<'de>,
D: Deserializer<'de>
{
Deserialize::deserialize(deserializer).map(Some)
}
impl Index {
pub fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: impl io::Read,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
info!("performing document addition");
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
// Set the primary key if not set already, ignore if already set.
match (self.primary_key(&wtxn)?, primary_key) {
(None, Some(ref primary_key)) => {
self.put_primary_key(&mut wtxn, primary_key)?;
}
_ => (),
}
let mut builder = update_builder.index_documents(&mut wtxn, self);
builder.update_format(format);
builder.index_documents_method(method);
let gzipped = false;
let reader = if gzipped {
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let result = builder.execute(reader, |indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
});
info!("document addition done: {:?}", result);
match result {
Ok(addition_result) => wtxn
.commit()
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, self);
match builder.execute() {
Ok(_count) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
pub fn update_settings(
&self,
settings: &Settings,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let mut builder = update_builder.settings(&mut wtxn, self);
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.searchable_attributes {
match names {
Some(names) => builder.set_searchable_fields(names.clone()),
None => builder.reset_searchable_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.displayed_attributes {
match names {
Some(names) => builder.set_displayed_fields(names.clone()),
None => builder.reset_displayed_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref facet_types) = settings.faceted_attributes {
let facet_types = facet_types.clone().unwrap_or_else(|| HashMap::new());
builder.set_faceted_fields(facet_types);
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref criteria) = settings.criteria {
match criteria {
Some(criteria) => builder.set_criteria(criteria.clone()),
None => builder.reset_criteria(),
}
}
let result = builder
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
match result {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
pub fn update_facets(
&self,
levels: &Facets,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let mut builder = update_builder.facets(&mut wtxn, self);
if let Some(value) = levels.level_group_size {
builder.level_group_size(value);
}
if let Some(value) = levels.min_level_size {
builder.min_level_size(value);
}
match builder.execute() {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
pub fn delete_documents(
&self,
document_ids: impl io::Read,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_reader(document_ids)?;
let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| { builder.delete_external_id(id); });
match builder.execute() {
Ok(deleted) => txn
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
Err(e) => Err(e.into())
}
}
}

View file

@ -0,0 +1,127 @@
mod index_actor;
mod update_actor;
mod uuid_resolver;
mod update_store;
mod update_handler;
use std::path::Path;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use super::IndexMetadata;
use futures::stream::StreamExt;
use actix_web::web::Payload;
use super::UpdateMeta;
use crate::index::{SearchResult, SearchQuery};
use actix_web::web::Bytes;
use crate::index::Settings;
use super::UpdateStatus;
pub struct IndexController {
uuid_resolver: uuid_resolver::UuidResolverHandle,
index_handle: index_actor::IndexActorHandle,
update_handle: update_actor::UpdateActorHandle<Bytes>,
}
enum IndexControllerMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<anyhow::Result<IndexMetadata>>,
},
Shutdown,
}
impl IndexController {
pub fn new(path: impl AsRef<Path>) -> Self {
let uuid_resolver = uuid_resolver::UuidResolverHandle::new();
let index_actor = index_actor::IndexActorHandle::new(&path);
let update_handle = update_actor::UpdateActorHandle::new(index_actor.clone(), &path);
Self { uuid_resolver, index_handle: index_actor, update_handle }
}
pub async fn add_documents(
&self,
index: String,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
mut payload: Payload,
primary_key: Option<String>,
) -> anyhow::Result<super::UpdateStatus> {
let uuid = self.uuid_resolver.get_or_create(index).await?;
let meta = UpdateMeta::DocumentsAddition { method, format, primary_key };
let (sender, receiver) = mpsc::channel(10);
// It is necessary to spawn a local task to senf the payload to the update handle to
// prevent dead_locking between the update_handle::update that waits for the update to be
// registered and the update_actor that waits for the the payload to be sent to it.
tokio::task::spawn_local(async move {
while let Some(bytes) = payload.next().await {
match bytes {
Ok(bytes) => { sender.send(Ok(bytes)).await; },
Err(e) => {
let error: Box<dyn std::error::Error + Sync + Send + 'static> = Box::new(e);
sender.send(Err(error)).await; },
}
}
});
// This must be done *AFTER* spawning the task.
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
fn clear_documents(&self, index: String) -> anyhow::Result<UpdateStatus> {
todo!()
}
fn delete_documents(&self, index: String, document_ids: Vec<String>) -> anyhow::Result<super::UpdateStatus> {
todo!()
}
fn update_settings(&self, index_uid: String, settings: Settings) -> anyhow::Result<super::UpdateStatus> {
todo!()
}
pub async fn create_index(&self, index_settings: super::IndexSettings) -> anyhow::Result<super::IndexMetadata> {
let super::IndexSettings { name, primary_key } = index_settings;
let uuid = self.uuid_resolver.create(name.unwrap()).await?;
let index_meta = self.index_handle.create_index(uuid, primary_key).await?;
Ok(index_meta)
}
fn delete_index(&self, index_uid: String) -> anyhow::Result<()> {
todo!()
}
fn swap_indices(&self, index1_uid: String, index2_uid: String) -> anyhow::Result<()> {
todo!()
}
pub fn index(&self, name: String) -> anyhow::Result<Option<std::sync::Arc<milli::Index>>> {
todo!()
}
fn update_status(&self, index: String, id: u64) -> anyhow::Result<Option<UpdateStatus>> {
todo!()
}
fn all_update_status(&self, index: String) -> anyhow::Result<Vec<super::UpdateStatus>> {
todo!()
}
pub fn list_indexes(&self) -> anyhow::Result<Vec<super::IndexMetadata>> {
todo!()
}
fn update_index(&self, name: String, index_settings: super::IndexSettings) -> anyhow::Result<super::IndexMetadata> {
todo!()
}
pub async fn search(&self, name: String, query: SearchQuery) -> anyhow::Result<SearchResult> {
let uuid = self.uuid_resolver.resolve(name).await.unwrap().unwrap();
let result = self.index_handle.search(uuid, query).await?;
Ok(result)
}
}

View file

@ -0,0 +1,590 @@
use std::collections::{hash_map::Entry, HashMap};
use std::fs::{create_dir_all, remove_dir_all, File};
use std::future::Future;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_stream::stream;
use chrono::{DateTime, Utc};
use futures::pin_mut;
use futures::stream::StreamExt;
use heed::EnvOpenOptions;
use log::info;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::sync::{mpsc, oneshot, RwLock};
use uuid::Uuid;
use super::get_arc_ownership_blocking;
use super::update_handler::UpdateHandler;
use crate::index::UpdateResult as UResult;
use crate::index::{Document, Index, SearchQuery, SearchResult, Settings};
use crate::index_controller::{
updates::{Failed, Processed, Processing},
UpdateMeta,
};
use crate::option::IndexerOpts;
pub type Result<T> = std::result::Result<T, IndexError>;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
type UpdateResult = std::result::Result<Processed<UpdateMeta, UResult>, Failed<UpdateMeta, String>>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
uuid: Uuid,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<Result<IndexMeta>>,
},
Update {
meta: Processing<UpdateMeta>,
data: std::fs::File,
ret: oneshot::Sender<Result<UpdateResult>>,
},
Search {
uuid: Uuid,
query: SearchQuery,
ret: oneshot::Sender<anyhow::Result<SearchResult>>,
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<Result<Settings>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<Result<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<Result<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<Result<Option<IndexMeta>>>,
},
}
struct IndexActor<S> {
read_receiver: Option<mpsc::Receiver<IndexMsg>>,
write_receiver: Option<mpsc::Receiver<IndexMsg>>,
update_handler: Arc<UpdateHandler>,
store: S,
}
#[derive(Error, Debug)]
pub enum IndexError {
#[error("error with index: {0}")]
Error(#[from] anyhow::Error),
#[error("index already exists")]
IndexAlreadyExists,
#[error("Index doesn't exists")]
UnexistingIndex,
}
#[async_trait::async_trait]
trait IndexStore {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>;
async fn update_index<R, F>(&self, uuid: Uuid, f: F) -> Result<R>
where
F: FnOnce(Index) -> Result<R> + Send + Sync + 'static,
R: Sync + Send + 'static;
async fn get_or_create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn delete(&self, uuid: &Uuid) -> Result<Option<Index>>;
async fn get_meta(&self, uuid: &Uuid) -> Result<Option<IndexMeta>>;
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
fn new(
read_receiver: mpsc::Receiver<IndexMsg>,
write_receiver: mpsc::Receiver<IndexMsg>,
store: S,
) -> Result<Self> {
let options = IndexerOpts::default();
let update_handler = UpdateHandler::new(&options)
.map_err(|e| IndexError::Error(e.into()))?;
let update_handler = Arc::new(update_handler);
let read_receiver = Some(read_receiver);
let write_receiver = Some(write_receiver);
Ok(Self {
read_receiver,
write_receiver,
store,
update_handler,
})
}
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
async fn run(mut self) {
let mut read_receiver = self
.read_receiver
.take()
.expect("Index Actor must have a inbox at this point.");
let read_stream = stream! {
loop {
match read_receiver.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
let mut write_receiver = self
.write_receiver
.take()
.expect("Index Actor must have a inbox at this point.");
let write_stream = stream! {
loop {
match write_receiver.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
pin_mut!(write_stream);
pin_mut!(read_stream);
let fut1 = read_stream.for_each_concurrent(Some(10), |msg| self.handle_message(msg));
let fut2 = write_stream.for_each_concurrent(Some(1), |msg| self.handle_message(msg));
let fut1: Box<dyn Future<Output = ()> + Unpin + Send> = Box::new(fut1);
let fut2: Box<dyn Future<Output = ()> + Unpin + Send> = Box::new(fut2);
tokio::join!(fut1, fut2);
}
async fn handle_message(&self, msg: IndexMsg) {
use IndexMsg::*;
match msg {
CreateIndex {
uuid,
primary_key,
ret,
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
Update { ret, meta, data } => {
let _ = ret.send(self.handle_update(meta, data).await);
}
Search { ret, query, uuid } => {
let _ = ret.send(self.handle_search(uuid, query).await);
}
Settings { ret, uuid } => {
let _ = ret.send(self.handle_settings(uuid).await);
}
Documents {
ret,
uuid,
attributes_to_retrieve,
offset,
limit,
} => {
let _ = ret.send(
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
.await,
);
}
Document {
uuid,
attributes_to_retrieve,
doc_id,
ret,
} => {
let _ = ret.send(
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
.await,
);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
GetMeta { uuid, ret } => {
let _ = ret.send(self.handle_get_meta(uuid).await);
}
}
}
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> anyhow::Result<SearchResult> {
let index = self.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
tokio::task::spawn_blocking(move || index.perform_search(query)).await?
}
async fn handle_create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
self.store.create_index(uuid, primary_key).await
}
async fn handle_update(
&self,
meta: Processing<UpdateMeta>,
data: File,
) -> Result<UpdateResult> {
info!("Processing update {}", meta.id());
let uuid = meta.index_uuid().clone();
let update_handler = self.update_handler.clone();
let handle = self
.store
.update_index(uuid, |index| {
let handle = tokio::task::spawn_blocking(move || {
update_handler.handle_update(meta, data, index)
});
Ok(handle)
})
.await?;
handle.await.map_err(|e| IndexError::Error(e.into()))
}
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings> {
let index = self.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
tokio::task::spawn_blocking(move || index.settings().map_err(|e| IndexError::Error(e)))
.await
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_fetch_documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let index = self.store.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
tokio::task::spawn_blocking(move || {
index
.retrieve_documents(offset, limit, attributes_to_retrieve)
.map_err(|e| IndexError::Error(e))
})
.await
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_fetch_document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
tokio::task::spawn_blocking(move || {
index
.retrieve_document(doc_id, attributes_to_retrieve)
.map_err(|e| IndexError::Error(e))
})
.await
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let index = self.store.delete(&uuid).await?;
if let Some(index) = index {
tokio::task::spawn(async move {
let index = index.0;
let store = get_arc_ownership_blocking(index).await;
tokio::task::spawn_blocking(move || {
store.prepare_for_closing().wait();
info!("Index {} was closed.", uuid);
});
});
}
Ok(())
}
async fn handle_get_meta(&self, uuid: Uuid) -> Result<Option<IndexMeta>> {
let result = self.store.get_meta(&uuid).await?;
Ok(result)
}
}
#[derive(Clone)]
pub struct IndexActorHandle {
read_sender: mpsc::Sender<IndexMsg>,
write_sender: mpsc::Sender<IndexMsg>,
}
impl IndexActorHandle {
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
let (read_sender, read_receiver) = mpsc::channel(100);
let (write_sender, write_receiver) = mpsc::channel(100);
let store = MapIndexStore::new(path);
let actor = IndexActor::new(read_receiver, write_receiver, store)?;
tokio::task::spawn(actor.run());
Ok(Self {
read_sender,
write_sender,
})
}
pub async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex {
ret,
uuid,
primary_key,
};
let _ = self.read_sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
pub async fn update(
&self,
meta: Processing<UpdateMeta>,
data: std::fs::File,
) -> anyhow::Result<UpdateResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update { ret, meta, data };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn settings(&self, uuid: Uuid) -> Result<Settings> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents {
uuid,
ret,
offset,
attributes_to_retrieve,
limit,
};
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document {
uuid,
ret,
doc_id,
attributes_to_retrieve,
};
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn get_index_meta(&self, uuid: Uuid) -> Result<Option<IndexMeta>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
}
struct MapIndexStore {
root: PathBuf,
meta_store: AsyncMap<Uuid, IndexMeta>,
index_store: AsyncMap<Uuid, Index>,
}
#[async_trait::async_trait]
impl IndexStore for MapIndexStore {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
let meta = match self.meta_store.write().await.entry(uuid.clone()) {
Entry::Vacant(entry) => {
let now = Utc::now();
let meta = IndexMeta {
uuid,
created_at: now.clone(),
updated_at: now,
primary_key,
};
entry.insert(meta).clone()
}
Entry::Occupied(_) => return Err(IndexError::IndexAlreadyExists),
};
let db_path = self.root.join(format!("index-{}", meta.uuid));
let index: Result<Index> = tokio::task::spawn_blocking(move || {
create_dir_all(&db_path).expect("can't create db");
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100_000);
let index = milli::Index::new(options, &db_path).map_err(|e| IndexError::Error(e))?;
let index = Index(Arc::new(index));
Ok(index)
})
.await
.expect("thread died");
self.index_store
.write()
.await
.insert(meta.uuid.clone(), index?);
Ok(meta)
}
async fn get_or_create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
match self.index_store.write().await.entry(uuid.clone()) {
Entry::Vacant(index_entry) => match self.meta_store.write().await.entry(uuid.clone()) {
Entry::Vacant(meta_entry) => {
let now = Utc::now();
let meta = IndexMeta {
uuid,
created_at: now.clone(),
updated_at: now,
primary_key,
};
let meta = meta_entry.insert(meta);
let db_path = self.root.join(format!("index-{}", meta.uuid));
let index: Result<Index> = tokio::task::spawn_blocking(move || {
create_dir_all(&db_path).expect("can't create db");
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100_000);
let index = milli::Index::new(options, &db_path)
.map_err(|e| IndexError::Error(e))?;
let index = Index(Arc::new(index));
Ok(index)
})
.await
.expect("thread died");
Ok(index_entry.insert(index?).clone())
}
Entry::Occupied(entry) => {
let meta = entry.get();
let db_path = self.root.join(format!("index-{}", meta.uuid));
let index: Result<Index> = tokio::task::spawn_blocking(move || {
create_dir_all(&db_path).expect("can't create db");
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100_000);
let index = milli::Index::new(options, &db_path)
.map_err(|e| IndexError::Error(e))?;
let index = Index(Arc::new(index));
Ok(index)
})
.await
.expect("thread died");
Ok(index_entry.insert(index?).clone())
}
},
Entry::Occupied(entry) => Ok(entry.get().clone()),
}
}
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
Ok(self.index_store.read().await.get(&uuid).cloned())
}
async fn delete(&self, uuid: &Uuid) -> Result<Option<Index>> {
let index = self.index_store.write().await.remove(&uuid);
if index.is_some() {
let db_path = self.root.join(format!("index-{}", uuid));
remove_dir_all(db_path).unwrap();
}
Ok(index)
}
async fn get_meta(&self, uuid: &Uuid) -> Result<Option<IndexMeta>> {
Ok(self.meta_store.read().await.get(uuid).cloned())
}
async fn update_index<R, F>(&self, uuid: Uuid, f: F) -> Result<R>
where
F: FnOnce(Index) -> Result<R> + Send + Sync + 'static,
R: Sync + Send + 'static,
{
let index = self.get_or_create(uuid.clone(), None).await?;
let mut meta = self
.get_meta(&uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
match f(index) {
Ok(r) => {
meta.updated_at = Utc::now();
self.meta_store.write().await.insert(uuid, meta);
Ok(r)
}
Err(e) => Err(e),
}
}
}
impl MapIndexStore {
fn new(root: impl AsRef<Path>) -> Self {
let mut root = root.as_ref().to_owned();
root.push("indexes/");
let meta_store = Arc::new(RwLock::new(HashMap::new()));
let index_store = Arc::new(RwLock::new(HashMap::new()));
Self {
meta_store,
index_store,
root,
}
}
}

View file

@ -0,0 +1,607 @@
use std::fs::{create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::{bail, Context};
use chrono::{DateTime, Utc};
use dashmap::{mapref::entry::Entry, DashMap};
use heed::{
types::{ByteSlice, SerdeJson, Str},
Database, Env, EnvOpenOptions, RoTxn, RwTxn,
};
use log::{error, info};
use milli::Index;
use rayon::ThreadPool;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::update_handler::UpdateHandler;
use super::{UpdateMeta, UpdateResult};
use crate::option::IndexerOpts;
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct IndexMeta {
update_store_size: u64,
index_store_size: u64,
pub uuid: Uuid,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl IndexMeta {
fn open(
&self,
path: impl AsRef<Path>,
thread_pool: Arc<ThreadPool>,
indexer_options: &IndexerOpts,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> {
let update_path = make_update_db_path(&path, &self.uuid);
let index_path = make_index_db_path(&path, &self.uuid);
create_dir_all(&update_path)?;
create_dir_all(&index_path)?;
let mut options = EnvOpenOptions::new();
options.map_size(self.index_store_size as usize);
let index = Arc::new(Index::new(options, index_path)?);
let mut options = EnvOpenOptions::new();
options.map_size(self.update_store_size as usize);
let handler = UpdateHandler::new(indexer_options, index.clone(), thread_pool)?;
let update_store = UpdateStore::open(options, update_path, handler)?;
Ok((index, update_store))
}
}
pub struct IndexStore {
env: Env,
name_to_uuid: Database<Str, ByteSlice>,
uuid_to_index: DashMap<Uuid, (Arc<Index>, Arc<UpdateStore>)>,
uuid_to_index_meta: Database<ByteSlice, SerdeJson<IndexMeta>>,
thread_pool: Arc<ThreadPool>,
indexer_options: IndexerOpts,
}
impl IndexStore {
pub fn new(path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<Self> {
let env = EnvOpenOptions::new()
.map_size(4096 * 100)
.max_dbs(2)
.open(path)?;
let uuid_to_index = DashMap::new();
let name_to_uuid = open_or_create_database(&env, Some("name_to_uid"))?;
let uuid_to_index_meta = open_or_create_database(&env, Some("uid_to_index_db"))?;
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(indexer_options.indexing_jobs.unwrap_or(0))
.build()?;
let thread_pool = Arc::new(thread_pool);
Ok(Self {
env,
name_to_uuid,
uuid_to_index,
uuid_to_index_meta,
thread_pool,
indexer_options,
})
}
pub fn delete(&self, index_uid: impl AsRef<str>) -> anyhow::Result<()> {
// we remove the references to the index from the index map so it is not accessible anymore
let mut txn = self.env.write_txn()?;
let uuid = self
.index_uuid(&txn, &index_uid)?
.with_context(|| format!("Index {:?} doesn't exist", index_uid.as_ref()))?;
self.name_to_uuid.delete(&mut txn, index_uid.as_ref())?;
self.uuid_to_index_meta.delete(&mut txn, uuid.as_bytes())?;
txn.commit()?;
// If the index was loaded (i.e it is present in the uuid_to_index map), then we need to
// close it. The process goes as follow:
//
// 1) We want to remove any pending updates from the store.
// 2) We try to get ownership on the update store so we can close it. It may take a
// couple of tries, but since the update store event loop only has a weak reference to
// itself, and we are the only other function holding a reference to it otherwise, we will
// get it eventually.
// 3) We request a closing of the update store.
// 4) We can take ownership on the index, and close it.
// 5) We remove all the files from the file system.
let index_uid = index_uid.as_ref().to_string();
let path = self.env.path().to_owned();
if let Some((_, (index, updates))) = self.uuid_to_index.remove(&uuid) {
std::thread::spawn(move || {
info!("Preparing for {:?} deletion.", index_uid);
// this error is non fatal, but may delay the deletion.
if let Err(e) = updates.abort_pendings() {
error!(
"error aborting pending updates when deleting index {:?}: {}",
index_uid, e
);
}
let updates = get_arc_ownership_blocking(updates);
let close_event = updates.prepare_for_closing();
close_event.wait();
info!("closed update store for {:?}", index_uid);
let index = get_arc_ownership_blocking(index);
let close_event = index.prepare_for_closing();
close_event.wait();
let update_path = make_update_db_path(&path, &uuid);
let index_path = make_index_db_path(&path, &uuid);
if let Err(e) = remove_dir_all(index_path) {
error!("error removing index {:?}: {}", index_uid, e);
}
if let Err(e) = remove_dir_all(update_path) {
error!("error removing index {:?}: {}", index_uid, e);
}
info!("index {:?} deleted.", index_uid);
});
}
Ok(())
}
fn index_uuid(&self, txn: &RoTxn, name: impl AsRef<str>) -> anyhow::Result<Option<Uuid>> {
match self.name_to_uuid.get(txn, name.as_ref())? {
Some(bytes) => {
let uuid = Uuid::from_slice(bytes)?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
fn retrieve_index(
&self,
txn: &RoTxn,
uid: Uuid,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
match self.uuid_to_index.entry(uid.clone()) {
Entry::Vacant(entry) => match self.uuid_to_index_meta.get(txn, uid.as_bytes())? {
Some(meta) => {
let path = self.env.path();
let (index, updates) =
meta.open(path, self.thread_pool.clone(), &self.indexer_options)?;
entry.insert((index.clone(), updates.clone()));
Ok(Some((index, updates)))
}
None => Ok(None),
},
Entry::Occupied(entry) => {
let (index, updates) = entry.get();
Ok(Some((index.clone(), updates.clone())))
}
}
}
fn get_index_txn(
&self,
txn: &RoTxn,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
match self.index_uuid(&txn, name)? {
Some(uid) => self.retrieve_index(&txn, uid),
None => Ok(None),
}
}
pub fn index(
&self,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
let txn = self.env.read_txn()?;
self.get_index_txn(&txn, name)
}
/// Use this function to perform an update on an index.
/// This function also puts a lock on what index is allowed to perform an update.
pub fn update_index<F, T>(&self, name: impl AsRef<str>, f: F) -> anyhow::Result<(T, IndexMeta)>
where
F: FnOnce(&Index) -> anyhow::Result<T>,
{
let mut txn = self.env.write_txn()?;
let (index, _) = self
.get_index_txn(&txn, &name)?
.with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?;
let result = f(index.as_ref());
match result {
Ok(ret) => {
let meta = self.update_meta(&mut txn, name, |meta| meta.updated_at = Utc::now())?;
txn.commit()?;
Ok((ret, meta))
}
Err(e) => Err(e),
}
}
pub fn index_with_meta(
&self,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, IndexMeta)>> {
let txn = self.env.read_txn()?;
let uuid = self.index_uuid(&txn, &name)?;
match uuid {
Some(uuid) => {
let meta = self
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())?
.with_context(|| {
format!("unable to retrieve metadata for index {:?}", name.as_ref())
})?;
let (index, _) = self
.retrieve_index(&txn, uuid)?
.with_context(|| format!("unable to retrieve index {:?}", name.as_ref()))?;
Ok(Some((index, meta)))
}
None => Ok(None),
}
}
fn update_meta<F>(
&self,
txn: &mut RwTxn,
name: impl AsRef<str>,
f: F,
) -> anyhow::Result<IndexMeta>
where
F: FnOnce(&mut IndexMeta),
{
let uuid = self
.index_uuid(txn, &name)?
.with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?;
let mut meta = self
.uuid_to_index_meta
.get(txn, uuid.as_bytes())?
.with_context(|| format!("couldn't retrieve metadata for index {:?}", name.as_ref()))?;
f(&mut meta);
self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?;
Ok(meta)
}
pub fn get_or_create_index(
&self,
name: impl AsRef<str>,
update_size: u64,
index_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> {
let mut txn = self.env.write_txn()?;
match self.get_index_txn(&txn, name.as_ref())? {
Some(res) => Ok(res),
None => {
let uuid = Uuid::new_v4();
let (index, updates, _) =
self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?;
// If we fail to commit the transaction, we must delete the database from the
// file-system.
if let Err(e) = txn.commit() {
self.clean_db(uuid);
return Err(e)?;
}
Ok((index, updates))
}
}
}
// Remove all the files and data associated with a db uuid.
fn clean_db(&self, uuid: Uuid) {
let update_db_path = make_update_db_path(self.env.path(), &uuid);
let index_db_path = make_index_db_path(self.env.path(), &uuid);
remove_dir_all(update_db_path).expect("Failed to clean database");
remove_dir_all(index_db_path).expect("Failed to clean database");
self.uuid_to_index.remove(&uuid);
}
fn create_index_txn(
&self,
txn: &mut RwTxn,
uuid: Uuid,
name: impl AsRef<str>,
update_store_size: u64,
index_store_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> {
let created_at = Utc::now();
let updated_at = created_at;
let meta = IndexMeta {
update_store_size,
index_store_size,
uuid: uuid.clone(),
created_at,
updated_at,
};
self.name_to_uuid.put(txn, name.as_ref(), uuid.as_bytes())?;
self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?;
let path = self.env.path();
let (index, update_store) =
match meta.open(path, self.thread_pool.clone(), &self.indexer_options) {
Ok(res) => res,
Err(e) => {
self.clean_db(uuid);
return Err(e);
}
};
self.uuid_to_index
.insert(uuid, (index.clone(), update_store.clone()));
Ok((index, update_store, meta))
}
/// Same as `get_or_create`, but returns an error if the index already exists.
pub fn create_index(
&self,
name: impl AsRef<str>,
update_size: u64,
index_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> {
let uuid = Uuid::new_v4();
let mut txn = self.env.write_txn()?;
if self.name_to_uuid.get(&txn, name.as_ref())?.is_some() {
bail!("index {:?} already exists", name.as_ref())
}
let result = self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?;
// If we fail to commit the transaction, we must delete the database from the
// file-system.
if let Err(e) = txn.commit() {
self.clean_db(uuid);
return Err(e)?;
}
Ok(result)
}
/// Returns each index associated with its metadata:
/// (index_name, IndexMeta, primary_key)
/// This method will force all the indexes to be loaded.
pub fn list_indexes(&self) -> anyhow::Result<Vec<(String, IndexMeta, Option<String>)>> {
let txn = self.env.read_txn()?;
let metas = self.name_to_uuid.iter(&txn)?.filter_map(|entry| {
entry
.map_err(|e| {
error!("error decoding entry while listing indexes: {}", e);
e
})
.ok()
});
let mut indexes = Vec::new();
for (name, uuid) in metas {
// get index to retrieve primary key
let (index, _) = self
.get_index_txn(&txn, name)?
.with_context(|| format!("could not load index {:?}", name))?;
let primary_key = index.primary_key(&index.read_txn()?)?.map(String::from);
// retieve meta
let meta = self
.uuid_to_index_meta
.get(&txn, &uuid)?
.with_context(|| format!("could not retieve meta for index {:?}", name))?;
indexes.push((name.to_owned(), meta, primary_key));
}
Ok(indexes)
}
}
// Loops on an arc to get ownership on the wrapped value. This method sleeps 100ms before retrying.
fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
std::thread::sleep(Duration::from_millis(100));
continue;
}
}
}
}
fn open_or_create_database<K: 'static, V: 'static>(
env: &Env,
name: Option<&str>,
) -> anyhow::Result<Database<K, V>> {
match env.open_database::<K, V>(name)? {
Some(db) => Ok(db),
None => Ok(env.create_database::<K, V>(name)?),
}
}
fn make_update_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf {
let mut path = path.as_ref().to_path_buf();
path.push(format!("update{}", uuid));
path
}
fn make_index_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf {
let mut path = path.as_ref().to_path_buf();
path.push(format!("index{}", uuid));
path
}
#[cfg(test)]
mod test {
use super::*;
use std::path::PathBuf;
#[test]
fn test_make_update_db_path() {
let uuid = Uuid::new_v4();
assert_eq!(
make_update_db_path("/home", &uuid),
PathBuf::from(format!("/home/update{}", uuid))
);
}
#[test]
fn test_make_index_db_path() {
let uuid = Uuid::new_v4();
assert_eq!(
make_index_db_path("/home", &uuid),
PathBuf::from(format!("/home/index{}", uuid))
);
}
mod index_store {
use super::*;
#[test]
fn test_index_uuid() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let txn = store.env.read_txn().unwrap();
// name is not found if the uuid in not present in the db
assert!(store.index_uuid(&txn, &name).unwrap().is_none());
drop(txn);
// insert an uuid in the the name_to_uuid_db:
let uuid = Uuid::new_v4();
let mut txn = store.env.write_txn().unwrap();
store
.name_to_uuid
.put(&mut txn, &name, uuid.as_bytes())
.unwrap();
txn.commit().unwrap();
// check that the uuid is there
let txn = store.env.read_txn().unwrap();
assert_eq!(store.index_uuid(&txn, &name).unwrap(), Some(uuid));
}
#[test]
fn test_retrieve_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let uuid = Uuid::new_v4();
let txn = store.env.read_txn().unwrap();
assert!(store.retrieve_index(&txn, uuid).unwrap().is_none());
let created_at = Utc::now();
let updated_at = created_at;
let meta = IndexMeta {
update_store_size: 4096 * 100,
index_store_size: 4096 * 100,
uuid: uuid.clone(),
created_at,
updated_at,
};
let mut txn = store.env.write_txn().unwrap();
store
.uuid_to_index_meta
.put(&mut txn, uuid.as_bytes(), &meta)
.unwrap();
txn.commit().unwrap();
// the index cache should be empty
assert!(store.uuid_to_index.is_empty());
let txn = store.env.read_txn().unwrap();
assert!(store.retrieve_index(&txn, uuid).unwrap().is_some());
assert_eq!(store.uuid_to_index.len(), 1);
}
#[test]
fn test_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
assert!(store.index(&name).unwrap().is_none());
let created_at = Utc::now();
let updated_at = created_at;
let uuid = Uuid::new_v4();
let meta = IndexMeta {
update_store_size: 4096 * 100,
index_store_size: 4096 * 100,
uuid: uuid.clone(),
created_at,
updated_at,
};
let mut txn = store.env.write_txn().unwrap();
store
.name_to_uuid
.put(&mut txn, &name, uuid.as_bytes())
.unwrap();
store
.uuid_to_index_meta
.put(&mut txn, uuid.as_bytes(), &meta)
.unwrap();
txn.commit().unwrap();
assert!(store.index(&name).unwrap().is_some());
}
#[test]
fn test_get_or_create_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let update_store_size = 4096 * 100;
let index_store_size = 4096 * 100;
store
.get_or_create_index(&name, update_store_size, index_store_size)
.unwrap();
let txn = store.env.read_txn().unwrap();
let uuid = store.name_to_uuid.get(&txn, &name).unwrap();
assert_eq!(store.uuid_to_index.len(), 1);
assert!(uuid.is_some());
let uuid = Uuid::from_slice(uuid.unwrap()).unwrap();
let meta = store
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())
.unwrap()
.unwrap();
assert_eq!(meta.update_store_size, update_store_size);
assert_eq!(meta.index_store_size, index_store_size);
assert_eq!(meta.uuid, uuid);
}
#[test]
fn test_create_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let update_store_size = 4096 * 100;
let index_store_size = 4096 * 100;
let uuid = Uuid::new_v4();
let mut txn = store.env.write_txn().unwrap();
store
.create_index_txn(&mut txn, uuid, name, update_store_size, index_store_size)
.unwrap();
let uuid = store.name_to_uuid.get(&txn, &name).unwrap();
assert_eq!(store.uuid_to_index.len(), 1);
assert!(uuid.is_some());
let uuid = Uuid::from_slice(uuid.unwrap()).unwrap();
let meta = store
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())
.unwrap()
.unwrap();
assert_eq!(meta.update_store_size, update_store_size);
assert_eq!(meta.index_store_size, index_store_size);
assert_eq!(meta.uuid, uuid);
}
}
}

View file

@ -0,0 +1,228 @@
mod update_store;
mod index_store;
mod update_handler;
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context};
use itertools::Itertools;
use milli::Index;
use crate::option::IndexerOpts;
use index_store::IndexStore;
use super::IndexController;
use super::updates::UpdateStatus;
use super::{UpdateMeta, UpdateResult, IndexMetadata, IndexSettings};
pub struct LocalIndexController {
indexes: IndexStore,
update_db_size: u64,
index_db_size: u64,
}
impl LocalIndexController {
pub fn new(
path: impl AsRef<Path>,
opt: IndexerOpts,
index_db_size: u64,
update_db_size: u64,
) -> anyhow::Result<Self> {
let indexes = IndexStore::new(path, opt)?;
Ok(Self { indexes, index_db_size, update_db_size })
}
}
impl IndexController for LocalIndexController {
fn add_documents<S: AsRef<str>>(
&self,
index: S,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
data: &[u8],
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> {
let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?;
let meta = UpdateMeta::DocumentsAddition { method, format, primary_key };
let pending = update_store.register_update(meta, data)?;
Ok(pending.into())
}
fn update_settings<S: AsRef<str>>(
&self,
index: S,
settings: super::Settings
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> {
let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?;
let meta = UpdateMeta::Settings(settings);
let pending = update_store.register_update(meta, &[])?;
Ok(pending.into())
}
fn create_index(&self, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
let index_name = index_settings.name.context("Missing name for index")?;
let (index, _, meta) = self.indexes.create_index(&index_name, self.update_db_size, self.index_db_size)?;
if let Some(ref primary_key) = index_settings.primary_key {
if let Err(e) = update_primary_key(index, primary_key).context("error creating index") {
// TODO: creating index could not be completed, delete everything.
Err(e)?
}
}
let meta = IndexMetadata {
uid: index_name,
uuid: meta.uuid.clone(),
created_at: meta.created_at,
updated_at: meta.created_at,
primary_key: index_settings.primary_key,
};
Ok(meta)
}
fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<()> {
self.indexes.delete(index_uid)
}
fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, _index1_uid: S1, _index2_uid: S2) -> anyhow::Result<()> {
todo!()
}
fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>> {
let index = self.indexes.index(name)?.map(|(i, _)| i);
Ok(index)
}
fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus<UpdateMeta, UpdateResult, String>>> {
match self.indexes.index(&index)? {
Some((_, update_store)) => Ok(update_store.meta(id)?),
None => bail!("index {:?} doesn't exist", index.as_ref()),
}
}
fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus<UpdateMeta, UpdateResult, String>>> {
match self.indexes.index(&index)? {
Some((_, update_store)) => {
let updates = update_store.iter_metas(|processing, processed, pending, aborted, failed| {
Ok(processing
.map(UpdateStatus::from)
.into_iter()
.chain(pending.filter_map(|p| p.ok()).map(|(_, u)| UpdateStatus::from(u)))
.chain(aborted.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(processed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(failed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.sorted_by(|a, b| a.id().cmp(&b.id()))
.collect())
})?;
Ok(updates)
}
None => bail!("index {} doesn't exist.", index.as_ref()),
}
}
fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
let metas = self.indexes.list_indexes()?;
let mut output_meta = Vec::new();
for (uid, meta, primary_key) in metas {
let created_at = meta.created_at;
let uuid = meta.uuid;
let updated_at = self
.all_update_status(&uid)?
.iter()
.filter_map(|u| u.processed().map(|u| u.processed_at))
.max()
.unwrap_or(created_at);
let index_meta = IndexMetadata {
uid,
created_at,
updated_at,
uuid,
primary_key,
};
output_meta.push(index_meta);
}
Ok(output_meta)
}
fn update_index(&self, uid: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
if index_settings.name.is_some() {
bail!("can't udpate an index name.")
}
let (primary_key, meta) = match index_settings.primary_key {
Some(ref primary_key) => {
self.indexes
.update_index(&uid, |index| {
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
bail!("primary key already exists.")
}
index.put_primary_key(&mut txn, primary_key)?;
txn.commit()?;
Ok(Some(primary_key.clone()))
})?
},
None => {
let (index, meta) = self.indexes
.index_with_meta(&uid)?
.with_context(|| format!("index {:?} doesn't exist.", uid.as_ref()))?;
let primary_key = index
.primary_key(&index.read_txn()?)?
.map(String::from);
(primary_key, meta)
},
};
Ok(IndexMetadata {
uid: uid.as_ref().to_string(),
uuid: meta.uuid.clone(),
created_at: meta.created_at,
updated_at: meta.updated_at,
primary_key,
})
}
fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<super::UpdateStatus> {
let (_, update_store) = self.indexes.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let meta = UpdateMeta::ClearDocuments;
let pending = update_store.register_update(meta, &[])?;
Ok(pending.into())
}
fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<super::UpdateStatus> {
let (_, update_store) = self.indexes.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let meta = UpdateMeta::DeleteDocuments;
let content = serde_json::to_vec(&document_ids)?;
let pending = update_store.register_update(meta, &content)?;
Ok(pending.into())
}
}
fn update_primary_key(index: impl AsRef<Index>, primary_key: impl AsRef<str>) -> anyhow::Result<()> {
let index = index.as_ref();
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
bail!("primary key already set.")
}
index.put_primary_key(&mut txn, primary_key.as_ref())?;
txn.commit()?;
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use tempfile::tempdir;
use crate::make_index_controller_tests;
make_index_controller_tests!({
let options = IndexerOpts::default();
let path = tempdir().unwrap();
let size = 4096 * 100;
LocalIndexController::new(path, options, size, size).unwrap()
});
}

View file

@ -0,0 +1,257 @@
mod index_actor;
mod update_actor;
mod update_handler;
mod update_store;
mod updates;
mod uuid_resolver;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use actix_web::web::{Bytes, Payload};
use futures::stream::StreamExt;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use serde::{Serialize, Deserialize};
use tokio::sync::{mpsc, oneshot};
use tokio::time::sleep;
use uuid::Uuid;
pub use updates::{Processed, Processing, Failed};
use crate::index::{SearchResult, SearchQuery, Document};
use crate::index::{UpdateResult, Settings, Facets};
pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
name: String,
#[serde(flatten)]
meta: index_actor::IndexMeta,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments,
Settings(Settings),
Facets(Facets),
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub name: Option<String>,
pub primary_key: Option<String>,
}
pub struct IndexController {
uuid_resolver: uuid_resolver::UuidResolverHandle,
index_handle: index_actor::IndexActorHandle,
update_handle: update_actor::UpdateActorHandle<Bytes>,
}
enum IndexControllerMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<anyhow::Result<IndexMetadata>>,
},
Shutdown,
}
impl IndexController {
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
let uuid_resolver = uuid_resolver::UuidResolverHandle::new();
let index_actor = index_actor::IndexActorHandle::new(&path)?;
let update_handle = update_actor::UpdateActorHandle::new(index_actor.clone(), &path);
Ok(Self { uuid_resolver, index_handle: index_actor, update_handle })
}
pub async fn add_documents(
&self,
index: String,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
mut payload: Payload,
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.get_or_create(index).await?;
let meta = UpdateMeta::DocumentsAddition { method, format, primary_key };
let (sender, receiver) = mpsc::channel(10);
// It is necessary to spawn a local task to senf the payload to the update handle to
// prevent dead_locking between the update_handle::update that waits for the update to be
// registered and the update_actor that waits for the the payload to be sent to it.
tokio::task::spawn_local(async move {
while let Some(bytes) = payload.next().await {
match bytes {
Ok(bytes) => { sender.send(Ok(bytes)).await; },
Err(e) => {
let error: Box<dyn std::error::Error + Sync + Send + 'static> = Box::new(e);
sender.send(Err(error)).await; },
}
}
});
// This must be done *AFTER* spawning the task.
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
pub async fn clear_documents(&self, index: String) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.resolve(index).await?;
let meta = UpdateMeta::ClearDocuments;
let (_, receiver) = mpsc::channel(1);
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
pub async fn delete_documents(&self, index: String, document_ids: Vec<String>) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.resolve(index).await?;
let meta = UpdateMeta::DeleteDocuments;
let (sender, receiver) = mpsc::channel(10);
tokio::task::spawn(async move {
let json = serde_json::to_vec(&document_ids).unwrap();
let bytes = Bytes::from(json);
let _ = sender.send(Ok(bytes)).await;
});
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
pub async fn update_settings(&self, index_uid: String, settings: Settings) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.get_or_create(index_uid).await?;
let meta = UpdateMeta::Settings(settings);
// Nothing so send, drop the sender right away, as not to block the update actor.
let (_, receiver) = mpsc::channel(1);
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
pub async fn create_index(&self, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
let IndexSettings { name, primary_key } = index_settings;
let name = name.unwrap();
let uuid = self.uuid_resolver.create(name.clone()).await?;
let meta = self.index_handle.create_index(uuid, primary_key).await?;
let meta = IndexMetadata { name, meta };
Ok(meta)
}
pub async fn delete_index(&self, index_uid: String) -> anyhow::Result<()> {
let uuid = self.uuid_resolver
.delete(index_uid)
.await?;
self.update_handle.delete(uuid.clone()).await?;
self.index_handle.delete(uuid).await?;
Ok(())
}
pub async fn update_status(&self, index: String, id: u64) -> anyhow::Result<Option<UpdateStatus>> {
let uuid = self.uuid_resolver
.resolve(index)
.await?;
let result = self.update_handle.update_status(uuid, id).await?;
Ok(result)
}
pub async fn all_update_status(&self, index: String) -> anyhow::Result<Vec<UpdateStatus>> {
let uuid = self.uuid_resolver
.resolve(index).await?;
let result = self.update_handle.get_all_updates_status(uuid).await?;
Ok(result)
}
pub async fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
let uuids = self.uuid_resolver.list().await?;
let mut ret = Vec::new();
for (name, uuid) in uuids {
if let Some(meta) = self.index_handle.get_index_meta(uuid).await? {
let meta = IndexMetadata { name, meta };
ret.push(meta);
}
}
Ok(ret)
}
pub async fn settings(&self, index: String) -> anyhow::Result<Settings> {
let uuid = self.uuid_resolver
.resolve(index.clone())
.await?;
let settings = self.index_handle.settings(uuid).await?;
Ok(settings)
}
pub async fn documents(
&self,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Vec<Document>> {
let uuid = self.uuid_resolver
.resolve(index.clone())
.await?;
let documents = self.index_handle.documents(uuid, offset, limit, attributes_to_retrieve).await?;
Ok(documents)
}
pub async fn document(
&self,
index: String,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Document> {
let uuid = self.uuid_resolver
.resolve(index.clone())
.await?;
let document = self.index_handle.document(uuid, doc_id, attributes_to_retrieve).await?;
Ok(document)
}
fn update_index(&self, name: String, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
todo!()
}
pub async fn search(&self, name: String, query: SearchQuery) -> anyhow::Result<SearchResult> {
let uuid = self.uuid_resolver.resolve(name).await?;
let result = self.index_handle.search(uuid, query).await?;
Ok(result)
}
pub async fn get_index(&self, name: String) -> anyhow::Result<Option<IndexMetadata>> {
let uuid = self.uuid_resolver.resolve(name.clone()).await?;
let result = self.index_handle
.get_index_meta(uuid)
.await?
.map(|meta| IndexMetadata { name, meta });
Ok(result)
}
}
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
sleep(Duration::from_millis(100)).await;
continue;
}
}
}
}

View file

@ -0,0 +1,315 @@
use std::collections::{hash_map::Entry, HashMap};
use std::fs::{create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use itertools::Itertools;
use log::info;
use super::index_actor::IndexActorHandle;
use thiserror::Error;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tokio::sync::{mpsc, oneshot, RwLock};
use uuid::Uuid;
use crate::index::UpdateResult;
use crate::index_controller::{UpdateMeta, UpdateStatus};
use super::get_arc_ownership_blocking;
pub type Result<T> = std::result::Result<T, UpdateError>;
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
#[derive(Debug, Error)]
pub enum UpdateError {
#[error("error with update: {0}")]
Error(Box<dyn std::error::Error + Sync + Send + 'static>),
#[error("Index {0} doesn't exist.")]
UnexistingIndex(Uuid),
}
enum UpdateMsg<D> {
Update {
uuid: Uuid,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
ret: oneshot::Sender<Result<UpdateStatus>>,
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
GetUpdate {
uuid: Uuid,
ret: oneshot::Sender<Result<Option<UpdateStatus>>>,
id: u64,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
}
struct UpdateActor<D, S> {
path: PathBuf,
store: S,
inbox: mpsc::Receiver<UpdateMsg<D>>,
}
#[async_trait::async_trait]
trait UpdateStoreStore {
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>>;
async fn delete(&self, uuid: &Uuid) -> Result<Option<Arc<UpdateStore>>>;
async fn get(&self, uuid: &Uuid) -> Result<Option<Arc<UpdateStore>>>;
}
impl<D, S> UpdateActor<D, S>
where
D: AsRef<[u8]> + Sized + 'static,
S: UpdateStoreStore,
{
fn new(store: S, inbox: mpsc::Receiver<UpdateMsg<D>>, path: impl AsRef<Path>) -> Self {
let path = path.as_ref().to_owned().join("update_files");
create_dir_all(&path).unwrap();
Self { store, inbox, path }
}
async fn run(mut self) {
use UpdateMsg::*;
info!("Started update actor.");
loop {
match self.inbox.recv().await {
Some(Update {
uuid,
meta,
data,
ret,
}) => {
let _ = ret.send(self.handle_update(uuid, meta, data).await);
}
Some(ListUpdates { uuid, ret }) => {
let _ = ret.send(self.handle_list_updates(uuid).await);
} ,
Some(GetUpdate { uuid, ret, id }) => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
Some(Delete { uuid, ret }) => {
let _ = ret.send(self.handle_delete(uuid).await);
}
None => {}
}
}
}
async fn handle_update(
&self,
uuid: Uuid,
meta: UpdateMeta,
mut payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<UpdateStatus> {
let update_store = self.store.get_or_create(uuid).await?;
let update_file_id = uuid::Uuid::new_v4();
let path = self.path.join(format!("update_{}", update_file_id));
let mut file = File::create(&path).await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file.write_all(bytes.as_ref()).await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
}
}
}
file.flush().await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let file = file.into_std().await;
tokio::task::spawn_blocking(move || {
let result = update_store
.register_update(meta, path, uuid)
.map(|pending| UpdateStatus::Pending(pending))
.map_err(|e| UpdateError::Error(Box::new(e)));
result
})
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?
}
async fn handle_list_updates(
&self,
uuid: Uuid,
) -> Result<Vec<UpdateStatus>> {
let store = self.store.get(&uuid).await?;
tokio::task::spawn_blocking(move || {
let result = match store {
Some(update_store) => {
let updates = update_store.iter_metas(|processing, processed, pending, aborted, failed| {
Ok(processing
.map(UpdateStatus::from)
.into_iter()
.chain(pending.filter_map(|p| p.ok()).map(|(_, u)| UpdateStatus::from(u)))
.chain(aborted.filter_map(std::result::Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(processed.filter_map(std::result::Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(failed.filter_map(std::result::Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.sorted_by(|a, b| a.id().cmp(&b.id()))
.collect())
})
.map_err(|e| UpdateError::Error(Box::new(e)))?;
Ok(updates)
}
None => Err(UpdateError::UnexistingIndex(uuid)),
};
result
}).await
.map_err(|e| UpdateError::Error(Box::new(e)))?
}
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<Option<UpdateStatus>> {
let store = self.store
.get(&uuid)
.await?
.ok_or(UpdateError::UnexistingIndex(uuid))?;
let result = store.meta(id)
.map_err(|e| UpdateError::Error(Box::new(e)))?;
Ok(result)
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store
.delete(&uuid)
.await?;
if let Some(store) = store {
tokio::task::spawn(async move {
let store = get_arc_ownership_blocking(store).await;
tokio::task::spawn_blocking(move || {
store.prepare_for_closing().wait();
info!("Update store {} was closed.", uuid);
});
});
}
Ok(())
}
}
#[derive(Clone)]
pub struct UpdateActorHandle<D> {
sender: mpsc::Sender<UpdateMsg<D>>,
}
impl<D> UpdateActorHandle<D>
where
D: AsRef<[u8]> + Sized + 'static + Sync + Send,
{
pub fn new(index_handle: IndexActorHandle, path: impl AsRef<Path>) -> Self {
let path = path.as_ref().to_owned().join("updates");
let (sender, receiver) = mpsc::channel(100);
let store = MapUpdateStoreStore::new(index_handle, &path);
let actor = UpdateActor::new(store, receiver, path);
tokio::task::spawn(actor.run());
Self { sender }
}
pub async fn update(
&self,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
uuid: Uuid,
) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Update {
uuid,
data,
meta,
ret,
};
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::ListUpdates { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn update_status(&self, uuid: Uuid, id: u64) -> Result<Option<UpdateStatus>> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
}
struct MapUpdateStoreStore {
db: Arc<RwLock<HashMap<Uuid, Arc<UpdateStore>>>>,
index_handle: IndexActorHandle,
path: PathBuf,
}
impl MapUpdateStoreStore {
fn new(index_handle: IndexActorHandle, path: impl AsRef<Path>) -> Self {
let db = Arc::new(RwLock::new(HashMap::new()));
let path = path.as_ref().to_owned();
Self {
db,
index_handle,
path,
}
}
}
#[async_trait::async_trait]
impl UpdateStoreStore for MapUpdateStoreStore {
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>> {
match self.db.write().await.entry(uuid) {
Entry::Vacant(e) => {
let mut options = heed::EnvOpenOptions::new();
options.map_size(4096 * 100_000);
let path = self.path.clone().join(format!("updates-{}", e.key()));
create_dir_all(&path).unwrap();
let index_handle = self.index_handle.clone();
let store = UpdateStore::open(options, &path, move |meta, file| {
futures::executor::block_on(index_handle.update(meta, file))
})
.unwrap();
let store = e.insert(store);
Ok(store.clone())
}
Entry::Occupied(e) => Ok(e.get().clone()),
}
}
async fn get(&self, uuid: &Uuid) -> Result<Option<Arc<UpdateStore>>> {
Ok(self.db.read().await.get(uuid).cloned())
}
async fn delete(&self, uuid: &Uuid) -> Result<Option<Arc<UpdateStore>>> {
let store = self.db.write().await.remove(&uuid);
if store.is_some() {
let path = self.path.clone().join(format!("updates-{}", uuid));
remove_dir_all(path).unwrap();
}
Ok(store)
}
}

View file

@ -0,0 +1,98 @@
use std::fs::File;
use anyhow::Result;
use grenad::CompressionType;
use milli::update::UpdateBuilder;
use crate::index::Index;
use rayon::ThreadPool;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::UpdateMeta;
use crate::index::UpdateResult;
use crate::option::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: ThreadPool,
log_frequency: usize,
max_memory: usize,
linked_hash_map_size: usize,
chunk_compression_type: CompressionType,
chunk_fusing_shrink_size: u64,
}
impl UpdateHandler {
pub fn new(
opt: &IndexerOpts,
) -> anyhow::Result<Self> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(opt.indexing_jobs.unwrap_or(0))
.build()?;
Ok(Self {
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.get_bytes() as usize,
linked_hash_map_size: opt.linked_hash_map_size,
chunk_compression_type: opt.chunk_compression_type,
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
})
}
fn update_buidler(&self, update_id: u64) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new(update_id);
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
update_builder.max_memory(self.max_memory);
update_builder.linked_hash_map_size(self.linked_hash_map_size);
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size);
update_builder
}
pub fn handle_update(
&self,
meta: Processing<UpdateMeta>,
content: File,
index: Index,
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
use UpdateMeta::*;
let update_id = meta.id();
let update_builder = self.update_buidler(update_id);
let result = match meta.meta() {
DocumentsAddition {
method,
format,
primary_key,
} => index.update_documents(
*format,
*method,
content,
update_builder,
primary_key.as_deref(),
),
ClearDocuments => index.clear_documents(update_builder),
DeleteDocuments => index.delete_documents(content, update_builder),
Settings(settings) => index.update_settings(settings, update_builder),
Facets(levels) => index.update_facets(levels, update_builder),
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.to_string())),
}
}
}

View file

@ -0,0 +1,444 @@
use std::fs::remove_file;
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use heed::types::{DecodeIgnore, OwnedType, SerdeJson};
use heed::{Database, Env, EnvOpenOptions};
use serde::{Deserialize, Serialize};
use std::fs::File;
use tokio::sync::mpsc;
use uuid::Uuid;
use crate::index_controller::updates::*;
type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
#[derive(Clone)]
pub struct UpdateStore<M, N, E> {
env: Env,
pending_meta: Database<OwnedType<BEU64>, SerdeJson<Pending<M>>>,
pending: Database<OwnedType<BEU64>, SerdeJson<PathBuf>>,
processed_meta: Database<OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
failed_meta: Database<OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
processing: Arc<RwLock<Option<Processing<M>>>>,
notification_sender: mpsc::Sender<()>,
}
pub trait HandleUpdate<M, N, E> {
fn handle_update(
&mut self,
meta: Processing<M>,
content: File,
) -> anyhow::Result<Result<Processed<M, N>, Failed<M, E>>>;
}
impl<M, N, E, F> HandleUpdate<M, N, E> for F
where
F: FnMut(Processing<M>, File) -> anyhow::Result<Result<Processed<M, N>, Failed<M, E>>>,
{
fn handle_update(
&mut self,
meta: Processing<M>,
content: File,
) -> anyhow::Result<Result<Processed<M, N>, Failed<M, E>>> {
self(meta, content)
}
}
impl<M, N, E> UpdateStore<M, N, E>
where
M: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync + Clone,
N: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync,
E: for<'a> Deserialize<'a> + Serialize + 'static + Send + Sync,
{
pub fn open<P, U>(
mut options: EnvOpenOptions,
path: P,
update_handler: U,
) -> heed::Result<Arc<Self>>
where
P: AsRef<Path>,
U: HandleUpdate<M, N, E> + Sync + Clone + Send + 'static,
{
options.max_dbs(5);
let env = options.open(path)?;
let pending_meta = env.create_database(Some("pending-meta"))?;
let pending = env.create_database(Some("pending"))?;
let processed_meta = env.create_database(Some("processed-meta"))?;
let aborted_meta = env.create_database(Some("aborted-meta"))?;
let failed_meta = env.create_database(Some("failed-meta"))?;
let processing = Arc::new(RwLock::new(None));
let (notification_sender, mut notification_receiver) = mpsc::channel(10);
// Send a first notification to trigger the process.
let _ = notification_sender.send(());
let update_store = Arc::new(UpdateStore {
env,
pending,
pending_meta,
processed_meta,
aborted_meta,
notification_sender,
failed_meta,
processing,
});
// We need a weak reference so we can take ownership on the arc later when we
// want to close the index.
let update_store_weak = Arc::downgrade(&update_store);
tokio::task::spawn(async move {
// Block and wait for something to process.
'outer: while let Some(_) = notification_receiver.recv().await {
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
let handler = update_handler.clone();
let res = tokio::task::spawn_blocking(move || {
update_store.process_pending_update(handler)
})
.await
.expect("Fatal error processing update.");
match res {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => eprintln!("error while processing update: {}", e),
}
}
// the ownership on the arc has been taken, we need to exit.
None => break 'outer,
}
}
}
});
Ok(update_store)
}
pub fn prepare_for_closing(self) -> heed::EnvClosingEvent {
self.env.prepare_for_closing()
}
/// Returns the new biggest id to use to store the new update.
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
let last_pending = self
.pending_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_processed = self
.processed_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_aborted = self
.aborted_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_update_id = [last_pending, last_processed, last_aborted]
.iter()
.copied()
.flatten()
.max();
match last_update_id {
Some(last_id) => Ok(last_id + 1),
None => Ok(0),
}
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(
&self,
meta: M,
content: impl AsRef<Path>,
index_uuid: Uuid,
) -> heed::Result<Pending<M>> {
let mut wtxn = self.env.write_txn()?;
// We ask the update store to give us a new update id, this is safe,
// no other update can have the same id because we use a write txn before
// asking for the id and registering it so other update registering
// will be forced to wait for a new write txn.
let update_id = self.new_update_id(&wtxn)?;
let update_key = BEU64::new(update_id);
let meta = Pending::new(meta, update_id, index_uuid);
self.pending_meta.put(&mut wtxn, &update_key, &meta)?;
self.pending
.put(&mut wtxn, &update_key, &content.as_ref().to_owned())?;
wtxn.commit()?;
self.notification_sender
.blocking_send(())
.expect("Update store loop exited.");
Ok(meta)
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U>(&self, mut handler: U) -> anyhow::Result<Option<()>>
where
U: HandleUpdate<M, N, E>,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_meta.first(&rtxn)?;
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some((first_id, pending)) => {
let content_path = self
.pending
.get(&rtxn, &first_id)?
.expect("associated update content");
// we change the state of the update from pending to processing before we pass it
// to the update handler. Processing store is non persistent to be able recover
// from a failure
let processing = pending.processing();
self.processing.write().unwrap().replace(processing.clone());
let file = File::open(&content_path)?;
// Process the pending update using the provided user function.
let result = handler.handle_update(processing, file)?;
drop(rtxn);
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.processing.write().unwrap().take();
self.pending_meta.delete(&mut wtxn, &first_id)?;
remove_file(&content_path)?;
self.pending.delete(&mut wtxn, &first_id)?;
match result {
Ok(processed) => self.processed_meta.put(&mut wtxn, &first_id, &processed)?,
Err(failed) => self.failed_meta.put(&mut wtxn, &first_id, &failed)?,
}
wtxn.commit()?;
Ok(Some(()))
}
None => Ok(None),
}
}
/// Execute the user defined function with the meta-store iterators, the first
/// iterator is the *processed* meta one, the second the *aborted* meta one
/// and, the last is the *pending* meta one.
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
where
F: for<'a> FnMut(
Option<Processing<M>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Pending<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
) -> heed::Result<T>,
{
let rtxn = self.env.read_txn()?;
// We get the pending, processed and aborted meta iterators.
let processed_iter = self.processed_meta.iter(&rtxn)?;
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
let pending_iter = self.pending_meta.iter(&rtxn)?;
let processing = self.processing.read().unwrap().clone();
let failed_iter = self.failed_meta.iter(&rtxn)?;
// We execute the user defined function with both iterators.
(f)(
processing,
processed_iter,
aborted_iter,
pending_iter,
failed_iter,
)
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatus<M, N, E>>> {
let rtxn = self.env.read_txn()?;
let key = BEU64::new(update_id);
if let Some(ref meta) = *self.processing.read().unwrap() {
if meta.id() == update_id {
return Ok(Some(UpdateStatus::Processing(meta.clone())));
}
}
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Pending(meta)));
}
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Processed(meta)));
}
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Aborted(meta)));
}
if let Some(meta) = self.failed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatus::Failed(meta)));
}
Ok(None)
}
/// Aborts an update, an aborted update content is deleted and
/// the meta of it is moved into the aborted updates database.
///
/// Trying to abort an update that is currently being processed, an update
/// that as already been processed or which doesn't actually exist, will
/// return `None`.
#[allow(dead_code)]
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<Aborted<M>>> {
let mut wtxn = self.env.write_txn()?;
let key = BEU64::new(update_id);
// We cannot abort an update that is currently being processed.
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
return Ok(None);
}
let pending = match self.pending_meta.get(&wtxn, &key)? {
Some(meta) => meta,
None => return Ok(None),
};
let aborted = pending.abort();
self.aborted_meta.put(&mut wtxn, &key, &aborted)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
wtxn.commit()?;
Ok(Some(aborted))
}
/// Aborts all the pending updates, and not the one being currently processed.
/// Returns the update metas and ids that were successfully aborted.
#[allow(dead_code)]
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, Aborted<M>)>> {
let mut wtxn = self.env.write_txn()?;
let mut aborted_updates = Vec::new();
// We skip the first pending update as it is currently being processed.
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
let (key, pending) = result?;
let id = key.get();
aborted_updates.push((id, pending.abort()));
}
for (id, aborted) in &aborted_updates {
let key = BEU64::new(*id);
self.aborted_meta.put(&mut wtxn, &key, &aborted)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
}
wtxn.commit()?;
Ok(aborted_updates)
}
}
//#[cfg(test)]
//mod tests {
//use super::*;
//use std::thread;
//use std::time::{Duration, Instant};
//#[test]
//fn simple() {
//let dir = tempfile::tempdir().unwrap();
//let mut options = EnvOpenOptions::new();
//options.map_size(4096 * 100);
//let update_store = UpdateStore::open(
//options,
//dir,
//|meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
//let new_meta = meta.meta().to_string() + " processed";
//let processed = meta.process(new_meta);
//Ok(processed)
//},
//)
//.unwrap();
//let meta = String::from("kiki");
//let update = update_store.register_update(meta, &[]).unwrap();
//thread::sleep(Duration::from_millis(100));
//let meta = update_store.meta(update.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "kiki processed");
//} else {
//panic!()
//}
//}
//#[test]
//#[ignore]
//fn long_running_update() {
//let dir = tempfile::tempdir().unwrap();
//let mut options = EnvOpenOptions::new();
//options.map_size(4096 * 100);
//let update_store = UpdateStore::open(
//options,
//dir,
//|meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
//thread::sleep(Duration::from_millis(400));
//let new_meta = meta.meta().to_string() + "processed";
//let processed = meta.process(new_meta);
//Ok(processed)
//},
//)
//.unwrap();
//let before_register = Instant::now();
//let meta = String::from("kiki");
//let update_kiki = update_store.register_update(meta, &[]).unwrap();
//assert!(before_register.elapsed() < Duration::from_millis(200));
//let meta = String::from("coco");
//let update_coco = update_store.register_update(meta, &[]).unwrap();
//assert!(before_register.elapsed() < Duration::from_millis(200));
//let meta = String::from("cucu");
//let update_cucu = update_store.register_update(meta, &[]).unwrap();
//assert!(before_register.elapsed() < Duration::from_millis(200));
//thread::sleep(Duration::from_millis(400 * 3 + 100));
//let meta = update_store.meta(update_kiki.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "kiki processed");
//} else {
//panic!()
//}
//let meta = update_store.meta(update_coco.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "coco processed");
//} else {
//panic!()
//}
//let meta = update_store.meta(update_cucu.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "cucu processed");
//} else {
//panic!()
//}
//}
//}

View file

@ -0,0 +1,186 @@
use chrono::{Utc, DateTime};
use serde::{Serialize, Deserialize};
use uuid::Uuid;
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Pending<M> {
pub update_id: u64,
pub meta: M,
pub enqueued_at: DateTime<Utc>,
pub index_uuid: Uuid,
}
impl<M> Pending<M> {
pub fn new(meta: M, update_id: u64, index_uuid: Uuid) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
index_uuid,
}
}
pub fn processing(self) -> Processing<M> {
Processing {
from: self,
started_processing_at: Utc::now(),
}
}
pub fn abort(self) -> Aborted<M> {
Aborted {
from: self,
aborted_at: Utc::now(),
}
}
pub fn meta(&self) -> &M {
&self.meta
}
pub fn id(&self) -> u64 {
self.update_id
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processed<M, N> {
pub success: N,
pub processed_at: DateTime<Utc>,
#[serde(flatten)]
pub from: Processing<M>,
}
impl<M, N> Processed<M, N> {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Processing<M> {
#[serde(flatten)]
pub from: Pending<M>,
pub started_processing_at: DateTime<Utc>,
}
impl<M> Processing<M> {
pub fn id(&self) -> u64 {
self.from.id()
}
pub fn meta(&self) -> &M {
self.from.meta()
}
pub fn index_uuid(&self) -> &Uuid {
&self.from.index_uuid
}
pub fn process<N>(self, meta: N) -> Processed<M, N> {
Processed {
success: meta,
from: self,
processed_at: Utc::now(),
}
}
pub fn fail<E>(self, error: E) -> Failed<M, E> {
Failed {
from: self,
error,
failed_at: Utc::now(),
}
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Aborted<M> {
#[serde(flatten)]
from: Pending<M>,
aborted_at: DateTime<Utc>,
}
impl<M> Aborted<M> {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Failed<M, E> {
#[serde(flatten)]
from: Processing<M>,
error: E,
failed_at: DateTime<Utc>,
}
impl<M, E> Failed<M, E> {
pub fn id(&self) -> u64 {
self.from.id()
}
}
#[derive(Debug, PartialEq, Eq, Hash, Serialize)]
#[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus<M, N, E> {
Processing(Processing<M>),
Pending(Pending<M>),
Processed(Processed<M, N>),
Aborted(Aborted<M>),
Failed(Failed<M, E>),
}
impl<M, N, E> UpdateStatus<M, N, E> {
pub fn id(&self) -> u64 {
match self {
UpdateStatus::Processing(u) => u.id(),
UpdateStatus::Pending(u) => u.id(),
UpdateStatus::Processed(u) => u.id(),
UpdateStatus::Aborted(u) => u.id(),
UpdateStatus::Failed(u) => u.id(),
}
}
pub fn processed(&self) -> Option<&Processed<M, N>> {
match self {
UpdateStatus::Processed(p) => Some(p),
_ => None,
}
}
}
impl<M, N, E> From<Pending<M>> for UpdateStatus<M, N, E> {
fn from(other: Pending<M>) -> Self {
Self::Pending(other)
}
}
impl<M, N, E> From<Aborted<M>> for UpdateStatus<M, N, E> {
fn from(other: Aborted<M>) -> Self {
Self::Aborted(other)
}
}
impl<M, N, E> From<Processed<M, N>> for UpdateStatus<M, N, E> {
fn from(other: Processed<M, N>) -> Self {
Self::Processed(other)
}
}
impl<M, N, E> From<Processing<M>> for UpdateStatus<M, N, E> {
fn from(other: Processing<M>) -> Self {
Self::Processing(other)
}
}
impl<M, N, E> From<Failed<M, E>> for UpdateStatus<M, N, E> {
fn from(other: Failed<M, E>) -> Self {
Self::Failed(other)
}
}

View file

@ -0,0 +1,219 @@
use log::{info, warn};
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::Arc;
use thiserror::Error;
use tokio::sync::{mpsc, oneshot, RwLock};
use uuid::Uuid;
pub type Result<T> = std::result::Result<T, UuidError>;
#[derive(Debug)]
enum UuidResolveMsg {
Resolve {
name: String,
ret: oneshot::Sender<Result<Uuid>>,
},
GetOrCreate {
name: String,
ret: oneshot::Sender<Result<Uuid>>,
},
Create {
name: String,
ret: oneshot::Sender<Result<Uuid>>,
},
Delete {
name: String,
ret: oneshot::Sender<Result<Uuid>>,
},
List {
ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>,
},
}
struct UuidResolverActor<S> {
inbox: mpsc::Receiver<UuidResolveMsg>,
store: S,
}
impl<S: UuidStore> UuidResolverActor<S> {
fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self {
Self { inbox, store }
}
async fn run(mut self) {
use UuidResolveMsg::*;
info!("uuid resolver started");
loop {
match self.inbox.recv().await {
Some(Create { name, ret }) => {
let _ = ret.send(self.handle_create(name).await);
}
Some(GetOrCreate { name, ret }) => {
let _ = ret.send(self.handle_get_or_create(name).await);
}
Some(Resolve { name, ret }) => {
let _ = ret.send(self.handle_resolve(name).await);
}
Some(Delete { name, ret }) => {
let _ = ret.send(self.handle_delete(name).await);
}
Some(List { ret }) => {
let _ = ret.send(self.handle_list().await);
}
// all senders have been dropped, need to quit.
None => break,
}
}
warn!("exiting uuid resolver loop");
}
async fn handle_create(&self, name: String) -> Result<Uuid> {
self.store.create_uuid(name, true).await
}
async fn handle_get_or_create(&self, name: String) -> Result<Uuid> {
self.store.create_uuid(name, false).await
}
async fn handle_resolve(&self, name: String) -> Result<Uuid> {
self.store
.get_uuid(&name)
.await?
.ok_or(UuidError::UnexistingIndex(name))
}
async fn handle_delete(&self, name: String) -> Result<Uuid> {
self.store
.delete(&name)
.await?
.ok_or(UuidError::UnexistingIndex(name))
}
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
let result = self.store.list().await?;
Ok(result)
}
}
#[derive(Clone)]
pub struct UuidResolverHandle {
sender: mpsc::Sender<UuidResolveMsg>,
}
impl UuidResolverHandle {
pub fn new() -> Self {
let (sender, reveiver) = mpsc::channel(100);
let store = MapUuidStore(Arc::new(RwLock::new(HashMap::new())));
let actor = UuidResolverActor::new(reveiver, store);
tokio::spawn(actor.run());
Self { sender }
}
pub async fn resolve(&self, name: String) -> anyhow::Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Resolve { name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn get_or_create(&self, name: String) -> Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::GetOrCreate { name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn create(&self, name: String) -> anyhow::Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Create { name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn delete(&self, name: String) -> anyhow::Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Delete { name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::List { ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
}
#[derive(Clone, Debug, Error)]
pub enum UuidError {
#[error("Name already exist.")]
NameAlreadyExist,
#[error("Index \"{0}\" doesn't exist.")]
UnexistingIndex(String),
}
#[async_trait::async_trait]
trait UuidStore {
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
// the uuid otherwise.
async fn create_uuid(&self, name: String, err: bool) -> Result<Uuid>;
async fn get_uuid(&self, name: &str) -> Result<Option<Uuid>>;
async fn delete(&self, name: &str) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
}
struct MapUuidStore(Arc<RwLock<HashMap<String, Uuid>>>);
#[async_trait::async_trait]
impl UuidStore for MapUuidStore {
async fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> {
match self.0.write().await.entry(name) {
Entry::Occupied(entry) => {
if err {
Err(UuidError::NameAlreadyExist)
} else {
Ok(entry.get().clone())
}
}
Entry::Vacant(entry) => {
let uuid = Uuid::new_v4();
let uuid = entry.insert(uuid);
Ok(uuid.clone())
}
}
}
async fn get_uuid(&self, name: &str) -> Result<Option<Uuid>> {
Ok(self.0.read().await.get(name).cloned())
}
async fn delete(&self, name: &str) -> Result<Option<Uuid>> {
Ok(self.0.write().await.remove(name))
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let list = self
.0
.read()
.await
.iter()
.map(|(name, uuid)| (name.to_owned(), uuid.clone()))
.collect();
Ok(list)
}
}

View file

@ -0,0 +1,68 @@
#![allow(clippy::or_fun_call)]
#![allow(unused_must_use)]
#![allow(unused_variables)]
#![allow(dead_code)]
pub mod data;
pub mod error;
pub mod helpers;
pub mod option;
pub mod routes;
mod index;
mod index_controller;
pub use option::Opt;
pub use self::data::Data;
#[macro_export]
macro_rules! create_app {
($data:expr, $enable_frontend:expr) => {
{
use actix_cors::Cors;
use actix_web::App;
use actix_web::middleware::TrailingSlash;
use actix_web::{web, middleware};
use meilisearch_http::error::payload_error_handler;
use meilisearch_http::routes::*;
let app = App::new()
.data($data.clone())
.app_data(
web::JsonConfig::default()
.limit($data.http_payload_size_limit())
.content_type(|_mime| true) // Accept all mime types
.error_handler(|err, _req| payload_error_handler(err).into()),
)
.app_data(
web::QueryConfig::default()
.error_handler(|err, _req| payload_error_handler(err).into())
)
.configure(document::services)
.configure(index::services)
.configure(search::services)
.configure(settings::services)
.configure(stop_words::services)
.configure(synonym::services)
.configure(health::services)
.configure(stats::services)
.configure(key::services);
//.configure(routes::dump::services);
let app = if $enable_frontend {
app
.service(load_html)
.service(load_css)
} else {
app
};
app.wrap(
Cors::default()
.send_wildcard()
.allowed_headers(vec!["content-type", "x-meili-api-key"])
.max_age(86_400) // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(middleware::NormalizePath::new(TrailingSlash::Trim))
}
};
}

View file

@ -0,0 +1,159 @@
use std::env;
use actix_web::HttpServer;
use main_error::MainError;
use meilisearch_http::{Data, Opt, create_app};
use structopt::StructOpt;
//mod analytics;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[actix_web::main]
async fn main() -> Result<(), MainError> {
let opt = Opt::from_args();
#[cfg(all(not(debug_assertions), feature = "sentry"))]
let _sentry = sentry::init((
if !opt.no_sentry {
Some(opt.sentry_dsn.clone())
} else {
None
},
sentry::ClientOptions {
release: sentry::release_name!(),
..Default::default()
},
));
match opt.env.as_ref() {
"production" => {
if opt.master_key.is_none() {
return Err(
"In production mode, the environment variable MEILI_MASTER_KEY is mandatory"
.into(),
);
}
#[cfg(all(not(debug_assertions), feature = "sentry"))]
if !opt.no_sentry && _sentry.is_enabled() {
sentry::integrations::panic::register_panic_handler(); // TODO: This shouldn't be needed when upgrading to sentry 0.19.0. These integrations are turned on by default when using `sentry::init`.
sentry::integrations::env_logger::init(None, Default::default());
}
}
"development" => {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
}
_ => unreachable!(),
}
//if let Some(path) = &opt.import_snapshot {
//snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?;
//}
let data = Data::new(opt.clone())?;
//if !opt.no_analytics {
//let analytics_data = data.clone();
//let analytics_opt = opt.clone();
//thread::spawn(move || analytics::analytics_sender(analytics_data, analytics_opt));
//}
//if let Some(path) = &opt.import_dump {
//dump::import_dump(&data, path, opt.dump_batch_size)?;
//}
//if opt.schedule_snapshot {
//snapshot::schedule_snapshot(data.clone(), &opt.snapshot_dir, opt.snapshot_interval_sec.unwrap_or(86400))?;
//}
print_launch_resume(&opt, &data);
let enable_frontend = opt.env != "production";
run_http(data, opt, enable_frontend).await?;
Ok(())
}
async fn run_http(data: Data, opt: Opt, enable_frontend: bool) -> Result<(), Box<dyn std::error::Error>> {
let http_server = HttpServer::new(move || create_app!(&data, enable_frontend))
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals();
if let Some(config) = opt.get_ssl_config()? {
http_server
.bind_rustls(opt.http_addr, config)?
.run()
.await?;
} else {
http_server.bind(opt.http_addr)?.run().await?;
}
Ok(())
}
pub fn print_launch_resume(opt: &Opt, data: &Data) {
let ascii_name = r#"
888b d888 d8b 888 d8b .d8888b. 888
8888b d8888 Y8P 888 Y8P d88P Y88b 888
88888b.d88888 888 Y88b. 888
888Y88888P888 .d88b. 888 888 888 "Y888b. .d88b. 8888b. 888d888 .d8888b 88888b.
888 Y888P 888 d8P Y8b 888 888 888 "Y88b. d8P Y8b "88b 888P" d88P" 888 "88b
888 Y8P 888 88888888 888 888 888 "888 88888888 .d888888 888 888 888 888
888 " 888 Y8b. 888 888 888 Y88b d88P Y8b. 888 888 888 Y88b. 888 888
888 888 "Y8888 888 888 888 "Y8888P" "Y8888 "Y888888 888 "Y8888P 888 888
"#;
eprintln!("{}", ascii_name);
eprintln!("Database path:\t\t{:?}", opt.db_path);
eprintln!("Server listening on:\t{:?}", opt.http_addr);
eprintln!("Environment:\t\t{:?}", opt.env);
eprintln!("Commit SHA:\t\t{:?}", env!("VERGEN_SHA").to_string());
eprintln!(
"Build date:\t\t{:?}",
env!("VERGEN_BUILD_TIMESTAMP").to_string()
);
eprintln!(
"Package version:\t{:?}",
env!("CARGO_PKG_VERSION").to_string()
);
#[cfg(all(not(debug_assertions), feature = "sentry"))]
eprintln!(
"Sentry DSN:\t\t{:?}",
if !opt.no_sentry {
&opt.sentry_dsn
} else {
"Disabled"
}
);
eprintln!(
"Amplitude Analytics:\t{:?}",
if !opt.no_analytics {
"Enabled"
} else {
"Disabled"
}
);
eprintln!();
if data.api_keys().master.is_some() {
eprintln!("A Master Key has been set. Requests to MeiliSearch won't be authorized unless you provide an authentication key.");
} else {
eprintln!("No master key found; The server will accept unidentified requests. \
If you need some protection in development mode, please export a key: export MEILI_MASTER_KEY=xxx");
}
eprintln!();
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html or bonjour@meilisearch.com");
eprintln!();
}

View file

@ -0,0 +1,294 @@
use std::{error, fs};
use std::io::{BufReader, Read};
use std::path::PathBuf;
use std::sync::Arc;
use byte_unit::Byte;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use grenad::CompressionType;
use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory to use for the Grenad buffer. It is recommended
/// to use something like 80%-90% of the available memory.
///
/// It is automatically split by the number of jobs e.g. if you use 7 jobs
/// and 7 GB of max memory, each thread will use a maximum of 1 GB.
#[structopt(long, default_value = "7 GiB")]
pub max_memory: Byte,
/// Size of the linked hash map cache when indexing.
/// The bigger it is, the faster the indexing is but the more memory it takes.
#[structopt(long, default_value = "500")]
pub linked_hash_map_size: usize,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// The number of bytes to remove from the begining of the chunks while reading/sorting
/// or merging them.
///
/// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`,
/// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set.
#[structopt(long, default_value = "4 GiB")]
pub chunk_fusing_shrink_size: Byte,
/// Enable the chunk fusing or not, this reduces the amount of disk space used.
#[structopt(long)]
pub enable_chunk_fusing: bool,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: Byte::from_str("1GiB").unwrap(),
linked_hash_map_size: 500,
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
chunk_fusing_shrink_size: Byte::from_str("4GiB").unwrap(),
enable_chunk_fusing: false,
indexing_jobs: None,
}
}
}
const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
#[derive(Debug, Clone, StructOpt)]
pub struct Opt {
/// The destination where the database must be created.
#[structopt(long, env = "MEILI_DB_PATH", default_value = "./data.ms")]
pub db_path: PathBuf,
/// The address on which the http server will listen.
#[structopt(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")]
pub http_addr: String,
/// The master key allowing you to do everything on the server.
#[structopt(long, env = "MEILI_MASTER_KEY")]
pub master_key: Option<String>,
/// The Sentry DSN to use for error reporting. This defaults to the MeiliSearch Sentry project.
/// You can disable sentry all together using the `--no-sentry` flag or `MEILI_NO_SENTRY` environment variable.
#[cfg(all(not(debug_assertions), feature = "sentry"))]
#[structopt(long, env = "SENTRY_DSN", default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337")]
pub sentry_dsn: String,
/// Disable Sentry error reporting.
#[structopt(long, env = "MEILI_NO_SENTRY")]
#[cfg(all(not(debug_assertions), feature = "sentry"))]
pub no_sentry: bool,
/// This environment variable must be set to `production` if you are running in production.
/// If the server is running in development mode more logs will be displayed,
/// and the master key can be avoided which implies that there is no security on the updates routes.
/// This is useful to debug when integrating the engine with another service.
#[structopt(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)]
pub env: String,
/// Do not send analytics to Meili.
#[structopt(long, env = "MEILI_NO_ANALYTICS")]
pub no_analytics: bool,
/// The maximum size, in bytes, of the main lmdb database directory
#[structopt(long, env = "MEILI_MAX_MDB_SIZE", default_value = "100 GiB")]
pub max_mdb_size: Byte,
/// The maximum size, in bytes, of the update lmdb database directory
#[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "10 GiB")]
pub max_udb_size: Byte,
/// The maximum size, in bytes, of accepted JSON payloads
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "10 MiB")]
pub http_payload_size_limit: Byte,
/// Read server certificates from CERTFILE.
/// This should contain PEM-format certificates
/// in the right order (the first certificate should
/// certify KEYFILE, the last should be a root CA).
#[structopt(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))]
pub ssl_cert_path: Option<PathBuf>,
/// Read private key from KEYFILE. This should be a RSA
/// private key or PKCS8-encoded private key, in PEM format.
#[structopt(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))]
pub ssl_key_path: Option<PathBuf>,
/// Enable client authentication, and accept certificates
/// signed by those roots provided in CERTFILE.
#[structopt(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))]
pub ssl_auth_path: Option<PathBuf>,
/// Read DER-encoded OCSP response from OCSPFILE and staple to certificate.
/// Optional
#[structopt(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))]
pub ssl_ocsp_path: Option<PathBuf>,
/// Send a fatal alert if the client does not complete client authentication.
#[structopt(long, env = "MEILI_SSL_REQUIRE_AUTH")]
pub ssl_require_auth: bool,
/// SSL support session resumption
#[structopt(long, env = "MEILI_SSL_RESUMPTION")]
pub ssl_resumption: bool,
/// SSL support tickets.
#[structopt(long, env = "MEILI_SSL_TICKETS")]
pub ssl_tickets: bool,
/// Defines the path of the snapshot file to import.
/// This option will, by default, stop the process if a database already exist or if no snapshot exists at
/// the given path. If this option is not specified no snapshot is imported.
#[structopt(long)]
pub import_snapshot: Option<PathBuf>,
/// The engine will ignore a missing snapshot and not return an error in such case.
#[structopt(long, requires = "import-snapshot")]
pub ignore_missing_snapshot: bool,
/// The engine will skip snapshot importation and not return an error in such case.
#[structopt(long, requires = "import-snapshot")]
pub ignore_snapshot_if_db_exists: bool,
/// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap.
#[structopt(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")]
pub snapshot_dir: PathBuf,
/// Activate snapshot scheduling.
#[structopt(long, env = "MEILI_SCHEDULE_SNAPSHOT")]
pub schedule_snapshot: bool,
/// Defines time interval, in seconds, between each snapshot creation.
#[structopt(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC")]
pub snapshot_interval_sec: Option<u64>,
/// Folder where dumps are created when the dump route is called.
#[structopt(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")]
pub dumps_dir: PathBuf,
/// Import a dump from the specified path, must be a `.tar.gz` file.
#[structopt(long, conflicts_with = "import-snapshot")]
pub import_dump: Option<PathBuf>,
/// The batch size used in the importation process, the bigger it is the faster the dump is created.
#[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")]
pub dump_batch_size: usize,
#[structopt(flatten)]
pub indexer_options: IndexerOpts,
}
impl Opt {
pub fn get_ssl_config(&self) -> Result<Option<rustls::ServerConfig>, Box<dyn error::Error>> {
if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) {
let client_auth = match &self.ssl_auth_path {
Some(auth_path) => {
let roots = load_certs(auth_path.to_path_buf())?;
let mut client_auth_roots = RootCertStore::empty();
for root in roots {
client_auth_roots.add(&root).unwrap();
}
if self.ssl_require_auth {
AllowAnyAuthenticatedClient::new(client_auth_roots)
} else {
AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots)
}
}
None => NoClientAuth::new(),
};
let mut config = rustls::ServerConfig::new(client_auth);
config.key_log = Arc::new(rustls::KeyLogFile::new());
let certs = load_certs(cert_path.to_path_buf())?;
let privkey = load_private_key(key_path.to_path_buf())?;
let ocsp = load_ocsp(&self.ssl_ocsp_path)?;
config
.set_single_cert_with_ocsp_and_sct(certs, privkey, ocsp, vec![])
.map_err(|_| "bad certificates/private key")?;
if self.ssl_resumption {
config.set_persistence(rustls::ServerSessionMemoryCache::new(256));
}
if self.ssl_tickets {
config.ticketer = rustls::Ticketer::new();
}
Ok(Some(config))
} else {
Ok(None)
}
}
}
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
let mut reader = BufReader::new(certfile);
Ok(certs(&mut reader).map_err(|_| "cannot read certificate file")?)
}
fn load_private_key(filename: PathBuf) -> Result<rustls::PrivateKey, Box<dyn error::Error>> {
let rsa_keys = {
let keyfile =
fs::File::open(filename.clone()).map_err(|_| "cannot open private key file")?;
let mut reader = BufReader::new(keyfile);
rsa_private_keys(&mut reader).map_err(|_| "file contains invalid rsa private key")?
};
let pkcs8_keys = {
let keyfile = fs::File::open(filename).map_err(|_| "cannot open private key file")?;
let mut reader = BufReader::new(keyfile);
pkcs8_private_keys(&mut reader)
.map_err(|_| "file contains invalid pkcs8 private key (encrypted keys not supported)")?
};
// prefer to load pkcs8 keys
if !pkcs8_keys.is_empty() {
Ok(pkcs8_keys[0].clone())
} else {
assert!(!rsa_keys.is_empty());
Ok(rsa_keys[0].clone())
}
}
fn load_ocsp(filename: &Option<PathBuf>) -> Result<Vec<u8>, Box<dyn error::Error>> {
let mut ret = Vec::new();
if let Some(ref name) = filename {
fs::File::open(name)
.map_err(|_| "cannot open ocsp file")?
.read_to_end(&mut ret)
.map_err(|_| "cannot read oscp file")?;
}
Ok(ret)
}

View file

@ -0,0 +1,262 @@
use actix_web::web::Payload;
use actix_web::{delete, get, post, put};
use actix_web::{web, HttpResponse};
use indexmap::IndexMap;
use log::error;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use serde::Deserialize;
use serde_json::Value;
use crate::Data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
macro_rules! guard_content_type {
($fn_name:ident, $guard_value:literal) => {
fn $fn_name(head: &actix_web::dev::RequestHead) -> bool {
if let Some(content_type) = head.headers.get("Content-Type") {
content_type.to_str().map(|v| v.contains($guard_value)).unwrap_or(false)
} else {
false
}
}
};
}
guard_content_type!(guard_json, "application/json");
type Document = IndexMap<String, Value>;
#[derive(Deserialize)]
struct DocumentParam {
index_uid: String,
document_id: String,
}
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get_document)
.service(delete_document)
.service(get_all_documents)
.service(add_documents_json)
.service(update_documents)
.service(delete_documents)
.service(clear_all_documents);
}
#[get(
"/indexes/{index_uid}/documents/{document_id}",
wrap = "Authentication::Public"
)]
async fn get_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
match data.retrieve_document(index, id, None as Option<Vec<String>>).await {
Ok(document) => {
let json = serde_json::to_string(&document).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete(
"/indexes/{index_uid}/documents/{document_id}",
wrap = "Authentication::Private"
)]
async fn delete_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
match data.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]).await {
Ok(result) => {
let json = serde_json::to_string(&result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct BrowseQuery {
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
}
#[get("/indexes/{index_uid}/documents", wrap = "Authentication::Public")]
async fn get_all_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> {
let attributes_to_retrieve = params
.attributes_to_retrieve
.as_ref()
.map(|attrs| attrs
.split(",")
.map(String::from)
.collect::<Vec<_>>());
match data.retrieve_documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
attributes_to_retrieve).await {
Ok(docs) => {
let json = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct UpdateDocumentsQuery {
primary_key: Option<String>,
}
/// Route used when the payload type is "application/json"
#[post(
"/indexes/{index_uid}/documents",
wrap = "Authentication::Private",
guard = "guard_json"
)]
async fn add_documents_json(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: Payload,
) -> Result<HttpResponse, ResponseError> {
let addition_result = data
.add_documents(
path.into_inner().index_uid,
IndexDocumentsMethod::ReplaceDocuments,
UpdateFormat::Json,
body,
params.primary_key.clone(),
).await;
match addition_result {
Ok(update) => {
let value = serde_json::to_string(&update).unwrap();
let response = HttpResponse::Ok().body(value);
Ok(response)
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
/// Default route for adding documents, this should return an error and redirect to the documentation
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn add_documents_default(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_params: web::Query<UpdateDocumentsQuery>,
_body: web::Json<Vec<Document>>,
) -> Result<HttpResponse, ResponseError> {
error!("Unknown document type");
todo!()
}
/// Default route for adding documents, this should return an error and redirect to the documentation
#[put("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn update_documents_default(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_params: web::Query<UpdateDocumentsQuery>,
_body: web::Json<Vec<Document>>,
) -> Result<HttpResponse, ResponseError> {
error!("Unknown document type");
todo!()
}
#[put(
"/indexes/{index_uid}/documents",
wrap = "Authentication::Private",
guard = "guard_json",
)]
async fn update_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>,
body: web::Payload,
) -> Result<HttpResponse, ResponseError> {
let addition_result = data
.add_documents(
path.into_inner().index_uid,
IndexDocumentsMethod::UpdateDocuments,
UpdateFormat::Json,
body,
params.primary_key.clone(),
).await;
match addition_result {
Ok(update) => {
let value = serde_json::to_string(&update).unwrap();
let response = HttpResponse::Ok().body(value);
Ok(response)
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[post(
"/indexes/{index_uid}/documents/delete-batch",
wrap = "Authentication::Private"
)]
async fn delete_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
body: web::Json<Vec<Value>>,
) -> Result<HttpResponse, ResponseError> {
let ids = body
.iter()
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
.collect();
match data.delete_documents(path.index_uid.clone(), ids).await {
Ok(result) => {
let json = serde_json::to_string(&result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn clear_all_documents(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.clear_documents(path.index_uid.clone()).await {
Ok(update) => {
let json = serde_json::to_string(&update).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View file

@ -0,0 +1,42 @@
use std::fs::File;
use std::path::Path;
use actix_web::{get, post};
use actix_web::{HttpResponse, web};
use serde::{Deserialize, Serialize};
use crate::dump::{DumpInfo, DumpStatus, compressed_dumps_dir, init_dump_process};
use crate::Data;
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(trigger_dump)
.service(get_dump_status);
}
#[post("/dumps", wrap = "Authentication::Private")]
async fn trigger_dump(
data: web::Data<Data>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct DumpStatusResponse {
status: String,
}
#[derive(Deserialize)]
struct DumpParam {
dump_uid: String,
}
#[get("/dumps/{dump_uid}/status", wrap = "Authentication::Private")]
async fn get_dump_status(
data: web::Data<Data>,
path: web::Path<DumpParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}

View file

@ -0,0 +1,13 @@
use actix_web::get;
use actix_web::{web, HttpResponse};
use crate::error::ResponseError;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get_health);
}
#[get("/health")]
async fn get_health() -> Result<HttpResponse, ResponseError> {
Ok(HttpResponse::NoContent().finish())
}

View file

@ -0,0 +1,167 @@
use actix_web::{delete, get, post, put};
use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::Data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(list_indexes)
.service(get_index)
.service(create_index)
.service(update_index)
.service(delete_index)
.service(get_update_status)
.service(get_all_updates_status);
}
#[get("/indexes", wrap = "Authentication::Private")]
async fn list_indexes(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
match data.list_indexes().await {
Ok(indexes) => {
let json = serde_json::to_string(&indexes).unwrap();
Ok(HttpResponse::Ok().body(&json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[get("/indexes/{index_uid}", wrap = "Authentication::Private")]
async fn get_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.index(&path.index_uid).await? {
Some(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
None => {
let e = format!("Index {:?} doesn't exist.", path.index_uid);
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct IndexCreateRequest {
uid: String,
primary_key: Option<String>,
}
#[post("/indexes", wrap = "Authentication::Private")]
async fn create_index(
data: web::Data<Data>,
body: web::Json<IndexCreateRequest>,
) -> Result<HttpResponse, ResponseError> {
match data.create_index(&body.uid, body.primary_key.clone()).await {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct UpdateIndexRequest {
name: Option<String>,
primary_key: Option<String>,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct UpdateIndexResponse {
name: String,
uid: String,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
#[put("/indexes/{index_uid}", wrap = "Authentication::Private")]
async fn update_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> {
match data.update_index(&path.index_uid, body.primary_key.as_ref(), body.name.as_ref()) {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete("/indexes/{index_uid}", wrap = "Authentication::Private")]
async fn delete_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.delete_index(path.index_uid.clone()).await {
Ok(_) => Ok(HttpResponse::Ok().finish()),
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[derive(Deserialize)]
struct UpdateParam {
index_uid: String,
update_id: u64,
}
#[get(
"/indexes/{index_uid}/updates/{update_id}",
wrap = "Authentication::Private"
)]
async fn get_update_status(
data: web::Data<Data>,
path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> {
let result = data.get_update_status(&path.index_uid, path.update_id).await;
match result {
Ok(Some(meta)) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Ok(None) => {
let e = format!("udpate {} for index {:?} doesn't exists.", path.update_id, path.index_uid);
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[get("/indexes/{index_uid}/updates", wrap = "Authentication::Private")]
async fn get_all_updates_status(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let result = data.get_updates_status(&path.index_uid).await;
match result {
Ok(metas) => {
let json = serde_json::to_string(&metas).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View file

@ -0,0 +1,26 @@
use actix_web::web;
use actix_web::HttpResponse;
use actix_web::get;
use serde::Serialize;
use crate::helpers::Authentication;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(list);
}
#[derive(Serialize)]
struct KeysResponse {
private: Option<String>,
public: Option<String>,
}
#[get("/keys", wrap = "Authentication::Admin")]
async fn list(data: web::Data<Data>) -> HttpResponse {
let api_keys = data.api_keys.clone();
HttpResponse::Ok().json(&KeysResponse {
private: api_keys.private,
public: api_keys.public,
})
}

View file

@ -0,0 +1,44 @@
use actix_web::{get, HttpResponse};
use serde::{Deserialize, Serialize};
pub mod document;
pub mod health;
pub mod index;
pub mod key;
pub mod search;
pub mod settings;
pub mod stats;
pub mod stop_words;
pub mod synonym;
//pub mod dump;
#[derive(Deserialize)]
pub struct IndexParam {
index_uid: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexUpdateResponse {
pub update_id: u64,
}
impl IndexUpdateResponse {
pub fn with_id(update_id: u64) -> Self {
Self { update_id }
}
}
#[get("/")]
pub async fn load_html() -> HttpResponse {
HttpResponse::Ok()
.content_type("text/html; charset=utf-8")
.body(include_str!("../../public/interface.html").to_string())
}
#[get("/bulma.min.css")]
pub async fn load_css() -> HttpResponse {
HttpResponse::Ok()
.content_type("text/css; charset=utf-8")
.body(include_str!("../../public/bulma.min.css").to_string())
}

View file

@ -0,0 +1,114 @@
use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
use actix_web::{get, post, web, HttpResponse};
use serde::Deserialize;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
use crate::index::{SearchQuery, DEFAULT_SEARCH_LIMIT};
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(search_with_post).service(search_with_url_query);
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQueryGet {
q: Option<String>,
offset: Option<usize>,
limit: Option<usize>,
attributes_to_retrieve: Option<String>,
attributes_to_crop: Option<String>,
crop_length: Option<usize>,
attributes_to_highlight: Option<String>,
filters: Option<String>,
matches: Option<bool>,
facet_filters: Option<String>,
facet_distributions: Option<String>,
}
impl TryFrom<SearchQueryGet> for SearchQuery {
type Error = anyhow::Error;
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
let attributes_to_retrieve = other
.attributes_to_retrieve
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
let attributes_to_crop = other
.attributes_to_crop
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
let attributes_to_highlight = other
.attributes_to_highlight
.map(|attrs| attrs.split(",").map(String::from).collect::<HashSet<_>>());
let facet_distributions = other
.facet_distributions
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
let facet_filters = match other.facet_filters {
Some(ref f) => Some(serde_json::from_str(f)?),
None => None,
};
Ok(Self {
q: other.q,
offset: other.offset,
limit: other.limit.unwrap_or(DEFAULT_SEARCH_LIMIT),
attributes_to_retrieve,
attributes_to_crop,
crop_length: other.crop_length,
attributes_to_highlight,
filters: other.filters,
matches: other.matches,
facet_filters,
facet_distributions,
})
}
}
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
async fn search_with_url_query(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Query<SearchQueryGet>,
) -> Result<HttpResponse, ResponseError> {
let query: SearchQuery = match params.into_inner().try_into() {
Ok(q) => q,
Err(e) => {
return Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
};
let search_result = data.search(&path.index_uid, query).await;
match search_result {
Ok(docs) => {
let docs = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(docs))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[post("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
async fn search_with_post(
data: web::Data<Data>,
path: web::Path<IndexParam>,
params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> {
let search_result = data.search(&path.index_uid, params.into_inner()).await;
match search_result {
Ok(docs) => {
let docs = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(docs))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View file

@ -0,0 +1,43 @@
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/attributes-for-faceting",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let attributes_for_faceting = data.db.load().main_read::<_, _, ResponseError>(|reader| {
let schema = index.main.schema(reader)?;
let attrs = index.main.attributes_for_faceting(reader)?;
let attr_names = match (&schema, &attrs) {
(Some(schema), Some(attrs)) => attrs
.iter()
.filter_map(|&id| schema.name(id))
.map(str::to_string)
.collect(),
_ => vec![],
};
Ok(attr_names)
})?;
Ok(HttpResponse::Ok().json(attributes_for_faceting))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/attributes-for-faceting",
Vec<String>,
attributes_for_faceting
);

View file

@ -0,0 +1,25 @@
use std::collections::HashSet;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/displayed-attributes",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/displayed-attributes",
HashSet<String>,
displayed_attributes
);

View file

@ -0,0 +1,36 @@
use crate::make_update_delete_routes;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/distinct-attribute",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let distinct_attribute_id = index.main.distinct_attribute(&reader)?;
let schema = index.main.schema(&reader)?;
let distinct_attribute = match (schema, distinct_attribute_id) {
(Some(schema), Some(id)) => schema.name(id).map(str::to_string),
_ => None,
};
Ok(HttpResponse::Ok().json(distinct_attribute))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/distinct-attribute",
String,
distinct_attribute
);

View file

@ -0,0 +1,183 @@
use actix_web::{web, HttpResponse, delete, get, post};
use crate::Data;
use crate::error::ResponseError;
use crate::index::Settings;
use crate::helpers::Authentication;
#[macro_export]
macro_rules! make_setting_route {
($route:literal, $type:ty, $attr:ident) => {
mod $attr {
use actix_web::{web, HttpResponse};
use crate::data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::index::Settings;
#[actix_web::delete($route, wrap = "Authentication::Private")]
pub async fn delete(
data: web::Data<data::Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
use crate::index::Settings;
let settings = Settings {
$attr: Some(None),
..Default::default()
};
match data.update_settings(index_uid.into_inner(), settings).await {
Ok(update_status) => {
let json = serde_json::to_string(&update_status).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[actix_web::post($route, wrap = "Authentication::Private")]
pub async fn update(
data: actix_web::web::Data<data::Data>,
index_uid: actix_web::web::Path<String>,
body: actix_web::web::Json<Option<$type>>,
) -> std::result::Result<HttpResponse, ResponseError> {
let settings = Settings {
$attr: Some(body.into_inner()),
..Default::default()
};
match data.update_settings(index_uid.into_inner(), settings).await {
Ok(update_status) => {
let json = serde_json::to_string(&update_status).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[actix_web::get($route, wrap = "Authentication::Private")]
pub async fn get(
data: actix_web::web::Data<data::Data>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
match data.settings(index_uid.as_ref()).await {
Ok(settings) => {
let setting = settings.$attr;
let json = serde_json::to_string(&setting).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
}
};
}
make_setting_route!(
"/indexes/{index_uid}/settings/attributes-for-faceting",
std::collections::HashMap<String, String>,
faceted_attributes
);
make_setting_route!(
"/indexes/{index_uid}/settings/displayed-attributes",
Vec<String>,
displayed_attributes
);
make_setting_route!(
"/indexes/{index_uid}/settings/searchable-attributes",
Vec<String>,
searchable_attributes
);
//make_setting_route!(
//"/indexes/{index_uid}/settings/distinct-attribute",
//String,
//distinct_attribute
//);
//make_setting_route!(
//"/indexes/{index_uid}/settings/ranking-rules",
//Vec<String>,
//ranking_rules
//);
macro_rules! create_services {
($($mod:ident),*) => {
pub fn services(cfg: &mut web::ServiceConfig) {
cfg
.service(update_all)
.service(get_all)
.service(delete_all)
$(
.service($mod::get)
.service($mod::update)
.service($mod::delete)
)*;
}
};
}
create_services!(
faceted_attributes,
displayed_attributes,
searchable_attributes
);
#[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
async fn update_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
body: web::Json<Settings>,
) -> Result<HttpResponse, ResponseError> {
match data.update_settings(index_uid.into_inner(), body.into_inner()).await {
Ok(update_result) => {
let json = serde_json::to_string(&update_result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[get("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
async fn get_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
match data.settings(index_uid.as_ref()).await {
Ok(settings) => {
let json = serde_json::to_string(&settings).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}
#[delete("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
async fn delete_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let settings = Settings::cleared();
match data.update_settings(index_uid.into_inner(), settings).await {
Ok(update_result) => {
let json = serde_json::to_string(&update_result).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
}

View file

@ -0,0 +1,23 @@
use crate::make_update_delete_routes;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/ranking-rules",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/ranking-rules",
Vec<String>,
ranking_rules
);

View file

@ -0,0 +1,34 @@
use actix_web::{web, HttpResponse, get};
use crate::data::get_indexed_attributes;
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/searchable-attributes",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let schema = index.main.schema(&reader)?;
let searchable_attributes: Option<Vec<String>> = schema.as_ref().map(get_indexed_attributes);
Ok(HttpResponse::Ok().json(searchable_attributes))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/searchable-attributes",
Vec<String>,
searchable_attributes
);

View file

@ -0,0 +1,33 @@
use std::collections::BTreeSet;
use crate::make_update_delete_routes;
use actix_web::{web, HttpResponse, get};
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let stop_words = index.main.stop_words(&reader)?;
Ok(HttpResponse::Ok().json(stop_words))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/stop-words",
BTreeSet<String>,
stop_words
);

View file

@ -0,0 +1,43 @@
use std::collections::BTreeMap;
use actix_web::{web, HttpResponse, get};
use indexmap::IndexMap;
use crate::error::{Error, ResponseError};
use crate::helpers::Authentication;
use crate::make_update_delete_routes;
use crate::Data;
#[get(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn get(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let index = data
.db
.load()
.open_index(&index_uid.as_ref())
.ok_or(Error::index_not_found(&index_uid.as_ref()))?;
let reader = data.db.load().main_read_txn()?;
let synonyms_list = index.main.synonyms(&reader)?;
let mut synonyms = IndexMap::new();
let index_synonyms = &index.synonyms;
for synonym in synonyms_list {
let list = index_synonyms.synonyms(&reader, synonym.as_bytes())?;
synonyms.insert(synonym, list);
}
Ok(HttpResponse::Ok().json(synonyms))
}
make_update_delete_routes!(
"/indexes/{index_uid}/settings/synonyms",
BTreeMap<String, Vec<String>>,
synonyms
);

View file

@ -0,0 +1,60 @@
use std::collections::{HashMap, BTreeMap};
use actix_web::web;
use actix_web::HttpResponse;
use actix_web::get;
use chrono::{DateTime, Utc};
use serde::Serialize;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(index_stats)
.service(get_stats)
.service(get_version);
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct IndexStatsResponse {
number_of_documents: u64,
is_indexing: bool,
fields_distribution: BTreeMap<String, usize>,
}
#[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")]
async fn index_stats(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct StatsResult {
database_size: u64,
last_update: Option<DateTime<Utc>>,
indexes: HashMap<String, IndexStatsResponse>,
}
#[get("/stats", wrap = "Authentication::Private")]
async fn get_stats(_data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct VersionResponse {
commit_sha: String,
build_date: String,
pkg_version: String,
}
#[get("/version", wrap = "Authentication::Private")]
async fn get_version() -> HttpResponse {
todo!()
}

View file

@ -0,0 +1,46 @@
use actix_web::{web, HttpResponse};
use actix_web::{delete, get, post};
use std::collections::BTreeSet;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get).service(update).service(delete);
}
#[get(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn get(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[post(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn update(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_body: web::Json<BTreeSet<String>>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[delete(
"/indexes/{index_uid}/settings/stop-words",
wrap = "Authentication::Private"
)]
async fn delete(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}

View file

@ -0,0 +1,47 @@
use std::collections::BTreeMap;
use actix_web::{web, HttpResponse};
use actix_web::{delete, get, post};
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(get).service(update).service(delete);
}
#[get(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn get(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[post(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn update(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
_body: web::Json<BTreeMap<String, Vec<String>>>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}
#[delete(
"/indexes/{index_uid}/settings/synonyms",
wrap = "Authentication::Private"
)]
async fn delete(
_data: web::Data<Data>,
_path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
todo!()
}

View file

@ -0,0 +1,96 @@
use crate::Data;
use crate::error::Error;
use crate::helpers::compression;
use log::error;
use std::fs::create_dir_all;
use std::path::Path;
use std::thread;
use std::time::{Duration};
use tempfile::TempDir;
pub fn load_snapshot(
db_path: &str,
snapshot_path: &Path,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool
) -> Result<(), Error> {
let db_path = Path::new(db_path);
if !db_path.exists() && snapshot_path.exists() {
compression::from_tar_gz(snapshot_path, db_path)
} else if db_path.exists() && !ignore_snapshot_if_db_exists {
Err(Error::Internal(format!("database already exists at {:?}, try to delete it or rename it", db_path.canonicalize().unwrap_or(db_path.into()))))
} else if !snapshot_path.exists() && !ignore_missing_snapshot {
Err(Error::Internal(format!("snapshot doesn't exist at {:?}", snapshot_path.canonicalize().unwrap_or(snapshot_path.into()))))
} else {
Ok(())
}
}
pub fn create_snapshot(data: &Data, snapshot_path: &Path) -> Result<(), Error> {
let tmp_dir = TempDir::new()?;
data.db.copy_and_compact_to_path(tmp_dir.path())?;
compression::to_tar_gz(tmp_dir.path(), snapshot_path).map_err(|e| Error::Internal(format!("something went wrong during snapshot compression: {}", e)))
}
pub fn schedule_snapshot(data: Data, snapshot_dir: &Path, time_gap_s: u64) -> Result<(), Error> {
if snapshot_dir.file_name().is_none() {
return Err(Error::Internal("invalid snapshot file path".to_string()));
}
let db_name = Path::new(&data.db_path).file_name().ok_or_else(|| Error::Internal("invalid database name".to_string()))?;
create_dir_all(snapshot_dir)?;
let snapshot_path = snapshot_dir.join(format!("{}.snapshot", db_name.to_str().unwrap_or("data.ms")));
thread::spawn(move || loop {
if let Err(e) = create_snapshot(&data, &snapshot_path) {
error!("Unsuccessful snapshot creation: {}", e);
}
thread::sleep(Duration::from_secs(time_gap_s));
});
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::prelude::*;
use std::fs;
#[test]
fn test_pack_unpack() {
let tempdir = TempDir::new().unwrap();
let test_dir = tempdir.path();
let src_dir = test_dir.join("src");
let dest_dir = test_dir.join("complex/destination/path/");
let archive_path = test_dir.join("archive.snapshot");
let file_1_relative = Path::new("file1.txt");
let subdir_relative = Path::new("subdir/");
let file_2_relative = Path::new("subdir/file2.txt");
create_dir_all(src_dir.join(subdir_relative)).unwrap();
fs::File::create(src_dir.join(file_1_relative)).unwrap().write_all(b"Hello_file_1").unwrap();
fs::File::create(src_dir.join(file_2_relative)).unwrap().write_all(b"Hello_file_2").unwrap();
assert!(compression::to_tar_gz(&src_dir, &archive_path).is_ok());
assert!(archive_path.exists());
assert!(load_snapshot(&dest_dir.to_str().unwrap(), &archive_path, false, false).is_ok());
assert!(dest_dir.exists());
assert!(dest_dir.join(file_1_relative).exists());
assert!(dest_dir.join(subdir_relative).exists());
assert!(dest_dir.join(file_2_relative).exists());
let contents = fs::read_to_string(dest_dir.join(file_1_relative)).unwrap();
assert_eq!(contents, "Hello_file_1");
let contents = fs::read_to_string(dest_dir.join(file_2_relative)).unwrap();
assert_eq!(contents, "Hello_file_2");
}
}