get rids of the index crate + the document_types crate

This commit is contained in:
Tamo 2022-10-11 17:42:43 +02:00 committed by Clément Renault
parent f3ec39a769
commit f456fb5e0b
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
30 changed files with 316 additions and 2207 deletions

View file

@ -47,17 +47,15 @@ jsonwebtoken = "8.1.1"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
index = { path = "../index" }
index-scheduler = { path = "../index-scheduler" }
file-store = { path = "../file-store" }
document-formats = { path = "../document-formats" }
mimalloc = { version = "0.1.29", default-features = false }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"
once_cell = "1.15.0"
parking_lot = "0.12.1"
permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
rand = "0.8.5"
@ -98,7 +96,7 @@ yaup = "0.2.1"
temp-env = "0.3.1"
[features]
default = ["analytics", "meilisearch-lib/default", "mini-dashboard"]
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
metrics = ["prometheus"]
analytics = ["segment"]
mini-dashboard = [
@ -112,10 +110,10 @@ mini-dashboard = [
"tempfile",
"zip",
]
chinese = ["meilisearch-lib/chinese"]
hebrew = ["meilisearch-lib/hebrew"]
japanese = ["meilisearch-lib/japanese"]
thai = ["meilisearch-lib/thai"]
chinese = ["meilisearch-types/chinese"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.2/build.zip"

View file

@ -1,7 +1,8 @@
use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use document_formats::DocumentFormatError;
use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use serde_json::Value;
use tokio::task::JoinError;
#[derive(Debug, thiserror::Error)]
@ -14,9 +15,19 @@ pub enum MeilisearchHttpError {
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
)]
InvalidContentType(String, Vec<String>),
#[error("Document `{0}` not found.")]
DocumentNotFound(String),
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error(transparent)]
SerdeJson(#[from] serde_json::Error),
#[error(transparent)]
HeedError(#[from] meilisearch_types::heed::Error),
#[error(transparent)]
IndexScheduler(#[from] index_scheduler::Error),
#[error(transparent)]
Milli(#[from] meilisearch_types::milli::Error),
#[error(transparent)]
Payload(#[from] PayloadError),
#[error(transparent)]
FileStore(#[from] file_store::Error),
@ -31,7 +42,12 @@ impl ErrorCode for MeilisearchHttpError {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter,
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,
MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
MeilisearchHttpError::Milli(e) => e.error_code(),
MeilisearchHttpError::Payload(e) => e.error_code(),
MeilisearchHttpError::FileStore(_) => Code::Internal,
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),

View file

@ -6,6 +6,7 @@ pub mod analytics;
pub mod extractors;
pub mod option;
pub mod routes;
pub mod search;
#[cfg(feature = "metrics")]
pub mod metrics;
@ -38,6 +39,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
opt.db_path.join("indexes"),
opt.max_index_size.get_bytes() as usize,
(&opt.indexer_options).try_into()?,
true,
#[cfg(test)]
todo!("We'll see later"),
)?;
@ -45,8 +47,6 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
/*
TODO: We should start a thread to handle the snapshots.
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
// snapshot
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)

View file

@ -11,11 +11,7 @@ use std::{fmt, fs};
use byte_unit::{Byte, ByteError};
use clap::Parser;
use meilisearch_lib::{
export_to_env_if_not_present,
options::{IndexerOpts, SchedulerConfig},
};
use index_scheduler::milli::update::IndexerConfig;
use meilisearch_types::milli::update::IndexerConfig;
use rustls::{
server::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient,

View file

@ -1,26 +1,24 @@
use std::io::Cursor;
use actix_web::error::PayloadError;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::{Bytes, Data};
use actix_web::web::Data;
use actix_web::HttpMessage;
use actix_web::{web, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use futures::{Stream, StreamExt};
use index::{retrieve_document, retrieve_documents};
use index_scheduler::milli::update::IndexDocumentsMethod;
use index_scheduler::IndexScheduler;
use index_scheduler::{KindWithContent, TaskView};
use futures::StreamExt;
use index_scheduler::{IndexScheduler, KindWithContent, TaskView};
use log::debug;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::star_or::StarOr;
use meilisearch_types::{milli, Document, Index};
use mime::Mime;
use once_cell::sync::Lazy;
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
use tokio::sync::mpsc;
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
@ -37,17 +35,6 @@ static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
]
});
/// This is required because Payload is not Sync nor Send
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
let (snd, recv) = mpsc::channel(1);
tokio::task::spawn_local(async move {
while let Some(data) = payload.next().await {
let _ = snd.send(data).await;
}
});
tokio_stream::wrappers::ReceiverStream::new(recv)
}
/// Extracts the mime type from the content type and return
/// a meilisearch error if anything bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
@ -344,3 +331,76 @@ pub async fn clear_all_documents(
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.all_documents(rtxn)?.map(move |ret| {
ret.map_err(ResponseError::from)
.and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document, ResponseError> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};
Ok(document)
}

View file

@ -1,9 +1,9 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::milli::{FieldDistribution, Index};
use index_scheduler::{IndexScheduler, KindWithContent, Query, Status};
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
@ -51,7 +51,7 @@ pub struct IndexView {
}
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, index::error::IndexError> {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,

View file

@ -1,9 +1,5 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
@ -15,6 +11,10 @@ use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(

View file

@ -1,14 +1,26 @@
use std::collections::BTreeSet;
use std::marker::PhantomData;
use actix_web::web::Data;
use fst::IntoStreamer;
use log::debug;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{Settings, Unchecked};
use index_scheduler::{IndexScheduler, KindWithContent};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::settings::{
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
Unchecked,
};
use meilisearch_types::Index;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
#[macro_export]
macro_rules! make_setting_route {
@ -18,14 +30,15 @@ macro_rules! make_setting_route {
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use log::debug;
use index::Settings;
use index_scheduler::milli::update::Setting;
use index_scheduler::{IndexScheduler, KindWithContent};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::Settings;
use meilisearch_types::error::ResponseError;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::{policies::*, GuardedData};
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::indexes::settings::settings;
pub async fn delete(
index_scheduler: GuardedData<
@ -98,7 +111,7 @@ macro_rules! make_setting_route {
) -> std::result::Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let settings = index::settings(&index, &rtxn)?;
let settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
@ -185,11 +198,11 @@ make_setting_route!(
make_setting_route!(
"/typo-tolerance",
patch,
index::updates::TypoSettings,
meilisearch_types::settings::TypoSettings,
typo_tolerance,
"typoTolerance",
analytics,
|setting: &Option<index::updates::TypoSettings>, req: &HttpRequest| {
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
@ -295,11 +308,11 @@ make_setting_route!(
make_setting_route!(
"/faceting",
patch,
index::updates::FacetingSettings,
meilisearch_types::settings::FacetingSettings,
faceting,
"faceting",
analytics,
|setting: &Option<index::updates::FacetingSettings>, req: &HttpRequest| {
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
@ -317,11 +330,11 @@ make_setting_route!(
make_setting_route!(
"/pagination",
patch,
index::updates::PaginationSettings,
meilisearch_types::settings::PaginationSettings,
pagination,
"pagination",
analytics,
|setting: &Option<index::updates::PaginationSettings>, req: &HttpRequest| {
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
@ -456,7 +469,7 @@ pub async fn get_all(
) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = index::settings(&index, &rtxn)?;
let new_settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
@ -479,3 +492,108 @@ pub async fn delete_all(
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
let displayed_attributes = index
.displayed_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
let criteria = index
.criteria(rtxn)?
.into_iter()
.map(|c| c.to_string())
.collect();
let stop_words = index
.stop_words(rtxn)?
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = index
.synonyms(rtxn)?
.iter()
.map(|(key, values)| {
(
key.join(" "),
values.iter().map(|value| value.join(" ")).collect(),
)
})
.collect();
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
};
let disabled_words = match index.exact_words(rtxn)? {
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
None => BTreeSet::new(),
};
let disabled_attributes = index
.exact_attributes(rtxn)?
.into_iter()
.map(String::from)
.collect();
let typo_tolerance = TypoSettings {
enabled: Setting::Set(index.authorize_typos(rtxn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index
.max_values_per_facet(rtxn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
};
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index
.pagination_max_total_hits(rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
_kind: PhantomData,
})
}

View file

@ -2,10 +2,10 @@ use std::collections::BTreeMap;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{Settings, Unchecked};
use index_scheduler::{IndexScheduler, Query, Status};
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::star_or::StarOr;
use serde::{Deserialize, Serialize};
use serde_json::json;