mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
get rids of the index crate + the document_types crate
This commit is contained in:
parent
f3ec39a769
commit
f456fb5e0b
30 changed files with 316 additions and 2207 deletions
|
@ -1,26 +1,24 @@
|
|||
use std::io::Cursor;
|
||||
|
||||
use actix_web::error::PayloadError;
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::{Bytes, Data};
|
||||
use actix_web::web::Data;
|
||||
use actix_web::HttpMessage;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice;
|
||||
use document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
use futures::{Stream, StreamExt};
|
||||
use index::{retrieve_document, retrieve_documents};
|
||||
use index_scheduler::milli::update::IndexDocumentsMethod;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use index_scheduler::{KindWithContent, TaskView};
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::{IndexScheduler, KindWithContent, TaskView};
|
||||
use log::debug;
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::star_or::StarOr;
|
||||
use meilisearch_types::{milli, Document, Index};
|
||||
use mime::Mime;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::Deserialize;
|
||||
use serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
@ -37,17 +35,6 @@ static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
|||
]
|
||||
});
|
||||
|
||||
/// This is required because Payload is not Sync nor Send
|
||||
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
|
||||
let (snd, recv) = mpsc::channel(1);
|
||||
tokio::task::spawn_local(async move {
|
||||
while let Some(data) = payload.next().await {
|
||||
let _ = snd.send(data).await;
|
||||
}
|
||||
});
|
||||
tokio_stream::wrappers::ReceiverStream::new(recv)
|
||||
}
|
||||
|
||||
/// Extracts the mime type from the content type and return
|
||||
/// a meilisearch error if anything bad happen.
|
||||
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
|
||||
|
@ -344,3 +331,76 @@ pub async fn clear_all_documents(
|
|||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn all_documents<'a>(
|
||||
index: &Index,
|
||||
rtxn: &'a RoTxn,
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
Ok(index.all_documents(rtxn)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from)
|
||||
.and_then(|(_key, document)| -> Result<_, ResponseError> {
|
||||
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
||||
})
|
||||
}))
|
||||
}
|
||||
|
||||
fn retrieve_documents<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let mut documents = Vec::new();
|
||||
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document?,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document?,
|
||||
};
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
let number_of_documents = index.number_of_documents(&rtxn)?;
|
||||
Ok((number_of_documents, documents))
|
||||
}
|
||||
|
||||
fn retrieve_document<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
doc_id: &str,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Document, ResponseError> {
|
||||
let txn = index.read_txn()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&txn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
let internal_id = index
|
||||
.external_documents_ids(&txn)?
|
||||
.get(doc_id.as_bytes())
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = index
|
||||
.documents(&txn, std::iter::once(internal_id))?
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|(_, d)| d)
|
||||
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
|
||||
|
||||
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document,
|
||||
};
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::milli::{FieldDistribution, Index};
|
||||
use index_scheduler::{IndexScheduler, KindWithContent, Query, Status};
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::milli::{self, FieldDistribution, Index};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
|
@ -51,7 +51,7 @@ pub struct IndexView {
|
|||
}
|
||||
|
||||
impl IndexView {
|
||||
fn new(uid: String, index: &Index) -> Result<IndexView, index::error::IndexError> {
|
||||
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
|
||||
let rtxn = index.read_txn()?;
|
||||
Ok(IndexView {
|
||||
uid,
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index::{
|
||||
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
|
@ -15,6 +11,10 @@ use serde_json::Value;
|
|||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::search::{
|
||||
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
|
|
|
@ -1,14 +1,26 @@
|
|||
use std::collections::BTreeSet;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use fst::IntoStreamer;
|
||||
use log::debug;
|
||||
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index::{Settings, Unchecked};
|
||||
use index_scheduler::{IndexScheduler, KindWithContent};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
|
||||
use meilisearch_types::settings::{
|
||||
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
|
||||
Unchecked,
|
||||
};
|
||||
use meilisearch_types::Index;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
|
@ -18,14 +30,15 @@ macro_rules! make_setting_route {
|
|||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||
use log::debug;
|
||||
|
||||
use index::Settings;
|
||||
use index_scheduler::milli::update::Setting;
|
||||
use index_scheduler::{IndexScheduler, KindWithContent};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::Settings;
|
||||
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::routes::indexes::settings::settings;
|
||||
|
||||
pub async fn delete(
|
||||
index_scheduler: GuardedData<
|
||||
|
@ -98,7 +111,7 @@ macro_rules! make_setting_route {
|
|||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let settings = index::settings(&index, &rtxn)?;
|
||||
let settings = settings(&index, &rtxn)?;
|
||||
|
||||
debug!("returns: {:?}", settings);
|
||||
let mut json = serde_json::json!(&settings);
|
||||
|
@ -185,11 +198,11 @@ make_setting_route!(
|
|||
make_setting_route!(
|
||||
"/typo-tolerance",
|
||||
patch,
|
||||
index::updates::TypoSettings,
|
||||
meilisearch_types::settings::TypoSettings,
|
||||
typo_tolerance,
|
||||
"typoTolerance",
|
||||
analytics,
|
||||
|setting: &Option<index::updates::TypoSettings>, req: &HttpRequest| {
|
||||
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
|
@ -295,11 +308,11 @@ make_setting_route!(
|
|||
make_setting_route!(
|
||||
"/faceting",
|
||||
patch,
|
||||
index::updates::FacetingSettings,
|
||||
meilisearch_types::settings::FacetingSettings,
|
||||
faceting,
|
||||
"faceting",
|
||||
analytics,
|
||||
|setting: &Option<index::updates::FacetingSettings>, req: &HttpRequest| {
|
||||
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
|
@ -317,11 +330,11 @@ make_setting_route!(
|
|||
make_setting_route!(
|
||||
"/pagination",
|
||||
patch,
|
||||
index::updates::PaginationSettings,
|
||||
meilisearch_types::settings::PaginationSettings,
|
||||
pagination,
|
||||
"pagination",
|
||||
analytics,
|
||||
|setting: &Option<index::updates::PaginationSettings>, req: &HttpRequest| {
|
||||
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
|
@ -456,7 +469,7 @@ pub async fn get_all(
|
|||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let new_settings = index::settings(&index, &rtxn)?;
|
||||
let new_settings = settings(&index, &rtxn)?;
|
||||
debug!("returns: {:?}", new_settings);
|
||||
Ok(HttpResponse::Ok().json(new_settings))
|
||||
}
|
||||
|
@ -479,3 +492,108 @@ pub async fn delete_all(
|
|||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
|
||||
let displayed_attributes = index
|
||||
.displayed_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let criteria = index
|
||||
.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = index
|
||||
.stop_words(rtxn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
||||
};
|
||||
|
||||
let disabled_words = match index.exact_words(rtxn)? {
|
||||
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
||||
None => BTreeSet::new(),
|
||||
};
|
||||
|
||||
let disabled_attributes = index
|
||||
.exact_attributes(rtxn)?
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let typo_tolerance = TypoSettings {
|
||||
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||
disable_on_words: Setting::Set(disabled_words),
|
||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||
};
|
||||
|
||||
let faceting = FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(
|
||||
index
|
||||
.max_values_per_facet(rtxn)?
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
};
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
max_total_hits: Setting::Set(
|
||||
index
|
||||
.pagination_max_total_hits(rtxn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -2,10 +2,10 @@ use std::collections::BTreeMap;
|
|||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index::{Settings, Unchecked};
|
||||
use index_scheduler::{IndexScheduler, Query, Status};
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::star_or::StarOr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue