architecture rework

This commit is contained in:
mpostma 2021-01-28 14:12:34 +01:00
parent 6a3f625e11
commit 74410d8c6b
No known key found for this signature in database
GPG key ID: CBC8A7C1D7A28C3A
14 changed files with 1065 additions and 310 deletions

View file

@ -5,19 +5,20 @@ pub use search::{SearchQuery, SearchResult};
use std::ops::Deref;
use std::sync::Arc;
use std::fs::create_dir_all;
use sha2::Digest;
use crate::{option::Opt, index_controller::Settings};
use crate::index_controller::{IndexStore, UpdateStore};
use crate::index_controller::{IndexController, LocalIndexController};
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner<UpdateStore>>,
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner<UpdateStore>;
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
@ -25,8 +26,8 @@ impl Deref for Data {
}
#[derive(Clone)]
pub struct DataInner<I> {
pub indexes: Arc<I>,
pub struct DataInner {
pub index_controller: Arc<LocalIndexController>,
api_keys: ApiKeys,
options: Opt,
}
@ -58,8 +59,9 @@ impl ApiKeys {
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let index_store = IndexStore::new(&path)?;
let index_controller = UpdateStore::new(index_store);
let indexer_opts = options.indexer_options.clone();
create_dir_all(&path)?;
let index_controller = LocalIndexController::new(&path, indexer_opts)?;
let indexes = Arc::new(index_controller);
let mut api_keys = ApiKeys {
@ -70,28 +72,31 @@ impl Data {
api_keys.generate_missing_api_keys();
let inner = DataInner { indexes, options, api_keys };
let inner = DataInner { index_controller: indexes, options, api_keys };
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub fn settings<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<Settings> {
let index = self.indexes
.get(&index_uid)?
let index = self.index_controller
.index(&index_uid)?
.ok_or_else(|| anyhow::anyhow!("Index {} does not exist.", index_uid.as_ref()))?;
let txn = index.read_txn()?;
let displayed_attributes = index
.displayed_fields()?
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let searchable_attributes = index
.searchable_fields()?
.searchable_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let faceted_attributes = index.faceted_fields()?
let faceted_attributes = index
.faceted_fields(&txn)?
.into_iter()
.map(|(k, v)| (k, v.to_string()))
.collect();

View file

@ -4,11 +4,11 @@ use std::time::Instant;
use serde_json::{Value, Map};
use serde::{Deserialize, Serialize};
use milli::{SearchResult as Results, obkv_to_json};
use milli::{Index, obkv_to_json, FacetCondition};
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use anyhow::bail;
use crate::error::Error;
use crate::index_controller::IndexController;
use super::Data;
const DEFAULT_SEARCH_LIMIT: usize = 20;
@ -26,11 +26,68 @@ pub struct SearchQuery {
pub attributes_to_retrieve: Option<Vec<String>>,
pub attributes_to_crop: Option<Vec<String>>,
pub crop_length: Option<usize>,
pub attributes_to_highlight: Option<Vec<String>>,
pub attributes_to_highlight: Option<HashSet<String>>,
pub filters: Option<String>,
pub matches: Option<bool>,
pub facet_filters: Option<Value>,
pub facets_distribution: Option<Vec<String>>,
pub facet_condition: Option<String>,
}
impl SearchQuery {
pub fn perform(&self, index: impl AsRef<Index>) -> anyhow::Result<SearchResult>{
let index = index.as_ref();
let before_search = Instant::now();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
if let Some(ref query) = self.q {
search.query(query);
}
if let Some(ref condition) = self.facet_condition {
if !condition.trim().is_empty() {
let condition = FacetCondition::from_str(&rtxn, &index, &condition).unwrap();
search.facet_condition(condition);
}
}
if let Some(offset) = self.offset {
search.offset(offset);
}
let milli::SearchResult { documents_ids, found_words, nb_hits, limit, } = search.execute()?;
let mut documents = Vec::new();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let displayed_fields = match index.displayed_fields_ids(&rtxn).unwrap() {
Some(fields) => fields,
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);
for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() {
let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
if let Some(ref attributes_to_highlight) = self.attributes_to_highlight {
highlighter.highlight_record(&mut object, &found_words, attributes_to_highlight);
}
documents.push(object);
}
Ok(SearchResult {
hits: documents,
nb_hits,
query: self.q.clone().unwrap_or_default(),
limit,
offset: self.offset.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
})
}
}
#[derive(Serialize)]
@ -105,45 +162,9 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
impl Data {
pub fn search<S: AsRef<str>>(&self, index: S, search_query: SearchQuery) -> anyhow::Result<SearchResult> {
let start = Instant::now();
let index = self.indexes
.get(&index)?
.ok_or_else(|| Error::OpenIndex(format!("Index {} doesn't exists.", index.as_ref())))?;
let Results { found_words, documents_ids, nb_hits, limit, .. } = index.search(&search_query)?;
let fields_ids_map = index.fields_ids_map()?;
let displayed_fields = match index.displayed_fields_ids()? {
Some(fields) => fields,
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let attributes_to_highlight = match search_query.attributes_to_highlight {
Some(fields) => fields.iter().map(ToOwned::to_owned).collect(),
None => HashSet::new(),
};
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);
let mut documents = Vec::new();
for (_id, obkv) in index.documents(&documents_ids)? {
let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
highlighter.highlight_record(&mut object, &found_words, &attributes_to_highlight);
documents.push(object);
match self.index_controller.index(&index)? {
Some(index) => Ok(search_query.perform(index)?),
None => bail!("index {:?} doesn't exists", index.as_ref()),
}
let processing_time_ms = start.elapsed().as_millis();
let result = SearchResult {
hits: documents,
nb_hits,
query: search_query.q.unwrap_or_default(),
offset: search_query.offset.unwrap_or(0),
limit,
processing_time_ms,
};
Ok(result)
}
}

View file

@ -1,15 +1,12 @@
use std::ops::Deref;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
//use milli::update_store::UpdateStatus;
use async_compression::tokio_02::write::GzipEncoder;
use futures_util::stream::StreamExt;
use tokio::io::AsyncWriteExt;
use super::Data;
use crate::index_controller::IndexController;
use crate::index_controller::{UpdateStatusResponse, Settings};
use crate::index_controller::{IndexController, UpdateStatusResponse, Settings};
impl Data {
pub async fn add_documents<B, E, S>(
@ -39,8 +36,8 @@ impl Data {
let file = file.into_std().await;
let mmap = unsafe { memmap::Mmap::map(&file)? };
let indexes = self.indexes.clone();
let update = tokio::task::spawn_blocking(move ||indexes.add_documents(index, method, format, &mmap[..])).await??;
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move ||index_controller.add_documents(index, method, format, &mmap[..])).await??;
Ok(update.into())
}
@ -49,7 +46,7 @@ impl Data {
index: S,
settings: Settings
) -> anyhow::Result<UpdateStatusResponse> {
let indexes = self.indexes.clone();
let indexes = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || indexes.update_settings(index, settings)).await??;
Ok(update.into())
}