mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
WIP: refactor IndexController
change the architecture of the index controller to allow it to own an index store.
This commit is contained in:
parent
686f987180
commit
6a3f625e11
15 changed files with 1197 additions and 287 deletions
|
@ -1,196 +1,145 @@
|
|||
use std::fs::File;
|
||||
use std::io::{Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
mod index_store;
|
||||
mod update_store;
|
||||
|
||||
pub use index_store::IndexStore;
|
||||
pub use update_store::UpdateStore;
|
||||
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Deref;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use dashmap::DashMap;
|
||||
use dashmap::mapref::one::Ref;
|
||||
use heed::types::{Str, SerdeBincode};
|
||||
use heed::{EnvOpenOptions, Env, Database};
|
||||
use milli::{Index, FieldsIdsMap, SearchResult, FieldId};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use milli::update::{IndexDocumentsMethod, UpdateFormat};
|
||||
use milli::update_store::{Processed, Processing, Failed, Pending, Aborted};
|
||||
use serde::{Serialize, Deserialize, de::Deserializer};
|
||||
|
||||
use crate::data::SearchQuery;
|
||||
pub type UpdateStatusResponse = UpdateStatus<UpdateMeta, UpdateResult, String>;
|
||||
|
||||
const CONTROLLER_META_FILENAME: &str = "index_controller_meta";
|
||||
const INDEXES_CONTROLLER_FILENAME: &str = "indexes_db";
|
||||
const INDEXES_DB_NAME: &str = "indexes_db";
|
||||
|
||||
pub trait UpdateStore {}
|
||||
|
||||
pub struct IndexController<U> {
|
||||
path: PathBuf,
|
||||
update_store: U,
|
||||
env: Env,
|
||||
indexes_db: Database<Str, SerdeBincode<IndexMetadata>>,
|
||||
indexes: DashMap<String, Index>,
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition { method: IndexDocumentsMethod, format: UpdateFormat },
|
||||
ClearDocuments,
|
||||
Settings(Settings),
|
||||
Facets(Facets),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct IndexControllerMeta {
|
||||
open_options: EnvOpenOptions,
|
||||
created_at: DateTime<Utc>,
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Facets {
|
||||
pub level_group_size: Option<NonZeroUsize>,
|
||||
pub min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
impl IndexControllerMeta {
|
||||
fn from_path(path: impl AsRef<Path>) -> Result<Option<IndexControllerMeta>> {
|
||||
let mut path = path.as_ref().to_path_buf();
|
||||
path.push(CONTROLLER_META_FILENAME);
|
||||
if path.exists() {
|
||||
let mut file = File::open(path)?;
|
||||
let mut buffer = Vec::new();
|
||||
let n = file.read_to_end(&mut buffer)?;
|
||||
let meta: IndexControllerMeta = serde_json::from_slice(&buffer[..n])?;
|
||||
Ok(Some(meta))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateStatus<M, P, N> {
|
||||
Pending { update_id: u64, meta: Pending<M> },
|
||||
Progressing { update_id: u64, meta: P },
|
||||
Processed { update_id: u64, meta: Processed<M, N> },
|
||||
Aborted { update_id: u64, meta: Aborted<M> },
|
||||
}
|
||||
|
||||
fn to_path(self, path: impl AsRef<Path>) -> Result<()> {
|
||||
let mut path = path.as_ref().to_path_buf();
|
||||
path.push(CONTROLLER_META_FILENAME);
|
||||
if path.exists() {
|
||||
Err(anyhow::anyhow!("Index controller metadata already exists"))
|
||||
} else {
|
||||
let mut file = File::create(path)?;
|
||||
let json = serde_json::to_vec(&self)?;
|
||||
file.write_all(&json)?;
|
||||
Ok(())
|
||||
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
||||
where T: Deserialize<'de>,
|
||||
D: Deserializer<'de>
|
||||
{
|
||||
Deserialize::deserialize(deserializer).map(Some)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Settings {
|
||||
#[serde(
|
||||
default,
|
||||
deserialize_with = "deserialize_some",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
)]
|
||||
pub displayed_attributes: Option<Option<Vec<String>>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
deserialize_with = "deserialize_some",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
)]
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
|
||||
#[serde(default)]
|
||||
pub faceted_attributes: Option<Option<HashMap<String, String>>>,
|
||||
|
||||
#[serde(
|
||||
default,
|
||||
deserialize_with = "deserialize_some",
|
||||
skip_serializing_if = "Option::is_none",
|
||||
)]
|
||||
pub criteria: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
impl Settings {
|
||||
pub fn cleared() -> Self {
|
||||
Self {
|
||||
displayed_attributes: Some(None),
|
||||
searchable_attributes: Some(None),
|
||||
faceted_attributes: Some(None),
|
||||
criteria: Some(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct IndexMetadata {
|
||||
created_at: DateTime<Utc>,
|
||||
open_options: EnvOpenOptions,
|
||||
id: String,
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
//DocumentsAddition(DocumentAdditionResult),
|
||||
Other,
|
||||
}
|
||||
|
||||
impl IndexMetadata {
|
||||
fn open_index(&self, path: impl AsRef<Path>) -> Result<Index> {
|
||||
// create a path in the form "db_path/indexes/index_id"
|
||||
let mut path = path.as_ref().to_path_buf();
|
||||
path.push("indexes");
|
||||
path.push(&self.id);
|
||||
Ok(Index::new(self.open_options, path)?)
|
||||
}
|
||||
}
|
||||
/// The `IndexController` is in charge of the access to the underlying indices. It splits the logic
|
||||
/// for read access which is provided, and write access which must be provided. This allows the
|
||||
/// implementer to define the behaviour of write accesses to the indices, and abstract the
|
||||
/// scheduling of the updates. The implementer must be able to provide an instance of `IndexStore`
|
||||
pub trait IndexController: Deref<Target = IndexStore> {
|
||||
|
||||
struct IndexView<'a, U> {
|
||||
txn: heed::RoTxn<'a>,
|
||||
index: Ref<'a, String, Index>,
|
||||
update_store: &'a U,
|
||||
}
|
||||
/*
|
||||
* Write operations
|
||||
*
|
||||
* Logic for the write operation need to be provided by the implementer, since they can be made
|
||||
* asynchronous thanks to an update_store for example.
|
||||
*
|
||||
* */
|
||||
|
||||
impl<'a, U: UpdateStore> IndexView<'a, U> {
|
||||
pub fn search(&self, search_query: SearchQuery) -> Result<SearchResult> {
|
||||
let mut search = self.index.search(&self.txn);
|
||||
if let Some(query) = &search_query.q {
|
||||
search.query(query);
|
||||
}
|
||||
/// Perform document addition on the database. If the provided index does not exist, it will be
|
||||
/// created when the addition is applied to the index.
|
||||
fn add_documents<S: AsRef<str>>(
|
||||
&self,
|
||||
index: S,
|
||||
method: IndexDocumentsMethod,
|
||||
format: UpdateFormat,
|
||||
data: &[u8],
|
||||
) -> anyhow::Result<UpdateStatusResponse>;
|
||||
|
||||
if let Some(offset) = search_query.offset {
|
||||
search.offset(offset);
|
||||
}
|
||||
/// Updates an index settings. If the index does not exist, it will be created when the update
|
||||
/// is applied to the index.
|
||||
fn update_settings<S: AsRef<str>>(&self, index_uid: S, settings: Settings) -> anyhow::Result<UpdateStatusResponse>;
|
||||
|
||||
let limit = search_query.limit;
|
||||
search.limit(limit);
|
||||
/// Create an index with the given `index_uid`.
|
||||
fn create_index<S: AsRef<str>>(&self, index_uid: S) -> Result<()>;
|
||||
|
||||
Ok(search.execute()?)
|
||||
}
|
||||
/// Delete index with the given `index_uid`, attempting to close it beforehand.
|
||||
fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> Result<()>;
|
||||
|
||||
pub fn fields_ids_map(&self) -> Result<FieldsIdsMap> {
|
||||
Ok(self.index.fields_ids_map(&self.txn)?)
|
||||
}
|
||||
/// Swap two indexes, concretely, it simply swaps the index the names point to.
|
||||
fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, index1_uid: S1, index2_uid: S2) -> Result<()>;
|
||||
|
||||
pub fn fields_displayed_fields_ids(&self) -> Result<Option<Vec<FieldId>>> {
|
||||
Ok(self.index.displayed_fields_ids(&self.txn)?)
|
||||
}
|
||||
|
||||
pub fn documents(&self, ids: Vec<u32>) -> Result<Vec<(u32, obkv::KvReader<'_>)>> {
|
||||
Ok(self.index.documents(&self.txn, ids)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl<U: UpdateStore> IndexController<U> {
|
||||
/// Open the index controller from meta found at path, and create a new one if no meta is
|
||||
/// found.
|
||||
pub fn new(path: impl AsRef<Path>, update_store: U) -> Result<Self> {
|
||||
// If index controller metadata is present, we return the env, otherwise, we create a new
|
||||
// metadata from scratch before returning a new env.
|
||||
let path = path.as_ref().to_path_buf();
|
||||
let env = match IndexControllerMeta::from_path(&path)? {
|
||||
Some(meta) => meta.open_options.open(INDEXES_CONTROLLER_FILENAME)?,
|
||||
None => {
|
||||
let open_options = EnvOpenOptions::new()
|
||||
.map_size(page_size::get() * 1000);
|
||||
let env = open_options.open(INDEXES_CONTROLLER_FILENAME)?;
|
||||
let created_at = Utc::now();
|
||||
let meta = IndexControllerMeta { open_options: open_options.clone(), created_at };
|
||||
meta.to_path(path)?;
|
||||
env
|
||||
}
|
||||
};
|
||||
let indexes = DashMap::new();
|
||||
let indexes_db = match env.open_database(Some(INDEXES_DB_NAME))? {
|
||||
Some(indexes_db) => indexes_db,
|
||||
None => env.create_database(Some(INDEXES_DB_NAME))?,
|
||||
};
|
||||
|
||||
Ok(Self { env, indexes, indexes_db, update_store, path })
|
||||
}
|
||||
|
||||
pub fn get_or_create<S: AsRef<str>>(&mut self, name: S) -> Result<IndexView<'_, U>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Get an index with read access to the db. The index are lazily loaded, meaning that we first
|
||||
/// check for its exixtence in the indexes map, and if it doesn't exist, the index db is check
|
||||
/// for metadata to launch the index.
|
||||
pub fn get<S: AsRef<str>>(&self, name: S) -> Result<Option<IndexView<'_, U>>> {
|
||||
let update_store = &self.update_store;
|
||||
match self.indexes.get(name.as_ref()) {
|
||||
Some(index) => {
|
||||
let txn = index.read_txn()?;
|
||||
Ok(Some(IndexView { index, update_store, txn }))
|
||||
}
|
||||
None => {
|
||||
let txn = self.env.read_txn()?;
|
||||
match self.indexes_db.get(&txn, name.as_ref())? {
|
||||
Some(meta) => {
|
||||
let index = meta.open_index(self.path)?;
|
||||
self.indexes.insert(name.as_ref().to_owned(), index);
|
||||
// TODO: create index view
|
||||
match self.indexes.get(name.as_ref()) {
|
||||
Some(index) => {
|
||||
let txn = index.read_txn()?;
|
||||
Ok(Some(IndexView { index, txn, update_store }))
|
||||
}
|
||||
None => Ok(None)
|
||||
}
|
||||
}
|
||||
None => Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_mut<S: AsRef<str>>(&self, name: S) -> Result<Option<IndexView<'_, U>>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub async fn delete_index<S: AsRef<str>>(&self, name:S) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub async fn list_indices(&self) -> Result<Vec<(String, IndexMetadata)>> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub async fn rename_index(&self, old: &str, new: &str) -> Result<()> {
|
||||
/// Apply an update to the given index. This method can be called when an update is ready to be
|
||||
/// processed
|
||||
fn handle_update<S: AsRef<str>>(
|
||||
&self,
|
||||
_index: S,
|
||||
_update_id: u64,
|
||||
_meta: Processing<UpdateMeta>,
|
||||
_content: &[u8]
|
||||
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue