MeiliSearch/src/index_controller/mod.rs

197 lines
6.4 KiB
Rust
Raw Normal View History

2021-01-13 17:50:36 +01:00
use std::fs::File;
use std::io::{Read, Write};
2021-01-14 11:27:07 +01:00
use std::path::{Path, PathBuf};
2021-01-13 17:50:36 +01:00
use anyhow::Result;
use chrono::{DateTime, Utc};
use dashmap::DashMap;
2021-01-14 11:27:07 +01:00
use dashmap::mapref::one::Ref;
2021-01-13 17:50:36 +01:00
use heed::types::{Str, SerdeBincode};
use heed::{EnvOpenOptions, Env, Database};
2021-01-14 11:27:07 +01:00
use milli::{Index, FieldsIdsMap, SearchResult, FieldId};
2021-01-13 17:50:36 +01:00
use serde::{Serialize, Deserialize};
2021-01-14 11:27:07 +01:00
use crate::data::SearchQuery;
2021-01-13 17:50:36 +01:00
const CONTROLLER_META_FILENAME: &str = "index_controller_meta";
const INDEXES_CONTROLLER_FILENAME: &str = "indexes_db";
const INDEXES_DB_NAME: &str = "indexes_db";
2021-01-14 11:27:07 +01:00
pub trait UpdateStore {}
2021-01-13 17:50:36 +01:00
pub struct IndexController<U> {
2021-01-14 11:27:07 +01:00
path: PathBuf,
2021-01-13 17:50:36 +01:00
update_store: U,
env: Env,
indexes_db: Database<Str, SerdeBincode<IndexMetadata>>,
indexes: DashMap<String, Index>,
}
#[derive(Debug, Serialize, Deserialize)]
struct IndexControllerMeta {
open_options: EnvOpenOptions,
created_at: DateTime<Utc>,
}
impl IndexControllerMeta {
fn from_path(path: impl AsRef<Path>) -> Result<Option<IndexControllerMeta>> {
2021-01-14 11:27:07 +01:00
let mut path = path.as_ref().to_path_buf();
path.push(CONTROLLER_META_FILENAME);
2021-01-13 17:50:36 +01:00
if path.exists() {
let mut file = File::open(path)?;
let mut buffer = Vec::new();
let n = file.read_to_end(&mut buffer)?;
let meta: IndexControllerMeta = serde_json::from_slice(&buffer[..n])?;
Ok(Some(meta))
} else {
Ok(None)
}
}
fn to_path(self, path: impl AsRef<Path>) -> Result<()> {
2021-01-14 11:27:07 +01:00
let mut path = path.as_ref().to_path_buf();
path.push(CONTROLLER_META_FILENAME);
2021-01-13 17:50:36 +01:00
if path.exists() {
Err(anyhow::anyhow!("Index controller metadata already exists"))
} else {
let mut file = File::create(path)?;
let json = serde_json::to_vec(&self)?;
file.write_all(&json)?;
Ok(())
}
}
}
#[derive(Debug, Serialize, Deserialize)]
struct IndexMetadata {
created_at: DateTime<Utc>,
open_options: EnvOpenOptions,
id: String,
}
impl IndexMetadata {
2021-01-14 11:27:07 +01:00
fn open_index(&self, path: impl AsRef<Path>) -> Result<Index> {
// create a path in the form "db_path/indexes/index_id"
let mut path = path.as_ref().to_path_buf();
path.push("indexes");
path.push(&self.id);
Ok(Index::new(self.open_options, path)?)
2021-01-13 17:50:36 +01:00
}
}
struct IndexView<'a, U> {
txn: heed::RoTxn<'a>,
2021-01-14 11:27:07 +01:00
index: Ref<'a, String, Index>,
2021-01-13 17:50:36 +01:00
update_store: &'a U,
}
impl<'a, U: UpdateStore> IndexView<'a, U> {
2021-01-13 18:18:52 +01:00
pub fn search(&self, search_query: SearchQuery) -> Result<SearchResult> {
2021-01-14 11:27:07 +01:00
let mut search = self.index.search(&self.txn);
2021-01-13 17:50:36 +01:00
if let Some(query) = &search_query.q {
search.query(query);
}
if let Some(offset) = search_query.offset {
search.offset(offset);
}
let limit = search_query.limit;
search.limit(limit);
Ok(search.execute()?)
}
2021-01-13 18:29:17 +01:00
pub fn fields_ids_map(&self) -> Result<FieldsIdsMap> {
2021-01-14 11:27:07 +01:00
Ok(self.index.fields_ids_map(&self.txn)?)
2021-01-13 18:29:17 +01:00
}
2021-01-14 11:27:07 +01:00
pub fn fields_displayed_fields_ids(&self) -> Result<Option<Vec<FieldId>>> {
Ok(self.index.displayed_fields_ids(&self.txn)?)
2021-01-13 18:29:17 +01:00
}
2021-01-14 11:27:07 +01:00
pub fn documents(&self, ids: Vec<u32>) -> Result<Vec<(u32, obkv::KvReader<'_>)>> {
Ok(self.index.documents(&self.txn, ids)?)
2021-01-13 18:29:17 +01:00
}
2021-01-13 17:50:36 +01:00
}
impl<U: UpdateStore> IndexController<U> {
/// Open the index controller from meta found at path, and create a new one if no meta is
/// found.
pub fn new(path: impl AsRef<Path>, update_store: U) -> Result<Self> {
// If index controller metadata is present, we return the env, otherwise, we create a new
// metadata from scratch before returning a new env.
2021-01-14 11:27:07 +01:00
let path = path.as_ref().to_path_buf();
let env = match IndexControllerMeta::from_path(&path)? {
2021-01-13 17:50:36 +01:00
Some(meta) => meta.open_options.open(INDEXES_CONTROLLER_FILENAME)?,
None => {
let open_options = EnvOpenOptions::new()
.map_size(page_size::get() * 1000);
let env = open_options.open(INDEXES_CONTROLLER_FILENAME)?;
let created_at = Utc::now();
2021-01-14 11:27:07 +01:00
let meta = IndexControllerMeta { open_options: open_options.clone(), created_at };
2021-01-13 17:50:36 +01:00
meta.to_path(path)?;
env
}
};
let indexes = DashMap::new();
2021-01-14 11:27:07 +01:00
let indexes_db = match env.open_database(Some(INDEXES_DB_NAME))? {
2021-01-13 17:50:36 +01:00
Some(indexes_db) => indexes_db,
2021-01-14 11:27:07 +01:00
None => env.create_database(Some(INDEXES_DB_NAME))?,
2021-01-13 17:50:36 +01:00
};
2021-01-14 11:27:07 +01:00
Ok(Self { env, indexes, indexes_db, update_store, path })
2021-01-13 17:50:36 +01:00
}
2021-01-14 11:27:07 +01:00
pub fn get_or_create<S: AsRef<str>>(&mut self, name: S) -> Result<IndexView<'_, U>> {
2021-01-13 17:50:36 +01:00
todo!()
}
/// Get an index with read access to the db. The index are lazily loaded, meaning that we first
/// check for its exixtence in the indexes map, and if it doesn't exist, the index db is check
/// for metadata to launch the index.
pub fn get<S: AsRef<str>>(&self, name: S) -> Result<Option<IndexView<'_, U>>> {
2021-01-14 11:27:07 +01:00
let update_store = &self.update_store;
2021-01-13 17:50:36 +01:00
match self.indexes.get(name.as_ref()) {
Some(index) => {
let txn = index.read_txn()?;
Ok(Some(IndexView { index, update_store, txn }))
}
None => {
let txn = self.env.read_txn()?;
match self.indexes_db.get(&txn, name.as_ref())? {
Some(meta) => {
2021-01-14 11:27:07 +01:00
let index = meta.open_index(self.path)?;
2021-01-13 17:50:36 +01:00
self.indexes.insert(name.as_ref().to_owned(), index);
2021-01-14 11:27:07 +01:00
// TODO: create index view
match self.indexes.get(name.as_ref()) {
Some(index) => {
let txn = index.read_txn()?;
Ok(Some(IndexView { index, txn, update_store }))
}
None => Ok(None)
}
2021-01-13 17:50:36 +01:00
}
None => Ok(None)
}
}
}
}
2021-01-14 11:27:07 +01:00
pub fn get_mut<S: AsRef<str>>(&self, name: S) -> Result<Option<IndexView<'_, U>>> {
2021-01-13 17:50:36 +01:00
todo!()
}
pub async fn delete_index<S: AsRef<str>>(&self, name:S) -> Result<()> {
todo!()
}
pub async fn list_indices(&self) -> Result<Vec<(String, IndexMetadata)>> {
todo!()
}
pub async fn rename_index(&self, old: &str, new: &str) -> Result<()> {
todo!()
}
}