introduce index resolver

This commit is contained in:
mpostma 2021-09-24 11:53:11 +02:00
parent 5353be74c3
commit 42a6260b65
23 changed files with 833 additions and 193 deletions

View file

@ -17,6 +17,8 @@ pub enum IndexError {
Facet(#[from] FacetError),
#[error("{0}")]
Milli(#[from] milli::Error),
#[error("A primary key is already present. It's impossible to update it")]
ExistingPrimaryKey,
}
internal_error!(
@ -33,6 +35,7 @@ impl ErrorCode for IndexError {
IndexError::DocumentNotFound(_) => Code::DocumentNotFound,
IndexError::Facet(e) => e.error_code(),
IndexError::Milli(e) => MilliError(e).error_code(),
IndexError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent,
}
}
}

View file

@ -5,19 +5,23 @@ use std::ops::Deref;
use std::path::Path;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use heed::{EnvOpenOptions, RoTxn};
use milli::update::Setting;
use milli::{obkv_to_json, FieldId};
use milli::{FieldDistribution, FieldId, obkv_to_json};
use serde_json::{Map, Value};
use serde::{Serialize, Deserialize};
use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Checked, Facets, Settings, Unchecked};
use uuid::Uuid;
use crate::EnvSizer;
use crate::index_controller::update_file_store::UpdateFileStore;
use self::error::IndexError;
use self::update_handler::UpdateHandler;
pub mod error;
pub mod update_handler;
@ -28,10 +32,51 @@ mod updates;
pub type Document = Map<String, Value>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub primary_key: Option<String>,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct IndexStats {
#[serde(skip)]
pub size: u64,
pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>,
pub field_distribution: FieldDistribution,
}
impl IndexMeta {
pub fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(txn)?;
let updated_at = index.updated_at(txn)?;
let primary_key = index.primary_key(txn)?.map(String::from);
Ok(Self {
created_at,
updated_at,
primary_key,
})
}
}
#[derive(Clone)]
pub struct Index {
pub uuid: Uuid,
pub inner: Arc<milli::Index>,
update_file_store: Arc<UpdateFileStore>,
update_handler: Arc<UpdateHandler>,
}
impl Deref for Index {
@ -43,14 +88,28 @@ impl Deref for Index {
}
impl Index {
pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>) -> Result<Self> {
pub fn open(path: impl AsRef<Path>, size: usize, update_file_store: Arc<UpdateFileStore>, uuid: Uuid, update_handler: Arc<UpdateHandler>) -> Result<Self> {
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let inner = Arc::new(milli::Index::new(options, &path)?);
Ok(Index { inner, update_file_store })
Ok(Index { inner, update_file_store, uuid, update_handler })
}
pub fn stats(&self) -> Result<IndexStats> {
let rtxn = self.read_txn()?;
Ok(IndexStats {
size: self.size(),
number_of_documents: self.number_of_documents(&rtxn)?,
is_indexing: None,
field_distribution: self.field_distribution(&rtxn)?,
})
}
pub fn meta(&self) -> Result<IndexMeta> {
IndexMeta::new(self)
}
pub fn settings(&self) -> Result<Settings<Checked>> {
let txn = self.read_txn()?;
self.settings_txn(&txn)

View file

@ -52,7 +52,7 @@ impl UpdateHandler {
pub fn handle_update(
&self,
index: Index,
index: &Index,
meta: Processing,
) -> Result<Processed, Failed> {
let update_id = meta.id();

View file

@ -8,10 +8,10 @@ use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
use serde::{Deserialize, Serialize, Serializer};
use uuid::Uuid;
use crate::index_controller::updates::status::UpdateResult;
use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult};
use super::Index;
use super::error::Result;
use super::{Index, IndexMeta};
use super::error::{IndexError, Result};
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
@ -163,6 +163,31 @@ pub struct Facets {
}
impl Index {
pub fn handle_update(&self, update: Processing) -> std::result::Result<Processed, Failed> {
self.update_handler.handle_update(self, update)
}
pub fn update_primary_key(&self, primary_key: Option<String>) -> Result<IndexMeta> {
match primary_key {
Some(primary_key) => {
let mut txn = self.write_txn()?;
if self.primary_key(&txn)?.is_some() {
return Err(IndexError::ExistingPrimaryKey);
}
let mut builder = UpdateBuilder::new(0).settings(&mut txn, self);
builder.set_primary_key(primary_key);
builder.execute(|_, _| ())?;
let meta = IndexMeta::new_txn(self, &txn)?;
txn.commit()?;
Ok(meta)
}
None => {
let meta = IndexMeta::new(self)?;
Ok(meta)
}
}
}
pub fn update_documents(
&self,
method: IndexDocumentsMethod,