mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-25 06:14:26 +01:00
extract the index abstraction out of the index-scheduler in its own module
This commit is contained in:
parent
48138c21a9
commit
94e29a9f5f
25
Cargo.lock
generated
25
Cargo.lock
generated
@ -1729,10 +1729,9 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-rt",
|
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bincode",
|
"bincode",
|
||||||
"csv",
|
"csv",
|
||||||
@ -1745,17 +1744,33 @@ dependencies = [
|
|||||||
"log",
|
"log",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"milli 0.33.0",
|
"milli 0.33.0",
|
||||||
"mockall",
|
|
||||||
"nelson",
|
"nelson",
|
||||||
"obkv",
|
"obkv",
|
||||||
"paste",
|
|
||||||
"permissive-json-pointer",
|
"permissive-json-pointer",
|
||||||
"proptest",
|
"proptest",
|
||||||
"proptest-derive",
|
"proptest-derive",
|
||||||
"regex",
|
"regex",
|
||||||
"roaring 0.9.0",
|
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"thiserror",
|
||||||
|
"time",
|
||||||
|
"uuid 1.1.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "index-scheduler"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"bincode",
|
||||||
|
"csv",
|
||||||
|
"file-store",
|
||||||
|
"index",
|
||||||
|
"log",
|
||||||
|
"milli 0.33.0",
|
||||||
|
"nelson",
|
||||||
|
"roaring 0.9.0",
|
||||||
|
"serde",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"time",
|
"time",
|
||||||
|
@ -6,6 +6,7 @@ members = [
|
|||||||
"meilisearch-lib",
|
"meilisearch-lib",
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"index-scheduler",
|
"index-scheduler",
|
||||||
|
"index",
|
||||||
"file-store",
|
"file-store",
|
||||||
"permissive-json-pointer",
|
"permissive-json-pointer",
|
||||||
]
|
]
|
||||||
|
@ -9,31 +9,16 @@ edition = "2021"
|
|||||||
anyhow = "1.0.64"
|
anyhow = "1.0.64"
|
||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
csv = "1.1.6"
|
csv = "1.1.6"
|
||||||
derivative = "2.2.0"
|
|
||||||
either = { version = "1.6.1", features = ["serde"] }
|
|
||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
fst = "0.4.7"
|
|
||||||
indexmap = { version = "1.8.0", features = ["serde-1"] }
|
|
||||||
lazy_static = "1.4.0"
|
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
||||||
obkv = "0.2.0"
|
index = { path = "../index" }
|
||||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
|
||||||
regex = "1.5.5"
|
|
||||||
roaring = "0.9.0"
|
roaring = "0.9.0"
|
||||||
serde = { version = "1.0.136", features = ["derive"] }
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
|
||||||
tempfile = "3.3.0"
|
tempfile = "3.3.0"
|
||||||
thiserror = "1.0.30"
|
thiserror = "1.0.30"
|
||||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.7.0"
|
|
||||||
meilisearch-types = { path = "../meilisearch-types", features = ["test-traits"] }
|
|
||||||
mockall = "0.11.0"
|
|
||||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||||
paste = "1.0.6"
|
|
||||||
proptest = "1.0.0"
|
|
||||||
proptest-derive = "0.3.0"
|
|
||||||
|
@ -1,155 +0,0 @@
|
|||||||
use std::borrow::Borrow;
|
|
||||||
use std::fmt::{self, Debug, Display};
|
|
||||||
use std::io::{self, BufReader, Read, Seek, Write};
|
|
||||||
|
|
||||||
use either::Either;
|
|
||||||
use meilisearch_types::error::{Code, ErrorCode};
|
|
||||||
use meilisearch_types::internal_error;
|
|
||||||
use milli::documents::{DocumentsBatchBuilder, Error};
|
|
||||||
use milli::Object;
|
|
||||||
use serde::Deserialize;
|
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, DocumentFormatError>;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum PayloadType {
|
|
||||||
Ndjson,
|
|
||||||
Json,
|
|
||||||
Csv,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for PayloadType {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
PayloadType::Ndjson => f.write_str("ndjson"),
|
|
||||||
PayloadType::Json => f.write_str("json"),
|
|
||||||
PayloadType::Csv => f.write_str("csv"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum DocumentFormatError {
|
|
||||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
|
||||||
MalformedPayload(Error, PayloadType),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for DocumentFormatError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
|
|
||||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
|
||||||
Error::Json(se) => {
|
|
||||||
// https://github.com/meilisearch/meilisearch/issues/2107
|
|
||||||
// The user input maybe insanely long. We need to truncate it.
|
|
||||||
let mut serde_msg = se.to_string();
|
|
||||||
let ellipsis = "...";
|
|
||||||
if serde_msg.len() > 100 + ellipsis.len() {
|
|
||||||
serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis);
|
|
||||||
}
|
|
||||||
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.",
|
|
||||||
b, serde_msg
|
|
||||||
)
|
|
||||||
}
|
|
||||||
_ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for DocumentFormatError {}
|
|
||||||
|
|
||||||
impl From<(PayloadType, Error)> for DocumentFormatError {
|
|
||||||
fn from((ty, error): (PayloadType, Error)) -> Self {
|
|
||||||
match error {
|
|
||||||
Error::Io(e) => Self::Internal(Box::new(e)),
|
|
||||||
e => Self::MalformedPayload(e, ty),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorCode for DocumentFormatError {
|
|
||||||
fn error_code(&self) -> Code {
|
|
||||||
match self {
|
|
||||||
DocumentFormatError::Internal(_) => Code::Internal,
|
|
||||||
DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
internal_error!(DocumentFormatError: io::Error);
|
|
||||||
|
|
||||||
/// Reads CSV from input and write an obkv batch to writer.
|
|
||||||
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
|
||||||
|
|
||||||
let csv = csv::Reader::from_reader(input);
|
|
||||||
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
|
|
||||||
|
|
||||||
let count = builder.documents_count();
|
|
||||||
let _ = builder
|
|
||||||
.into_inner()
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
|
|
||||||
Ok(count as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reads JSON Lines from input and write an obkv batch to writer.
|
|
||||||
pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
|
||||||
let reader = BufReader::new(input);
|
|
||||||
|
|
||||||
for result in serde_json::Deserializer::from_reader(reader).into_iter() {
|
|
||||||
let object = result
|
|
||||||
.map_err(Error::Json)
|
|
||||||
.map_err(|e| (PayloadType::Ndjson, e))?;
|
|
||||||
builder
|
|
||||||
.append_json_object(&object)
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let count = builder.documents_count();
|
|
||||||
let _ = builder
|
|
||||||
.into_inner()
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
|
|
||||||
Ok(count as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reads JSON from input and write an obkv batch to writer.
|
|
||||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
|
||||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
|
||||||
let reader = BufReader::new(input);
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
#[serde(transparent)]
|
|
||||||
struct ArrayOrSingleObject {
|
|
||||||
#[serde(with = "either::serde_untagged")]
|
|
||||||
inner: Either<Vec<Object>, Object>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let content: ArrayOrSingleObject = serde_json::from_reader(reader)
|
|
||||||
.map_err(Error::Json)
|
|
||||||
.map_err(|e| (PayloadType::Json, e))?;
|
|
||||||
|
|
||||||
for object in content.inner.map_right(|o| vec![o]).into_inner() {
|
|
||||||
builder
|
|
||||||
.append_json_object(&object)
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let count = builder.documents_count();
|
|
||||||
let _ = builder
|
|
||||||
.into_inner()
|
|
||||||
.map_err(Into::into)
|
|
||||||
.map_err(DocumentFormatError::Internal)?;
|
|
||||||
|
|
||||||
Ok(count as usize)
|
|
||||||
}
|
|
@ -1,8 +1,6 @@
|
|||||||
use milli::heed;
|
use milli::heed;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::index;
|
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
#[error("Index `{0}` not found")]
|
#[error("Index `{0}` not found")]
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
mod autobatcher;
|
mod autobatcher;
|
||||||
mod batch;
|
mod batch;
|
||||||
mod document_formats;
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod index;
|
|
||||||
pub mod task;
|
pub mod task;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
|
||||||
|
33
index/Cargo.toml
Normal file
33
index/Cargo.toml
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
[package]
|
||||||
|
name = "index"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.64"
|
||||||
|
bincode = "1.3.3"
|
||||||
|
csv = "1.1.6"
|
||||||
|
derivative = "2.2.0"
|
||||||
|
either = { version = "1.6.1", features = ["serde"] }
|
||||||
|
fst = "0.4.7"
|
||||||
|
indexmap = { version = "1.8.0", features = ["serde-1"] }
|
||||||
|
lazy_static = "1.4.0"
|
||||||
|
log = "0.4.14"
|
||||||
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" }
|
||||||
|
obkv = "0.2.0"
|
||||||
|
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||||
|
regex = "1.5.5"
|
||||||
|
serde = { version = "1.0.136", features = ["derive"] }
|
||||||
|
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||||
|
thiserror = "1.0.30"
|
||||||
|
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||||
|
file-store = { path = "../file-store" }
|
||||||
|
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||||
|
proptest = "1.0.0"
|
||||||
|
proptest-derive = "0.3.0"
|
@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||||
|
|
||||||
use super::error::IndexError;
|
use super::error::IndexError;
|
||||||
use super::error::Result;
|
use super::error::Result;
|
@ -12,10 +12,10 @@ pub mod updates;
|
|||||||
#[allow(clippy::module_inception)]
|
#[allow(clippy::module_inception)]
|
||||||
mod index;
|
mod index;
|
||||||
|
|
||||||
pub use index::{Document, IndexMeta, IndexStats};
|
pub use self::index::{Document, IndexMeta, IndexStats};
|
||||||
|
|
||||||
#[cfg(not(test))]
|
#[cfg(not(test))]
|
||||||
pub use index::Index;
|
pub use self::index::Index;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub use test::MockIndex as Index;
|
pub use test::MockIndex as Index;
|
||||||
@ -37,7 +37,7 @@ pub mod test {
|
|||||||
use super::index::Index;
|
use super::index::Index;
|
||||||
use super::Document;
|
use super::Document;
|
||||||
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
|
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
|
||||||
use crate::update_file_store::UpdateFileStore;
|
use file_store::FileStore;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum MockIndex {
|
pub enum MockIndex {
|
||||||
@ -164,7 +164,7 @@ pub mod test {
|
|||||||
&self,
|
&self,
|
||||||
method: IndexDocumentsMethod,
|
method: IndexDocumentsMethod,
|
||||||
primary_key: Option<String>,
|
primary_key: Option<String>,
|
||||||
file_store: UpdateFileStore,
|
file_store: FileStore,
|
||||||
contents: impl Iterator<Item = Uuid>,
|
contents: impl Iterator<Item = Uuid>,
|
||||||
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
) -> Result<Vec<Result<DocumentAdditionResult>>> {
|
||||||
match self {
|
match self {
|
3
index/src/main.rs
Normal file
3
index/src/main.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
fn main() {
|
||||||
|
println!("Hello, world!");
|
||||||
|
}
|
@ -13,7 +13,7 @@ use regex::Regex;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
use crate::index::error::FacetError;
|
use crate::error::FacetError;
|
||||||
|
|
||||||
use super::error::{IndexError, Result};
|
use super::error::{IndexError, Result};
|
||||||
use super::index::Index;
|
use super::index::Index;
|
Loading…
Reference in New Issue
Block a user