Merge branch 'main' into 2983-forward-date-to-milli

This commit is contained in:
amab8901 2022-12-21 14:31:19 +01:00
commit 0893b175dc
23 changed files with 288 additions and 211 deletions

View File

@ -6,10 +6,20 @@ on:
jobs:
flaky:
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky 100 times

View File

@ -32,30 +32,56 @@ jobs:
if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh
publish:
publish-linux:
name: Publish binary for Linux
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.3.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
publish-macos-windows:
name: Publish binary for ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: check-version
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04, macos-latest, windows-latest]
os: [macos-12, windows-2022]
include:
- os: ubuntu-18.04
artifact_name: meilisearch
asset_name: meilisearch-linux-amd64
- os: macos-latest
- os: macos-12
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-latest
- os: windows-2022
artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: hecrj/setup-rust-action@master
with:
rust-version: stable
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
@ -72,15 +98,13 @@ jobs:
name: Publish binary for macOS silicon
runs-on: ${{ matrix.os }}
needs: check-version
continue-on-error: false
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
- os: macos-12
target: aarch64-apple-darwin
asset_name: meilisearch-macos-apple-silicon
steps:
- name: Checkout repository
uses: actions/checkout@v3
@ -110,7 +134,6 @@ jobs:
name: Publish binary for aarch64
runs-on: ${{ matrix.os }}
needs: check-version
continue-on-error: false
strategy:
fail-fast: false
matrix:
@ -121,11 +144,9 @@ jobs:
linker: gcc-aarch64-linux-gnu
use-cross: true
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
with:
@ -133,16 +154,13 @@ jobs:
profile: minimal
target: ${{ matrix.target }}
override: true
- name: APT update
run: |
sudo apt update
- name: Install target specific tools
if: matrix.use-cross
run: |
sudo apt-get install -y ${{ matrix.linker }}
- name: Configure target aarch64 GNU
if: matrix.target == 'aarch64-unknown-linux-gnu'
## Environment variable is not passed using env:
@ -154,17 +172,14 @@ jobs:
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: ${{ matrix.use-cross }}
args: --release --target ${{ matrix.target }}
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'

View File

@ -15,12 +15,20 @@ jobs:
debian:
name: Publish debian packagge
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
steps:
- uses: hecrj/setup-rust-action@master
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
rust-version: stable
toolchain: stable
override: true
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3

View File

@ -86,6 +86,7 @@ jobs:
- name: Build and push
uses: docker/build-push-action@v3
with:
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |

View File

@ -15,13 +15,42 @@ env:
RUSTFLAGS: "-D warnings"
jobs:
tests:
test-linux:
name: Tests on ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release
test-others:
name: Tests on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04, macos-latest, windows-latest]
os: [macos-12, windows-2022]
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
@ -40,12 +69,18 @@ jobs:
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Cache dependencies
@ -58,7 +93,7 @@ jobs:
clippy:
name: Run Clippy
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
@ -77,7 +112,7 @@ jobs:
fmt:
name: Run Rustfmt
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1

View File

@ -16,7 +16,7 @@ jobs:
update-version-cargo-toml:
name: Update version in Cargo.toml files
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1

18
Cargo.lock generated
View File

@ -1332,8 +1332,8 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "0.37.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b"
version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [
"nom",
"nom_locate",
@ -1351,8 +1351,8 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "0.37.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b"
version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [
"serde_json",
]
@ -1898,8 +1898,8 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "0.37.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b"
version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [
"serde_json",
]
@ -2374,6 +2374,7 @@ dependencies = [
"csv",
"either",
"enum-iterator",
"file-store",
"flate2",
"fst",
"insta",
@ -2386,6 +2387,7 @@ dependencies = [
"serde",
"serde_json",
"tar",
"tempfile",
"thiserror",
"time",
"tokio",
@ -2418,8 +2420,8 @@ dependencies = [
[[package]]
name = "milli"
version = "0.37.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.2#1582b96119fedad39c726a6d4aeda0f53e868a3b"
version = "0.37.3"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.3#2101e3c6d592f6ce6cc25b6e4585f3a8a6246457"
dependencies = [
"bimap",
"bincode",

View File

@ -1,7 +1,7 @@
status = [
'Tests on ubuntu-18.04',
'Tests on macos-latest',
'Tests on windows-latest',
'Tests on macos-12',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',

View File

@ -19,9 +19,10 @@ pub enum Error {
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
// Are these three really Internal errors?
// TODO look at that later.
Error::Io(_) => Code::Internal,
Error::Io(e) => e.error_code(),
// These errors either happen when creating a dump and don't need any error code,
// or come from an internal bad deserialization.
Error::Serde(_) => Code::Internal,
Error::Uuid(_) => Code::Internal,

View File

@ -1,42 +0,0 @@
use meilisearch_auth::error::AuthControllerError;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::internal_error;
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
pub type Result<T> = std::result::Result<T, DumpError>;
#[derive(thiserror::Error, Debug)]
pub enum DumpError {
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
IndexResolver(Box<IndexResolverError>),
}
internal_error!(
DumpError: milli::heed::Error,
std::io::Error,
tokio::task::JoinError,
tokio::sync::oneshot::error::RecvError,
serde_json::error::Error,
tempfile::PersistError,
fs_extra::error::Error,
AuthControllerError,
TaskError
);
impl From<IndexResolverError> for DumpError {
fn from(e: IndexResolverError) -> Self {
Self::IndexResolver(Box::new(e))
}
}
impl ErrorCode for DumpError {
fn error_code(&self) -> Code {
match self {
DumpError::Internal(_) => Code::Internal,
DumpError::IndexResolver(e) => e.error_code(),
}
}
}

View File

@ -102,7 +102,7 @@ impl V4Reader {
&self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()),
&index.index_meta,
BufReader::new(
File::open(&self.dump.path().join("updates").join("data.jsonl")).unwrap(),
File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
),
)
}))

View File

@ -25,7 +25,7 @@ impl DumpWriter {
if let Some(instance_uuid) = instance_uuid {
fs::write(
dir.path().join("instance_uid.uuid"),
&instance_uuid.as_hyphenated().to_string(),
instance_uuid.as_hyphenated().to_string(),
)?;
}
@ -36,7 +36,7 @@ impl DumpWriter {
};
fs::write(dir.path().join("metadata.json"), serde_json::to_string(&metadata)?)?;
std::fs::create_dir(&dir.path().join("indexes"))?;
std::fs::create_dir(dir.path().join("indexes"))?;
Ok(DumpWriter { dir })
}

View File

@ -666,7 +666,7 @@ impl IndexScheduler {
let snapshot_path = self.snapshots_path.join(format!("{}.snapshot", db_name));
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.snapshots_path)?;
compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?;
let file = temp_snapshot_file.persist(&snapshot_path)?;
let file = temp_snapshot_file.persist(snapshot_path)?;
// 5.3 Change the permission to make the snapshot readonly
let mut permissions = file.metadata()?.permissions();

View File

@ -114,17 +114,17 @@ impl ErrorCode for Error {
Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal,
// TODO: TAMO: are all these errors really internal?
Error::Heed(_) => Code::Internal,
Error::FileStore(_) => Code::Internal,
Error::IoError(_) => Code::Internal,
Error::Persist(_) => Code::Internal,
Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(),
Error::IoError(e) => e.error_code(),
Error::Persist(e) => e.error_code(),
// Irrecoverable errors
Error::Anyhow(_) => Code::Internal,
Error::CorruptedTaskQueue => Code::Internal,
Error::CorruptedDump => Code::Internal,
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
Error::HeedTransaction(_) => Code::Internal,
}
}
}

View File

@ -1220,7 +1220,7 @@ mod tests {
);
let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
(file, documents_count)
}
@ -1600,7 +1600,7 @@ mod tests {
}"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -1637,7 +1637,7 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -1804,7 +1804,7 @@ mod tests {
}"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -1962,7 +1962,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2009,7 +2009,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2058,7 +2058,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2108,7 +2108,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2159,7 +2159,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2606,7 +2606,7 @@ mod tests {
}"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2644,7 +2644,7 @@ mod tests {
}"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2700,7 +2700,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2748,7 +2748,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2802,7 +2802,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2861,7 +2861,7 @@ mod tests {
);
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2925,7 +2925,7 @@ mod tests {
let allow_index_creation = i % 2 != 0;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
@ -2978,7 +2978,7 @@ mod tests {
let allow_index_creation = i % 2 != 0;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap() as u64;
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {

View File

@ -29,7 +29,7 @@ pub fn default_snapshot_settings_for_test<'a>(
let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name);
let test_name = test_name.rsplit("::").next().unwrap().to_owned();
let path = Path::new("snapshots").join(filename).join(&test_name);
let path = Path::new("snapshots").join(filename).join(test_name);
settings.set_snapshot_path(path.clone());
let snap_name = if let Some(name) = name {
Cow::Borrowed(name)

View File

@ -18,7 +18,7 @@ impl AuthController {
let keys_file_path = dst.as_ref().join(KEYS_PATH);
let keys = store.list_api_keys()?;
let mut keys_file = File::create(&keys_file_path)?;
let mut keys_file = File::create(keys_file_path)?;
for key in keys {
serde_json::to_writer(&mut keys_file, &key)?;
keys_file.write_all(b"\n")?;

View File

@ -10,16 +10,18 @@ anyhow = "1.0.65"
csv = "1.1.6"
either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
flate2 = "1.0.24"
fst = "0.4.7"
memmap2 = "0.5.7"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.2", default-features = false }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.3", default-features = false }
proptest = { version = "1.0.0", optional = true }
proptest-derive = { version = "0.3.0", optional = true }
roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.85"
tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.0"

View File

@ -13,7 +13,6 @@ use serde::{Deserialize, Deserializer};
use serde_json::error::Category;
use crate::error::{Code, ErrorCode};
use crate::internal_error;
type Result<T> = std::result::Result<T, DocumentFormatError>;
@ -36,14 +35,14 @@ impl fmt::Display for PayloadType {
#[derive(Debug)]
pub enum DocumentFormatError {
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
Io(io::Error),
MalformedPayload(Error, PayloadType),
}
impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e),
Self::Io(e) => write!(f, "{e}"),
Self::MalformedPayload(me, b) => match me.borrow() {
Error::Json(se) => {
let mut message = match se.classify() {
@ -85,23 +84,27 @@ impl std::error::Error for DocumentFormatError {}
impl From<(PayloadType, Error)> for DocumentFormatError {
fn from((ty, error): (PayloadType, Error)) -> Self {
match error {
Error::Io(e) => Self::Internal(Box::new(e)),
Error::Io(e) => Self::Io(e),
e => Self::MalformedPayload(e, ty),
}
}
}
impl From<io::Error> for DocumentFormatError {
fn from(error: io::Error) -> Self {
Self::Io(error)
}
}
impl ErrorCode for DocumentFormatError {
fn error_code(&self) -> Code {
match self {
DocumentFormatError::Internal(_) => Code::Internal,
DocumentFormatError::Io(e) => e.error_code(),
DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload,
}
}
}
internal_error!(DocumentFormatError: io::Error);
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
@ -110,7 +113,7 @@ pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?;
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
Ok(count as u64)
}
@ -139,7 +142,7 @@ fn read_json_inner(
// The json data has been deserialized and does not need to be processed again.
// The data has been transferred to the writer during the deserialization process.
Ok(Ok(_)) => (),
Ok(Err(e)) => return Err(DocumentFormatError::Internal(Box::new(e))),
Ok(Err(e)) => return Err(DocumentFormatError::Io(e)),
Err(_e) => {
// If we cannot deserialize the content as an array of object then we try
// to deserialize it with the original method to keep correct error messages.
@ -155,16 +158,13 @@ fn read_json_inner(
.map_err(|e| (payload_type, e))?;
for object in content.inner.map_right(|o| vec![o]).into_inner() {
builder
.append_json_object(&object)
.map_err(Into::into)
.map_err(DocumentFormatError::Internal)?;
builder.append_json_object(&object).map_err(DocumentFormatError::Io)?;
}
}
}
let count = builder.documents_count();
let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?;
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
Ok(count as u64)
}

View File

@ -1,4 +1,4 @@
use std::fmt;
use std::{fmt, io};
use actix_web::http::StatusCode;
use actix_web::{self as aweb, HttpResponseBuilder};
@ -23,7 +23,10 @@ pub struct ResponseError {
}
impl ResponseError {
pub fn from_msg(message: String, code: Code) -> Self {
pub fn from_msg(mut message: String, code: Code) -> Self {
if code == Code::IoError {
message.push_str(". This error generally happens when you have no space left on device or when your database doesn't have read or write right.");
}
Self {
code: code.http(),
message,
@ -47,13 +50,7 @@ where
T: ErrorCode,
{
fn from(other: T) -> Self {
Self {
code: other.http_status(),
message: other.to_string(),
error_code: other.error_name(),
error_type: other.error_type(),
error_link: other.error_url(),
}
Self::from_msg(other.to_string(), other.error_code())
}
}
@ -111,8 +108,13 @@ impl fmt::Display for ErrorType {
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
pub enum Code {
// error related to your setup
IoError,
NoSpaceLeftOnDevice,
TooManyOpenFiles,
// index related error
CreateIndex,
IndexAlreadyExists,
@ -145,7 +147,6 @@ pub enum Code {
InvalidToken,
MissingAuthorizationHeader,
MissingMasterKey,
NoSpaceLeftOnDevice,
DumpNotFound,
InvalidTaskDateFilter,
InvalidTaskStatusesFilter,
@ -188,6 +189,15 @@ impl Code {
use Code::*;
match self {
// related to the setup
IoError => ErrCode::invalid("io_error", StatusCode::UNPROCESSABLE_ENTITY),
TooManyOpenFiles => {
ErrCode::invalid("too_many_open_files", StatusCode::UNPROCESSABLE_ENTITY)
}
NoSpaceLeftOnDevice => {
ErrCode::invalid("no_space_left_on_device", StatusCode::UNPROCESSABLE_ENTITY)
}
// index related errors
// create index is thrown on internal error while creating an index.
CreateIndex => {
@ -266,9 +276,6 @@ impl Code {
ErrCode::invalid("missing_task_filters", StatusCode::BAD_REQUEST)
}
DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND),
NoSpaceLeftOnDevice => {
ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR)
}
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
RetrieveDocument => {
ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST)
@ -380,7 +387,7 @@ impl ErrorCode for milli::Error {
match self {
Error::InternalError(_) => Code::Internal,
Error::IoError(_) => Code::Internal,
Error::IoError(e) => e.error_code(),
Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
@ -415,13 +422,28 @@ impl ErrorCode for milli::Error {
}
}
impl ErrorCode for file_store::Error {
fn error_code(&self) -> Code {
match self {
Self::IoError(e) => e.error_code(),
Self::PersistError(e) => e.error_code(),
}
}
}
impl ErrorCode for tempfile::PersistError {
fn error_code(&self) -> Code {
self.error.error_code()
}
}
impl ErrorCode for HeedError {
fn error_code(&self) -> Code {
match self {
HeedError::Mdb(MdbError::MapFull) => Code::DatabaseSizeLimitReached,
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStore,
HeedError::Io(_)
| HeedError::Mdb(_)
HeedError::Io(e) => e.error_code(),
HeedError::Mdb(_)
| HeedError::Encoding
| HeedError::Decoding
| HeedError::InvalidDatabaseTyping
@ -431,6 +453,17 @@ impl ErrorCode for HeedError {
}
}
impl ErrorCode for io::Error {
fn error_code(&self) -> Code {
match self.raw_os_error() {
Some(5) => Code::IoError,
Some(24) => Code::TooManyOpenFiles,
Some(28) => Code::NoSpaceLeftOnDevice,
_ => Code::Internal,
}
}
}
#[cfg(feature = "test-traits")]
mod strategy {
use proptest::strategy::Strategy;

View File

@ -19,7 +19,7 @@ pub fn create_version_file(db_path: &Path) -> io::Result<()> {
pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME);
match fs::read_to_string(&version_path) {
match fs::read_to_string(version_path) {
Ok(version) => {
let version_components = version.split('.').collect::<Vec<_>>();
let (major, minor, patch) = match &version_components[..] {

View File

@ -51,7 +51,7 @@ fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
fs::read_to_string(db_path.join("instance-uid"))
.ok()
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
.or_else(|| fs::read_to_string(config_user_id_path(db_path)?).ok())
.and_then(|uid| InstanceUid::from_str(&uid).ok())
}

View File

@ -108,75 +108,43 @@ pub fn create_app(
.wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim))
}
// TODO: TAMO: Finish setting up things
enum OnFailure {
RemoveDb,
KeepDb,
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || {
IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: !opt.scheduler_options.disable_auto_batching,
})
};
enum OnFailure {
RemoveDb,
KeepDb,
}
let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> {
// if anything wrong happens we delete the `data.ms` entirely.
match (
index_scheduler_builder().map_err(anyhow::Error::from),
auth_controller_builder().map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
if matches!(on_failure, OnFailure::RemoveDb) {
std::fs::remove_dir_all(&opt.db_path)?;
}
Err(e)
}
}
};
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?,
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
// the db already exists and we should not ignore the snapshot => throw an error
} else if !empty_db && !opt.ignore_snapshot_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
// the snapshot doesn't exist and we can't ignore it => throw an error
} else if !snapshot_path_exists && !opt.ignore_missing_snapshot {
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
meilisearch_builder(OnFailure::RemoveDb)?
open_or_create_database(opt, empty_db)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) =
meilisearch_builder(OnFailure::RemoveDb)?;
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
@ -184,29 +152,22 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
return Err(e);
}
}
// the db already exists and we should not ignore the dump option => throw an error
} else if !empty_db && !opt.ignore_dump_if_db_exists {
bail!(
"database already exists at {:?}, try to delete it or rename it",
opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned())
)
// the dump doesn't exist and we can't ignore it => throw an error
} else if !src_path_exists && !opt.ignore_missing_dump {
bail!("dump doesn't exist at {:?}", path)
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
let (mut index_scheduler, mut auth_controller) =
meilisearch_builder(OnFailure::RemoveDb)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
}
}
open_or_create_database(opt, empty_db)?
}
} else {
if !empty_db {
check_version_file(&opt.db_path)?;
}
meilisearch_builder(OnFailure::KeepDb)?
open_or_create_database(opt, empty_db)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@ -228,6 +189,57 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
Ok((index_scheduler, auth_controller))
}
/// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
fn open_or_create_database_unchecked(
opt: &Opt,
on_failure: OnFailure,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: !opt.scheduler_options.disable_auto_batching,
})?)
};
match (
index_scheduler_builder(),
auth_controller.map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
(Ok(i), Ok(a), Ok(())) => Ok((i, a)),
(Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => {
if matches!(on_failure, OnFailure::RemoveDb) {
std::fs::remove_dir_all(&opt.db_path)?;
}
Err(e)
}
}
}
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
fn open_or_create_database(
opt: &Opt,
empty_db: bool,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
}
fn import_dump(
db_path: &Path,
dump_path: &Path,