Merge pull request #58 from meilisearch/actor-index-controller

actor index controller
This commit is contained in:
marin 2021-03-15 18:25:35 +01:00 committed by GitHub
commit 0c17b166df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
52 changed files with 3934 additions and 2759 deletions

768
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -81,7 +81,6 @@ pub enum Code {
}
impl Code {
/// ascociate a `Code` variant to the actual ErrCode
fn err_code(&self) -> ErrCode {
use Code::*;
@ -94,17 +93,23 @@ impl Code {
// thrown when requesting an unexisting index
IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND),
InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST),
OpenIndex => ErrCode::internal("index_not_accessible", StatusCode::INTERNAL_SERVER_ERROR),
OpenIndex => {
ErrCode::internal("index_not_accessible", StatusCode::INTERNAL_SERVER_ERROR)
}
// invalid state error
InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR),
// thrown when no primary key has been set
MissingPrimaryKey => ErrCode::invalid("missing_primary_key", StatusCode::BAD_REQUEST),
// error thrown when trying to set an already existing primary key
PrimaryKeyAlreadyPresent => ErrCode::invalid("primary_key_already_present", StatusCode::BAD_REQUEST),
PrimaryKeyAlreadyPresent => {
ErrCode::invalid("primary_key_already_present", StatusCode::BAD_REQUEST)
}
// invalid document
MaxFieldsLimitExceeded => ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST),
MaxFieldsLimitExceeded => {
ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST)
}
MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST),
// error related to facets
@ -117,16 +122,26 @@ impl Code {
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
InvalidToken => ErrCode::authentication("invalid_token", StatusCode::FORBIDDEN),
MissingAuthorizationHeader => ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED),
MissingAuthorizationHeader => {
ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED)
}
NotFound => ErrCode::invalid("not_found", StatusCode::NOT_FOUND),
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
RetrieveDocument => ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST),
RetrieveDocument => {
ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST)
}
SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST),
UnsupportedMediaType => ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE),
UnsupportedMediaType => {
ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE)
}
// error related to dump
DumpAlreadyInProgress => ErrCode::invalid("dump_already_in_progress", StatusCode::CONFLICT),
DumpProcessFailed => ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR),
DumpAlreadyInProgress => {
ErrCode::invalid("dump_already_in_progress", StatusCode::CONFLICT)
}
DumpProcessFailed => {
ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR)
}
}
}

View File

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "actix-codec"
version = "0.3.0"
@ -12,8 +14,24 @@ dependencies = [
"futures-sink",
"log",
"pin-project 0.4.27",
"tokio",
"tokio-util",
"tokio 0.2.24",
"tokio-util 0.3.1",
]
[[package]]
name = "actix-codec"
version = "0.4.0-beta.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90673465c6187bd0829116b02be465dc0195a74d7719f76ffff0effef934a92e"
dependencies = [
"bitflags",
"bytes 1.0.1",
"futures-core",
"futures-sink",
"log",
"pin-project-lite 0.2.0",
"tokio 1.2.0",
"tokio-util 0.6.3",
]
[[package]]
@ -22,27 +40,22 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "177837a10863f15ba8d3ae3ec12fac1099099529ed20083a27fdfe247381d0dc"
dependencies = [
"actix-codec",
"actix-rt",
"actix-service",
"actix-utils",
"actix-codec 0.3.0",
"actix-rt 1.1.1",
"actix-service 1.0.6",
"actix-utils 2.0.0",
"derive_more",
"either",
"futures-util",
"http",
"log",
"rustls",
"tokio-rustls",
"trust-dns-proto",
"trust-dns-resolver",
"webpki",
]
[[package]]
name = "actix-cors"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f3a3d5493dbc9b8769fe88c030d057ef8d2edc5728e5e26267780e8fc5db0be"
version = "0.5.4"
dependencies = [
"actix-web",
"derive_more",
@ -58,28 +71,25 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "452299e87817ae5673910e53c243484ca38be3828db819b6011736fc6982e874"
dependencies = [
"actix-codec",
"actix-codec 0.3.0",
"actix-connect",
"actix-rt",
"actix-service",
"actix-rt 1.1.1",
"actix-service 1.0.6",
"actix-threadpool",
"actix-tls",
"actix-utils",
"actix-utils 2.0.0",
"base64 0.13.0",
"bitflags",
"brotli2",
"bytes 0.5.6",
"cookie",
"copyless",
"derive_more",
"either",
"encoding_rs",
"flate2",
"futures-channel",
"futures-core",
"futures-util",
"fxhash",
"h2",
"h2 0.2.7",
"http",
"httparse",
"indexmap",
@ -100,6 +110,51 @@ dependencies = [
"time 0.2.23",
]
[[package]]
name = "actix-http"
version = "3.0.0-beta.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a01f9e0681608afa887d4269a0857ac4226f09ba5ceda25939e8391c9da610a"
dependencies = [
"actix-codec 0.4.0-beta.1",
"actix-rt 2.1.0",
"actix-service 2.0.0-beta.4",
"actix-tls",
"actix-utils 3.0.0-beta.2",
"ahash 0.7.0",
"base64 0.13.0",
"bitflags",
"brotli2",
"bytes 1.0.1",
"bytestring",
"cfg-if 1.0.0",
"cookie",
"derive_more",
"encoding_rs",
"flate2",
"futures-core",
"futures-util",
"h2 0.3.1",
"http",
"httparse",
"itoa",
"language-tags",
"log",
"mime",
"once_cell",
"percent-encoding",
"pin-project 1.0.2",
"rand 0.8.3",
"regex",
"serde",
"serde_json",
"serde_urlencoded",
"sha-1 0.9.2",
"smallvec",
"time 0.2.23",
"tokio 1.2.0",
]
[[package]]
name = "actix-macros"
version = "0.1.3"
@ -111,10 +166,20 @@ dependencies = [
]
[[package]]
name = "actix-router"
version = "0.2.5"
name = "actix-macros"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd1f7dbda1645bf7da33554db60891755f6c01c1b2169e2f4c492098d30c235"
checksum = "dbcb2b608f0accc2f5bcf3dd872194ce13d94ee45b571487035864cf966b04ef"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "actix-router"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ad299af73649e1fc893e333ccf86f377751eb95ff875d095131574c6f43452c"
dependencies = [
"bytestring",
"http",
@ -129,33 +194,42 @@ version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "143fcc2912e0d1de2bcf4e2f720d2a60c28652ab4179685a1ee159e0fb3db227"
dependencies = [
"actix-macros",
"actix-macros 0.1.3",
"actix-threadpool",
"copyless",
"futures-channel",
"futures-util",
"smallvec",
"tokio",
"tokio 0.2.24",
]
[[package]]
name = "actix-rt"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b4e57bc1a3915e71526d128baf4323700bd1580bc676239e2298a4c5b001f18"
dependencies = [
"actix-macros 0.2.0",
"futures-core",
"tokio 1.2.0",
]
[[package]]
name = "actix-server"
version = "1.0.4"
version = "2.0.0-beta.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45407e6e672ca24784baa667c5d32ef109ccdd8d5e0b5ebb9ef8a67f4dfb708e"
checksum = "a99198727204a48f82559c18e4b0ba3197b97d5f4576a32bdbef371f3b4599c1"
dependencies = [
"actix-codec",
"actix-rt",
"actix-service",
"actix-utils",
"futures-channel",
"futures-util",
"actix-codec 0.4.0-beta.1",
"actix-rt 2.1.0",
"actix-service 2.0.0-beta.4",
"actix-utils 3.0.0-beta.2",
"futures-core",
"log",
"mio",
"mio-uds",
"mio 0.7.9",
"num_cpus",
"slab",
"socket2",
"tokio 1.2.0",
]
[[package]]
@ -169,17 +243,13 @@ dependencies = [
]
[[package]]
name = "actix-testing"
version = "1.0.1"
name = "actix-service"
version = "2.0.0-beta.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47239ca38799ab74ee6a8a94d1ce857014b2ac36f242f70f3f75a66f691e791c"
checksum = "ca9756f4d32984ac454ae3155a276f6be69b424197bd3f0ca3c87cde72f41d63"
dependencies = [
"actix-macros",
"actix-rt",
"actix-server",
"actix-service",
"log",
"socket2",
"futures-core",
"pin-project-lite 0.2.0",
]
[[package]]
@ -199,18 +269,21 @@ dependencies = [
[[package]]
name = "actix-tls"
version = "2.0.0"
version = "3.0.0-beta.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24789b7d7361cf5503a504ebe1c10806896f61e96eca9a7350e23001aca715fb"
checksum = "d2b1455e3f7a26d40cfc1080b571f41e8165e5a88e937ed579f7a4b3d55b0370"
dependencies = [
"actix-codec",
"actix-service",
"actix-utils",
"futures-util",
"rustls",
"tokio-rustls",
"webpki",
"webpki-roots",
"actix-codec 0.4.0-beta.1",
"actix-rt 2.1.0",
"actix-service 2.0.0-beta.4",
"actix-utils 3.0.0-beta.2",
"derive_more",
"futures-core",
"http",
"log",
"tokio-rustls 0.22.0",
"tokio-util 0.6.3",
"webpki-roots 0.21.0",
]
[[package]]
@ -219,9 +292,9 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e9022dec56632d1d7979e59af14f0597a28a830a9c1c7fec8b2327eb9f16b5a"
dependencies = [
"actix-codec",
"actix-rt",
"actix-service",
"actix-codec 0.3.0",
"actix-rt 1.1.1",
"actix-service 1.0.6",
"bitflags",
"bytes 0.5.6",
"either",
@ -234,50 +307,63 @@ dependencies = [
]
[[package]]
name = "actix-web"
version = "3.3.2"
name = "actix-utils"
version = "3.0.0-beta.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e641d4a172e7faa0862241a20ff4f1f5ab0ab7c279f00c2d4587b77483477b86"
checksum = "458795e09a29bc5557604f9ff6f32236fd0ee457d631672e4ec8f6a0103bb292"
dependencies = [
"actix-codec",
"actix-http",
"actix-macros",
"actix-codec 0.4.0-beta.1",
"actix-rt 2.1.0",
"actix-service 2.0.0-beta.4",
"futures-core",
"futures-sink",
"log",
"pin-project-lite 0.2.0",
]
[[package]]
name = "actix-web"
version = "4.0.0-beta.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d95e50c9e32e8456220b5804867de76e97a86ab8c38b51c9edcccc0f0fddca7"
dependencies = [
"actix-codec 0.4.0-beta.1",
"actix-http 3.0.0-beta.4",
"actix-macros 0.2.0",
"actix-router",
"actix-rt",
"actix-rt 2.1.0",
"actix-server",
"actix-service",
"actix-testing",
"actix-threadpool",
"actix-service 2.0.0-beta.4",
"actix-tls",
"actix-utils",
"actix-utils 3.0.0-beta.2",
"actix-web-codegen",
"ahash 0.7.0",
"awc",
"bytes 0.5.6",
"bytes 1.0.1",
"derive_more",
"either",
"encoding_rs",
"futures-channel",
"futures-core",
"futures-util",
"fxhash",
"log",
"mime",
"pin-project 1.0.2",
"regex",
"rustls",
"rustls 0.19.0",
"serde",
"serde_json",
"serde_urlencoded",
"smallvec",
"socket2",
"time 0.2.23",
"tinyvec",
"url",
]
[[package]]
name = "actix-web-codegen"
version = "0.4.0"
version = "0.5.0-beta.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad26f77093333e0e7c6ffe54ebe3582d908a104e448723eec6d43d08b07143fb"
checksum = "7f138ac357a674c3b480ddb7bbd894b13c1b6e8927d728bc9ea5e17eee2f8fc9"
dependencies = [
"proc-macro2",
"quote",
@ -305,6 +391,17 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
[[package]]
name = "ahash"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efa60d2eadd8b12a996add391db32bd1153eac697ba4869660c0016353611426"
dependencies = [
"getrandom 0.2.2",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "0.7.15"
@ -332,7 +429,7 @@ checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1"
[[package]]
name = "assert-json-diff"
version = "1.0.1"
source = "git+https://github.com/qdequele/assert-json-diff#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4"
source = "git+https://github.com/qdequele/assert-json-diff?branch=master#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4"
dependencies = [
"serde",
"serde_json",
@ -348,7 +445,28 @@ dependencies = [
"futures-core",
"memchr",
"pin-project-lite 0.1.11",
"tokio",
"tokio 0.2.24",
]
[[package]]
name = "async-stream"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3670df70cbc01729f901f94c887814b3c68db038aad1329a418bae178bc5295c"
dependencies = [
"async-stream-impl",
"futures-core",
]
[[package]]
name = "async-stream-impl"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3548b8efc9f8e8a5a0a2808c5bd8451a9031b9e5b879a79590304ae928b0a70"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
@ -381,24 +499,26 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "awc"
version = "2.0.3"
version = "3.0.0-beta.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b381e490e7b0cfc37ebc54079b0413d8093ef43d14a4e4747083f7fa47a9e691"
checksum = "09aecd8728f6491a62b27454ea4b36fb7e50faf32928b0369b644e402c651f4e"
dependencies = [
"actix-codec",
"actix-http",
"actix-rt",
"actix-service",
"actix-codec 0.4.0-beta.1",
"actix-http 3.0.0-beta.4",
"actix-rt 2.1.0",
"actix-service 2.0.0-beta.4",
"base64 0.13.0",
"bytes 0.5.6",
"bytes 1.0.1",
"cfg-if 1.0.0",
"derive_more",
"futures-core",
"itoa",
"log",
"mime",
"percent-encoding",
"rand 0.7.3",
"rustls",
"pin-project-lite 0.2.0",
"rand 0.8.3",
"rustls 0.19.0",
"serde",
"serde_json",
"serde_urlencoded",
@ -569,12 +689,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0dcbc35f504eb6fc275a6d20e4ebcda18cf50d40ba6fabff8c711fa16cb3b16"
[[package]]
name = "bytestring"
version = "0.1.5"
name = "bytes"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7c05fa5172da78a62d9949d662d2ac89d4cc7355d7b49adee5163f1fb3f363"
checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040"
[[package]]
name = "bytestring"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90706ba19e97b90786e19dc0d5e2abd80008d99d4c0c5d1ad0b5e72cec7c494d"
dependencies = [
"bytes 0.5.6",
"bytes 1.0.1",
]
[[package]]
@ -1177,19 +1303,38 @@ dependencies = [
"http",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tokio 0.2.24",
"tokio-util 0.3.1",
"tracing",
"tracing-futures",
]
[[package]]
name = "h2"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d832b01df74254fe364568d6ddc294443f61cbec82816b60904303af87efae78"
dependencies = [
"bytes 1.0.1",
"fnv",
"futures-core",
"futures-sink",
"futures-util",
"http",
"indexmap",
"slab",
"tokio 1.2.0",
"tokio-util 0.6.3",
"tracing",
]
[[package]]
name = "hashbrown"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96282e96bfcd3da0d3aa9938bedf1e50df3269b6db08b4876d2da0bb1a0841cf"
dependencies = [
"ahash",
"ahash 0.3.8",
"autocfg",
]
@ -1268,11 +1413,11 @@ dependencies = [
[[package]]
name = "http"
version = "0.2.2"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84129d298a6d57d246960ff8eb831ca4af3f96d29e2e28848dae275408658e26"
checksum = "7245cd7449cc792608c3c8a9eaf69bd4eabbabf802713748fd739c98b82f0747"
dependencies = [
"bytes 0.5.6",
"bytes 1.0.1",
"fnv",
"itoa",
]
@ -1330,7 +1475,7 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-util",
"h2",
"h2 0.2.7",
"http",
"http-body",
"httparse",
@ -1338,7 +1483,7 @@ dependencies = [
"itoa",
"pin-project 1.0.2",
"socket2",
"tokio",
"tokio 0.2.24",
"tower-service",
"tracing",
"want",
@ -1354,9 +1499,9 @@ dependencies = [
"futures-util",
"hyper",
"log",
"rustls",
"tokio",
"tokio-rustls",
"rustls 0.18.1",
"tokio 0.2.24",
"tokio-rustls 0.14.1",
"webpki",
]
@ -1543,9 +1688,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.81"
version = "0.2.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb"
checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c"
[[package]]
name = "linked-hash-map"
@ -1619,7 +1764,7 @@ checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
name = "meilisearch-error"
version = "0.19.0"
dependencies = [
"actix-http",
"actix-http 2.2.0",
]
[[package]]
@ -1627,13 +1772,15 @@ name = "meilisearch-http"
version = "0.17.0"
dependencies = [
"actix-cors",
"actix-http",
"actix-rt",
"actix-service",
"actix-http 3.0.0-beta.4",
"actix-rt 2.1.0",
"actix-service 2.0.0-beta.4",
"actix-web",
"anyhow",
"assert-json-diff",
"async-compression",
"async-stream",
"async-trait",
"byte-unit",
"bytes 0.6.0",
"chrono",
@ -1662,7 +1809,7 @@ dependencies = [
"rand 0.7.3",
"rayon",
"regex",
"rustls",
"rustls 0.19.0",
"sentry",
"serde",
"serde_json",
@ -1674,7 +1821,8 @@ dependencies = [
"tar",
"tempdir",
"tempfile",
"tokio",
"thiserror",
"tokio 1.2.0",
"uuid",
"vergen",
]
@ -1723,7 +1871,7 @@ dependencies = [
[[package]]
name = "milli"
version = "0.1.0"
source = "git+https://github.com/meilisearch/milli.git?rev=8dcb3e0#8dcb3e0c41965c96ae718ae85c45004cf94c6e94"
source = "git+https://github.com/meilisearch/milli.git?rev=794fce7#794fce7bff3e3461a7f3954fd97f58f8232e5a8e"
dependencies = [
"anyhow",
"bstr",
@ -1806,14 +1954,15 @@ dependencies = [
]
[[package]]
name = "mio-named-pipes"
version = "0.1.7"
name = "mio"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656"
checksum = "a5dede4e2065b3842b8b0af444119f3aa331cc7cc2dd20388bfb0f5d5a38823a"
dependencies = [
"libc",
"log",
"mio",
"miow 0.3.6",
"ntapi",
"winapi 0.3.9",
]
@ -1825,7 +1974,7 @@ checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0"
dependencies = [
"iovec",
"libc",
"mio",
"mio 0.6.23",
]
[[package]]
@ -1873,6 +2022,15 @@ dependencies = [
"libc",
]
[[package]]
name = "ntapi"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "num-integer"
version = "0.1.44"
@ -2437,17 +2595,17 @@ dependencies = [
"mime_guess",
"percent-encoding",
"pin-project-lite 0.2.0",
"rustls",
"rustls 0.18.1",
"serde",
"serde_json",
"serde_urlencoded",
"tokio",
"tokio-rustls",
"tokio 0.2.24",
"tokio-rustls 0.14.1",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"webpki-roots",
"webpki-roots 0.20.0",
"winreg 0.7.0",
]
@ -2521,6 +2679,19 @@ dependencies = [
"webpki",
]
[[package]]
name = "rustls"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "064fd21ff87c6e87ed4506e68beb42459caa4a0e2eb144932e6776768556980b"
dependencies = [
"base64 0.13.0",
"log",
"ring",
"sct",
"webpki",
]
[[package]]
name = "ryu"
version = "1.0.5"
@ -2947,18 +3118,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.22"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e"
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.22"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56"
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
dependencies = [
"proc-macro2",
"quote",
@ -3060,22 +3231,40 @@ dependencies = [
"lazy_static",
"libc",
"memchr",
"mio",
"mio-named-pipes",
"mio 0.6.23",
"mio-uds",
"num_cpus",
"pin-project-lite 0.1.11",
"signal-hook-registry",
"slab",
"winapi 0.3.9",
]
[[package]]
name = "tokio"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8190d04c665ea9e6b6a0dc45523ade572c088d2e6566244c1122671dbf4ae3a"
dependencies = [
"autocfg",
"bytes 1.0.1",
"libc",
"memchr",
"mio 0.7.9",
"num_cpus",
"once_cell",
"parking_lot",
"pin-project-lite 0.2.0",
"signal-hook-registry",
"tokio-macros",
"winapi 0.3.9",
]
[[package]]
name = "tokio-macros"
version = "0.2.6"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e44da00bfc73a25f814cd8d7e57a68a5c31b74b3152a0a1d1f590c97ed06265a"
checksum = "caf7b11a536f46a809a8a9f0bb4237020f70ecbf115b842360afb127ea2fda57"
dependencies = [
"proc-macro2",
"quote",
@ -3089,8 +3278,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e12831b255bcfa39dc0436b01e19fea231a37db570686c06ee72c423479f889a"
dependencies = [
"futures-core",
"rustls",
"tokio",
"rustls 0.18.1",
"tokio 0.2.24",
"webpki",
]
[[package]]
name = "tokio-rustls"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6"
dependencies = [
"rustls 0.19.0",
"tokio 1.2.0",
"webpki",
]
@ -3105,7 +3305,21 @@ dependencies = [
"futures-sink",
"log",
"pin-project-lite 0.1.11",
"tokio",
"tokio 0.2.24",
]
[[package]]
name = "tokio-util"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebb7cb2f00c5ae8df755b252306272cd1790d39728363936e01827e11f0b017b"
dependencies = [
"bytes 1.0.1",
"futures-core",
"futures-sink",
"log",
"pin-project-lite 0.2.0",
"tokio 1.2.0",
]
[[package]]
@ -3161,7 +3375,7 @@ dependencies = [
"rand 0.7.3",
"smallvec",
"thiserror",
"tokio",
"tokio 0.2.24",
"url",
]
@ -3181,7 +3395,7 @@ dependencies = [
"resolv-conf",
"smallvec",
"thiserror",
"tokio",
"tokio 0.2.24",
"trust-dns-proto",
]
@ -3433,6 +3647,15 @@ dependencies = [
"webpki",
]
[[package]]
name = "webpki-roots"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82015b7e0b8bad8185994674a13a93306bea76cf5a16c5a181382fd3a5ec2376"
dependencies = [
"webpki",
]
[[package]]
name = "whatlang"
version = "0.9.0"

View File

@ -13,17 +13,20 @@ path = "src/main.rs"
vergen = "3.1.0"
[dependencies]
actix-cors = "0.5.3"
actix-http = "2"
actix-rt = "1"
actix-service = "1.0.6"
actix-web = { version = "3.3.2", features = ["rustls"] }
anyhow = "1.0.38"
actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "8f7b1fd" }
actix-http = { version = "3.0.0-beta.4", features = ["cookies"] }
actix-service = "2.0.0-beta.4"
actix-web = { version = "4.0.0-beta.4", features = ["rustls", "cookies"] }
anyhow = "1.0.36"
async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] }
async-stream = "0.3.0"
async-trait = "0.1.42"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
bytes = "0.6.0"
chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.0"
dashmap = "4.0.2"
either = "1.6.1"
env_logger = "0.8.2"
flate2 = "1.0.19"
fst = "0.4.5"
@ -33,18 +36,20 @@ grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = "0.10.6"
http = "0.2.1"
indexmap = { version = "1.3.2", features = ["serde-1"] }
itertools = "0.10.0"
log = "0.4.8"
main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", rev = "f190d5f" }
milli = { git = "https://github.com/meilisearch/milli.git", rev = "b7b23cd" }
mime = "0.3.16"
once_cell = "1.5.2"
parking_lot = "0.11.1"
rand = "0.7.3"
rayon = "1.5.0"
regex = "1.4.2"
rustls = "0.18"
rustls = "0.19"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.59", features = ["preserve_order"] }
sha2 = "0.9.1"
@ -53,15 +58,22 @@ slice-group-by = "0.2.6"
structopt = "0.3.20"
tar = "0.4.29"
tempfile = "3.1.0"
tokio = { version = "0.2", features = ["full"] }
dashmap = "4.0.2"
thiserror = "1.0.24"
tokio = { version = "1", features = ["full"] }
uuid = "0.8.2"
itertools = "0.10.0"
either = "1.6.1"
[dependencies.sentry]
default-features = false
features = ["with_client_implementation", "with_panic", "with_failure", "with_device_info", "with_rust_info", "with_reqwest_transport", "with_rustls", "with_env_logger"]
features = [
"with_client_implementation",
"with_panic",
"with_failure",
"with_device_info",
"with_rust_info",
"with_reqwest_transport",
"with_rustls",
"with_env_logger"
]
optional = true
version = "0.18.1"
@ -70,7 +82,7 @@ version = "0.18.1"
serde_url_params = "0.2.0"
tempdir = "0.3.7"
assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" }
tokio = { version = "0.2", features = ["macros", "time"] }
actix-rt = "2.1.0"
urlencoding = "1.1.1"
[features]

View File

@ -1,16 +1,15 @@
mod search;
pub mod search;
mod updates;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
use std::fs::create_dir_all;
use std::ops::Deref;
use std::sync::Arc;
use sha2::Digest;
use anyhow::bail;
use crate::index_controller::{IndexController, LocalIndexController, IndexMetadata, Settings, IndexSettings};
use crate::index::Settings;
use crate::index_controller::IndexController;
use crate::index_controller::{IndexMetadata, IndexSettings};
use crate::option::Opt;
#[derive(Clone)]
@ -26,9 +25,8 @@ impl Deref for Data {
}
}
#[derive(Clone)]
pub struct DataInner {
pub index_controller: Arc<LocalIndexController>,
pub index_controller: IndexController,
pub api_keys: ApiKeys,
options: Opt,
}
@ -60,15 +58,11 @@ impl ApiKeys {
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let indexer_opts = options.indexer_options.clone();
create_dir_all(&path)?;
let index_controller = LocalIndexController::new(
&path,
indexer_opts,
options.max_mdb_size.get_bytes(),
options.max_udb_size.get_bytes(),
)?;
let index_controller = Arc::new(index_controller);
let index_size = options.max_mdb_size.get_bytes() as usize;
let update_store_size = options.max_udb_size.get_bytes() as usize;
let index_controller = IndexController::new(&path, index_size, update_store_size)?;
let mut api_keys = ApiKeys {
master: options.clone().master_key,
@ -78,70 +72,39 @@ impl Data {
api_keys.generate_missing_api_keys();
let inner = DataInner { index_controller, options, api_keys };
let inner = DataInner {
index_controller,
options,
api_keys,
};
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub fn settings<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<Settings> {
let index = self.index_controller
.index(&index_uid)?
.ok_or_else(|| anyhow::anyhow!("Index {} does not exist.", index_uid.as_ref()))?;
let txn = index.read_txn()?;
let displayed_attributes = index
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let searchable_attributes = index
.searchable_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let faceted_attributes = index
.faceted_fields(&txn)?
.into_iter()
.map(|(k, v)| (k, v.to_string()))
.collect();
let criteria = index
.criteria(&txn)?
.into_iter()
.map(|v| v.to_string())
.collect();
Ok(Settings {
displayed_attributes: Some(Some(displayed_attributes)),
searchable_attributes: Some(Some(searchable_attributes)),
faceted_attributes: Some(Some(faceted_attributes)),
ranking_rules: Some(Some(criteria)),
})
pub async fn settings(&self, uid: String) -> anyhow::Result<Settings> {
self.index_controller.settings(uid).await
}
pub fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes()
pub async fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes().await
}
pub fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<IndexMetadata>> {
Ok(self
.list_indexes()?
.into_iter()
.find(|i| i.uid == name.as_ref()))
pub async fn index(&self, uid: String) -> anyhow::Result<IndexMetadata> {
self.index_controller.get_index(uid).await
}
pub fn create_index(&self, name: impl AsRef<str>, primary_key: Option<impl AsRef<str>>) -> anyhow::Result<IndexMetadata> {
if !is_index_uid_valid(name.as_ref()) {
bail!("invalid index uid: {:?}", name.as_ref())
}
pub async fn create_index(
&self,
uid: String,
primary_key: Option<String>,
) -> anyhow::Result<IndexMetadata> {
let settings = IndexSettings {
name: Some(name.as_ref().to_string()),
primary_key: primary_key.map(|s| s.as_ref().to_string()),
uid: Some(uid),
primary_key,
};
let meta = self.index_controller.create_index(settings)?;
let meta = self.index_controller.create_index(settings).await?;
Ok(meta)
}
@ -155,8 +118,3 @@ impl Data {
&self.api_keys
}
}
fn is_index_uid_valid(uid: &str) -> bool {
uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}

View File

@ -1,353 +1,37 @@
use std::collections::{HashSet, BTreeMap};
use std::mem;
use std::time::Instant;
use anyhow::{bail, Context};
use either::Either;
use heed::RoTxn;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use milli::{FacetCondition, Index, MatchingWords, facet::FacetValue, obkv_to_json};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use super::Data;
use crate::index_controller::IndexController;
pub const DEFAULT_SEARCH_LIMIT: usize = 20;
const fn default_search_limit() -> usize {
DEFAULT_SEARCH_LIMIT
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
#[allow(dead_code)]
pub struct SearchQuery {
pub q: Option<String>,
pub offset: Option<usize>,
#[serde(default = "default_search_limit")]
pub limit: usize,
pub attributes_to_retrieve: Option<Vec<String>>,
pub attributes_to_crop: Option<Vec<String>>,
pub crop_length: Option<usize>,
pub attributes_to_highlight: Option<HashSet<String>>,
pub filters: Option<String>,
pub matches: Option<bool>,
pub facet_filters: Option<Value>,
pub facet_distributions: Option<Vec<String>>,
}
impl SearchQuery {
pub fn perform(&self, index: impl AsRef<Index>) -> anyhow::Result<SearchResult> {
let index = index.as_ref();
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
if let Some(ref query) = self.q {
search.query(query);
}
search.limit(self.limit);
search.offset(self.offset.unwrap_or_default());
if let Some(ref facets) = self.facet_filters {
if let Some(facets) = parse_facets(facets, index, &rtxn)? {
search.facet_condition(facets);
}
}
let milli::SearchResult {
documents_ids,
matching_words,
candidates,
} = search.execute()?;
let mut documents = Vec::new();
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let displayed_fields_ids = index.displayed_fields_ids(&rtxn)?;
let attributes_to_retrieve_ids = match self.attributes_to_retrieve {
Some(ref attrs) if attrs.iter().any(|f| f == "*") => None,
Some(ref attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f))
.collect::<Vec<_>>()
.into(),
None => None,
};
let displayed_fields_ids = match (displayed_fields_ids, attributes_to_retrieve_ids) {
(_, Some(ids)) => ids,
(Some(ids), None) => ids,
(None, None) => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);
for (_id, obkv) in index.documents(&rtxn, documents_ids)? {
let mut object = obkv_to_json(&displayed_fields_ids, &fields_ids_map, obkv)?;
if let Some(ref attributes_to_highlight) = self.attributes_to_highlight {
highlighter.highlight_record(&mut object, &matching_words, attributes_to_highlight);
}
documents.push(object);
}
let nb_hits = candidates.len();
let facet_distributions = match self.facet_distributions {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}
Some(facet_distribution.candidates(candidates).execute()?)
}
None => None,
};
Ok(SearchResult {
hits: documents,
nb_hits,
query: self.q.clone().unwrap_or_default(),
limit: self.limit,
offset: self.offset.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distributions,
})
}
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
hits: Vec<Map<String, Value>>,
nb_hits: u64,
query: String,
limit: usize,
offset: usize,
processing_time_ms: u128,
#[serde(skip_serializing_if = "Option::is_none")]
facet_distributions: Option<BTreeMap<String, BTreeMap<FacetValue, u64>>>,
}
struct Highlighter<'a, A> {
analyzer: Analyzer<'a, A>,
}
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
fn new(stop_words: &'a fst::Set<A>) -> Self {
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
Self { analyzer }
}
fn highlight_value(&self, value: Value, words_to_highlight: &MatchingWords) -> Value {
match value {
Value::Null => Value::Null,
Value::Bool(boolean) => Value::Bool(boolean),
Value::Number(number) => Value::Number(number),
Value::String(old_string) => {
let mut string = String::new();
let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
let to_highlight = words_to_highlight.matches(token.text());
if to_highlight {
string.push_str("<mark>")
}
string.push_str(word);
if to_highlight {
string.push_str("</mark>")
}
} else {
string.push_str(word);
}
}
Value::String(string)
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| self.highlight_value(v, words_to_highlight))
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| (k, self.highlight_value(v, words_to_highlight)))
.collect(),
),
}
}
fn highlight_record(
&self,
object: &mut Map<String, Value>,
words_to_highlight: &MatchingWords,
attributes_to_highlight: &HashSet<String>,
) {
// TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) {
let old_value = mem::take(value);
*value = self.highlight_value(old_value, words_to_highlight);
}
}
}
}
use crate::index::{SearchQuery, SearchResult};
impl Data {
pub fn search<S: AsRef<str>>(
pub async fn search(
&self,
index: S,
index: String,
search_query: SearchQuery,
) -> anyhow::Result<SearchResult> {
match self.index_controller.index(&index)? {
Some(index) => Ok(search_query.perform(index)?),
None => bail!("index {:?} doesn't exists", index.as_ref()),
}
self.index_controller.search(index, search_query).await
}
pub async fn retrieve_documents<S>(
pub async fn retrieve_documents(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Vec<Map<String, Value>>>
where
S: AsRef<str> + Send + Sync + 'static,
{
let index_controller = self.index_controller.clone();
let documents: anyhow::Result<_> = tokio::task::spawn_blocking(move || {
let index = index_controller
.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let iter = index.documents.range(&txn, &(..))?.skip(offset).take(limit);
let mut documents = Vec::new();
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&attributes_to_retrieve_ids, &fields_ids_map, obkv)?;
documents.push(object);
}
Ok(documents)
})
.await?;
documents
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Vec<Map<String, Value>>> {
self.index_controller
.documents(index, offset, limit, attributes_to_retrieve)
.await
}
pub async fn retrieve_document<S>(
pub async fn retrieve_document(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
document_id: impl AsRef<str> + Sync + Send + 'static,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Map<String, Value>>
where
S: AsRef<str> + Sync + Send + 'static,
{
let index_controller = self.index_controller.clone();
let document: anyhow::Result<_> = tokio::task::spawn_blocking(move || {
let index = index_controller
.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let internal_id = index
.external_documents_ids(&txn)?
.get(document_id.as_ref().as_bytes())
.with_context(|| format!("Document with id {} not found", document_id.as_ref()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d);
match document {
Some(document) => Ok(obkv_to_json(
&attributes_to_retrieve_ids,
&fields_ids_map,
document,
)?),
None => bail!("Document with id {} not found", document_id.as_ref()),
}
})
.await?;
document
index: String,
document_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Map<String, Value>> {
self.index_controller
.document(index, document_id, attributes_to_retrieve)
.await
}
}
fn parse_facets_array(
txn: &RoTxn,
index: &Index,
arr: &Vec<Value>,
) -> anyhow::Result<Option<FacetCondition>> {
let mut ands = Vec::new();
for value in arr {
match value {
Value::String(s) => ands.push(Either::Right(s.clone())),
Value::Array(arr) => {
let mut ors = Vec::new();
for value in arr {
match value {
Value::String(s) => ors.push(s.clone()),
v => bail!("Invalid facet expression, expected String, found: {:?}", v),
}
}
ands.push(Either::Left(ors));
}
v => bail!(
"Invalid facet expression, expected String or [String], found: {:?}",
v
),
}
}
FacetCondition::from_array(txn, index, ands)
}
fn parse_facets(
facets: &Value,
index: &Index,
txn: &RoTxn,
) -> anyhow::Result<Option<FacetCondition>> {
match facets {
// Disabled for now
//Value::String(expr) => Ok(Some(FacetCondition::from_str(txn, index, expr)?)),
Value::Array(arr) => parse_facets_array(txn, index, arr),
v => bail!(
"Invalid facet expression, expected Array, found: {:?}",
v
),
}
}

View File

@ -1,123 +1,80 @@
use std::ops::Deref;
use anyhow::bail;
use async_compression::tokio_02::write::GzipEncoder;
use futures_util::stream::StreamExt;
use actix_web::web::Payload;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use tokio::io::AsyncWriteExt;
use super::{Data, is_index_uid_valid};
use crate::index_controller::{UpdateStatus, IndexController, Settings, IndexSettings, IndexMetadata};
use super::Data;
use crate::index::Settings;
use crate::index_controller::{IndexMetadata, IndexSettings, UpdateStatus};
impl Data {
pub async fn add_documents<B, E>(
pub async fn add_documents(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
index: String,
method: IndexDocumentsMethod,
format: UpdateFormat,
mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin,
stream: Payload,
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus>
where
B: Deref<Target = [u8]>,
E: std::error::Error + Send + Sync + 'static,
{
if !is_index_uid_valid(index.as_ref()) {
bail!("invalid index uid: {:?}", index.as_ref())
}
let file = tokio::task::spawn_blocking(tempfile::tempfile).await?;
let file = tokio::fs::File::from_std(file?);
let mut encoder = GzipEncoder::new(file);
let mut empty_update = true;
while let Some(result) = stream.next().await {
empty_update = false;
let bytes = &*result?;
encoder.write_all(&bytes[..]).await?;
}
encoder.shutdown().await?;
let mut file = encoder.into_inner();
file.sync_all().await?;
let file = file.into_std().await;
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move ||{
let mmap;
let bytes = if empty_update {
&[][..]
} else {
mmap = unsafe { memmap::Mmap::map(&file)? };
&mmap
};
index_controller.add_documents(index, method, format, &bytes, primary_key)
}).await??;
Ok(update.into())
) -> anyhow::Result<UpdateStatus> {
let update_status = self
.index_controller
.add_documents(index, method, format, stream, primary_key)
.await?;
Ok(update_status)
}
pub async fn update_settings(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
index: String,
settings: Settings,
create: bool,
) -> anyhow::Result<UpdateStatus> {
if !is_index_uid_valid(index.as_ref()) {
bail!("invalid index uid: {:?}", index.as_ref())
}
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || index_controller.update_settings(index, settings, create)).await??;
Ok(update.into())
let update = self
.index_controller
.update_settings(index, settings, create)
.await?;
Ok(update)
}
pub async fn clear_documents(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
) -> anyhow::Result<UpdateStatus> {
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || index_controller.clear_documents(index)).await??;
Ok(update.into())
pub async fn clear_documents(&self, index: String) -> anyhow::Result<UpdateStatus> {
let update = self.index_controller.clear_documents(index).await?;
Ok(update)
}
pub async fn delete_documents(
&self,
index: impl AsRef<str> + Sync + Send + 'static,
index: String,
document_ids: Vec<String>,
) -> anyhow::Result<UpdateStatus> {
let index_controller = self.index_controller.clone();
let update = tokio::task::spawn_blocking(move || index_controller.delete_documents(index, document_ids)).await??;
Ok(update.into())
let update = self
.index_controller
.delete_documents(index, document_ids)
.await?;
Ok(update)
}
pub async fn delete_index(
&self,
index: impl AsRef<str> + Send + Sync + 'static,
) -> anyhow::Result<()> {
let index_controller = self.index_controller.clone();
tokio::task::spawn_blocking(move || { index_controller.delete_index(index) }).await??;
pub async fn delete_index(&self, index: String) -> anyhow::Result<()> {
self.index_controller.delete_index(index).await?;
Ok(())
}
#[inline]
pub fn get_update_status(&self, index: impl AsRef<str>, uid: u64) -> anyhow::Result<Option<UpdateStatus>> {
self.index_controller.update_status(index, uid)
pub async fn get_update_status(&self, index: String, uid: u64) -> anyhow::Result<UpdateStatus> {
self.index_controller.update_status(index, uid).await
}
pub fn get_updates_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index)
pub async fn get_updates_status(&self, index: String) -> anyhow::Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index).await
}
pub fn update_index(
pub async fn update_index(
&self,
name: impl AsRef<str>,
primary_key: Option<impl AsRef<str>>,
new_name: Option<impl AsRef<str>>
uid: String,
primary_key: Option<String>,
new_uid: Option<String>,
) -> anyhow::Result<IndexMetadata> {
let settings = IndexSettings {
name: new_name.map(|s| s.as_ref().to_string()),
primary_key: primary_key.map(|s| s.as_ref().to_string()),
uid: new_uid,
primary_key,
};
self.index_controller.update_index(name, settings)
self.index_controller.update_index(uid, settings).await
}
}

View File

@ -1,13 +1,13 @@
use std::error;
use std::fmt;
use actix_http::ResponseBuilder;
use actix_web as aweb;
use actix_web::dev::HttpResponseBuilder;
use actix_web::error::{JsonPayloadError, QueryPayloadError};
use actix_web::http::Error as HttpError;
use actix_web::http::StatusCode;
use serde::ser::{Serialize, Serializer, SerializeStruct};
use meilisearch_error::{ErrorCode, Code};
use meilisearch_error::{Code, ErrorCode};
use serde::ser::{Serialize, SerializeStruct, Serializer};
#[derive(Debug)]
pub struct ResponseError {
@ -31,19 +31,25 @@ impl fmt::Display for ResponseError {
// TODO: remove this when implementing actual error handling
impl From<anyhow::Error> for ResponseError {
fn from(other: anyhow::Error) -> ResponseError {
ResponseError { inner: Box::new(Error::NotFound(other.to_string())) }
ResponseError {
inner: Box::new(Error::NotFound(other.to_string())),
}
}
}
impl From<Error> for ResponseError {
fn from(error: Error) -> ResponseError {
ResponseError { inner: Box::new(error) }
ResponseError {
inner: Box::new(error),
}
}
}
impl From<FacetCountError> for ResponseError {
fn from(err: FacetCountError) -> ResponseError {
ResponseError { inner: Box::new(err) }
ResponseError {
inner: Box::new(err),
}
}
}
@ -66,7 +72,7 @@ impl Serialize for ResponseError {
impl aweb::error::ResponseError for ResponseError {
fn error_response(&self) -> aweb::HttpResponse {
ResponseBuilder::new(self.status_code()).json(&self)
HttpResponseBuilder::new(self.status_code()).json(&self)
}
fn status_code(&self) -> StatusCode {
@ -129,7 +135,10 @@ impl ErrorCode for Error {
pub enum FacetCountError {
AttributeNotSet(String),
SyntaxError(String),
UnexpectedToken { found: String, expected: &'static [&'static str] },
UnexpectedToken {
found: String,
expected: &'static [&'static str],
},
NoFacetSet,
}
@ -142,7 +151,10 @@ impl ErrorCode for FacetCountError {
}
impl FacetCountError {
pub fn unexpected_token(found: impl ToString, expected: &'static [&'static str]) -> FacetCountError {
pub fn unexpected_token(
found: impl ToString,
expected: &'static [&'static str],
) -> FacetCountError {
let found = found.to_string();
FacetCountError::UnexpectedToken { expected, found }
}
@ -161,7 +173,9 @@ impl fmt::Display for FacetCountError {
match self {
AttributeNotSet(attr) => write!(f, "Attribute {} is not set as facet", attr),
SyntaxError(msg) => write!(f, "Syntax error: {}", msg),
UnexpectedToken { expected, found } => write!(f, "Unexpected {} found, expected {:?}", found, expected),
UnexpectedToken { expected, found } => {
write!(f, "Unexpected {} found, expected {:?}", found, expected)
}
NoFacetSet => write!(f, "Can't perform facet count, as no facet is set"),
}
}
@ -260,8 +274,8 @@ impl From<std::io::Error> for Error {
}
}
impl From<actix_http::Error> for Error {
fn from(err: actix_http::Error) -> Error {
impl From<HttpError> for Error {
fn from(err: HttpError) -> Error {
Error::Internal(err.to_string())
}
}
@ -275,10 +289,14 @@ impl From<serde_json::error::Error> for Error {
impl From<JsonPayloadError> for Error {
fn from(err: JsonPayloadError) -> Error {
match err {
JsonPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid JSON: {}", err)),
JsonPayloadError::Deserialize(err) => {
Error::BadRequest(format!("Invalid JSON: {}", err))
}
JsonPayloadError::Overflow => Error::PayloadTooLarge,
JsonPayloadError::ContentType => Error::UnsupportedMediaType,
JsonPayloadError::Payload(err) => Error::BadRequest(format!("Problem while decoding the request: {}", err)),
JsonPayloadError::Payload(err) => {
Error::BadRequest(format!("Problem while decoding the request: {}", err))
}
}
}
}
@ -286,7 +304,9 @@ impl From<JsonPayloadError> for Error {
impl From<QueryPayloadError> for Error {
fn from(err: QueryPayloadError) -> Error {
match err {
QueryPayloadError::Deserialize(err) => Error::BadRequest(format!("Invalid query parameters: {}", err)),
QueryPayloadError::Deserialize(err) => {
Error::BadRequest(format!("Invalid query parameters: {}", err))
}
}
}
}

View File

@ -3,27 +3,26 @@ use std::pin::Pin;
use std::rc::Rc;
use std::task::{Context, Poll};
use actix_service::{Service, Transform};
use actix_web::{dev::ServiceRequest, dev::ServiceResponse, web};
use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform};
use actix_web::web;
use futures::future::{err, ok, Future, Ready};
use crate::error::{Error, ResponseError};
use crate::Data;
#[derive(Clone)]
#[derive(Clone, Copy)]
pub enum Authentication {
Public,
Private,
Admin,
}
impl<S: 'static, B> Transform<S> for Authentication
impl<S: 'static, B> Transform<S, ServiceRequest> for Authentication
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
S::Future: 'static,
B: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type InitError = ();
@ -32,7 +31,7 @@ where
fn new_transform(&self, service: S) -> Self::Future {
ok(LoggingMiddleware {
acl: self.clone(),
acl: *self,
service: Rc::new(RefCell::new(service)),
})
}
@ -44,23 +43,22 @@ pub struct LoggingMiddleware<S> {
}
#[allow(clippy::type_complexity)]
impl<S, B> Service for LoggingMiddleware<S>
impl<S, B> Service<ServiceRequest> for LoggingMiddleware<S>
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S: Service<ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
S::Future: 'static,
B: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = actix_web::Error;
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>;
fn poll_ready(&mut self, cx: &mut Context) -> Poll<Result<(), Self::Error>> {
fn poll_ready(&self, cx: &mut Context) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&mut self, req: ServiceRequest) -> Self::Future {
let mut svc = self.service.clone();
fn call(&self, req: ServiceRequest) -> Self::Future {
let svc = self.service.clone();
// This unwrap is left because this error should never appear. If that's the case, then
// it means that actix-web has an issue or someone changes the type `Data`.
let data = req.app_data::<web::Data<Data>>().unwrap();
@ -72,10 +70,16 @@ where
let auth_header = match req.headers().get("X-Meili-API-Key") {
Some(auth) => match auth.to_str() {
Ok(auth) => auth,
Err(_) => return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into())),
Err(_) => {
return Box::pin(err(
ResponseError::from(Error::MissingAuthorizationHeader).into()
))
}
},
None => {
return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into()));
return Box::pin(err(
ResponseError::from(Error::MissingAuthorizationHeader).into()
));
}
};
@ -95,9 +99,10 @@ where
if authenticated {
Box::pin(svc.call(req))
} else {
Box::pin(err(
ResponseError::from(Error::InvalidToken(auth_header.to_string())).into()
Box::pin(err(ResponseError::from(Error::InvalidToken(
auth_header.to_string(),
))
.into()))
}
}
}

View File

@ -1,9 +1,9 @@
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::fs::{create_dir_all, File};
use std::path::Path;
use tar::{Builder, Archive};
use tar::{Archive, Builder};
use crate::error::Error;

View File

@ -1,6 +1,4 @@
pub mod authentication;
pub mod normalize_path;
pub mod compression;
pub use authentication::Authentication;
pub use normalize_path::NormalizePath;

View File

@ -1,86 +0,0 @@
/// From https://docs.rs/actix-web/3.0.0-alpha.2/src/actix_web/middleware/normalize.rs.html#34
use actix_http::Error;
use actix_service::{Service, Transform};
use actix_web::{
dev::ServiceRequest,
dev::ServiceResponse,
http::uri::{PathAndQuery, Uri},
};
use futures::future::{ok, Ready};
use regex::Regex;
use std::task::{Context, Poll};
pub struct NormalizePath;
impl<S, B> Transform<S> for NormalizePath
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = Error;
type InitError = ();
type Transform = NormalizePathNormalization<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ok(NormalizePathNormalization {
service,
merge_slash: Regex::new("//+").unwrap(),
})
}
}
pub struct NormalizePathNormalization<S> {
service: S,
merge_slash: Regex,
}
impl<S, B> Service for NormalizePathNormalization<S>
where
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = Error>,
S::Future: 'static,
{
type Request = ServiceRequest;
type Response = ServiceResponse<B>;
type Error = Error;
type Future = S::Future;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.service.poll_ready(cx)
}
fn call(&mut self, mut req: ServiceRequest) -> Self::Future {
let head = req.head_mut();
// always add trailing slash, might be an extra one
let path = head.uri.path().to_string() + "/";
if self.merge_slash.find(&path).is_some() {
// normalize multiple /'s to one /
let path = self.merge_slash.replace_all(&path, "/");
let path = if path.len() > 1 {
path.trim_end_matches('/')
} else {
&path
};
let mut parts = head.uri.clone().into_parts();
let pq = parts.path_and_query.as_ref().unwrap();
let path = if let Some(q) = pq.query() {
bytes::Bytes::from(format!("{}?{}", path, q))
} else {
bytes::Bytes::copy_from_slice(path.as_bytes())
};
parts.path_and_query = Some(PathAndQuery::from_maybe_shared(path).unwrap());
let uri = Uri::from_parts(parts).unwrap();
req.match_info_mut().get_mut().update(&uri);
req.head_mut().uri = uri;
}
self.service.call(req)
}
}

View File

@ -0,0 +1,138 @@
mod search;
mod updates;
use std::collections::HashSet;
use std::ops::Deref;
use std::sync::Arc;
use anyhow::{bail, Context};
use milli::obkv_to_json;
use serde_json::{Map, Value};
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Facets, Settings, UpdateResult};
pub type Document = Map<String, Value>;
#[derive(Clone)]
pub struct Index(pub Arc<milli::Index>);
impl Deref for Index {
type Target = milli::Index;
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
impl Index {
pub fn settings(&self) -> anyhow::Result<Settings> {
let txn = self.read_txn()?;
let displayed_attributes = self
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let searchable_attributes = self
.searchable_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect())
.unwrap_or_else(|| vec!["*".to_string()]);
let faceted_attributes = self
.faceted_fields(&txn)?
.into_iter()
.map(|(k, v)| (k, v.to_string()))
.collect();
let criteria = self
.criteria(&txn)?
.into_iter()
.map(|c| c.to_string())
.collect();
Ok(Settings {
displayed_attributes: Some(Some(displayed_attributes)),
searchable_attributes: Some(Some(searchable_attributes)),
attributes_for_faceting: Some(Some(faceted_attributes)),
ranking_rules: Some(Some(criteria)),
})
}
pub fn retrieve_documents<S: AsRef<str>>(
&self,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Vec<Map<String, Value>>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, attributes_to_retrieve, &fields_ids_map)?;
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
let mut documents = Vec::new();
for entry in iter {
let (_id, obkv) = entry?;
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
documents.push(object);
}
Ok(documents)
}
pub fn retrieve_document<S: AsRef<str>>(
&self,
doc_id: String,
attributes_to_retrieve: Option<Vec<S>>,
) -> anyhow::Result<Map<String, Value>> {
let txn = self.read_txn()?;
let fields_ids_map = self.fields_ids_map(&txn)?;
let fields_to_display =
self.fields_to_display(&txn, attributes_to_retrieve, &fields_ids_map)?;
let internal_id = self
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.with_context(|| format!("Document with id {} not found", doc_id))?;
let document = self
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d);
match document {
Some(document) => Ok(obkv_to_json(&fields_to_display, &fields_ids_map, document)?),
None => bail!("Document with id {} not found", doc_id),
}
}
fn fields_to_display<S: AsRef<str>>(
&self,
txn: &heed::RoTxn,
attributes_to_retrieve: Option<Vec<S>>,
fields_ids_map: &milli::FieldsIdsMap,
) -> anyhow::Result<Vec<u8>> {
let mut displayed_fields_ids = match self.displayed_fields_ids(&txn)? {
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
let attributes_to_retrieve_ids = match attributes_to_retrieve {
Some(attrs) => attrs
.iter()
.filter_map(|f| fields_ids_map.id(f.as_ref()))
.collect::<HashSet<_>>(),
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
};
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
Ok(displayed_fields_ids)
}
}

View File

@ -0,0 +1,228 @@
use std::collections::{BTreeMap, HashSet};
use std::mem;
use std::time::Instant;
use anyhow::bail;
use either::Either;
use heed::RoTxn;
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use milli::{facet::FacetValue, FacetCondition, MatchingWords};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use super::Index;
pub const DEFAULT_SEARCH_LIMIT: usize = 20;
const fn default_search_limit() -> usize {
DEFAULT_SEARCH_LIMIT
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
#[allow(dead_code)]
pub struct SearchQuery {
pub q: Option<String>,
pub offset: Option<usize>,
#[serde(default = "default_search_limit")]
pub limit: usize,
pub attributes_to_retrieve: Option<Vec<String>>,
pub attributes_to_crop: Option<Vec<String>>,
pub crop_length: Option<usize>,
pub attributes_to_highlight: Option<HashSet<String>>,
pub filters: Option<String>,
pub matches: Option<bool>,
pub facet_filters: Option<Value>,
pub facet_distributions: Option<Vec<String>>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<Map<String, Value>>,
pub nb_hits: u64,
pub query: String,
pub limit: usize,
pub offset: usize,
pub processing_time_ms: u128,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distributions: Option<BTreeMap<String, BTreeMap<FacetValue, u64>>>,
}
impl Index {
pub fn perform_search(&self, query: SearchQuery) -> anyhow::Result<SearchResult> {
let before_search = Instant::now();
let rtxn = self.read_txn()?;
let mut search = self.search(&rtxn);
if let Some(ref query) = query.q {
search.query(query);
}
search.limit(query.limit);
search.offset(query.offset.unwrap_or_default());
if let Some(ref facets) = query.facet_filters {
if let Some(facets) = parse_facets(facets, self, &rtxn)? {
search.facet_condition(facets);
}
}
let milli::SearchResult {
documents_ids,
matching_words,
candidates,
..
} = search.execute()?;
let mut documents = Vec::new();
let fields_ids_map = self.fields_ids_map(&rtxn).unwrap();
let fields_to_display =
self.fields_to_display(&rtxn, query.attributes_to_retrieve, &fields_ids_map)?;
let stop_words = fst::Set::default();
let highlighter = Highlighter::new(&stop_words);
for (_id, obkv) in self.documents(&rtxn, documents_ids)? {
let mut object =
milli::obkv_to_json(&fields_to_display, &fields_ids_map, obkv).unwrap();
if let Some(ref attributes_to_highlight) = query.attributes_to_highlight {
highlighter.highlight_record(&mut object, &matching_words, attributes_to_highlight);
}
documents.push(object);
}
let nb_hits = candidates.len();
let facet_distributions = match query.facet_distributions {
Some(ref fields) => {
let mut facet_distribution = self.facets_distribution(&rtxn);
if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields);
}
Some(facet_distribution.candidates(candidates).execute()?)
}
None => None,
};
let result = SearchResult {
hits: documents,
nb_hits,
query: query.q.clone().unwrap_or_default(),
limit: query.limit,
offset: query.offset.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distributions,
};
Ok(result)
}
}
fn parse_facets_array(
txn: &RoTxn,
index: &Index,
arr: &[Value],
) -> anyhow::Result<Option<FacetCondition>> {
let mut ands = Vec::new();
for value in arr {
match value {
Value::String(s) => ands.push(Either::Right(s.clone())),
Value::Array(arr) => {
let mut ors = Vec::new();
for value in arr {
match value {
Value::String(s) => ors.push(s.clone()),
v => bail!("Invalid facet expression, expected String, found: {:?}", v),
}
}
ands.push(Either::Left(ors));
}
v => bail!(
"Invalid facet expression, expected String or [String], found: {:?}",
v
),
}
}
FacetCondition::from_array(txn, &index.0, ands)
}
pub struct Highlighter<'a, A> {
analyzer: Analyzer<'a, A>,
}
impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
pub fn new(stop_words: &'a fst::Set<A>) -> Self {
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
Self { analyzer }
}
pub fn highlight_value(&self, value: Value, words_to_highlight: &MatchingWords) -> Value {
match value {
Value::Null => Value::Null,
Value::Bool(boolean) => Value::Bool(boolean),
Value::Number(number) => Value::Number(number),
Value::String(old_string) => {
let mut string = String::new();
let analyzed = self.analyzer.analyze(&old_string);
for (word, token) in analyzed.reconstruct() {
if token.is_word() {
let to_highlight = words_to_highlight.matches(token.text());
if to_highlight {
string.push_str("<mark>")
}
string.push_str(word);
if to_highlight {
string.push_str("</mark>")
}
} else {
string.push_str(word);
}
}
Value::String(string)
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| self.highlight_value(v, words_to_highlight))
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| (k, self.highlight_value(v, words_to_highlight)))
.collect(),
),
}
}
pub fn highlight_record(
&self,
object: &mut Map<String, Value>,
words_to_highlight: &MatchingWords,
attributes_to_highlight: &HashSet<String>,
) {
// TODO do we need to create a string for element that are not and needs to be highlight?
for (key, value) in object.iter_mut() {
if attributes_to_highlight.contains(key) {
let old_value = mem::take(value);
*value = self.highlight_value(old_value, words_to_highlight);
}
}
}
}
fn parse_facets(
facets: &Value,
index: &Index,
txn: &RoTxn,
) -> anyhow::Result<Option<FacetCondition>> {
match facets {
// Disabled for now
//Value::String(expr) => Ok(Some(FacetCondition::from_str(txn, index, expr)?)),
Value::Array(arr) => parse_facets_array(txn, index, arr),
v => bail!("Invalid facet expression, expected Array, found: {:?}", v),
}
}

View File

@ -0,0 +1,227 @@
use std::collections::HashMap;
use std::io;
use std::num::NonZeroUsize;
use flate2::read::GzDecoder;
use log::info;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{de::Deserializer, Deserialize, Serialize};
use super::Index;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Settings {
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)]
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub ranking_rules: Option<Option<Vec<String>>>,
}
impl Settings {
pub fn cleared() -> Self {
Self {
displayed_attributes: Some(None),
searchable_attributes: Some(None),
attributes_for_faceting: Some(None),
ranking_rules: Some(None),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(Some)
}
impl Index {
pub fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: impl io::Read,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
info!("performing document addition");
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
// Set the primary key if not set already, ignore if already set.
if let (None, Some(ref primary_key)) = (self.primary_key(&wtxn)?, primary_key) {
self.put_primary_key(&mut wtxn, primary_key)?;
}
let mut builder = update_builder.index_documents(&mut wtxn, self);
builder.update_format(format);
builder.index_documents_method(method);
let gzipped = false;
let reader = if gzipped {
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let result = builder.execute(reader, |indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
});
info!("document addition done: {:?}", result);
result.and_then(|addition_result| {
wtxn.commit()
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
.map_err(Into::into)
})
}
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, self);
match builder.execute() {
Ok(_count) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e),
}
}
pub fn update_settings(
&self,
settings: &Settings,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let mut builder = update_builder.settings(&mut wtxn, self);
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.searchable_attributes {
match names {
Some(names) => builder.set_searchable_fields(names.clone()),
None => builder.reset_searchable_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.displayed_attributes {
match names {
Some(names) => builder.set_displayed_fields(names.clone()),
None => builder.reset_displayed_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref facet_types) = settings.attributes_for_faceting {
let facet_types = facet_types.clone().unwrap_or_else(HashMap::new);
builder.set_faceted_fields(facet_types);
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref criteria) = settings.ranking_rules {
match criteria {
Some(criteria) => builder.set_criteria(criteria.clone()),
None => builder.reset_criteria(),
}
}
let result = builder
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
match result {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e),
}
}
pub fn update_facets(
&self,
levels: &Facets,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let mut builder = update_builder.facets(&mut wtxn, self);
if let Some(value) = levels.level_group_size {
builder.level_group_size(value);
}
if let Some(value) = levels.min_level_size {
builder.min_level_size(value);
}
match builder.execute() {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e),
}
}
pub fn delete_documents(
&self,
document_ids: impl io::Read,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_reader(document_ids)?;
let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| {
builder.delete_external_id(id);
});
match builder.execute() {
Ok(deleted) => txn
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
Err(e) => Err(e),
}
}
}

View File

@ -0,0 +1,612 @@
use std::collections::HashMap;
use std::fs::{create_dir_all, File};
use std::future::Future;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_stream::stream;
use chrono::{DateTime, Utc};
use futures::pin_mut;
use futures::stream::StreamExt;
use heed::EnvOpenOptions;
use log::debug;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tokio::fs::remove_dir_all;
use tokio::sync::{mpsc, oneshot, RwLock};
use tokio::task::spawn_blocking;
use uuid::Uuid;
use super::update_handler::UpdateHandler;
use super::{get_arc_ownership_blocking, IndexSettings};
use crate::index::UpdateResult as UResult;
use crate::index::{Document, Index, SearchQuery, SearchResult, Settings};
use crate::index_controller::{
updates::{Failed, Processed, Processing},
UpdateMeta,
};
use crate::option::IndexerOpts;
pub type Result<T> = std::result::Result<T, IndexError>;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
type UpdateResult = std::result::Result<Processed<UpdateMeta, UResult>, Failed<UpdateMeta, String>>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMeta {
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
}
impl IndexMeta {
fn new(index: &Index) -> Result<Self> {
let txn = index.read_txn()?;
Self::new_txn(index, &txn)
}
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> {
let created_at = index.created_at(&txn)?;
let updated_at = index.updated_at(&txn)?;
let primary_key = index.primary_key(&txn)?.map(String::from);
Ok(Self {
primary_key,
updated_at,
created_at,
})
}
}
enum IndexMsg {
CreateIndex {
uuid: Uuid,
primary_key: Option<String>,
ret: oneshot::Sender<Result<IndexMeta>>,
},
Update {
meta: Processing<UpdateMeta>,
data: std::fs::File,
ret: oneshot::Sender<Result<UpdateResult>>,
},
Search {
uuid: Uuid,
query: SearchQuery,
ret: oneshot::Sender<anyhow::Result<SearchResult>>,
},
Settings {
uuid: Uuid,
ret: oneshot::Sender<Result<Settings>>,
},
Documents {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
offset: usize,
limit: usize,
ret: oneshot::Sender<Result<Vec<Document>>>,
},
Document {
uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>,
doc_id: String,
ret: oneshot::Sender<Result<Document>>,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
GetMeta {
uuid: Uuid,
ret: oneshot::Sender<Result<IndexMeta>>,
},
UpdateIndex {
uuid: Uuid,
index_settings: IndexSettings,
ret: oneshot::Sender<Result<IndexMeta>>,
},
}
struct IndexActor<S> {
read_receiver: Option<mpsc::Receiver<IndexMsg>>,
write_receiver: Option<mpsc::Receiver<IndexMsg>>,
update_handler: Arc<UpdateHandler>,
store: S,
}
#[derive(Error, Debug)]
pub enum IndexError {
#[error("error with index: {0}")]
Error(#[from] anyhow::Error),
#[error("index already exists")]
IndexAlreadyExists,
#[error("Index doesn't exists")]
UnexistingIndex,
#[error("Heed error: {0}")]
HeedError(#[from] heed::Error),
#[error("Existing primary key")]
ExistingPrimaryKey,
}
#[async_trait::async_trait]
trait IndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
}
impl<S: IndexStore + Sync + Send> IndexActor<S> {
fn new(
read_receiver: mpsc::Receiver<IndexMsg>,
write_receiver: mpsc::Receiver<IndexMsg>,
store: S,
) -> Result<Self> {
let options = IndexerOpts::default();
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
let update_handler = Arc::new(update_handler);
let read_receiver = Some(read_receiver);
let write_receiver = Some(write_receiver);
Ok(Self {
read_receiver,
write_receiver,
store,
update_handler,
})
}
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
async fn run(mut self) {
let mut read_receiver = self
.read_receiver
.take()
.expect("Index Actor must have a inbox at this point.");
let read_stream = stream! {
loop {
match read_receiver.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
let mut write_receiver = self
.write_receiver
.take()
.expect("Index Actor must have a inbox at this point.");
let write_stream = stream! {
loop {
match write_receiver.recv().await {
Some(msg) => yield msg,
None => break,
}
}
};
pin_mut!(write_stream);
pin_mut!(read_stream);
let fut1 = read_stream.for_each_concurrent(Some(10), |msg| self.handle_message(msg));
let fut2 = write_stream.for_each_concurrent(Some(1), |msg| self.handle_message(msg));
let fut1: Box<dyn Future<Output = ()> + Unpin + Send> = Box::new(fut1);
let fut2: Box<dyn Future<Output = ()> + Unpin + Send> = Box::new(fut2);
tokio::join!(fut1, fut2);
}
async fn handle_message(&self, msg: IndexMsg) {
use IndexMsg::*;
match msg {
CreateIndex {
uuid,
primary_key,
ret,
} => {
let _ = ret.send(self.handle_create_index(uuid, primary_key).await);
}
Update { ret, meta, data } => {
let _ = ret.send(self.handle_update(meta, data).await);
}
Search { ret, query, uuid } => {
let _ = ret.send(self.handle_search(uuid, query).await);
}
Settings { ret, uuid } => {
let _ = ret.send(self.handle_settings(uuid).await);
}
Documents {
ret,
uuid,
attributes_to_retrieve,
offset,
limit,
} => {
let _ = ret.send(
self.handle_fetch_documents(uuid, offset, limit, attributes_to_retrieve)
.await,
);
}
Document {
uuid,
attributes_to_retrieve,
doc_id,
ret,
} => {
let _ = ret.send(
self.handle_fetch_document(uuid, doc_id, attributes_to_retrieve)
.await,
);
}
Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await);
}
GetMeta { uuid, ret } => {
let _ = ret.send(self.handle_get_meta(uuid).await);
}
UpdateIndex {
uuid,
index_settings,
ret,
} => {
let _ = ret.send(self.handle_update_index(uuid, index_settings).await);
}
}
}
async fn handle_search(&self, uuid: Uuid, query: SearchQuery) -> anyhow::Result<SearchResult> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
spawn_blocking(move || index.perform_search(query)).await?
}
async fn handle_create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> Result<IndexMeta> {
let index = self.store.create(uuid, primary_key).await?;
let meta = spawn_blocking(move || IndexMeta::new(&index))
.await
.map_err(|e| IndexError::Error(e.into()))??;
Ok(meta)
}
async fn handle_update(
&self,
meta: Processing<UpdateMeta>,
data: File,
) -> Result<UpdateResult> {
debug!("Processing update {}", meta.id());
let uuid = meta.index_uuid();
let update_handler = self.update_handler.clone();
let index = match self.store.get(*uuid).await? {
Some(index) => index,
None => self.store.create(*uuid, None).await?,
};
spawn_blocking(move || update_handler.handle_update(meta, data, index))
.await
.map_err(|e| IndexError::Error(e.into()))
}
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
spawn_blocking(move || index.settings().map_err(IndexError::Error))
.await
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_fetch_documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
spawn_blocking(move || {
index
.retrieve_documents(offset, limit, attributes_to_retrieve)
.map_err(IndexError::Error)
})
.await
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_fetch_document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
spawn_blocking(move || {
index
.retrieve_document(doc_id, attributes_to_retrieve)
.map_err(IndexError::Error)
})
.await
.map_err(|e| IndexError::Error(e.into()))?
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let index = self.store.delete(uuid).await?;
if let Some(index) = index {
tokio::task::spawn(async move {
let index = index.0;
let store = get_arc_ownership_blocking(index).await;
spawn_blocking(move || {
store.prepare_for_closing().wait();
debug!("Index closed");
});
});
}
Ok(())
}
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
match self.store.get(uuid).await? {
Some(index) => {
let meta = spawn_blocking(move || IndexMeta::new(&index))
.await
.map_err(|e| IndexError::Error(e.into()))??;
Ok(meta)
}
None => Err(IndexError::UnexistingIndex),
}
}
async fn handle_update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
let index = self
.store
.get(uuid)
.await?
.ok_or(IndexError::UnexistingIndex)?;
spawn_blocking(move || match index_settings.primary_key {
Some(ref primary_key) => {
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
return Err(IndexError::ExistingPrimaryKey);
}
index.put_primary_key(&mut txn, primary_key)?;
let meta = IndexMeta::new_txn(&index, &txn)?;
txn.commit()?;
Ok(meta)
}
None => {
let meta = IndexMeta::new(&index)?;
Ok(meta)
}
})
.await
.map_err(|e| IndexError::Error(e.into()))?
}
}
#[derive(Clone)]
pub struct IndexActorHandle {
read_sender: mpsc::Sender<IndexMsg>,
write_sender: mpsc::Sender<IndexMsg>,
}
impl IndexActorHandle {
pub fn new(path: impl AsRef<Path>, index_size: usize) -> anyhow::Result<Self> {
let (read_sender, read_receiver) = mpsc::channel(100);
let (write_sender, write_receiver) = mpsc::channel(100);
let store = HeedIndexStore::new(path, index_size);
let actor = IndexActor::new(read_receiver, write_receiver, store)?;
tokio::task::spawn(actor.run());
Ok(Self {
read_sender,
write_sender,
})
}
pub async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex {
ret,
uuid,
primary_key,
};
let _ = self.read_sender.send(msg).await;
receiver.await.expect("IndexActor has been killed")
}
pub async fn update(
&self,
meta: Processing<UpdateMeta>,
data: std::fs::File,
) -> anyhow::Result<UpdateResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update { ret, meta, data };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn settings(&self, uuid: Uuid) -> Result<Settings> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents {
uuid,
ret,
offset,
attributes_to_retrieve,
limit,
};
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document {
uuid,
ret,
doc_id,
attributes_to_retrieve,
};
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
pub async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> Result<IndexMeta> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::UpdateIndex {
uuid,
index_settings,
ret,
};
let _ = self.read_sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}
}
struct HeedIndexStore {
index_store: AsyncMap<Uuid, Index>,
path: PathBuf,
index_size: usize,
}
impl HeedIndexStore {
fn new(path: impl AsRef<Path>, index_size: usize) -> Self {
let path = path.as_ref().join("indexes/");
let index_store = Arc::new(RwLock::new(HashMap::new()));
Self {
index_store,
path,
index_size,
}
}
}
#[async_trait::async_trait]
impl IndexStore for HeedIndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
let path = self.path.join(format!("index-{}", uuid));
if path.exists() {
return Err(IndexError::IndexAlreadyExists);
}
let index_size = self.index_size;
let index = spawn_blocking(move || -> Result<Index> {
let index = open_index(&path, index_size)?;
if let Some(primary_key) = primary_key {
let mut txn = index.write_txn()?;
index.put_primary_key(&mut txn, &primary_key)?;
txn.commit()?;
}
Ok(index)
})
.await
.map_err(|e| IndexError::Error(e.into()))??;
self.index_store.write().await.insert(uuid, index.clone());
Ok(index)
}
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> {
let guard = self.index_store.read().await;
match guard.get(&uuid) {
Some(index) => Ok(Some(index.clone())),
None => {
// drop the guard here so we can perform the write after without deadlocking;
drop(guard);
let path = self.path.join(format!("index-{}", uuid));
if !path.exists() {
return Ok(None);
}
let index_size = self.index_size;
let index = spawn_blocking(move || open_index(path, index_size))
.await
.map_err(|e| IndexError::Error(e.into()))??;
self.index_store.write().await.insert(uuid, index.clone());
Ok(Some(index))
}
}
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid));
remove_dir_all(db_path)
.await
.map_err(|e| IndexError::Error(e.into()))?;
let index = self.index_store.write().await.remove(&uuid);
Ok(index)
}
}
fn open_index(path: impl AsRef<Path>, size: usize) -> Result<Index> {
create_dir_all(&path).map_err(|e| IndexError::Error(e.into()))?;
let mut options = EnvOpenOptions::new();
options.map_size(size);
let index = milli::Index::new(options, &path).map_err(IndexError::Error)?;
Ok(Index(Arc::new(index)))
}

View File

@ -1,607 +0,0 @@
use std::fs::{create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::{bail, Context};
use chrono::{DateTime, Utc};
use dashmap::{mapref::entry::Entry, DashMap};
use heed::{
types::{ByteSlice, SerdeJson, Str},
Database, Env, EnvOpenOptions, RoTxn, RwTxn,
};
use log::{error, info};
use milli::Index;
use rayon::ThreadPool;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use super::update_handler::UpdateHandler;
use super::{UpdateMeta, UpdateResult};
use crate::option::IndexerOpts;
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct IndexMeta {
update_store_size: u64,
index_store_size: u64,
pub uuid: Uuid,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl IndexMeta {
fn open(
&self,
path: impl AsRef<Path>,
thread_pool: Arc<ThreadPool>,
indexer_options: &IndexerOpts,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> {
let update_path = make_update_db_path(&path, &self.uuid);
let index_path = make_index_db_path(&path, &self.uuid);
create_dir_all(&update_path)?;
create_dir_all(&index_path)?;
let mut options = EnvOpenOptions::new();
options.map_size(self.index_store_size as usize);
let index = Arc::new(Index::new(options, index_path)?);
let mut options = EnvOpenOptions::new();
options.map_size(self.update_store_size as usize);
let handler = UpdateHandler::new(indexer_options, index.clone(), thread_pool)?;
let update_store = UpdateStore::open(options, update_path, handler)?;
Ok((index, update_store))
}
}
pub struct IndexStore {
env: Env,
name_to_uuid: Database<Str, ByteSlice>,
uuid_to_index: DashMap<Uuid, (Arc<Index>, Arc<UpdateStore>)>,
uuid_to_index_meta: Database<ByteSlice, SerdeJson<IndexMeta>>,
thread_pool: Arc<ThreadPool>,
indexer_options: IndexerOpts,
}
impl IndexStore {
pub fn new(path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<Self> {
let env = EnvOpenOptions::new()
.map_size(4096 * 100)
.max_dbs(2)
.open(path)?;
let uuid_to_index = DashMap::new();
let name_to_uuid = open_or_create_database(&env, Some("name_to_uid"))?;
let uuid_to_index_meta = open_or_create_database(&env, Some("uid_to_index_db"))?;
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(indexer_options.indexing_jobs.unwrap_or(0))
.build()?;
let thread_pool = Arc::new(thread_pool);
Ok(Self {
env,
name_to_uuid,
uuid_to_index,
uuid_to_index_meta,
thread_pool,
indexer_options,
})
}
pub fn delete(&self, index_uid: impl AsRef<str>) -> anyhow::Result<()> {
// we remove the references to the index from the index map so it is not accessible anymore
let mut txn = self.env.write_txn()?;
let uuid = self
.index_uuid(&txn, &index_uid)?
.with_context(|| format!("Index {:?} doesn't exist", index_uid.as_ref()))?;
self.name_to_uuid.delete(&mut txn, index_uid.as_ref())?;
self.uuid_to_index_meta.delete(&mut txn, uuid.as_bytes())?;
txn.commit()?;
// If the index was loaded (i.e it is present in the uuid_to_index map), then we need to
// close it. The process goes as follow:
//
// 1) We want to remove any pending updates from the store.
// 2) We try to get ownership on the update store so we can close it. It may take a
// couple of tries, but since the update store event loop only has a weak reference to
// itself, and we are the only other function holding a reference to it otherwise, we will
// get it eventually.
// 3) We request a closing of the update store.
// 4) We can take ownership on the index, and close it.
// 5) We remove all the files from the file system.
let index_uid = index_uid.as_ref().to_string();
let path = self.env.path().to_owned();
if let Some((_, (index, updates))) = self.uuid_to_index.remove(&uuid) {
std::thread::spawn(move || {
info!("Preparing for {:?} deletion.", index_uid);
// this error is non fatal, but may delay the deletion.
if let Err(e) = updates.abort_pendings() {
error!(
"error aborting pending updates when deleting index {:?}: {}",
index_uid, e
);
}
let updates = get_arc_ownership_blocking(updates);
let close_event = updates.prepare_for_closing();
close_event.wait();
info!("closed update store for {:?}", index_uid);
let index = get_arc_ownership_blocking(index);
let close_event = index.prepare_for_closing();
close_event.wait();
let update_path = make_update_db_path(&path, &uuid);
let index_path = make_index_db_path(&path, &uuid);
if let Err(e) = remove_dir_all(index_path) {
error!("error removing index {:?}: {}", index_uid, e);
}
if let Err(e) = remove_dir_all(update_path) {
error!("error removing index {:?}: {}", index_uid, e);
}
info!("index {:?} deleted.", index_uid);
});
}
Ok(())
}
fn index_uuid(&self, txn: &RoTxn, name: impl AsRef<str>) -> anyhow::Result<Option<Uuid>> {
match self.name_to_uuid.get(txn, name.as_ref())? {
Some(bytes) => {
let uuid = Uuid::from_slice(bytes)?;
Ok(Some(uuid))
}
None => Ok(None),
}
}
fn retrieve_index(
&self,
txn: &RoTxn,
uid: Uuid,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
match self.uuid_to_index.entry(uid.clone()) {
Entry::Vacant(entry) => match self.uuid_to_index_meta.get(txn, uid.as_bytes())? {
Some(meta) => {
let path = self.env.path();
let (index, updates) =
meta.open(path, self.thread_pool.clone(), &self.indexer_options)?;
entry.insert((index.clone(), updates.clone()));
Ok(Some((index, updates)))
}
None => Ok(None),
},
Entry::Occupied(entry) => {
let (index, updates) = entry.get();
Ok(Some((index.clone(), updates.clone())))
}
}
}
fn get_index_txn(
&self,
txn: &RoTxn,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
match self.index_uuid(&txn, name)? {
Some(uid) => self.retrieve_index(&txn, uid),
None => Ok(None),
}
}
pub fn index(
&self,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, Arc<UpdateStore>)>> {
let txn = self.env.read_txn()?;
self.get_index_txn(&txn, name)
}
/// Use this function to perform an update on an index.
/// This function also puts a lock on what index is allowed to perform an update.
pub fn update_index<F, T>(&self, name: impl AsRef<str>, f: F) -> anyhow::Result<(T, IndexMeta)>
where
F: FnOnce(&Index) -> anyhow::Result<T>,
{
let mut txn = self.env.write_txn()?;
let (index, _) = self
.get_index_txn(&txn, &name)?
.with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?;
let result = f(index.as_ref());
match result {
Ok(ret) => {
let meta = self.update_meta(&mut txn, name, |meta| meta.updated_at = Utc::now())?;
txn.commit()?;
Ok((ret, meta))
}
Err(e) => Err(e),
}
}
pub fn index_with_meta(
&self,
name: impl AsRef<str>,
) -> anyhow::Result<Option<(Arc<Index>, IndexMeta)>> {
let txn = self.env.read_txn()?;
let uuid = self.index_uuid(&txn, &name)?;
match uuid {
Some(uuid) => {
let meta = self
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())?
.with_context(|| {
format!("unable to retrieve metadata for index {:?}", name.as_ref())
})?;
let (index, _) = self
.retrieve_index(&txn, uuid)?
.with_context(|| format!("unable to retrieve index {:?}", name.as_ref()))?;
Ok(Some((index, meta)))
}
None => Ok(None),
}
}
fn update_meta<F>(
&self,
txn: &mut RwTxn,
name: impl AsRef<str>,
f: F,
) -> anyhow::Result<IndexMeta>
where
F: FnOnce(&mut IndexMeta),
{
let uuid = self
.index_uuid(txn, &name)?
.with_context(|| format!("Index {:?} doesn't exist", name.as_ref()))?;
let mut meta = self
.uuid_to_index_meta
.get(txn, uuid.as_bytes())?
.with_context(|| format!("couldn't retrieve metadata for index {:?}", name.as_ref()))?;
f(&mut meta);
self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?;
Ok(meta)
}
pub fn get_or_create_index(
&self,
name: impl AsRef<str>,
update_size: u64,
index_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>)> {
let mut txn = self.env.write_txn()?;
match self.get_index_txn(&txn, name.as_ref())? {
Some(res) => Ok(res),
None => {
let uuid = Uuid::new_v4();
let (index, updates, _) =
self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?;
// If we fail to commit the transaction, we must delete the database from the
// file-system.
if let Err(e) = txn.commit() {
self.clean_db(uuid);
return Err(e)?;
}
Ok((index, updates))
}
}
}
// Remove all the files and data associated with a db uuid.
fn clean_db(&self, uuid: Uuid) {
let update_db_path = make_update_db_path(self.env.path(), &uuid);
let index_db_path = make_index_db_path(self.env.path(), &uuid);
remove_dir_all(update_db_path).expect("Failed to clean database");
remove_dir_all(index_db_path).expect("Failed to clean database");
self.uuid_to_index.remove(&uuid);
}
fn create_index_txn(
&self,
txn: &mut RwTxn,
uuid: Uuid,
name: impl AsRef<str>,
update_store_size: u64,
index_store_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> {
let created_at = Utc::now();
let updated_at = created_at;
let meta = IndexMeta {
update_store_size,
index_store_size,
uuid: uuid.clone(),
created_at,
updated_at,
};
self.name_to_uuid.put(txn, name.as_ref(), uuid.as_bytes())?;
self.uuid_to_index_meta.put(txn, uuid.as_bytes(), &meta)?;
let path = self.env.path();
let (index, update_store) =
match meta.open(path, self.thread_pool.clone(), &self.indexer_options) {
Ok(res) => res,
Err(e) => {
self.clean_db(uuid);
return Err(e);
}
};
self.uuid_to_index
.insert(uuid, (index.clone(), update_store.clone()));
Ok((index, update_store, meta))
}
/// Same as `get_or_create`, but returns an error if the index already exists.
pub fn create_index(
&self,
name: impl AsRef<str>,
update_size: u64,
index_size: u64,
) -> anyhow::Result<(Arc<Index>, Arc<UpdateStore>, IndexMeta)> {
let uuid = Uuid::new_v4();
let mut txn = self.env.write_txn()?;
if self.name_to_uuid.get(&txn, name.as_ref())?.is_some() {
bail!("index {:?} already exists", name.as_ref())
}
let result = self.create_index_txn(&mut txn, uuid, name, update_size, index_size)?;
// If we fail to commit the transaction, we must delete the database from the
// file-system.
if let Err(e) = txn.commit() {
self.clean_db(uuid);
return Err(e)?;
}
Ok(result)
}
/// Returns each index associated with its metadata:
/// (index_name, IndexMeta, primary_key)
/// This method will force all the indexes to be loaded.
pub fn list_indexes(&self) -> anyhow::Result<Vec<(String, IndexMeta, Option<String>)>> {
let txn = self.env.read_txn()?;
let metas = self.name_to_uuid.iter(&txn)?.filter_map(|entry| {
entry
.map_err(|e| {
error!("error decoding entry while listing indexes: {}", e);
e
})
.ok()
});
let mut indexes = Vec::new();
for (name, uuid) in metas {
// get index to retrieve primary key
let (index, _) = self
.get_index_txn(&txn, name)?
.with_context(|| format!("could not load index {:?}", name))?;
let primary_key = index.primary_key(&index.read_txn()?)?.map(String::from);
// retieve meta
let meta = self
.uuid_to_index_meta
.get(&txn, &uuid)?
.with_context(|| format!("could not retieve meta for index {:?}", name))?;
indexes.push((name.to_owned(), meta, primary_key));
}
Ok(indexes)
}
}
// Loops on an arc to get ownership on the wrapped value. This method sleeps 100ms before retrying.
fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
std::thread::sleep(Duration::from_millis(100));
continue;
}
}
}
}
fn open_or_create_database<K: 'static, V: 'static>(
env: &Env,
name: Option<&str>,
) -> anyhow::Result<Database<K, V>> {
match env.open_database::<K, V>(name)? {
Some(db) => Ok(db),
None => Ok(env.create_database::<K, V>(name)?),
}
}
fn make_update_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf {
let mut path = path.as_ref().to_path_buf();
path.push(format!("update{}", uuid));
path
}
fn make_index_db_path(path: impl AsRef<Path>, uuid: &Uuid) -> PathBuf {
let mut path = path.as_ref().to_path_buf();
path.push(format!("index{}", uuid));
path
}
#[cfg(test)]
mod test {
use super::*;
use std::path::PathBuf;
#[test]
fn test_make_update_db_path() {
let uuid = Uuid::new_v4();
assert_eq!(
make_update_db_path("/home", &uuid),
PathBuf::from(format!("/home/update{}", uuid))
);
}
#[test]
fn test_make_index_db_path() {
let uuid = Uuid::new_v4();
assert_eq!(
make_index_db_path("/home", &uuid),
PathBuf::from(format!("/home/index{}", uuid))
);
}
mod index_store {
use super::*;
#[test]
fn test_index_uuid() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let txn = store.env.read_txn().unwrap();
// name is not found if the uuid in not present in the db
assert!(store.index_uuid(&txn, &name).unwrap().is_none());
drop(txn);
// insert an uuid in the the name_to_uuid_db:
let uuid = Uuid::new_v4();
let mut txn = store.env.write_txn().unwrap();
store
.name_to_uuid
.put(&mut txn, &name, uuid.as_bytes())
.unwrap();
txn.commit().unwrap();
// check that the uuid is there
let txn = store.env.read_txn().unwrap();
assert_eq!(store.index_uuid(&txn, &name).unwrap(), Some(uuid));
}
#[test]
fn test_retrieve_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let uuid = Uuid::new_v4();
let txn = store.env.read_txn().unwrap();
assert!(store.retrieve_index(&txn, uuid).unwrap().is_none());
let created_at = Utc::now();
let updated_at = created_at;
let meta = IndexMeta {
update_store_size: 4096 * 100,
index_store_size: 4096 * 100,
uuid: uuid.clone(),
created_at,
updated_at,
};
let mut txn = store.env.write_txn().unwrap();
store
.uuid_to_index_meta
.put(&mut txn, uuid.as_bytes(), &meta)
.unwrap();
txn.commit().unwrap();
// the index cache should be empty
assert!(store.uuid_to_index.is_empty());
let txn = store.env.read_txn().unwrap();
assert!(store.retrieve_index(&txn, uuid).unwrap().is_some());
assert_eq!(store.uuid_to_index.len(), 1);
}
#[test]
fn test_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
assert!(store.index(&name).unwrap().is_none());
let created_at = Utc::now();
let updated_at = created_at;
let uuid = Uuid::new_v4();
let meta = IndexMeta {
update_store_size: 4096 * 100,
index_store_size: 4096 * 100,
uuid: uuid.clone(),
created_at,
updated_at,
};
let mut txn = store.env.write_txn().unwrap();
store
.name_to_uuid
.put(&mut txn, &name, uuid.as_bytes())
.unwrap();
store
.uuid_to_index_meta
.put(&mut txn, uuid.as_bytes(), &meta)
.unwrap();
txn.commit().unwrap();
assert!(store.index(&name).unwrap().is_some());
}
#[test]
fn test_get_or_create_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let update_store_size = 4096 * 100;
let index_store_size = 4096 * 100;
store
.get_or_create_index(&name, update_store_size, index_store_size)
.unwrap();
let txn = store.env.read_txn().unwrap();
let uuid = store.name_to_uuid.get(&txn, &name).unwrap();
assert_eq!(store.uuid_to_index.len(), 1);
assert!(uuid.is_some());
let uuid = Uuid::from_slice(uuid.unwrap()).unwrap();
let meta = store
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())
.unwrap()
.unwrap();
assert_eq!(meta.update_store_size, update_store_size);
assert_eq!(meta.index_store_size, index_store_size);
assert_eq!(meta.uuid, uuid);
}
#[test]
fn test_create_index() {
let temp = tempfile::tempdir().unwrap();
let store = IndexStore::new(temp, IndexerOpts::default()).unwrap();
let name = "foobar";
let update_store_size = 4096 * 100;
let index_store_size = 4096 * 100;
let uuid = Uuid::new_v4();
let mut txn = store.env.write_txn().unwrap();
store
.create_index_txn(&mut txn, uuid, name, update_store_size, index_store_size)
.unwrap();
let uuid = store.name_to_uuid.get(&txn, &name).unwrap();
assert_eq!(store.uuid_to_index.len(), 1);
assert!(uuid.is_some());
let uuid = Uuid::from_slice(uuid.unwrap()).unwrap();
let meta = store
.uuid_to_index_meta
.get(&txn, uuid.as_bytes())
.unwrap()
.unwrap();
assert_eq!(meta.update_store_size, update_store_size);
assert_eq!(meta.index_store_size, index_store_size);
assert_eq!(meta.uuid, uuid);
}
}
}

View File

@ -1,243 +0,0 @@
mod update_store;
mod index_store;
mod update_handler;
use std::path::Path;
use std::sync::Arc;
use anyhow::{bail, Context, anyhow};
use itertools::Itertools;
use milli::Index;
use crate::option::IndexerOpts;
use index_store::IndexStore;
use super::IndexController;
use super::updates::UpdateStatus;
use super::{UpdateMeta, UpdateResult, IndexMetadata, IndexSettings};
pub struct LocalIndexController {
indexes: IndexStore,
update_db_size: u64,
index_db_size: u64,
}
impl LocalIndexController {
pub fn new(
path: impl AsRef<Path>,
opt: IndexerOpts,
index_db_size: u64,
update_db_size: u64,
) -> anyhow::Result<Self> {
let indexes = IndexStore::new(path, opt)?;
Ok(Self { indexes, index_db_size, update_db_size })
}
}
impl IndexController for LocalIndexController {
fn add_documents<S: AsRef<str>>(
&self,
index: S,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
data: &[u8],
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> {
let (_, update_store) = self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?;
let meta = UpdateMeta::DocumentsAddition { method, format, primary_key };
let pending = update_store.register_update(meta, data)?;
Ok(pending.into())
}
fn update_settings<S: AsRef<str>>(
&self,
index: S,
settings: super::Settings,
create: bool,
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateResult, String>> {
let (_, update_store) = if create {
self.indexes.get_or_create_index(&index, self.update_db_size, self.index_db_size)?
} else {
self.indexes.index(&index)?.ok_or_else(|| anyhow!("Index {:?} doesn't exist", index.as_ref()))?
};
let meta = UpdateMeta::Settings(settings);
let pending = update_store.register_update(meta, &[])?;
Ok(pending.into())
}
fn create_index(&self, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
let index_name = index_settings.name.context("Missing name for index")?;
let (index, _, meta) = self.indexes.create_index(&index_name, self.update_db_size, self.index_db_size)?;
if let Some(ref primary_key) = index_settings.primary_key {
if let Err(e) = update_primary_key(index, primary_key).context("error creating index") {
// TODO: creating index could not be completed, delete everything.
Err(e)?
}
}
let meta = IndexMetadata {
uid: index_name,
uuid: meta.uuid.clone(),
created_at: meta.created_at,
updated_at: meta.created_at,
primary_key: index_settings.primary_key,
};
Ok(meta)
}
fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> anyhow::Result<()> {
self.indexes.delete(index_uid)
}
fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, _index1_uid: S1, _index2_uid: S2) -> anyhow::Result<()> {
todo!()
}
fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>> {
let index = self.indexes.index(name)?.map(|(i, _)| i);
Ok(index)
}
fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus<UpdateMeta, UpdateResult, String>>> {
match self.indexes.index(&index)? {
Some((_, update_store)) => Ok(update_store.meta(id)?),
None => bail!("index {:?} doesn't exist", index.as_ref()),
}
}
fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus<UpdateMeta, UpdateResult, String>>> {
match self.indexes.index(&index)? {
Some((_, update_store)) => {
let updates = update_store.iter_metas(|processing, processed, aborted, pending, failed| {
let processing_id = processing
.as_ref()
.map(|p| p.id());
Ok(processing
.map(UpdateStatus::from)
.into_iter()
.chain(pending.
filter_map(Result::ok)
// If an update is processing, filter out this update from the pending
// updates.
.filter(|(_, u)| processing_id
.map_or(true, |id| id != u.id()))
.map(|(_, u)| UpdateStatus::from(u)))
.chain(aborted.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(processed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.chain(failed.filter_map(Result::ok).map(|(_, u)| UpdateStatus::from(u)))
.sorted_by(|a, b| a.id().cmp(&b.id()))
.collect())
})?;
Ok(updates)
}
None => bail!("index {} doesn't exist.", index.as_ref()),
}
}
fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
let metas = self.indexes.list_indexes()?;
let mut output_meta = Vec::new();
for (uid, meta, primary_key) in metas {
let created_at = meta.created_at;
let uuid = meta.uuid;
let updated_at = self
.all_update_status(&uid)?
.iter()
.filter_map(|u| u.processed().map(|u| u.processed_at))
.max()
.unwrap_or(created_at);
let index_meta = IndexMetadata {
uid,
created_at,
updated_at,
uuid,
primary_key,
};
output_meta.push(index_meta);
}
Ok(output_meta)
}
fn update_index(&self, uid: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata> {
if index_settings.name.is_some() {
bail!("can't udpate an index name.")
}
let (primary_key, meta) = match index_settings.primary_key {
Some(ref primary_key) => {
self.indexes
.update_index(&uid, |index| {
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
bail!("primary key already exists.")
}
index.put_primary_key(&mut txn, primary_key)?;
txn.commit()?;
Ok(Some(primary_key.clone()))
})?
},
None => {
let (index, meta) = self.indexes
.index_with_meta(&uid)?
.with_context(|| format!("index {:?} doesn't exist.", uid.as_ref()))?;
let primary_key = index
.primary_key(&index.read_txn()?)?
.map(String::from);
(primary_key, meta)
},
};
Ok(IndexMetadata {
uid: uid.as_ref().to_string(),
uuid: meta.uuid.clone(),
created_at: meta.created_at,
updated_at: meta.updated_at,
primary_key,
})
}
fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<super::UpdateStatus> {
let (_, update_store) = self.indexes.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let meta = UpdateMeta::ClearDocuments;
let pending = update_store.register_update(meta, &[])?;
Ok(pending.into())
}
fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<super::UpdateStatus> {
let (_, update_store) = self.indexes.index(&index)?
.with_context(|| format!("Index {:?} doesn't exist", index.as_ref()))?;
let meta = UpdateMeta::DeleteDocuments;
let content = serde_json::to_vec(&document_ids)?;
let pending = update_store.register_update(meta, &content)?;
Ok(pending.into())
}
}
fn update_primary_key(index: impl AsRef<Index>, primary_key: impl AsRef<str>) -> anyhow::Result<()> {
let index = index.as_ref();
let mut txn = index.write_txn()?;
if index.primary_key(&txn)?.is_some() {
bail!("primary key already set.")
}
index.put_primary_key(&mut txn, primary_key.as_ref())?;
txn.commit()?;
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use tempfile::tempdir;
use crate::make_index_controller_tests;
make_index_controller_tests!({
let options = IndexerOpts::default();
let path = tempdir().unwrap();
let size = 4096 * 100;
LocalIndexController::new(path, options, size, size).unwrap()
});
}

View File

@ -1,255 +0,0 @@
use std::collections::HashMap;
use std::io;
use std::sync::Arc;
use anyhow::Result;
use flate2::read::GzDecoder;
use grenad::CompressionType;
use log::info;
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use milli::Index;
use rayon::ThreadPool;
use super::update_store::HandleUpdate;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::{Facets, Settings, UpdateMeta, UpdateResult};
use crate::option::IndexerOpts;
pub struct UpdateHandler {
index: Arc<Index>,
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: Arc<ThreadPool>,
log_frequency: usize,
max_memory: usize,
linked_hash_map_size: usize,
chunk_compression_type: CompressionType,
chunk_fusing_shrink_size: u64,
}
impl UpdateHandler {
pub fn new(
opt: &IndexerOpts,
index: Arc<Index>,
thread_pool: Arc<ThreadPool>,
) -> anyhow::Result<Self> {
Ok(Self {
index,
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.get_bytes() as usize,
linked_hash_map_size: opt.linked_hash_map_size,
chunk_compression_type: opt.chunk_compression_type,
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
})
}
fn update_buidler(&self, update_id: u64) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new(update_id);
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
update_builder.max_memory(self.max_memory);
update_builder.linked_hash_map_size(self.linked_hash_map_size);
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size);
update_builder
}
fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: &[u8],
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
// Set the primary key if not set already, ignore if already set.
match (self.index.primary_key(&wtxn)?, primary_key) {
(None, Some(ref primary_key)) => {
self.index.put_primary_key(&mut wtxn, primary_key)?;
}
_ => (),
}
let mut builder = update_builder.index_documents(&mut wtxn, &self.index);
builder.update_format(format);
builder.index_documents_method(method);
let gzipped = true;
let reader = if gzipped && !content.is_empty() {
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let result = builder.execute(reader, |indexing_step, update_id| {
info!("update {}: {:?}", update_id, indexing_step)
});
match result {
Ok(addition_result) => wtxn
.commit()
.and(Ok(UpdateResult::DocumentsAddition(addition_result)))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, &self.index);
match builder.execute() {
Ok(_count) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn update_settings(
&self,
settings: &Settings,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
let mut builder = update_builder.settings(&mut wtxn, &self.index);
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.searchable_attributes {
match names {
Some(names) => builder.set_searchable_fields(names.clone()),
None => builder.reset_searchable_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref names) = settings.displayed_attributes {
match names {
Some(names) => builder.set_displayed_fields(names.clone()),
None => builder.reset_displayed_fields(),
}
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref facet_types) = settings.faceted_attributes {
let facet_types = facet_types.clone().unwrap_or_else(|| HashMap::new());
builder.set_faceted_fields(facet_types);
}
// We transpose the settings JSON struct into a real setting update.
if let Some(ref criteria) = settings.ranking_rules {
match criteria {
Some(criteria) => builder.set_criteria(criteria.clone()),
None => builder.reset_criteria(),
}
}
let result = builder
.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
match result {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn update_facets(
&self,
levels: &Facets,
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
// We must use the write transaction of the update here.
let mut wtxn = self.index.write_txn()?;
let mut builder = update_builder.facets(&mut wtxn, &self.index);
if let Some(value) = levels.level_group_size {
builder.level_group_size(value);
}
if let Some(value) = levels.min_level_size {
builder.min_level_size(value);
}
match builder.execute() {
Ok(()) => wtxn
.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into),
Err(e) => Err(e.into()),
}
}
fn delete_documents(
&self,
document_ids: &[u8],
update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_slice(document_ids)?;
let mut txn = self.index.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, &self.index)?;
// We ignore unexisting document ids
ids.iter().for_each(|id| { builder.delete_external_id(id); });
match builder.execute() {
Ok(deleted) => txn
.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into),
Err(e) => Err(e.into())
}
}
}
impl HandleUpdate<UpdateMeta, UpdateResult, String> for UpdateHandler {
fn handle_update(
&mut self,
meta: Processing<UpdateMeta>,
content: &[u8],
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
use UpdateMeta::*;
let update_id = meta.id();
let update_builder = self.update_buidler(update_id);
let result = match meta.meta() {
DocumentsAddition {
method,
format,
primary_key,
} => self.update_documents(
*format,
*method,
content,
update_builder,
primary_key.as_deref(),
),
ClearDocuments => self.clear_documents(update_builder),
DeleteDocuments => self.delete_documents(content, update_builder),
Settings(settings) => self.update_settings(settings, update_builder),
Facets(levels) => self.update_facets(levels, update_builder),
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.to_string())),
}
}
}

View File

@ -1,31 +1,34 @@
mod local_index_controller;
mod index_actor;
mod update_actor;
mod update_handler;
mod update_store;
mod updates;
mod uuid_resolver;
pub use local_index_controller::LocalIndexController;
use std::collections::HashMap;
use std::num::NonZeroUsize;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Result;
use chrono::{DateTime, Utc};
use milli::Index;
use milli::update::{IndexDocumentsMethod, UpdateFormat, DocumentAdditionResult};
use serde::{Serialize, Deserialize, de::Deserializer};
use uuid::Uuid;
use actix_web::web::{Bytes, Payload};
use anyhow::bail;
use futures::stream::StreamExt;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use tokio::time::sleep;
pub use updates::{Processed, Processing, Failed};
use crate::index::{Document, SearchQuery, SearchResult};
use crate::index::{Facets, Settings, UpdateResult};
pub use updates::{Failed, Processed, Processing};
pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>;
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
pub uid: String,
uuid: Uuid,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
primary_key: Option<String>,
uid: String,
#[serde(flatten)]
meta: index_actor::IndexMeta,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -42,241 +45,248 @@ pub enum UpdateMeta {
Facets(Facets),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
where T: Deserialize<'de>,
D: Deserializer<'de>
{
Deserialize::deserialize(deserializer).map(Some)
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Settings {
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)]
pub faceted_attributes: Option<Option<HashMap<String, String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
pub ranking_rules: Option<Option<Vec<String>>>,
}
impl Settings {
pub fn cleared() -> Self {
Self {
displayed_attributes: Some(None),
searchable_attributes: Some(None),
faceted_attributes: Some(None),
ranking_rules: Some(None),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: usize },
Other,
}
#[derive(Clone, Debug)]
pub struct IndexSettings {
pub name: Option<String>,
pub uid: Option<String>,
pub primary_key: Option<String>,
}
/// The `IndexController` is in charge of the access to the underlying indices. It splits the logic
/// for read access which is provided thanks to an handle to the index, and write access which must
/// be provided. This allows the implementer to define the behaviour of write accesses to the
/// indices, and abstract the scheduling of the updates. The implementer must be able to provide an
/// instance of `IndexStore`
pub trait IndexController {
pub struct IndexController {
uuid_resolver: uuid_resolver::UuidResolverHandle,
index_handle: index_actor::IndexActorHandle,
update_handle: update_actor::UpdateActorHandle<Bytes>,
}
/*
* Write operations
*
* Logic for the write operation need to be provided by the implementer, since they can be made
* asynchronous thanks to an update_store for example.
*
* */
impl IndexController {
pub fn new(
path: impl AsRef<Path>,
index_size: usize,
update_store_size: usize,
) -> anyhow::Result<Self> {
let uuid_resolver = uuid_resolver::UuidResolverHandle::new(&path)?;
let index_actor = index_actor::IndexActorHandle::new(&path, index_size)?;
let update_handle =
update_actor::UpdateActorHandle::new(index_actor.clone(), &path, update_store_size)?;
Ok(Self {
uuid_resolver,
index_handle: index_actor,
update_handle,
})
}
/// Perform document addition on the database. If the provided index does not exist, it will be
/// created when the addition is applied to the index.
fn add_documents<S: AsRef<str>>(
pub async fn add_documents(
&self,
index: S,
method: IndexDocumentsMethod,
format: UpdateFormat,
data: &[u8],
uid: String,
method: milli::update::IndexDocumentsMethod,
format: milli::update::UpdateFormat,
mut payload: Payload,
primary_key: Option<String>,
) -> anyhow::Result<UpdateStatus>;
) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.get_or_create(uid).await?;
let meta = UpdateMeta::DocumentsAddition {
method,
format,
primary_key,
};
let (sender, receiver) = mpsc::channel(10);
/// Clear all documents in the given index.
fn clear_documents(&self, index: impl AsRef<str>) -> anyhow::Result<UpdateStatus>;
// It is necessary to spawn a local task to senf the payload to the update handle to
// prevent dead_locking between the update_handle::update that waits for the update to be
// registered and the update_actor that waits for the the payload to be sent to it.
tokio::task::spawn_local(async move {
while let Some(bytes) = payload.next().await {
match bytes {
Ok(bytes) => {
let _ = sender.send(Ok(bytes)).await;
}
Err(e) => {
let error: Box<dyn std::error::Error + Sync + Send + 'static> = Box::new(e);
let _ = sender.send(Err(error)).await;
}
}
}
});
/// Delete all documents in `document_ids`.
fn delete_documents(&self, index: impl AsRef<str>, document_ids: Vec<String>) -> anyhow::Result<UpdateStatus>;
// This must be done *AFTER* spawning the task.
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
/// Updates an index settings. If the index does not exist, it will be created when the update
/// is applied to the index. `create` specifies whether an index should be created if not
/// existing.
fn update_settings<S: AsRef<str>>(&self, index_uid: S, settings: Settings, create: bool) -> anyhow::Result<UpdateStatus>;
pub async fn clear_documents(&self, uid: String) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.resolve(uid).await?;
let meta = UpdateMeta::ClearDocuments;
let (_, receiver) = mpsc::channel(1);
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
/// Create an index with the given `index_uid`.
fn create_index(&self, index_settings: IndexSettings) -> Result<IndexMetadata>;
/// Delete index with the given `index_uid`, attempting to close it beforehand.
fn delete_index<S: AsRef<str>>(&self, index_uid: S) -> Result<()>;
/// Swap two indexes, concretely, it simply swaps the index the names point to.
fn swap_indices<S1: AsRef<str>, S2: AsRef<str>>(&self, index1_uid: S1, index2_uid: S2) -> Result<()>;
/// Apply an update to the given index. This method can be called when an update is ready to be
/// processed
fn handle_update<S: AsRef<str>>(
pub async fn delete_documents(
&self,
_index: S,
_update_id: u64,
_meta: Processing<UpdateMeta>,
_content: &[u8]
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
todo!()
uid: String,
document_ids: Vec<String>,
) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.resolve(uid).await?;
let meta = UpdateMeta::DeleteDocuments;
let (sender, receiver) = mpsc::channel(10);
tokio::task::spawn(async move {
let json = serde_json::to_vec(&document_ids).unwrap();
let bytes = Bytes::from(json);
let _ = sender.send(Ok(bytes)).await;
});
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
/// Returns, if it exists, the `Index` with the povided name.
fn index(&self, name: impl AsRef<str>) -> anyhow::Result<Option<Arc<Index>>>;
pub async fn update_settings(
&self,
uid: String,
settings: Settings,
create: bool,
) -> anyhow::Result<UpdateStatus> {
let uuid = if create {
let uuid = self.uuid_resolver.get_or_create(uid).await?;
// We need to create the index upfront, since it would otherwise only be created when
// the update is processed. This would make calls to GET index to fail until the update
// is complete. Since this is get or create, we ignore the error when the index already
// exists.
match self.index_handle.create_index(uuid, None).await {
Ok(_) | Err(index_actor::IndexError::IndexAlreadyExists) => (),
Err(e) => return Err(e.into()),
}
uuid
} else {
self.uuid_resolver.resolve(uid).await?
};
let meta = UpdateMeta::Settings(settings);
// Nothing so send, drop the sender right away, as not to block the update actor.
let (_, receiver) = mpsc::channel(1);
/// Returns the udpate status an update
fn update_status(&self, index: impl AsRef<str>, id: u64) -> anyhow::Result<Option<UpdateStatus>>;
let status = self.update_handle.update(meta, receiver, uuid).await?;
Ok(status)
}
/// Returns all the udpate status for an index
fn all_update_status(&self, index: impl AsRef<str>) -> anyhow::Result<Vec<UpdateStatus>>;
pub async fn create_index(
&self,
index_settings: IndexSettings,
) -> anyhow::Result<IndexMetadata> {
let IndexSettings { uid, primary_key } = index_settings;
let uid = uid.ok_or_else(|| anyhow::anyhow!("Can't create an index without a uid."))?;
let uuid = self.uuid_resolver.create(uid.clone()).await?;
let meta = self.index_handle.create_index(uuid, primary_key).await?;
let _ = self.update_handle.create(uuid).await?;
let meta = IndexMetadata { uid, meta };
/// List all the indexes
fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>>;
Ok(meta)
}
fn update_index(&self, name: impl AsRef<str>, index_settings: IndexSettings) -> anyhow::Result<IndexMetadata>;
pub async fn delete_index(&self, uid: String) -> anyhow::Result<()> {
let uuid = self.uuid_resolver.delete(uid).await?;
self.update_handle.delete(uuid).await?;
self.index_handle.delete(uuid).await?;
Ok(())
}
pub async fn update_status(&self, uid: String, id: u64) -> anyhow::Result<UpdateStatus> {
let uuid = self.uuid_resolver.resolve(uid).await?;
let result = self.update_handle.update_status(uuid, id).await?;
Ok(result)
}
pub async fn all_update_status(&self, uid: String) -> anyhow::Result<Vec<UpdateStatus>> {
let uuid = self.uuid_resolver.resolve(uid).await?;
let result = self.update_handle.get_all_updates_status(uuid).await?;
Ok(result)
}
pub async fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
let uuids = self.uuid_resolver.list().await?;
let mut ret = Vec::new();
for (uid, uuid) in uuids {
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMetadata { uid, meta };
ret.push(meta);
}
Ok(ret)
}
pub async fn settings(&self, uid: String) -> anyhow::Result<Settings> {
let uuid = self.uuid_resolver.resolve(uid.clone()).await?;
let settings = self.index_handle.settings(uuid).await?;
Ok(settings)
}
pub async fn documents(
&self,
uid: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Vec<Document>> {
let uuid = self.uuid_resolver.resolve(uid.clone()).await?;
let documents = self
.index_handle
.documents(uuid, offset, limit, attributes_to_retrieve)
.await?;
Ok(documents)
}
pub async fn document(
&self,
uid: String,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> anyhow::Result<Document> {
let uuid = self.uuid_resolver.resolve(uid.clone()).await?;
let document = self
.index_handle
.document(uuid, doc_id, attributes_to_retrieve)
.await?;
Ok(document)
}
pub async fn update_index(
&self,
uid: String,
index_settings: IndexSettings,
) -> anyhow::Result<IndexMetadata> {
if index_settings.uid.is_some() {
bail!("Can't change the index uid.")
}
let uuid = self.uuid_resolver.resolve(uid.clone()).await?;
let meta = self.index_handle.update_index(uuid, index_settings).await?;
let meta = IndexMetadata { uid, meta };
Ok(meta)
}
pub async fn search(&self, uid: String, query: SearchQuery) -> anyhow::Result<SearchResult> {
let uuid = self.uuid_resolver.resolve(uid).await?;
let result = self.index_handle.search(uuid, query).await?;
Ok(result)
}
pub async fn get_index(&self, uid: String) -> anyhow::Result<IndexMetadata> {
let uuid = self.uuid_resolver.resolve(uid.clone()).await?;
let meta = self.index_handle.get_index_meta(uuid).await?;
let meta = IndexMetadata { uid, meta };
Ok(meta)
}
}
#[cfg(test)]
#[macro_use]
pub(crate) mod test {
use super::*;
#[macro_export]
macro_rules! make_index_controller_tests {
($controller_buider:block) => {
#[test]
fn test_create_and_list_indexes() {
crate::index_controller::test::create_and_list_indexes($controller_buider);
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
loop {
match Arc::try_unwrap(item) {
Ok(item) => return item,
Err(item_arc) => {
item = item_arc;
sleep(Duration::from_millis(100)).await;
continue;
}
#[test]
fn test_create_index_with_no_name_is_error() {
crate::index_controller::test::create_index_with_no_name_is_error($controller_buider);
}
#[test]
fn test_update_index() {
crate::index_controller::test::update_index($controller_buider);
}
};
}
pub(crate) fn create_and_list_indexes(controller: impl IndexController) {
let settings1 = IndexSettings {
name: Some(String::from("test_index")),
primary_key: None,
};
let settings2 = IndexSettings {
name: Some(String::from("test_index2")),
primary_key: Some(String::from("foo")),
};
controller.create_index(settings1).unwrap();
controller.create_index(settings2).unwrap();
let indexes = controller.list_indexes().unwrap();
assert_eq!(indexes.len(), 2);
assert_eq!(indexes[0].uid, "test_index");
assert_eq!(indexes[1].uid, "test_index2");
assert_eq!(indexes[1].primary_key.clone().unwrap(), "foo");
}
pub(crate) fn create_index_with_no_name_is_error(controller: impl IndexController) {
let settings = IndexSettings {
name: None,
primary_key: None,
};
assert!(controller.create_index(settings).is_err());
}
pub(crate) fn update_index(controller: impl IndexController) {
let settings = IndexSettings {
name: Some(String::from("test")),
primary_key: None,
};
assert!(controller.create_index(settings).is_ok());
// perform empty update returns index meta unchanged
let settings = IndexSettings {
name: None,
primary_key: None,
};
let result = controller.update_index("test", settings).unwrap();
assert_eq!(result.uid, "test");
assert_eq!(result.created_at, result.updated_at);
assert!(result.primary_key.is_none());
// Changing the name trigger an error
let settings = IndexSettings {
name: Some(String::from("bar")),
primary_key: None,
};
assert!(controller.update_index("test", settings).is_err());
// Update primary key
let settings = IndexSettings {
name: None,
primary_key: Some(String::from("foo")),
};
let result = controller.update_index("test", settings.clone()).unwrap();
assert_eq!(result.uid, "test");
assert!(result.created_at < result.updated_at);
assert_eq!(result.primary_key.unwrap(), "foo");
// setting the primary key again is an error
assert!(controller.update_index("test", settings).is_err());
}
}
}

View File

@ -0,0 +1,369 @@
use std::collections::{hash_map::Entry, HashMap};
use std::fs::{create_dir_all, remove_dir_all};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use super::index_actor::IndexActorHandle;
use log::info;
use thiserror::Error;
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use tokio::sync::{mpsc, oneshot, RwLock};
use uuid::Uuid;
use super::get_arc_ownership_blocking;
use crate::index::UpdateResult;
use crate::index_controller::{UpdateMeta, UpdateStatus};
pub type Result<T> = std::result::Result<T, UpdateError>;
type UpdateStore = super::update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
#[derive(Debug, Error)]
pub enum UpdateError {
#[error("error with update: {0}")]
Error(Box<dyn std::error::Error + Sync + Send + 'static>),
#[error("Index {0} doesn't exist.")]
UnexistingIndex(Uuid),
#[error("Update {0} doesn't exist.")]
UnexistingUpdate(u64),
}
enum UpdateMsg<D> {
Update {
uuid: Uuid,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
ret: oneshot::Sender<Result<UpdateStatus>>,
},
ListUpdates {
uuid: Uuid,
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
},
GetUpdate {
uuid: Uuid,
ret: oneshot::Sender<Result<UpdateStatus>>,
id: u64,
},
Delete {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
Create {
uuid: Uuid,
ret: oneshot::Sender<Result<()>>,
},
}
struct UpdateActor<D, S> {
path: PathBuf,
store: S,
inbox: mpsc::Receiver<UpdateMsg<D>>,
}
#[async_trait::async_trait]
trait UpdateStoreStore {
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>>;
async fn get(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>>;
}
impl<D, S> UpdateActor<D, S>
where
D: AsRef<[u8]> + Sized + 'static,
S: UpdateStoreStore,
{
fn new(
store: S,
inbox: mpsc::Receiver<UpdateMsg<D>>,
path: impl AsRef<Path>,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned().join("update_files");
create_dir_all(&path)?;
assert!(path.exists());
Ok(Self { store, inbox, path })
}
async fn run(mut self) {
use UpdateMsg::*;
info!("Started update actor.");
loop {
match self.inbox.recv().await {
Some(Update {
uuid,
meta,
data,
ret,
}) => {
let _ = ret.send(self.handle_update(uuid, meta, data).await);
}
Some(ListUpdates { uuid, ret }) => {
let _ = ret.send(self.handle_list_updates(uuid).await);
}
Some(GetUpdate { uuid, ret, id }) => {
let _ = ret.send(self.handle_get_update(uuid, id).await);
}
Some(Delete { uuid, ret }) => {
let _ = ret.send(self.handle_delete(uuid).await);
}
Some(Create { uuid, ret }) => {
let _ = ret.send(self.handle_create(uuid).await);
}
None => break,
}
}
}
async fn handle_update(
&self,
uuid: Uuid,
meta: UpdateMeta,
mut payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<UpdateStatus> {
let update_store = self.store.get_or_create(uuid).await?;
let update_file_id = uuid::Uuid::new_v4();
let path = self.path.join(format!("update_{}", update_file_id));
let mut file = File::create(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file.write_all(bytes.as_ref())
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
}
}
}
file.flush()
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
tokio::task::spawn_blocking(move || {
update_store
.register_update(meta, path, uuid)
.map(UpdateStatus::Pending)
.map_err(|e| UpdateError::Error(Box::new(e)))
})
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?
}
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let update_store = self.store.get(uuid).await?;
tokio::task::spawn_blocking(move || {
let result = update_store
.ok_or(UpdateError::UnexistingIndex(uuid))?
.list()
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(result)
})
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?
}
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let store = self
.store
.get(uuid)
.await?
.ok_or(UpdateError::UnexistingIndex(uuid))?;
let result = store
.meta(id)
.map_err(|e| UpdateError::Error(Box::new(e)))?
.ok_or(UpdateError::UnexistingUpdate(id))?;
Ok(result)
}
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
let store = self.store.delete(uuid).await?;
if let Some(store) = store {
tokio::task::spawn(async move {
let store = get_arc_ownership_blocking(store).await;
tokio::task::spawn_blocking(move || {
store.prepare_for_closing().wait();
info!("Update store {} was closed.", uuid);
});
});
}
Ok(())
}
async fn handle_create(&self, uuid: Uuid) -> Result<()> {
let _ = self.store.get_or_create(uuid).await?;
Ok(())
}
}
#[derive(Clone)]
pub struct UpdateActorHandle<D> {
sender: mpsc::Sender<UpdateMsg<D>>,
}
impl<D> UpdateActorHandle<D>
where
D: AsRef<[u8]> + Sized + 'static + Sync + Send,
{
pub fn new(
index_handle: IndexActorHandle,
path: impl AsRef<Path>,
update_store_size: usize,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned().join("updates");
let (sender, receiver) = mpsc::channel(100);
let store = MapUpdateStoreStore::new(index_handle, &path, update_store_size);
let actor = UpdateActor::new(store, receiver, path)?;
tokio::task::spawn(actor.run());
Ok(Self { sender })
}
pub async fn update(
&self,
meta: UpdateMeta,
data: mpsc::Receiver<PayloadData<D>>,
uuid: Uuid,
) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Update {
uuid,
data,
meta,
ret,
};
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::ListUpdates { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
pub async fn create(&self, uuid: Uuid) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Create { uuid, ret };
let _ = self.sender.send(msg).await;
receiver.await.expect("update actor killed.")
}
}
struct MapUpdateStoreStore {
db: Arc<RwLock<HashMap<Uuid, Arc<UpdateStore>>>>,
index_handle: IndexActorHandle,
path: PathBuf,
update_store_size: usize,
}
impl MapUpdateStoreStore {
fn new(
index_handle: IndexActorHandle,
path: impl AsRef<Path>,
update_store_size: usize,
) -> Self {
let db = Arc::new(RwLock::new(HashMap::new()));
let path = path.as_ref().to_owned();
Self {
db,
index_handle,
path,
update_store_size,
}
}
}
#[async_trait::async_trait]
impl UpdateStoreStore for MapUpdateStoreStore {
async fn get_or_create(&self, uuid: Uuid) -> Result<Arc<UpdateStore>> {
match self.db.write().await.entry(uuid) {
Entry::Vacant(e) => {
let mut options = heed::EnvOpenOptions::new();
let update_store_size = self.update_store_size;
options.map_size(update_store_size);
let path = self.path.clone().join(format!("updates-{}", e.key()));
create_dir_all(&path).unwrap();
let index_handle = self.index_handle.clone();
let store = UpdateStore::open(options, &path, move |meta, file| {
futures::executor::block_on(index_handle.update(meta, file))
})
.map_err(|e| UpdateError::Error(e.into()))?;
let store = e.insert(store);
Ok(store.clone())
}
Entry::Occupied(e) => Ok(e.get().clone()),
}
}
async fn get(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>> {
let guard = self.db.read().await;
match guard.get(&uuid) {
Some(uuid) => Ok(Some(uuid.clone())),
None => {
// The index is not found in the found in the loaded indexes, so we attempt to load
// it from disk. We need to acquire a write lock **before** attempting to open the
// index, because someone could be trying to open it at the same time as us.
drop(guard);
let path = self.path.clone().join(format!("updates-{}", uuid));
if path.exists() {
let mut guard = self.db.write().await;
match guard.entry(uuid) {
Entry::Vacant(entry) => {
// We can safely load the index
let index_handle = self.index_handle.clone();
let mut options = heed::EnvOpenOptions::new();
let update_store_size = self.update_store_size;
options.map_size(update_store_size);
let store = UpdateStore::open(options, &path, move |meta, file| {
futures::executor::block_on(index_handle.update(meta, file))
})
.map_err(|e| UpdateError::Error(e.into()))?;
let store = entry.insert(store);
Ok(Some(store.clone()))
}
Entry::Occupied(entry) => {
// The index was loaded while we attempted to to iter
Ok(Some(entry.get().clone()))
}
}
} else {
Ok(None)
}
}
}
}
async fn delete(&self, uuid: Uuid) -> Result<Option<Arc<UpdateStore>>> {
let store = self.db.write().await.remove(&uuid);
let path = self.path.clone().join(format!("updates-{}", uuid));
if store.is_some() || path.exists() {
remove_dir_all(path).unwrap();
}
Ok(store)
}
}

View File

@ -0,0 +1,95 @@
use std::fs::File;
use crate::index::Index;
use anyhow::Result;
use grenad::CompressionType;
use milli::update::UpdateBuilder;
use rayon::ThreadPool;
use crate::index::UpdateResult;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::UpdateMeta;
use crate::option::IndexerOpts;
pub struct UpdateHandler {
max_nb_chunks: Option<usize>,
chunk_compression_level: Option<u32>,
thread_pool: ThreadPool,
log_frequency: usize,
max_memory: usize,
linked_hash_map_size: usize,
chunk_compression_type: CompressionType,
chunk_fusing_shrink_size: u64,
}
impl UpdateHandler {
pub fn new(opt: &IndexerOpts) -> anyhow::Result<Self> {
let thread_pool = rayon::ThreadPoolBuilder::new()
.num_threads(opt.indexing_jobs.unwrap_or(0))
.build()?;
Ok(Self {
max_nb_chunks: opt.max_nb_chunks,
chunk_compression_level: opt.chunk_compression_level,
thread_pool,
log_frequency: opt.log_every_n,
max_memory: opt.max_memory.get_bytes() as usize,
linked_hash_map_size: opt.linked_hash_map_size,
chunk_compression_type: opt.chunk_compression_type,
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
})
}
fn update_buidler(&self, update_id: u64) -> UpdateBuilder {
// We prepare the update by using the update builder.
let mut update_builder = UpdateBuilder::new(update_id);
if let Some(max_nb_chunks) = self.max_nb_chunks {
update_builder.max_nb_chunks(max_nb_chunks);
}
if let Some(chunk_compression_level) = self.chunk_compression_level {
update_builder.chunk_compression_level(chunk_compression_level);
}
update_builder.thread_pool(&self.thread_pool);
update_builder.log_every_n(self.log_frequency);
update_builder.max_memory(self.max_memory);
update_builder.linked_hash_map_size(self.linked_hash_map_size);
update_builder.chunk_compression_type(self.chunk_compression_type);
update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size);
update_builder
}
pub fn handle_update(
&self,
meta: Processing<UpdateMeta>,
content: File,
index: Index,
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> {
use UpdateMeta::*;
let update_id = meta.id();
let update_builder = self.update_buidler(update_id);
let result = match meta.meta() {
DocumentsAddition {
method,
format,
primary_key,
} => index.update_documents(
*format,
*method,
content,
update_builder,
primary_key.as_deref(),
),
ClearDocuments => index.clear_documents(update_builder),
DeleteDocuments => index.delete_documents(content, update_builder),
Settings(settings) => index.update_settings(settings, update_builder),
Facets(levels) => index.update_facets(levels, update_builder),
};
match result {
Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.to_string())),
}
}
}

View File

@ -1,10 +1,14 @@
use std::path::Path;
use std::sync::{Arc, RwLock};
use std::fs::remove_file;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use crossbeam_channel::Sender;
use heed::types::{OwnedType, DecodeIgnore, SerdeJson, ByteSlice};
use heed::{EnvOpenOptions, Env, Database};
use serde::{Serialize, Deserialize};
use heed::types::{DecodeIgnore, OwnedType, SerdeJson};
use heed::{Database, Env, EnvOpenOptions};
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::fs::File;
use tokio::sync::mpsc;
use uuid::Uuid;
use crate::index_controller::updates::*;
@ -14,16 +18,33 @@ type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub struct UpdateStore<M, N, E> {
env: Env,
pending_meta: Database<OwnedType<BEU64>, SerdeJson<Pending<M>>>,
pending: Database<OwnedType<BEU64>, ByteSlice>,
pending: Database<OwnedType<BEU64>, SerdeJson<PathBuf>>,
processed_meta: Database<OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
failed_meta: Database<OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
processing: Arc<RwLock<Option<Processing<M>>>>,
notification_sender: Sender<()>,
notification_sender: mpsc::Sender<()>,
}
pub trait HandleUpdate<M, N, E> {
fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>>;
fn handle_update(
&mut self,
meta: Processing<M>,
content: File,
) -> anyhow::Result<Result<Processed<M, N>, Failed<M, E>>>;
}
impl<M, N, E, F> HandleUpdate<M, N, E> for F
where
F: FnMut(Processing<M>, File) -> anyhow::Result<Result<Processed<M, N>, Failed<M, E>>>,
{
fn handle_update(
&mut self,
meta: Processing<M>,
content: File,
) -> anyhow::Result<Result<Processed<M, N>, Failed<M, E>>> {
self(meta, content)
}
}
impl<M, N, E> UpdateStore<M, N, E>
@ -35,11 +56,11 @@ where
pub fn open<P, U>(
mut options: EnvOpenOptions,
path: P,
mut update_handler: U,
update_handler: U,
) -> heed::Result<Arc<Self>>
where
P: AsRef<Path>,
U: HandleUpdate<M, N, E> + Send + 'static,
U: HandleUpdate<M, N, E> + Sync + Clone + Send + 'static,
{
options.max_dbs(5);
@ -51,7 +72,7 @@ where
let failed_meta = env.create_database(Some("failed-meta"))?;
let processing = Arc::new(RwLock::new(None));
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
let (notification_sender, mut notification_receiver) = mpsc::channel(10);
// Send a first notification to trigger the process.
let _ = notification_sender.send(());
@ -69,13 +90,19 @@ where
// We need a weak reference so we can take ownership on the arc later when we
// want to close the index.
let update_store_weak = Arc::downgrade(&update_store);
std::thread::spawn(move || {
tokio::task::spawn(async move {
// Block and wait for something to process.
'outer: for _ in notification_receiver {
'outer: while notification_receiver.recv().await.is_some() {
loop {
match update_store_weak.upgrade() {
Some(update_store) => {
match update_store.process_pending_update(&mut update_handler) {
let handler = update_handler.clone();
let res = tokio::task::spawn_blocking(move || {
update_store.process_pending_update(handler)
})
.await
.expect("Fatal error processing update.");
match res {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => eprintln!("error while processing update: {}", e),
@ -97,17 +124,20 @@ where
/// Returns the new biggest id to use to store the new update.
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
let last_pending = self.pending_meta
let last_pending = self
.pending_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_processed = self.processed_meta
let last_processed = self
.processed_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
let last_aborted = self.aborted_meta
let last_aborted = self
.aborted_meta
.remap_data_type::<DecodeIgnore>()
.last(txn)?
.map(|(k, _)| k.get());
@ -129,7 +159,8 @@ where
pub fn register_update(
&self,
meta: M,
content: &[u8]
content: impl AsRef<Path>,
index_uuid: Uuid,
) -> heed::Result<Pending<M>> {
let mut wtxn = self.env.write_txn()?;
@ -140,23 +171,24 @@ where
let update_id = self.new_update_id(&wtxn)?;
let update_key = BEU64::new(update_id);
let meta = Pending::new(meta, update_id);
let meta = Pending::new(meta, update_id, index_uuid);
self.pending_meta.put(&mut wtxn, &update_key, &meta)?;
self.pending.put(&mut wtxn, &update_key, content)?;
self.pending
.put(&mut wtxn, &update_key, &content.as_ref().to_owned())?;
wtxn.commit()?;
if let Err(e) = self.notification_sender.try_send(()) {
assert!(!e.is_disconnected(), "update notification channel is disconnected");
}
self.notification_sender
.blocking_send(())
.expect("Update store loop exited.");
Ok(meta)
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<()>>
fn process_pending_update<U>(&self, mut handler: U) -> anyhow::Result<Option<()>>
where
U: HandleUpdate<M, N, E> + Send + 'static,
U: HandleUpdate<M, N, E>,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
@ -166,7 +198,8 @@ where
// a reader while processing it, not a writer.
match first_meta {
Some((first_id, pending)) => {
let first_content = self.pending
let content_path = self
.pending
.get(&rtxn, &first_id)?
.expect("associated update content");
@ -174,23 +207,19 @@ where
// to the update handler. Processing store is non persistent to be able recover
// from a failure
let processing = pending.processing();
self.processing
.write()
.unwrap()
.replace(processing.clone());
self.processing.write().replace(processing.clone());
let file = File::open(&content_path)?;
// Process the pending update using the provided user function.
let result = handler.handle_update(processing, first_content);
let result = handler.handle_update(processing, file)?;
drop(rtxn);
// Once the pending update have been successfully processed
// we must remove the content from the pending and processing stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.processing
.write()
.unwrap()
.take();
self.processing.write().take();
self.pending_meta.delete(&mut wtxn, &first_id)?;
remove_file(&content_path)?;
self.pending.delete(&mut wtxn, &first_id)?;
match result {
Ok(processed) => self.processed_meta.put(&mut wtxn, &first_id, &processed)?,
@ -199,35 +228,60 @@ where
wtxn.commit()?;
Ok(Some(()))
},
None => Ok(None)
}
None => Ok(None),
}
}
/// Execute the user defined function with the meta-store iterators, the first
/// iterator is the *processed* meta one, the second the *aborted* meta one
/// and, the last is the *pending* meta one.
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
where
F: for<'a> FnMut(
Option<Processing<M>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Processed<M, N>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Aborted<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Pending<M>>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<Failed<M, E>>>,
) -> heed::Result<T>,
{
pub fn list(&self) -> anyhow::Result<Vec<UpdateStatus<M, N, E>>> {
let rtxn = self.env.read_txn()?;
let mut updates = Vec::new();
// We get the pending, processed and aborted meta iterators.
let processed_iter = self.processed_meta.iter(&rtxn)?;
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
let pending_iter = self.pending_meta.iter(&rtxn)?;
let processing = self.processing.read().unwrap().clone();
let failed_iter = self.failed_meta.iter(&rtxn)?;
let processing = self.processing.read();
if let Some(ref processing) = *processing {
let update = UpdateStatus::from(processing.clone());
updates.push(update);
}
// We execute the user defined function with both iterators.
(f)(processing, processed_iter, aborted_iter, pending_iter, failed_iter)
let pending = self
.pending_meta
.iter(&rtxn)?
.filter_map(Result::ok)
.filter_map(|(_, p)| (Some(p.id()) != processing.as_ref().map(|p| p.id())).then(|| p))
.map(UpdateStatus::from);
updates.extend(pending);
let aborted = self
.aborted_meta
.iter(&rtxn)?
.filter_map(Result::ok)
.map(|(_, p)| p)
.map(UpdateStatus::from);
updates.extend(aborted);
let processed = self
.processed_meta
.iter(&rtxn)?
.filter_map(Result::ok)
.map(|(_, p)| p)
.map(UpdateStatus::from);
updates.extend(processed);
let failed = self
.failed_meta
.iter(&rtxn)?
.filter_map(Result::ok)
.map(|(_, p)| p)
.map(UpdateStatus::from);
updates.extend(failed);
updates.sort_by_key(|u| u.id());
Ok(updates)
}
/// Returns the update associated meta or `None` if the update doesn't exist.
@ -235,7 +289,7 @@ where
let rtxn = self.env.read_txn()?;
let key = BEU64::new(update_id);
if let Some(ref meta) = *self.processing.read().unwrap() {
if let Some(ref meta) = *self.processing.read() {
if meta.id() == update_id {
return Ok(Some(UpdateStatus::Processing(meta.clone())));
}
@ -319,89 +373,92 @@ where
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::{Duration, Instant};
//#[cfg(test)]
//mod tests {
//use super::*;
//use std::thread;
//use std::time::{Duration, Instant};
impl<M, N, F, E> HandleUpdate<M, N, E> for F
where F: FnMut(Processing<M>, &[u8]) -> Result<Processed<M, N>, Failed<M, E>> + Send + 'static {
fn handle_update(&mut self, meta: Processing<M>, content: &[u8]) -> Result<Processed<M, N>, Failed<M, E>> {
self(meta, content)
}
}
//#[test]
//fn simple() {
//let dir = tempfile::tempdir().unwrap();
//let mut options = EnvOpenOptions::new();
//options.map_size(4096 * 100);
//let update_store = UpdateStore::open(
//options,
//dir,
//|meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
//let new_meta = meta.meta().to_string() + " processed";
//let processed = meta.process(new_meta);
//Ok(processed)
//},
//)
//.unwrap();
#[test]
fn simple() {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
let new_meta = meta.meta().to_string() + " processed";
let processed = meta.process(new_meta);
Ok(processed)
}).unwrap();
//let meta = String::from("kiki");
//let update = update_store.register_update(meta, &[]).unwrap();
//thread::sleep(Duration::from_millis(100));
//let meta = update_store.meta(update.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "kiki processed");
//} else {
//panic!()
//}
//}
let meta = String::from("kiki");
let update = update_store.register_update(meta, &[]).unwrap();
thread::sleep(Duration::from_millis(100));
let meta = update_store.meta(update.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "kiki processed");
} else {
panic!()
}
}
//#[test]
//#[ignore]
//fn long_running_update() {
//let dir = tempfile::tempdir().unwrap();
//let mut options = EnvOpenOptions::new();
//options.map_size(4096 * 100);
//let update_store = UpdateStore::open(
//options,
//dir,
//|meta: Processing<String>, _content: &_| -> Result<_, Failed<_, ()>> {
//thread::sleep(Duration::from_millis(400));
//let new_meta = meta.meta().to_string() + "processed";
//let processed = meta.process(new_meta);
//Ok(processed)
//},
//)
//.unwrap();
#[test]
#[ignore]
fn long_running_update() {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(4096 * 100);
let update_store = UpdateStore::open(options, dir, |meta: Processing<String>, _content:&_| -> Result<_, Failed<_, ()>> {
thread::sleep(Duration::from_millis(400));
let new_meta = meta.meta().to_string() + "processed";
let processed = meta.process(new_meta);
Ok(processed)
}).unwrap();
//let before_register = Instant::now();
let before_register = Instant::now();
//let meta = String::from("kiki");
//let update_kiki = update_store.register_update(meta, &[]).unwrap();
//assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("kiki");
let update_kiki = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
//let meta = String::from("coco");
//let update_coco = update_store.register_update(meta, &[]).unwrap();
//assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("coco");
let update_coco = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
//let meta = String::from("cucu");
//let update_cucu = update_store.register_update(meta, &[]).unwrap();
//assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("cucu");
let update_cucu = update_store.register_update(meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
//thread::sleep(Duration::from_millis(400 * 3 + 100));
thread::sleep(Duration::from_millis(400 * 3 + 100));
//let meta = update_store.meta(update_kiki.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "kiki processed");
//} else {
//panic!()
//}
let meta = update_store.meta(update_kiki.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "kiki processed");
} else {
panic!()
}
//let meta = update_store.meta(update_coco.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "coco processed");
//} else {
//panic!()
//}
let meta = update_store.meta(update_coco.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "coco processed");
} else {
panic!()
}
let meta = update_store.meta(update_cucu.id()).unwrap().unwrap();
if let UpdateStatus::Processed(Processed { success, .. }) = meta {
assert_eq!(success, "cucu processed");
} else {
panic!()
}
}
}
//let meta = update_store.meta(update_cucu.id()).unwrap().unwrap();
//if let UpdateStatus::Processed(Processed { success, .. }) = meta {
//assert_eq!(success, "cucu processed");
//} else {
//panic!()
//}
//}
//}

View File

@ -1,5 +1,6 @@
use chrono::{Utc, DateTime};
use serde::{Serialize, Deserialize};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
@ -7,14 +8,16 @@ pub struct Pending<M> {
pub update_id: u64,
pub meta: M,
pub enqueued_at: DateTime<Utc>,
pub index_uuid: Uuid,
}
impl<M> Pending<M> {
pub fn new(meta: M, update_id: u64) -> Self {
pub fn new(meta: M, update_id: u64, index_uuid: Uuid) -> Self {
Self {
enqueued_at: Utc::now(),
meta,
update_id,
index_uuid,
}
}
@ -73,6 +76,10 @@ impl<M> Processing<M> {
self.from.meta()
}
pub fn index_uuid(&self) -> &Uuid {
&self.from.index_uuid
}
pub fn process<N>(self, meta: N) -> Processed<M, N> {
Processed {
success: meta,

View File

@ -0,0 +1,295 @@
use std::{fs::create_dir_all, path::Path};
use heed::{
types::{ByteSlice, Str},
Database, Env, EnvOpenOptions,
};
use log::{info, warn};
use thiserror::Error;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
pub type Result<T> = std::result::Result<T, UuidError>;
#[derive(Debug)]
enum UuidResolveMsg {
Resolve {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
},
GetOrCreate {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
},
Create {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
},
Delete {
uid: String,
ret: oneshot::Sender<Result<Uuid>>,
},
List {
ret: oneshot::Sender<Result<Vec<(String, Uuid)>>>,
},
}
struct UuidResolverActor<S> {
inbox: mpsc::Receiver<UuidResolveMsg>,
store: S,
}
impl<S: UuidStore> UuidResolverActor<S> {
fn new(inbox: mpsc::Receiver<UuidResolveMsg>, store: S) -> Self {
Self { inbox, store }
}
async fn run(mut self) {
use UuidResolveMsg::*;
info!("uuid resolver started");
loop {
match self.inbox.recv().await {
Some(Create { uid: name, ret }) => {
let _ = ret.send(self.handle_create(name).await);
}
Some(GetOrCreate { uid: name, ret }) => {
let _ = ret.send(self.handle_get_or_create(name).await);
}
Some(Resolve { uid: name, ret }) => {
let _ = ret.send(self.handle_resolve(name).await);
}
Some(Delete { uid: name, ret }) => {
let _ = ret.send(self.handle_delete(name).await);
}
Some(List { ret }) => {
let _ = ret.send(self.handle_list().await);
}
// all senders have been dropped, need to quit.
None => break,
}
}
warn!("exiting uuid resolver loop");
}
async fn handle_create(&self, uid: String) -> Result<Uuid> {
if !is_index_uid_valid(&uid) {
return Err(UuidError::BadlyFormatted(uid));
}
self.store.create_uuid(uid, true).await
}
async fn handle_get_or_create(&self, uid: String) -> Result<Uuid> {
if !is_index_uid_valid(&uid) {
return Err(UuidError::BadlyFormatted(uid));
}
self.store.create_uuid(uid, false).await
}
async fn handle_resolve(&self, uid: String) -> Result<Uuid> {
self.store
.get_uuid(uid.clone())
.await?
.ok_or(UuidError::UnexistingIndex(uid))
}
async fn handle_delete(&self, uid: String) -> Result<Uuid> {
self.store
.delete(uid.clone())
.await?
.ok_or(UuidError::UnexistingIndex(uid))
}
async fn handle_list(&self) -> Result<Vec<(String, Uuid)>> {
let result = self.store.list().await?;
Ok(result)
}
}
fn is_index_uid_valid(uid: &str) -> bool {
uid.chars()
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}
#[derive(Clone)]
pub struct UuidResolverHandle {
sender: mpsc::Sender<UuidResolveMsg>,
}
impl UuidResolverHandle {
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
let (sender, reveiver) = mpsc::channel(100);
let store = HeedUuidStore::new(path)?;
let actor = UuidResolverActor::new(reveiver, store);
tokio::spawn(actor.run());
Ok(Self { sender })
}
pub async fn resolve(&self, name: String) -> anyhow::Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Resolve { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn get_or_create(&self, name: String) -> Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::GetOrCreate { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn create(&self, name: String) -> anyhow::Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Create { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn delete(&self, name: String) -> anyhow::Result<Uuid> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::Delete { uid: name, ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
pub async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>> {
let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::List { ret };
let _ = self.sender.send(msg).await;
Ok(receiver
.await
.expect("Uuid resolver actor has been killed")?)
}
}
#[derive(Debug, Error)]
pub enum UuidError {
#[error("Name already exist.")]
NameAlreadyExist,
#[error("Index \"{0}\" doesn't exist.")]
UnexistingIndex(String),
#[error("Error performing task: {0}")]
TokioTask(#[from] tokio::task::JoinError),
#[error("Database error: {0}")]
Heed(#[from] heed::Error),
#[error("Uuid error: {0}")]
Uuid(#[from] uuid::Error),
#[error("Badly formatted index uid: {0}")]
BadlyFormatted(String),
}
#[async_trait::async_trait]
trait UuidStore {
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
// the uuid otherwise.
async fn create_uuid(&self, uid: String, err: bool) -> Result<Uuid>;
async fn get_uuid(&self, uid: String) -> Result<Option<Uuid>>;
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
}
struct HeedUuidStore {
env: Env,
db: Database<Str, ByteSlice>,
}
impl HeedUuidStore {
fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
let path = path.as_ref().join("index_uuids");
create_dir_all(&path)?;
let mut options = EnvOpenOptions::new();
options.map_size(1_073_741_824); // 1GB
let env = options.open(path)?;
let db = env.create_database(None)?;
Ok(Self { env, db })
}
}
#[async_trait::async_trait]
impl UuidStore for HeedUuidStore {
async fn create_uuid(&self, name: String, err: bool) -> Result<Uuid> {
let env = self.env.clone();
let db = self.db;
tokio::task::spawn_blocking(move || {
let mut txn = env.write_txn()?;
match db.get(&txn, &name)? {
Some(uuid) => {
if err {
Err(UuidError::NameAlreadyExist)
} else {
let uuid = Uuid::from_slice(uuid)?;
Ok(uuid)
}
}
None => {
let uuid = Uuid::new_v4();
db.put(&mut txn, &name, uuid.as_bytes())?;
txn.commit()?;
Ok(uuid)
}
}
})
.await?
}
async fn get_uuid(&self, name: String) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
tokio::task::spawn_blocking(move || {
let txn = env.read_txn()?;
match db.get(&txn, &name)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
Ok(Some(uuid))
}
None => Ok(None),
}
})
.await?
}
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
let env = self.env.clone();
let db = self.db;
tokio::task::spawn_blocking(move || {
let mut txn = env.write_txn()?;
match db.get(&txn, &uid)? {
Some(uuid) => {
let uuid = Uuid::from_slice(uuid)?;
db.delete(&mut txn, &uid)?;
txn.commit()?;
Ok(Some(uuid))
}
None => Ok(None),
}
})
.await?
}
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
let env = self.env.clone();
let db = self.db;
tokio::task::spawn_blocking(move || {
let txn = env.read_txn()?;
let mut entries = Vec::new();
for entry in db.iter(&txn)? {
let (name, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?;
entries.push((name.to_owned(), uuid))
}
Ok(entries)
})
.await?
}
}

View File

@ -1,60 +1,59 @@
#![allow(clippy::or_fun_call)]
pub mod data;
pub mod error;
pub mod helpers;
mod index;
mod index_controller;
pub mod option;
pub mod routes;
mod index_controller;
use actix_http::Error;
use actix_service::ServiceFactory;
use actix_web::{dev, web, App};
pub use option::Opt;
pub use self::data::Data;
use self::error::payload_error_handler;
pub use option::Opt;
pub fn create_app(
data: &Data,
enable_frontend: bool,
) -> App<
impl ServiceFactory<
Config = (),
Request = dev::ServiceRequest,
Response = dev::ServiceResponse<actix_http::body::Body>,
Error = Error,
InitError = (),
>,
actix_http::body::Body,
> {
let app = App::new()
.data(data.clone())
.app_data(
web::JsonConfig::default()
.limit(data.http_payload_size_limit())
.content_type(|_mime| true) // Accept all mime types
.error_handler(|err, _req| payload_error_handler(err).into()),
)
.app_data(
web::QueryConfig::default()
.error_handler(|err, _req| payload_error_handler(err).into())
)
.configure(routes::document::services)
.configure(routes::index::services)
.configure(routes::search::services)
.configure(routes::settings::services)
.configure(routes::stop_words::services)
.configure(routes::synonym::services)
.configure(routes::health::services)
.configure(routes::stats::services)
.configure(routes::key::services);
#[macro_export]
macro_rules! create_app {
($data:expr, $enable_frontend:expr) => {{
use actix_cors::Cors;
use actix_web::middleware::TrailingSlash;
use actix_web::App;
use actix_web::{middleware, web};
use meilisearch_http::error::payload_error_handler;
use meilisearch_http::routes::*;
let app = App::new()
.data($data.clone())
.app_data(
web::JsonConfig::default()
.limit($data.http_payload_size_limit())
.content_type(|_mime| true) // Accept all mime types
.error_handler(|err, _req| payload_error_handler(err).into()),
)
.app_data(
web::QueryConfig::default()
.error_handler(|err, _req| payload_error_handler(err).into()),
)
.configure(document::services)
.configure(index::services)
.configure(search::services)
.configure(settings::services)
.configure(stop_words::services)
.configure(synonym::services)
.configure(health::services)
.configure(stats::services)
.configure(key::services);
//.configure(routes::dump::services);
if enable_frontend {
app
.service(routes::load_html)
.service(routes::load_css)
} else {
app
}
let app = if $enable_frontend {
app.service(load_html).service(load_css)
} else {
app
};
app.wrap(
Cors::default()
.send_wildcard()
.allowed_headers(vec!["content-type", "x-meili-api-key"])
.max_age(86_400), // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(middleware::NormalizePath::new(TrailingSlash::Trim))
}};
}

View File

@ -1,9 +1,7 @@
use std::env;
use actix_cors::Cors;
use actix_web::{middleware, HttpServer};
use actix_web::HttpServer;
use main_error::MainError;
use meilisearch_http::helpers::NormalizePath;
use meilisearch_http::{create_app, Data, Opt};
use structopt::StructOpt;
@ -46,46 +44,49 @@ async fn main() -> Result<(), MainError> {
}
}
"development" => {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
.init();
}
_ => unreachable!(),
}
//if let Some(path) = &opt.import_snapshot {
//snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?;
//snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?;
//}
let data = Data::new(opt.clone())?;
//if !opt.no_analytics {
//let analytics_data = data.clone();
//let analytics_opt = opt.clone();
//thread::spawn(move || analytics::analytics_sender(analytics_data, analytics_opt));
//let analytics_data = data.clone();
//let analytics_opt = opt.clone();
//thread::spawn(move || analytics::analytics_sender(analytics_data, analytics_opt));
//}
//if let Some(path) = &opt.import_dump {
//dump::import_dump(&data, path, opt.dump_batch_size)?;
//dump::import_dump(&data, path, opt.dump_batch_size)?;
//}
//if opt.schedule_snapshot {
//snapshot::schedule_snapshot(data.clone(), &opt.snapshot_dir, opt.snapshot_interval_sec.unwrap_or(86400))?;
//snapshot::schedule_snapshot(data.clone(), &opt.snapshot_dir, opt.snapshot_interval_sec.unwrap_or(86400))?;
//}
print_launch_resume(&opt, &data);
let enable_frontend = opt.env != "production";
let http_server = HttpServer::new(move || {
create_app(&data, enable_frontend)
.wrap(
Cors::default()
.send_wildcard()
.allowed_headers(vec!["content-type", "x-meili-api-key"])
.max_age(86_400) // 24h
)
.wrap(middleware::Logger::default())
.wrap(middleware::Compress::default())
.wrap(NormalizePath)
});
run_http(data, opt, enable_frontend).await?;
Ok(())
}
async fn run_http(
data: Data,
opt: Opt,
enable_frontend: bool,
) -> Result<(), Box<dyn std::error::Error>> {
let http_server = HttpServer::new(move || create_app!(&data, enable_frontend))
// Disable signals allows the server to terminate immediately when a user enter CTRL-C
.disable_signals();
if let Some(config) = opt.get_ssl_config()? {
http_server
@ -95,7 +96,6 @@ async fn main() -> Result<(), MainError> {
} else {
http_server.bind(opt.http_addr)?.run().await?;
}
Ok(())
}

View File

@ -1,15 +1,15 @@
use std::{error, fs};
use std::io::{BufReader, Read};
use std::path::PathBuf;
use std::sync::Arc;
use std::{error, fs};
use byte_unit::Byte;
use grenad::CompressionType;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
RootCertStore,
};
use grenad::CompressionType;
use structopt::StructOpt;
#[derive(Debug, Clone, StructOpt)]
@ -99,7 +99,11 @@ pub struct Opt {
/// The Sentry DSN to use for error reporting. This defaults to the MeiliSearch Sentry project.
/// You can disable sentry all together using the `--no-sentry` flag or `MEILI_NO_SENTRY` environment variable.
#[cfg(all(not(debug_assertions), feature = "sentry"))]
#[structopt(long, env = "SENTRY_DSN", default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337")]
#[structopt(
long,
env = "SENTRY_DSN",
default_value = "https://5ddfa22b95f241198be2271aaf028653@sentry.io/3060337"
)]
pub sentry_dsn: String,
/// Disable Sentry error reporting.

View File

@ -7,10 +7,10 @@ use milli::update::{IndexDocumentsMethod, UpdateFormat};
use serde::Deserialize;
use serde_json::Value;
use crate::Data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
@ -19,7 +19,10 @@ macro_rules! guard_content_type {
($fn_name:ident, $guard_value:literal) => {
fn $fn_name(head: &actix_web::dev::RequestHead) -> bool {
if let Some(content_type) = head.headers.get("Content-Type") {
content_type.to_str().map(|v| v.contains($guard_value)).unwrap_or(false)
content_type
.to_str()
.map(|v| v.contains($guard_value))
.unwrap_or(false)
} else {
false
}
@ -57,7 +60,10 @@ async fn get_document(
) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone();
let id = path.document_id.clone();
match data.retrieve_document(index, id, None as Option<Vec<String>>).await {
match data
.retrieve_document(index, id, None as Option<Vec<String>>)
.await
{
Ok(document) => {
let json = serde_json::to_string(&document).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -76,7 +82,10 @@ async fn delete_document(
data: web::Data<Data>,
path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> {
match data.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()]).await {
match data
.delete_documents(path.index_uid.clone(), vec![path.document_id.clone()])
.await
{
Ok(result) => {
let json = serde_json::to_string(&result).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -104,16 +113,17 @@ async fn get_all_documents(
let attributes_to_retrieve = params
.attributes_to_retrieve
.as_ref()
.map(|attrs| attrs
.split(",")
.map(String::from)
.collect::<Vec<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
match data.retrieve_documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
attributes_to_retrieve).await {
match data
.retrieve_documents(
path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
attributes_to_retrieve,
)
.await
{
Ok(docs) => {
let json = serde_json::to_string(&docs).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -149,7 +159,8 @@ async fn add_documents_json(
UpdateFormat::Json,
body,
params.primary_key.clone(),
).await;
)
.await;
match addition_result {
Ok(update) => {
@ -163,7 +174,6 @@ async fn add_documents_json(
}
}
/// Default route for adding documents, this should return an error and redirect to the documentation
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
async fn add_documents_default(
@ -191,7 +201,7 @@ async fn update_documents_default(
#[put(
"/indexes/{index_uid}/documents",
wrap = "Authentication::Private",
guard = "guard_json",
guard = "guard_json"
)]
async fn update_documents(
data: web::Data<Data>,
@ -206,7 +216,8 @@ async fn update_documents(
UpdateFormat::Json,
body,
params.primary_key.clone(),
).await;
)
.await;
match addition_result {
Ok(update) => {
@ -231,7 +242,11 @@ async fn delete_documents(
) -> Result<HttpResponse, ResponseError> {
let ids = body
.iter()
.map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string()))
.map(|v| {
v.as_str()
.map(String::from)
.unwrap_or_else(|| v.to_string())
})
.collect();
match data.delete_documents(path.index_uid.clone(), ids).await {

View File

@ -3,10 +3,10 @@ use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::Data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::routes::IndexParam;
use crate::Data;
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(list_indexes)
@ -18,10 +18,9 @@ pub fn services(cfg: &mut web::ServiceConfig) {
.service(get_all_updates_status);
}
#[get("/indexes", wrap = "Authentication::Private")]
async fn list_indexes(data: web::Data<Data>) -> Result<HttpResponse, ResponseError> {
match data.list_indexes() {
match data.list_indexes().await {
Ok(indexes) => {
let json = serde_json::to_string(&indexes).unwrap();
Ok(HttpResponse::Ok().body(&json))
@ -37,13 +36,12 @@ async fn get_index(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
match data.index(&path.index_uid)? {
Some(meta) => {
match data.index(path.index_uid.clone()).await {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
None => {
let e = format!("Index {:?} doesn't exist.", path.index_uid);
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
}
@ -61,7 +59,8 @@ async fn create_index(
data: web::Data<Data>,
body: web::Json<IndexCreateRequest>,
) -> Result<HttpResponse, ResponseError> {
match data.create_index(&body.uid, body.primary_key.clone()) {
let body = body.into_inner();
match data.create_index(body.uid, body.primary_key).await {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -75,7 +74,7 @@ async fn create_index(
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
struct UpdateIndexRequest {
name: Option<String>,
uid: Option<String>,
primary_key: Option<String>,
}
@ -95,7 +94,11 @@ async fn update_index(
path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> {
match data.update_index(&path.index_uid, body.primary_key.as_ref(), body.name.as_ref()) {
let body = body.into_inner();
match data
.update_index(path.into_inner().index_uid, body.primary_key, body.uid)
.await
{
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -133,16 +136,15 @@ async fn get_update_status(
data: web::Data<Data>,
path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> {
let result = data.get_update_status(&path.index_uid, path.update_id);
let params = path.into_inner();
let result = data
.get_update_status(params.index_uid, params.update_id)
.await;
match result {
Ok(Some(meta)) => {
Ok(meta) => {
let json = serde_json::to_string(&meta).unwrap();
Ok(HttpResponse::Ok().body(json))
}
Ok(None) => {
let e = format!("udpate {} for index {:?} doesn't exists.", path.update_id, path.index_uid);
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
Err(e) => {
Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
}
@ -154,7 +156,7 @@ async fn get_all_updates_status(
data: web::Data<Data>,
path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> {
let result = data.get_updates_status(&path.index_uid);
let result = data.get_updates_status(path.into_inner().index_uid).await;
match result {
Ok(metas) => {
let json = serde_json::to_string(&metas).unwrap();

View File

@ -1,6 +1,6 @@
use actix_web::get;
use actix_web::web;
use actix_web::HttpResponse;
use actix_web::get;
use serde::Serialize;
use crate::helpers::Authentication;
@ -19,7 +19,7 @@ struct KeysResponse {
#[get("/keys", wrap = "Authentication::Admin")]
async fn list(data: web::Data<Data>) -> HttpResponse {
let api_keys = data.api_keys.clone();
HttpResponse::Ok().json(KeysResponse {
HttpResponse::Ok().json(&KeysResponse {
private: api_keys.private,
public: api_keys.public,
})

View File

@ -4,9 +4,9 @@ use std::convert::{TryFrom, TryInto};
use actix_web::{get, post, web, HttpResponse};
use serde::Deserialize;
use crate::data::{SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::index::{SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::routes::IndexParam;
use crate::Data;
@ -36,19 +36,19 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
fn try_from(other: SearchQueryGet) -> anyhow::Result<Self> {
let attributes_to_retrieve = other
.attributes_to_retrieve
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
let attributes_to_crop = other
.attributes_to_crop
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
let attributes_to_highlight = other
.attributes_to_highlight
.map(|attrs| attrs.split(",").map(String::from).collect::<HashSet<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect::<HashSet<_>>());
let facet_distributions = other
.facet_distributions
.map(|attrs| attrs.split(",").map(String::from).collect::<Vec<_>>());
.map(|attrs| attrs.split(',').map(String::from).collect::<Vec<_>>());
let facet_filters = match other.facet_filters {
Some(ref f) => Some(serde_json::from_str(f)?),
@ -80,10 +80,12 @@ async fn search_with_url_query(
let query: SearchQuery = match params.into_inner().try_into() {
Ok(q) => q,
Err(e) => {
return Ok(HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() })))
return Ok(
HttpResponse::BadRequest().body(serde_json::json!({ "error": e.to_string() }))
)
}
};
let search_result = data.search(&path.index_uid, query);
let search_result = data.search(path.into_inner().index_uid, query).await;
match search_result {
Ok(docs) => {
let docs = serde_json::to_string(&docs).unwrap();
@ -101,7 +103,9 @@ async fn search_with_post(
path: web::Path<IndexParam>,
params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> {
let search_result = data.search(&path.index_uid, params.into_inner());
let search_result = data
.search(path.into_inner().index_uid, params.into_inner())
.await;
match search_result {
Ok(docs) => {
let docs = serde_json::to_string(&docs).unwrap();

View File

@ -1,9 +1,9 @@
use actix_web::{web, HttpResponse, delete, get, post};
use actix_web::{delete, get, post, web, HttpResponse};
use crate::Data;
use crate::error::ResponseError;
use crate::index_controller::Settings;
use crate::helpers::Authentication;
use crate::index::Settings;
use crate::Data;
#[macro_export]
macro_rules! make_setting_route {
@ -14,14 +14,14 @@ macro_rules! make_setting_route {
use crate::data;
use crate::error::ResponseError;
use crate::helpers::Authentication;
use crate::index_controller::Settings;
use crate::index::Settings;
#[actix_web::delete($route, wrap = "Authentication::Private")]
pub async fn delete(
data: web::Data<data::Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
use crate::index_controller::Settings;
use crate::index::Settings;
let settings = Settings {
$attr: Some(None),
..Default::default()
@ -64,7 +64,7 @@ macro_rules! make_setting_route {
data: actix_web::web::Data<data::Data>,
index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> {
match data.settings(index_uid.as_ref()) {
match data.settings(index_uid.into_inner()).await {
Ok(settings) => {
let setting = settings.$attr;
let json = serde_json::to_string(&setting).unwrap();
@ -82,7 +82,7 @@ macro_rules! make_setting_route {
make_setting_route!(
"/indexes/{index_uid}/settings/attributes-for-faceting",
std::collections::HashMap<String, String>,
faceted_attributes
attributes_for_faceting
);
make_setting_route!(
@ -98,16 +98,16 @@ make_setting_route!(
);
//make_setting_route!(
//"/indexes/{index_uid}/settings/distinct-attribute",
//String,
//distinct_attribute
//"/indexes/{index_uid}/settings/distinct-attribute",
//String,
//distinct_attribute
//);
make_setting_route!(
"/indexes/{index_uid}/settings/ranking-rules",
Vec<String>,
ranking_rules
);
//make_setting_route!(
//"/indexes/{index_uid}/settings/ranking-rules",
//Vec<String>,
//ranking_rules
//);
macro_rules! create_services {
($($mod:ident),*) => {
@ -126,10 +126,9 @@ macro_rules! create_services {
}
create_services!(
faceted_attributes,
attributes_for_faceting,
displayed_attributes,
searchable_attributes,
ranking_rules
searchable_attributes
);
#[post("/indexes/{index_uid}/settings", wrap = "Authentication::Private")]
@ -138,7 +137,10 @@ async fn update_all(
index_uid: web::Path<String>,
body: web::Json<Settings>,
) -> Result<HttpResponse, ResponseError> {
match data.update_settings(index_uid.into_inner(), body.into_inner(), true).await {
match data
.update_settings(index_uid.into_inner(), body.into_inner(), true)
.await
{
Ok(update_result) => {
let json = serde_json::to_string(&update_result).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -154,7 +156,7 @@ async fn get_all(
data: web::Data<Data>,
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
match data.settings(index_uid.as_ref()) {
match data.settings(index_uid.into_inner()).await {
Ok(settings) => {
let json = serde_json::to_string(&settings).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -171,7 +173,10 @@ async fn delete_all(
index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> {
let settings = Settings::cleared();
match data.update_settings(index_uid.into_inner(), settings, false).await {
match data
.update_settings(index_uid.into_inner(), settings, false)
.await
{
Ok(update_result) => {
let json = serde_json::to_string(&update_result).unwrap();
Ok(HttpResponse::Ok().body(json))
@ -181,4 +186,3 @@ async fn delete_all(
}
}
}

View File

@ -1,8 +1,8 @@
use std::collections::{HashMap, BTreeMap};
use std::collections::{BTreeMap, HashMap};
use actix_web::get;
use actix_web::web;
use actix_web::HttpResponse;
use actix_web::get;
use chrono::{DateTime, Utc};
use serde::Serialize;

View File

@ -1,5 +1,5 @@
use actix_web::{web, HttpResponse};
use actix_web::{delete, get, post};
use actix_web::{web, HttpResponse};
use std::collections::BTreeSet;
use crate::error::ResponseError;

View File

@ -1,7 +1,7 @@
use std::collections::BTreeMap;
use actix_web::{web, HttpResponse};
use actix_web::{delete, get, post};
use actix_web::{web, HttpResponse};
use crate::error::ResponseError;
use crate::helpers::Authentication;

View File

@ -2,7 +2,7 @@ use std::time::Duration;
use actix_web::http::StatusCode;
use serde_json::{json, Value};
use tokio::time::delay_for;
use tokio::time::sleep;
use super::service::Service;
@ -19,7 +19,10 @@ impl Index<'_> {
pub async fn load_test_set(&self) -> u64 {
let url = format!("/indexes/{}/documents", self.uid);
let (response, code) = self.service.post_str(url, include_str!("../assets/test_set.json")).await;
let (response, code) = self
.service
.post_str(url, include_str!("../assets/test_set.json"))
.await;
assert_eq!(code, 200);
let update_id = response["updateId"].as_i64().unwrap();
self.wait_update_id(update_id as u64).await;
@ -60,7 +63,11 @@ impl Index<'_> {
self.service.post(url, documents).await
}
pub async fn update_documents(&self, documents: Value, primary_key: Option<&str>) -> (Value, StatusCode) {
pub async fn update_documents(
&self,
documents: Value,
primary_key: Option<&str>,
) -> (Value, StatusCode) {
let url = match primary_key {
Some(key) => format!("/indexes/{}/documents?primaryKey={}", self.uid, key),
None => format!("/indexes/{}/documents", self.uid),
@ -73,19 +80,19 @@ impl Index<'_> {
let url = format!("/indexes/{}/updates/{}", self.uid, update_id);
for _ in 0..10 {
let (response, status_code) = self.service.get(&url).await;
assert_eq!(status_code, 200);
assert_eq!(status_code, 200, "response: {}", response);
if response["status"] == "processed" || response["status"] == "failed" {
return response;
}
delay_for(Duration::from_secs(1)).await;
sleep(Duration::from_secs(1)).await;
}
panic!("Timeout waiting for update id");
}
pub async fn get_update(&self, udpate_id: u64) -> (Value, StatusCode) {
let url = format!("/indexes/{}/updates/{}", self.uid, udpate_id);
pub async fn get_update(&self, update_id: u64) -> (Value, StatusCode) {
let url = format!("/indexes/{}/updates/{}", self.uid, update_id);
self.service.get(url).await
}
@ -95,7 +102,11 @@ impl Index<'_> {
self.service.get(url).await
}
pub async fn get_document(&self, id: u64, _options: Option<GetDocumentOptions>) -> (Value, StatusCode) {
pub async fn get_document(
&self,
id: u64,
_options: Option<GetDocumentOptions>,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/{}", self.uid, id);
self.service.get(url).await
}
@ -111,7 +122,10 @@ impl Index<'_> {
}
if let Some(attributes_to_retrieve) = options.attributes_to_retrieve {
url.push_str(&format!("attributesToRetrieve={}&", attributes_to_retrieve.join(",")));
url.push_str(&format!(
"attributesToRetrieve={}&",
attributes_to_retrieve.join(",")
));
}
self.service.get(url).await
@ -129,7 +143,9 @@ impl Index<'_> {
pub async fn delete_batch(&self, ids: Vec<u64>) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/delete-batch", self.uid);
self.service.post(url, serde_json::to_value(&ids).unwrap()).await
self.service
.post(url, serde_json::to_value(&ids).unwrap())
.await
}
pub async fn settings(&self) -> (Value, StatusCode) {

View File

@ -2,8 +2,8 @@ mod index;
mod server;
mod service;
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
pub use server::Server;
pub use index::{GetDocumentOptions, GetAllDocumentsOptions};
/// Performs a search test on both post and get routes
#[macro_export]

View File

@ -5,22 +5,24 @@ use tempdir::TempDir;
use urlencoding::encode;
use meilisearch_http::data::Data;
use meilisearch_http::option::{Opt, IndexerOpts};
use meilisearch_http::option::{IndexerOpts, Opt};
use super::index::Index;
use super::service::Service;
pub struct Server {
pub service: Service,
// hod ownership to the tempdir while we use the server instance.
_dir: tempdir::TempDir,
}
impl Server {
pub async fn new() -> Self {
let tmp_dir = TempDir::new("meilisearch").unwrap();
let dir = TempDir::new("meilisearch").unwrap();
let opt = Opt {
db_path: tmp_dir.path().join("db"),
dumps_dir: tmp_dir.path().join("dump"),
db_path: dir.path().join("db"),
dumps_dir: dir.path().join("dump"),
dump_batch_size: 16,
http_addr: "127.0.0.1:7700".to_owned(),
master_key: None,
@ -53,9 +55,7 @@ impl Server {
let data = Data::new(opt).unwrap();
let service = Service(data);
Server {
service,
}
Server { service, _dir: dir }
}
/// Returns a view to an index. There is no guarantee that the index exists.

View File

@ -1,15 +1,14 @@
use actix_web::{http::StatusCode, test};
use serde_json::Value;
use meilisearch_http::create_app;
use meilisearch_http::data::Data;
use meilisearch_http::helpers::NormalizePath;
pub struct Service(pub Data);
impl Service {
pub async fn post(&self, url: impl AsRef<str>, body: Value) -> (Value, StatusCode) {
let mut app =
test::init_service(meilisearch_http::create_app(&self.0, true).wrap(NormalizePath)).await;
let mut app = test::init_service(create_app!(&self.0, true)).await;
let req = test::TestRequest::post()
.uri(url.as_ref())
@ -24,14 +23,17 @@ impl Service {
}
/// Send a test post request from a text body, with a `content-type:application/json` header.
pub async fn post_str(&self, url: impl AsRef<str>, body: impl AsRef<str>) -> (Value, StatusCode) {
let mut app =
test::init_service(meilisearch_http::create_app(&self.0, true).wrap(NormalizePath)).await;
pub async fn post_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
) -> (Value, StatusCode) {
let mut app = test::init_service(create_app!(&self.0, true)).await;
let req = test::TestRequest::post()
.uri(url.as_ref())
.set_payload(body.as_ref().to_string())
.header("content-type", "application/json")
.insert_header(("content-type", "application/json"))
.to_request();
let res = test::call_service(&mut app, req).await;
let status_code = res.status();
@ -42,8 +44,7 @@ impl Service {
}
pub async fn get(&self, url: impl AsRef<str>) -> (Value, StatusCode) {
let mut app =
test::init_service(meilisearch_http::create_app(&self.0, true).wrap(NormalizePath)).await;
let mut app = test::init_service(create_app!(&self.0, true)).await;
let req = test::TestRequest::get().uri(url.as_ref()).to_request();
let res = test::call_service(&mut app, req).await;
@ -55,8 +56,7 @@ impl Service {
}
pub async fn put(&self, url: impl AsRef<str>, body: Value) -> (Value, StatusCode) {
let mut app =
test::init_service(meilisearch_http::create_app(&self.0, true).wrap(NormalizePath)).await;
let mut app = test::init_service(create_app!(&self.0, true)).await;
let req = test::TestRequest::put()
.uri(url.as_ref())
@ -71,8 +71,7 @@ impl Service {
}
pub async fn delete(&self, url: impl AsRef<str>) -> (Value, StatusCode) {
let mut app =
test::init_service(meilisearch_http::create_app(&self.0, true).wrap(NormalizePath)).await;
let mut app = test::init_service(create_app!(&self.0, true)).await;
let req = test::TestRequest::delete().uri(url.as_ref()).to_request();
let res = test::call_service(&mut app, req).await;

View File

@ -1,7 +1,7 @@
use serde_json::{json, Value};
use chrono::DateTime;
use serde_json::{json, Value};
use crate::common::{Server, GetAllDocumentsOptions};
use crate::common::{GetAllDocumentsOptions, Server};
#[actix_rt::test]
async fn add_documents_no_index_creation() {
@ -28,14 +28,16 @@ async fn add_documents_no_index_creation() {
let (response, code) = index.get_update(0).await;
assert_eq!(code, 200);
println!("response: {}", response);
assert_eq!(response["status"], "processed");
assert_eq!(response["updateId"], 0);
assert_eq!(response["success"]["DocumentsAddition"]["nb_documents"], 1);
let processed_at = DateTime::parse_from_rfc3339(response["processedAt"].as_str().unwrap()).unwrap();
let enqueued_at = DateTime::parse_from_rfc3339(response["enqueuedAt"].as_str().unwrap()).unwrap();
let started_processing_at = DateTime::parse_from_rfc3339(response["startedProcessingAt"].as_str().unwrap()).unwrap();
let processed_at =
DateTime::parse_from_rfc3339(response["processedAt"].as_str().unwrap()).unwrap();
let enqueued_at =
DateTime::parse_from_rfc3339(response["enqueuedAt"].as_str().unwrap()).unwrap();
let started_processing_at =
DateTime::parse_from_rfc3339(response["startedProcessingAt"].as_str().unwrap()).unwrap();
assert!(processed_at > started_processing_at);
assert!(started_processing_at > enqueued_at);
@ -72,7 +74,8 @@ async fn document_addition_with_primary_key() {
"content": "foo",
}
]);
let (_response, code) = index.add_documents(documents, Some("primary")).await; assert_eq!(code, 200);
let (_response, code) = index.add_documents(documents, Some("primary")).await;
assert_eq!(code, 200);
index.wait_update_id(0).await;
@ -98,7 +101,8 @@ async fn document_update_with_primary_key() {
"content": "foo",
}
]);
let (_response, code) = index.update_documents(documents, Some("primary")).await; assert_eq!(code, 200);
let (_response, code) = index.update_documents(documents, Some("primary")).await;
assert_eq!(code, 200);
index.wait_update_id(0).await;
@ -159,7 +163,7 @@ async fn update_documents_with_primary_key_and_primary_key_already_exists() {
assert_eq!(code, 200);
index.wait_update_id(0).await;
let (response, code) = index.get_update(0).await;
let (response, code) = index.get_update(0).await;
assert_eq!(code, 200);
assert_eq!(response["status"], "processed");
assert_eq!(response["updateId"], 0);
@ -264,7 +268,10 @@ async fn update_document() {
let (response, code) = index.get_document(1, None).await;
assert_eq!(code, 200);
assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##);
assert_eq!(
response.to_string(),
r##"{"doc_id":1,"content":"foo","other":"bar"}"##
);
}
#[actix_rt::test]
@ -276,7 +283,12 @@ async fn add_larger_dataset() {
assert_eq!(code, 200);
assert_eq!(response["status"], "processed");
assert_eq!(response["success"]["DocumentsAddition"]["nb_documents"], 77);
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { limit: Some(1000), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: Some(1000),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 77);
}
@ -292,7 +304,12 @@ async fn update_larger_dataset() {
assert_eq!(code, 200);
assert_eq!(response["status"], "processed");
assert_eq!(response["success"]["DocumentsAddition"]["nb_documents"], 77);
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { limit: Some(1000), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: Some(1000),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 77);
}

View File

@ -1,6 +1,6 @@
use serde_json::json;
use crate::common::{Server, GetAllDocumentsOptions};
use crate::common::{GetAllDocumentsOptions, Server};
#[actix_rt::test]
async fn delete_one_document_unexisting_index() {
@ -24,7 +24,9 @@ async fn delete_one_unexisting_document() {
async fn delete_one_document() {
let server = Server::new().await;
let index = server.index("test");
index.add_documents(json!([{ "id": 0, "content": "foobar" }]), None).await;
index
.add_documents(json!([{ "id": 0, "content": "foobar" }]), None)
.await;
index.wait_update_id(0).await;
let (_response, code) = server.index("test").delete_document(0).await;
assert_eq!(code, 200);
@ -39,20 +41,26 @@ async fn clear_all_documents_unexisting_index() {
let server = Server::new().await;
let (_response, code) = server.index("test").clear_all_documents().await;
assert_eq!(code, 400);
}
#[actix_rt::test]
async fn clear_all_documents() {
let server = Server::new().await;
let index = server.index("test");
index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }]), None).await;
index
.add_documents(
json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }]),
None,
)
.await;
index.wait_update_id(0).await;
let (_response, code) = index.clear_all_documents().await;
assert_eq!(code, 200);
let _update = index.wait_update_id(1).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
assert!(response.as_array().unwrap().is_empty());
}
@ -67,7 +75,9 @@ async fn clear_all_documents_empty_index() {
assert_eq!(code, 200);
let _update = index.wait_update_id(0).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
assert!(response.as_array().unwrap().is_empty());
}
@ -89,13 +99,14 @@ async fn delete_batch() {
assert_eq!(code, 200);
let _update = index.wait_update_id(1).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 1);
assert_eq!(response.as_array().unwrap()[0]["id"], 3);
}
#[actix_rt::test]
async fn delete_no_document_batch() {
let server = Server::new().await;
@ -106,7 +117,9 @@ async fn delete_no_document_batch() {
assert_eq!(code, 200);
let _update = index.wait_update_id(1).await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 3);
}

View File

@ -1,15 +1,14 @@
use crate::common::Server;
use crate::common::GetAllDocumentsOptions;
use crate::common::Server;
use serde_json::json;
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
// transplant
#[actix_rt::test]
async fn get_unexisting_index_single_document() {
let server = Server::new().await;
let (_response, code) = server
.index("test")
.get_document(1, None)
.await;
let (_response, code) = server.index("test").get_document(1, None).await;
assert_eq!(code, 400);
}
@ -18,9 +17,7 @@ async fn get_unexisting_document() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
let (_response, code) = index
.get_document(1, None)
.await;
let (_response, code) = index.get_document(1, None).await;
assert_eq!(code, 400);
}
@ -38,14 +35,15 @@ async fn get_document() {
let (_, code) = index.add_documents(documents, None).await;
assert_eq!(code, 200);
index.wait_update_id(0).await;
let (response, code) = index
.get_document(0, None)
.await;
let (response, code) = index.get_document(0, None).await;
assert_eq!(code, 200);
assert_eq!(response, serde_json::json!( {
"id": 0,
"content": "foobar",
}));
assert_eq!(
response,
serde_json::json!( {
"id": 0,
"content": "foobar",
})
);
}
#[actix_rt::test]
@ -65,7 +63,9 @@ async fn get_no_documents() {
let (_, code) = index.create(None).await;
assert_eq!(code, 200);
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
assert!(response.as_array().unwrap().is_empty());
}
@ -76,7 +76,9 @@ async fn get_all_documents_no_options() {
let index = server.index("test");
index.load_test_set().await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
let arr = response.as_array().unwrap();
assert_eq!(arr.len(), 20);
@ -107,7 +109,12 @@ async fn test_get_all_documents_limit() {
let index = server.index("test");
index.load_test_set().await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
limit: Some(5),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 5);
assert_eq!(response.as_array().unwrap()[0]["id"], 0);
@ -119,7 +126,12 @@ async fn test_get_all_documents_offset() {
let index = server.index("test");
index.load_test_set().await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
offset: Some(5),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(response.as_array().unwrap()[0]["id"], 13);
@ -131,19 +143,93 @@ async fn test_get_all_documents_attributes_to_retrieve() {
let index = server.index("test");
index.load_test_set().await;
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { attributes_to_retrieve: Some(vec!["name"]), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec!["name"]),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(response.as_array().unwrap()[0].as_object().unwrap().keys().count(), 1);
assert!(response.as_array().unwrap()[0].as_object().unwrap().get("name").is_some());
assert_eq!(
response.as_array().unwrap()[0]
.as_object()
.unwrap()
.keys()
.count(),
1
);
assert!(response.as_array().unwrap()[0]
.as_object()
.unwrap()
.get("name")
.is_some());
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { attributes_to_retrieve: Some(vec![]), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec![]),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(response.as_array().unwrap()[0].as_object().unwrap().keys().count(), 0);
assert_eq!(
response.as_array().unwrap()[0]
.as_object()
.unwrap()
.keys()
.count(),
0
);
let (response, code) = index.get_all_documents(GetAllDocumentsOptions { attributes_to_retrieve: Some(vec!["name", "tags"]), ..Default::default() }).await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions {
attributes_to_retrieve: Some(vec!["name", "tags"]),
..Default::default()
})
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(response.as_array().unwrap()[0].as_object().unwrap().keys().count(), 2);
assert_eq!(
response.as_array().unwrap()[0]
.as_object()
.unwrap()
.keys()
.count(),
2
);
}
#[actix_rt::test]
async fn get_documents_displayed_attributes() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({"displayedAttributes": ["gender"]}))
.await;
index.load_test_set().await;
let (response, code) = index
.get_all_documents(GetAllDocumentsOptions::default())
.await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 20);
assert_eq!(
response.as_array().unwrap()[0]
.as_object()
.unwrap()
.keys()
.count(),
1
);
assert!(response.as_array().unwrap()[0]
.as_object()
.unwrap()
.get("gender")
.is_some());
let (response, code) = index.get_document(0, None).await;
assert_eq!(code, 200);
assert_eq!(response.as_object().unwrap().keys().count(), 1);
assert!(response.as_object().unwrap().get("gender").is_some());
}

View File

@ -1,3 +1,3 @@
mod add_documents;
mod get_documents;
mod delete_documents;
mod get_documents;

View File

@ -9,12 +9,11 @@ async fn create_index_no_primary_key() {
assert_eq!(code, 200);
assert_eq!(response["uid"], "test");
assert!(response.get("uuid").is_some());
assert!(response.get("createdAt").is_some());
assert!(response.get("updatedAt").is_some());
assert_eq!(response["createdAt"], response["updatedAt"]);
assert_eq!(response["primaryKey"], Value::Null);
assert_eq!(response.as_object().unwrap().len(), 5);
assert_eq!(response.as_object().unwrap().len(), 4);
}
#[actix_rt::test]
@ -25,12 +24,11 @@ async fn create_index_with_primary_key() {
assert_eq!(code, 200);
assert_eq!(response["uid"], "test");
assert!(response.get("uuid").is_some());
assert!(response.get("createdAt").is_some());
assert!(response.get("updatedAt").is_some());
assert_eq!(response["createdAt"], response["updatedAt"]);
//assert_eq!(response["createdAt"], response["updatedAt"]);
assert_eq!(response["primaryKey"], "primary");
assert_eq!(response.as_object().unwrap().len(), 5);
assert_eq!(response.as_object().unwrap().len(), 4);
}
// TODO: partial test since we are testing error, amd error is not yet fully implemented in

View File

@ -13,12 +13,11 @@ async fn create_and_get_index() {
assert_eq!(code, 200);
assert_eq!(response["uid"], "test");
assert!(response.get("uuid").is_some());
assert!(response.get("createdAt").is_some());
assert!(response.get("updatedAt").is_some());
assert_eq!(response["createdAt"], response["updatedAt"]);
assert_eq!(response["primaryKey"], Value::Null);
assert_eq!(response.as_object().unwrap().len(), 5);
assert_eq!(response.as_object().unwrap().len(), 4);
}
// TODO: partial test since we are testing error, amd error is not yet fully implemented in
@ -53,7 +52,12 @@ async fn list_multiple_indexes() {
assert!(response.is_array());
let arr = response.as_array().unwrap();
assert_eq!(arr.len(), 2);
assert!(arr.iter().find(|entry| entry["uid"] == "test" && entry["primaryKey"] == Value::Null).is_some());
assert!(arr.iter().find(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key").is_some());
assert!(arr
.iter()
.find(|entry| entry["uid"] == "test" && entry["primaryKey"] == Value::Null)
.is_some());
assert!(arr
.iter()
.find(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key")
.is_some());
}

View File

@ -1,4 +1,4 @@
mod create_index;
mod delete_index;
mod get_index;
mod update_index;
mod delete_index;

View File

@ -13,7 +13,6 @@ async fn update_primary_key() {
assert_eq!(code, 200);
assert_eq!(response["uid"], "test");
assert!(response.get("uuid").is_some());
assert!(response.get("createdAt").is_some());
assert!(response.get("updatedAt").is_some());
@ -22,7 +21,7 @@ async fn update_primary_key() {
assert!(created_at < updated_at);
assert_eq!(response["primaryKey"], "primary");
assert_eq!(response.as_object().unwrap().len(), 5);
assert_eq!(response.as_object().unwrap().len(), 4);
}
#[actix_rt::test]

View File

@ -1,8 +1,8 @@
mod common;
mod documents;
mod index;
mod search;
mod settings;
mod documents;
mod updates;
// Tests are isolated by features in different modules to allow better readability, test

View File

@ -1,3 +1,2 @@
// This modules contains all the test concerning search. Each particular feture of the search
// should be tested in its own module to isolate tests and keep the tests readable.

View File

@ -19,8 +19,19 @@ async fn get_settings() {
assert_eq!(settings.keys().len(), 4);
assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["facetedAttributes"], json!({}));
assert_eq!(settings["rankingRules"], json!(["typo", "words", "proximity", "attribute", "wordsPosition", "exactness"]));
println!("{:?}", settings);
assert_eq!(settings["attributesForFaceting"], json!({}));
assert_eq!(
settings["rankingRules"],
json!([
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
])
);
}
#[actix_rt::test]
@ -35,20 +46,24 @@ async fn update_settings_unknown_field() {
async fn test_partial_update() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"displayedAttributes": ["foo"]})).await;
let (_response, _code) = index
.update_settings(json!({"displayedAttributes": ["foo"]}))
.await;
index.wait_update_id(0).await;
let (response, code) = index.settings().await;
assert_eq!(code, 200);
assert_eq!(response["displayedAttributes"],json!(["foo"]));
assert_eq!(response["searchableAttributes"],json!(["*"]));
assert_eq!(response["displayedAttributes"], json!(["foo"]));
assert_eq!(response["searchableAttributes"], json!(["*"]));
index.update_settings(json!({"searchableAttributes": ["bar"]})).await;
let (_response, _) = index
.update_settings(json!({"searchableAttributes": ["bar"]}))
.await;
index.wait_update_id(1).await;
let (response, code) = index.settings().await;
assert_eq!(code, 200);
assert_eq!(response["displayedAttributes"],json!(["foo"]));
assert_eq!(response["searchableAttributes"],json!(["bar"]));
assert_eq!(response["displayedAttributes"], json!(["foo"]));
assert_eq!(response["searchableAttributes"], json!(["bar"]));
}
#[actix_rt::test]
@ -63,20 +78,22 @@ async fn delete_settings_unexisting_index() {
async fn reset_all_settings() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"]})).await;
index
.update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"]}))
.await;
index.wait_update_id(0).await;
let (response, code) = index.settings().await;
assert_eq!(code, 200);
assert_eq!(response["displayedAttributes"],json!(["foo"]));
assert_eq!(response["searchableAttributes"],json!(["bar"]));
assert_eq!(response["displayedAttributes"], json!(["foo"]));
assert_eq!(response["searchableAttributes"], json!(["bar"]));
index.delete_settings().await;
index.wait_update_id(1).await;
let (response, code) = index.settings().await;
assert_eq!(code, 200);
assert_eq!(response["displayedAttributes"],json!(["*"]));
assert_eq!(response["searchableAttributes"],json!(["*"]));
assert_eq!(response["displayedAttributes"], json!(["*"]));
assert_eq!(response["searchableAttributes"], json!(["*"]));
}
#[actix_rt::test]
@ -94,7 +111,6 @@ async fn update_setting_unexisting_index_invalid_uid() {
let server = Server::new().await;
let index = server.index("test##! ");
let (_response, code) = index.update_settings(json!({})).await;
println!("response: {}", _response);
assert_eq!(code, 400);
}
@ -124,10 +140,10 @@ macro_rules! test_setting_routes {
.chars()
.map(|c| if c == '_' { '-' } else { c })
.collect::<String>());
let (_response, code) = server.service.post(url, serde_json::Value::Null).await;
assert_eq!(code, 200);
let (_response, code) = server.index("test").get().await;
assert_eq!(code, 200);
let (response, code) = server.service.post(url, serde_json::Value::Null).await;
assert_eq!(code, 200, "{}", response);
let (response, code) = server.index("test").get().await;
assert_eq!(code, 200, "{}", response);
}
#[actix_rt::test]
@ -149,4 +165,5 @@ macro_rules! test_setting_routes {
test_setting_routes!(
attributes_for_faceting,
displayed_attributes,
searchable_attributes);
searchable_attributes
);

View File

@ -8,7 +8,7 @@ async fn get_update_unexisting_index() {
}
#[actix_rt::test]
async fn get_unexisting_udpate_status() {
async fn get_unexisting_update_status() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
@ -21,13 +21,15 @@ async fn get_update_status() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
index.add_documents(
serde_json::json!([{
"id": 1,
"content": "foobar",
}]),
None
).await;
index
.add_documents(
serde_json::json!([{
"id": 1,
"content": "foobar",
}]),
None,
)
.await;
let (_response, code) = index.get_update(0).await;
assert_eq!(code, 200);
// TODO check resonse format, as per #48
@ -55,10 +57,12 @@ async fn list_updates() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
index.add_documents(
serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(),
None
).await;
index
.add_documents(
serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(),
None,
)
.await;
let (response, code) = index.list_updates().await;
assert_eq!(code, 200);
assert_eq!(response.as_array().unwrap().len(), 1);