diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 4997861fb..0a5835b86 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,7 +1,7 @@ contact_links: - - name: Feature request + - name: Feature request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal - about: The feature requests are not managed in this repository, please open a discussion in our dedicated product repository + about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository - name: Documentation issue url: https://github.com/meilisearch/documentation/issues/new about: For documentation issues, open an issue or a PR in the documentation repository diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 64af0e2c0..304798d75 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -6,7 +6,7 @@ name: Publish binaries to release jobs: publish: - name: Publish for ${{ matrix.os }} + name: Publish binary for ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -39,7 +39,7 @@ jobs: tag: ${{ github.ref }} publish-aarch64: - name: Publish to GitHub + name: Publish binary for aarch64 runs-on: ${{ matrix.os }} continue-on-error: false strategy: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a98e29f44..ff28f82ca 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -36,6 +36,25 @@ jobs: command: test args: --locked --release + # We run tests in debug also, to make sure that the debug_assertions are hit + test-debug: + name: Run tests in debug + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Cache dependencies + uses: Swatinem/rust-cache@v1.3.0 + - name: Run tests in debug + uses: actions-rs/cargo@v1 + with: + command: test + args: --locked + clippy: name: Run Clippy runs-on: ubuntu-18.04 diff --git a/Cargo.lock b/Cargo.lock index 3022dc517..173162bc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,16 +16,15 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", - "tokio-util 0.7.0", + "tokio-util", ] [[package]] name = "actix-cors" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30dbd116ef7532f56e2f6d7c511736ea0b124d914ee8820a5271247bf89f06aa" +checksum = "414360eed71ba2d5435b185ba43ecbe281dfab5df3898286d6b7be8074372c92" dependencies = [ - "actix-service", "actix-utils", "actix-web", "derive_more", @@ -37,9 +36,9 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.0.0" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3fdd63b9cfeaf92eeeece719dabbddddb420a57d3fd171ce1490ecfb7086b1" +checksum = "a5885cb81a0d4d0d322864bea1bb6c2a8144626b4fdc625d4c51eba197e7797a" dependencies = [ "actix-codec", "actix-rt", @@ -47,7 +46,7 @@ dependencies = [ "actix-tls", "actix-utils", "ahash 0.7.6", - "base64 0.13.0", + "base64", "bitflags", "brotli", "bytes", @@ -68,9 +67,8 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rand", - "sha-1 0.10.0", + "sha-1", "smallvec", - "zstd", ] [[package]] @@ -79,8 +77,8 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6" dependencies = [ - "quote 1.0.15", - "syn 1.0.86", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -99,9 +97,9 @@ dependencies = [ [[package]] name = "actix-rt" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf3f2183be1241ed4dd22611850b85d38de0b08a09f1f7bcccbd0809084b359" +checksum = "7ea16c295198e958ef31930a6ef37d0fb64e9ca3b6116e6b93a8bdae96ee1000" dependencies = [ "actix-macros", "futures-core", @@ -110,20 +108,20 @@ dependencies = [ [[package]] name = "actix-server" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9e7472ac180abb0a8e592b653744345983a7a14f44691c8394a799d0df4dbbf" +checksum = "0da34f8e659ea1b077bb4637948b815cd3768ad5a188fdcd74ff4d84240cd824" dependencies = [ "actix-rt", "actix-service", "actix-utils", "futures-core", "futures-util", - "log", "mio", "num_cpus", "socket2", "tokio", + "tracing", ] [[package]] @@ -151,7 +149,7 @@ dependencies = [ "log", "pin-project-lite", "tokio-rustls", - "tokio-util 0.7.0", + "tokio-util", "webpki-roots", ] @@ -202,7 +200,7 @@ dependencies = [ "serde_urlencoded", "smallvec", "socket2", - "time 0.3.7", + "time 0.3.9", "url", ] @@ -213,9 +211,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7525bedf54704abb1d469e88d7e7e9226df73778798a69cea5022d53b2ae91bc" dependencies = [ "actix-router", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -287,37 +285,13 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.55" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "159bb86af3a200e19a068f4224eae4c8bb2d0fa054c7e5d1cacd5cef95e684cd" +checksum = "4361135be9122e0870de935d7c439aef945b9f9ddd4199a553b5270b49c82a27" dependencies = [ "backtrace", ] -[[package]] -name = "arc-swap" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f" - -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - -[[package]] -name = "as-slice" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45403b49e3954a4b8428a0ac21a4b7afadccf92bfd96273f1a58cd4812496ae0" -dependencies = [ - "generic-array 0.12.4", - "generic-array 0.13.3", - "generic-array 0.14.5", - "stable_deref_trait", -] - [[package]] name = "assert-json-diff" version = "2.0.1" @@ -330,9 +304,9 @@ dependencies = [ [[package]] name = "async-stream" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "171374e7e3b2504e0e5236e3b59260560f9fe94bfe9ac39ba5e4e929c5590625" +checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" dependencies = [ "async-stream-impl", "futures-core", @@ -340,24 +314,33 @@ dependencies = [ [[package]] name = "async-stream-impl" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "648ed8c8d2ce5409ccd57453d9d1b214b342a0d69376a6feda1fd6cae3299308" +checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] name = "async-trait" -version = "0.1.52" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061a7acccaa286c011ddc30970520b98fa40e00c9d644633fb26b5fc63a265e3" +checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", +] + +[[package]] +name = "atomic-polyfill" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d862f14e042f75b95236d4ef1bb3d5c170964082d1e1e9c3ce689a2cbee217c" +dependencies = [ + "critical-section", ] [[package]] @@ -399,10 +382,19 @@ dependencies = [ ] [[package]] -name = "base64" -version = "0.12.3" +name = "bare-metal" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" +checksum = "5deb64efa5bd81e31fcd1938615a6d98c82eafcbcd787162b6f63b91d6bac5b3" +dependencies = [ + "rustc_version 0.2.3", +] + +[[package]] +name = "bare-metal" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603" [[package]] name = "base64" @@ -410,6 +402,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "big_s" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8" + [[package]] name = "bimap" version = "0.6.2" @@ -443,35 +441,38 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit_field" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4" + +[[package]] +name = "bitfield" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46afbd2983a5d5a7bd740ccb198caf5b82f45c40c09c0eed36052d91cb92e719" + [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "block-buffer" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" -dependencies = [ - "generic-array 0.14.5", -] - [[package]] name = "block-buffer" version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" dependencies = [ - "generic-array 0.14.5", + "generic-array", ] [[package]] name = "brotli" -version = "3.3.3" +version = "3.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f838e47a451d5a8fa552371f80024dd6ace9b7acdf25c4c3d0f9bc6816fb1c39" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -500,6 +501,12 @@ dependencies = [ "serde", ] +[[package]] +name = "build_const" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" + [[package]] name = "bumpalo" version = "3.9.1" @@ -508,9 +515,9 @@ checksum = "a4a45a46ab1f2412e53d3a0ade76ffad2025804294569aae387231a0cd6e0899" [[package]] name = "byte-unit" -version = "4.0.13" +version = "4.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "956ffc5b0ec7d7a6949e3f21fd63ba5af4cffdc2ba1e0b7bf62b481458c4ae7f" +checksum = "95ebf10dda65f19ff0f42ea15572a359ed60d7fc74fdc984d90310937be0014b" dependencies = [ "serde", "utf8-width", @@ -524,22 +531,22 @@ checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" [[package]] name = "bytemuck" -version = "1.7.3" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439989e6b8c38d1b6570a384ef1e49c8848128f5a97f3914baef02920842712f" +checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54" +checksum = "562e382481975bc61d11275ac5e62a19abd00b0547d99516a415336f183dcd0e" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -586,9 +593,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.9.2" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3596addfb02dcdc06f5252ddda9f3785f9230f5827fb4284645240fa05ad92" +checksum = "5809dd3e6444651fd1cdd3dbec71eca438c439a0fcc8081674a14da0afe50185" dependencies = [ "serde", "serde_derive", @@ -645,23 +652,16 @@ dependencies = [ ] [[package]] -name = "chrono" -version = "0.4.19" +name = "chunked_transfer" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "time 0.1.44", - "winapi", -] +checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" [[package]] name = "clap" -version = "3.1.2" +version = "3.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5177fac1ab67102d8989464efd043c6ff44191b1557ec1ddd489b4f7e1447e77" +checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" dependencies = [ "atty", "bitflags", @@ -676,15 +676,15 @@ dependencies = [ [[package]] name = "clap_derive" -version = "3.1.2" +version = "3.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d42c94ce7c2252681b5fed4d3627cc807b13dfc033246bd05d5b252399000e" +checksum = "a3aab4734e083b809aaf5794e14e756d1c798d2c69c7f7de7a09a2f5214993c1" dependencies = [ "heck", "proc-macro-error", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -693,9 +693,9 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -711,7 +711,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94d4706de1b0fa5b132270cddffa8585166037822e260a944fe161acd137ca05" dependencies = [ "percent-encoding", - "time 0.3.7", + "time 0.3.9", "version_check", ] @@ -721,6 +721,18 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "cortex-m" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ff967e867ca14eba0c34ac25cd71ea98c678e741e3915d923999bb2fe7c826" +dependencies = [ + "bare-metal 0.2.5", + "bitfield", + "embedded-hal", + "volatile-register", +] + [[package]] name = "cow-utils" version = "0.1.2" @@ -729,13 +741,22 @@ checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173" [[package]] name = "cpufeatures" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +dependencies = [ + "build_const", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -746,13 +767,25 @@ dependencies = [ ] [[package]] -name = "crossbeam-channel" -version = "0.5.2" +name = "critical-section" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa" +checksum = "bc1e89b93912c97878305b70ef6b011bfc74622e7b79a9d4a0676c7663496bcd" +dependencies = [ + "bare-metal 1.0.0", + "cfg-if 1.0.0", + "cortex-m", + "riscv", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.7", + "crossbeam-utils 0.8.8", ] [[package]] @@ -763,17 +796,18 @@ checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", - "crossbeam-utils 0.8.7", + "crossbeam-utils 0.8.8", ] [[package]] name = "crossbeam-epoch" -version = "0.9.7" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9" +checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" dependencies = [ + "autocfg", "cfg-if 1.0.0", - "crossbeam-utils 0.8.7", + "crossbeam-utils 0.8.8", "lazy_static", "memoffset", "scopeguard", @@ -800,9 +834,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" +checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" dependencies = [ "cfg-if 1.0.0", "lazy_static", @@ -814,7 +848,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" dependencies = [ - "generic-array 0.14.5", + "generic-array", "typenum", ] @@ -846,9 +880,9 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -858,10 +892,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case", - "proc-macro2 1.0.36", - "quote 1.0.15", - "rustc_version", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "rustc_version 0.4.0", + "syn 1.0.91", ] [[package]] @@ -871,19 +905,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2c9736e15e7df1638a7f6eee92a6511615c738246a052af5ba86f039b65aede" [[package]] -name = "difference" -version = "2.0.0" +name = "difflib" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" - -[[package]] -name = "digest" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" -dependencies = [ - "generic-array 0.14.5", -] +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "digest" @@ -891,7 +916,7 @@ version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" dependencies = [ - "block-buffer 0.10.2", + "block-buffer", "crypto-common", ] @@ -918,9 +943,9 @@ dependencies = [ [[package]] name = "downcast" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb454f0228b18c7f4c3b0ebbee346ed9c52e7443b0999cd543ff3571205701d" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" [[package]] name = "either" @@ -929,10 +954,84 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] -name = "encoding_rs" -version = "0.8.30" +name = "embedded-hal" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dc8abb250ffdda33912550faa54c88ec8b998dec0b2c55ab224921ce11df" +checksum = "35949884794ad573cf46071e41c9b60efb0cb311e3ca01f7af807af1debc66ff" +dependencies = [ + "nb 0.1.3", + "void", +] + +[[package]] +name = "encoding" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" +dependencies = [ + "encoding-index-japanese", + "encoding-index-korean", + "encoding-index-simpchinese", + "encoding-index-singlebyte", + "encoding-index-tradchinese", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding_index_tests" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" + +[[package]] +name = "encoding_rs" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" dependencies = [ "cfg-if 1.0.0", ] @@ -952,9 +1051,9 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -993,10 +1092,10 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.1.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.22.2#f2984f66e64838d51f5cce412693fa411ee3f2d4" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ - "nom 7.1.0", + "nom", "nom_locate", ] @@ -1018,11 +1117,19 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flatten-serde-json" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" +dependencies = [ + "serde_json", +] + [[package]] name = "float-cmp" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" dependencies = [ "num-traits", ] @@ -1045,9 +1152,9 @@ dependencies = [ [[package]] name = "fragile" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da1b8f89c5b5a5b7e59405cfcf0bb9588e5ed19f0b57a4cd542bbba3f164a6d" +checksum = "e9d758e60b45e8d749c89c1b389ad8aee550f86aa12e2b9298b546dda7a82ab1" [[package]] name = "fs_extra" @@ -1115,9 +1222,9 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -1159,24 +1266,6 @@ dependencies = [ "byteorder", ] -[[package]] -name = "generic-array" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" -dependencies = [ - "typenum", -] - -[[package]] -name = "generic-array" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f797e67af32588215eaaab8327027ee8e71b9dd0b2b26996aedf20c030fce309" -dependencies = [ - "typenum", -] - [[package]] name = "generic-array" version = "0.14.5" @@ -1195,13 +1284,13 @@ checksum = "9e006f616a407d396ace1d2ebb3f43ed73189db8b098079bd129928d7645dd1e" [[package]] name = "getrandom" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" dependencies = [ "cfg-if 1.0.0", "libc", - "wasi", + "wasi 0.10.2+wasi-snapshot-preview1", ] [[package]] @@ -1211,9 +1300,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" dependencies = [ "proc-macro-error", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -1224,9 +1313,9 @@ checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" [[package]] name = "git2" -version = "0.14.0" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94781080dd1a6b55dea7c46540d5bac87742a22f6dc2d84e54a5071ad6f0e387" +checksum = "3826a6e0e2215d7a41c2bfc7c9244123969273f3476b939a226aac0ab56e9e3c" dependencies = [ "bitflags", "libc", @@ -1243,9 +1332,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "grenad" -version = "0.3.1" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a7a9cc43b28a20f791b17863f34a36654fdfa50be6d0a67bb18c1e34d145f18" +checksum = "1d69e46e7b225459e2e0272707d167d7dcaaac89307a848326df6b30ec432151" dependencies = [ "bytemuck", "byteorder", @@ -1254,9 +1343,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.11" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9f1f717ddc7b2ba36df7e871fd88db79326551d3d6f1fc406fbfd28b582ff8e" +checksum = "37a82c6d637fc9515a4694bbf1cb2457b79d81ce52b3108bdeea58b07dd34a57" dependencies = [ "bytes", "fnv", @@ -1267,15 +1356,15 @@ dependencies = [ "indexmap", "slab", "tokio", - "tokio-util 0.6.9", + "tokio-util", "tracing", ] [[package]] name = "hash32" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4041af86e63ac4298ce40e5cca669066e75b6f1aa3390fe2561ffa5e1d9f4cc" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" dependencies = [ "byteorder", ] @@ -1298,13 +1387,13 @@ checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" [[package]] name = "heapless" -version = "0.6.1" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634bd4d29cbf24424d0a4bfcbf80c6960129dc24424752a7d1d1390607023422" +checksum = "d076121838e03f862871315477528debffdb7462fb229216ecef91b1a3eb31eb" dependencies = [ - "as-slice", - "generic-array 0.14.5", + "atomic-polyfill", "hash32", + "spin 0.9.2", "stable_deref_trait", ] @@ -1317,7 +1406,7 @@ checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "heed" version = "0.12.1" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" dependencies = [ "byteorder", "heed-traits", @@ -1326,7 +1415,6 @@ dependencies = [ "lmdb-rkv-sys", "once_cell", "page_size", - "serde", "synchronoise", "url", "zerocopy", @@ -1335,12 +1423,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" dependencies = [ "bincode", "heed-traits", @@ -1398,12 +1486,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" -[[package]] -name = "human_format" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86cce260d758a9aa3d7c4b99d55c815a540f8a37514ba6046ab6be402a157cb0" - [[package]] name = "humantime" version = "2.1.0" @@ -1412,9 +1494,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.17" +version = "0.14.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "043f0e083e9901b6cc658a77d1eb86f4fc650bbb977a4337dd63192826aa85dd" +checksum = "b26ae0a80afebe130861d90abf98e3814a4f28a4c6ffeb5ab8ebb2be311e0ef2" dependencies = [ "bytes", "futures-channel", @@ -1442,7 +1524,7 @@ checksum = "d87c48c02e0dc5e3b849a2041db3029fd066650f8f717c07bf8ed78ccb895cac" dependencies = [ "http", "hyper", - "rustls 0.20.4", + "rustls", "tokio", "tokio-rustls", ] @@ -1460,9 +1542,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223" +checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" dependencies = [ "autocfg", "hashbrown 0.11.2", @@ -1480,18 +1562,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9" - -[[package]] -name = "iso8601-duration" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b51dd97fa24074214b9eb14da518957573f4dec3189112610ae1ccec9ac464" -dependencies = [ - "nom 5.1.2", -] +checksum = "35e70ee094dc02fd9c13fdad4940090f22dbd6ac7c9e7094a46cf0232a50bc7c" [[package]] name = "itertools" @@ -1548,12 +1621,20 @@ dependencies = [ ] [[package]] -name = "jsonwebtoken" -version = "7.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afabcc15e437a6484fc4f12d0fd63068fe457bf93f1c148d3d9649c60b103f32" +name = "json-depth-checker" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ - "base64 0.12.3", + "serde_json", +] + +[[package]] +name = "jsonwebtoken" +version = "8.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "012bb02250fdd38faa5feee63235f7a459974440b9b57593822414c31f92839e" +dependencies = [ + "base64", "pem", "ring", "serde", @@ -1582,30 +1663,17 @@ dependencies = [ "fst", ] -[[package]] -name = "lexical-core" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" -dependencies = [ - "arrayvec", - "bitflags", - "cfg-if 1.0.0", - "ryu", - "static_assertions", -] - [[package]] name = "libc" -version = "0.2.119" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" +checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" [[package]] name = "libgit2-sys" -version = "0.13.0+1.4.1" +version = "0.13.2+1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864e22fc06cae62860398cd854c93d5867f11c02ec916aa1417b440f170df23a" +checksum = "3a42de9a51a5c12e00fc0e4ca6bc2ea43582fc6418488e8f615e905d886f258b" dependencies = [ "cc", "libc", @@ -1614,10 +1682,16 @@ dependencies = [ ] [[package]] -name = "libz-sys" -version = "1.1.3" +name = "libm" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66" +checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db" + +[[package]] +name = "libz-sys" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f35facd4a5673cb5a48822be2be1d4236c1c99cb4113cab7061ac720d5bf859" dependencies = [ "cc", "libc", @@ -1626,10 +1700,162 @@ dependencies = [ ] [[package]] -name = "linked-hash-map" -version = "0.5.4" +name = "lindera" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" +checksum = "3dea10df226936ff54f16d3922500e08ef4be2ba7c0070bec9ad4a1474316111" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "encoding", + "lindera-cc-cedict-builder", + "lindera-core", + "lindera-dictionary", + "lindera-ipadic", + "lindera-ipadic-builder", + "lindera-ko-dic-builder", + "lindera-unidic-builder", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "lindera-cc-cedict-builder" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4392785248c3d8755c6fae9d0086d27ad7a1d6810155a2494fe5206e2021f471" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + +[[package]] +name = "lindera-core" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af63a4484334d4b83277621f1ba62fb83472858cc37fb4ab2181a4c19eebcb38" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "encoding", + "log", + "serde", + "thiserror", + "yada", +] + +[[package]] +name = "lindera-decompress" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "817ee62bc8973ec2457805df83796c59f074e49a4a0ee9baffe2663fe157f54a" +dependencies = [ + "anyhow", + "lzma-rs", + "serde", +] + +[[package]] +name = "lindera-dictionary" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd57501ee44a6aba0431d043c7926347e29883a79d8fc3955b8837e4ad1fee3c" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "lindera-core", +] + +[[package]] +name = "lindera-ipadic" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade3bd3faa5f0db629c26264663e901dee5f46221eb04c2c7b592bd7485d44f9" +dependencies = [ + "bincode", + "byteorder", + "encoding", + "flate2", + "lindera-core", + "lindera-ipadic-builder", + "once_cell", + "tar", + "ureq", +] + +[[package]] +name = "lindera-ipadic-builder" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee61f8dd6566738c5fd0ee9b1c11212ffc2d1f97af69c08a02cbb5c49995250a" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "serde", + "yada", +] + +[[package]] +name = "lindera-ko-dic-builder" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01f05950d9adc7aa42aa8b16be1616f9625576c867179ac29372714eaed6993d" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + +[[package]] +name = "lindera-unidic-builder" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3836c1278b8309ebf209c67bc7a935f4ce7c9246a578b250540398806a40b81d" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] [[package]] name = "lmdb-rkv-sys" @@ -1661,18 +1887,19 @@ checksum = "902eb695eb0591864543cbfbf6d742510642a605a61fc5e97fe6ceb5a30ac4fb" [[package]] name = "lock_api" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" dependencies = [ + "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.14" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" dependencies = [ "cfg-if 1.0.0", ] @@ -1694,9 +1921,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81" dependencies = [ "log", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", +] + +[[package]] +name = "lzma-rs" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1" +dependencies = [ + "byteorder", + "crc", ] [[package]] @@ -1713,22 +1950,22 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "meilisearch-auth" -version = "0.26.1" +version = "0.27.0" dependencies = [ "enum-iterator", - "heed", "meilisearch-error", + "milli", "rand", "serde", "serde_json", "sha2", "thiserror", - "time 0.3.7", + "time 0.3.9", ] [[package]] name = "meilisearch-error" -version = "0.26.1" +version = "0.27.0" dependencies = [ "actix-web", "proptest", @@ -1739,14 +1976,13 @@ dependencies = [ [[package]] name = "meilisearch-http" -version = "0.26.1" +version = "0.27.0" dependencies = [ "actix-cors", "actix-rt", "actix-web", "actix-web-static-files", "anyhow", - "arc-swap", "assert-json-diff", "async-stream", "async-trait", @@ -1762,11 +1998,9 @@ dependencies = [ "fst", "futures", "futures-util", - "heed", "hex", "http", "indexmap", - "iso8601-duration", "itertools", "jsonwebtoken", "log", @@ -1778,7 +2012,7 @@ dependencies = [ "num_cpus", "obkv", "once_cell", - "parking_lot 0.11.2", + "parking_lot", "paste", "pin-project-lite", "platform-dirs", @@ -1786,13 +2020,13 @@ dependencies = [ "rayon", "regex", "reqwest", - "rustls 0.20.4", + "rustls", "rustls-pemfile", "segment", "serde", "serde_json", "serde_url_params", - "sha-1 0.9.8", + "sha-1", "sha2", "siphasher", "slice-group-by", @@ -1802,7 +2036,7 @@ dependencies = [ "tempfile", "thiserror", "tikv-jemallocator", - "time 0.3.7", + "time 0.3.9", "tokio", "tokio-stream", "urlencoding", @@ -1814,7 +2048,7 @@ dependencies = [ [[package]] name = "meilisearch-lib" -version = "0.26.1" +version = "0.27.0" dependencies = [ "actix-rt", "actix-web", @@ -1834,7 +2068,6 @@ dependencies = [ "fst", "futures", "futures-util", - "heed", "http", "indexmap", "itertools", @@ -1849,16 +2082,16 @@ dependencies = [ "num_cpus", "obkv", "once_cell", - "parking_lot 0.11.2", + "parking_lot", "paste", - "pin-project", + "permissive-json-pointer", "proptest", "proptest-derive", "rand", "rayon", "regex", "reqwest", - "rustls 0.19.1", + "rustls", "serde", "serde_json", "siphasher", @@ -1867,7 +2100,7 @@ dependencies = [ "tar", "tempfile", "thiserror", - "time 0.3.7", + "time 0.3.9", "tokio", "uuid", "walkdir", @@ -1876,14 +2109,16 @@ dependencies = [ [[package]] name = "meilisearch-tokenizer" -version = "0.2.7" -source = "git+https://github.com/meilisearch/tokenizer.git?tag=v0.2.7#e14f64f2482d8f57e9aae8dc37dcb1469099f6f3" +version = "0.2.9" +source = "git+https://github.com/meilisearch/tokenizer.git?tag=v0.2.9#1dfc8ad9f5b338c39c3bc5fd5b2d0c1328314ddc" dependencies = [ "character_converter", "cow-utils", "deunicode", "fst", "jieba-rs", + "lindera", + "lindera-core", "once_cell", "slice-group-by", "unicode-segmentation", @@ -1916,8 +2151,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.23.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.22.2#f2984f66e64838d51f5cce412693fa411ee3f2d4" +version = "0.26.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.26.4#6ee67df128b63f40eedfe1cb2edfa1a59492e310" dependencies = [ "bimap", "bincode", @@ -1928,16 +2163,15 @@ dependencies = [ "csv", "either", "filter-parser", - "flate2", + "flatten-serde-json", "fst", "fxhash", "geoutils", "grenad", "heed", - "human_format", "itertools", + "json-depth-checker", "levenshtein_automata", - "linked-hash-map", "log", "logging_timer", "meilisearch-tokenizer", @@ -1953,8 +2187,9 @@ dependencies = [ "slice-group-by", "smallstr", "smallvec", + "smartstring", "tempfile", - "time 0.3.7", + "time 0.3.9", "uuid", ] @@ -1992,14 +2227,15 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba272f85fa0b41fc91872be579b3bbe0f56b792aa361a380eb669469f68dafb2" +checksum = "52da4364ffb0e4fe33a9841a98a3f3014fb964045ce4f7a45a398243c8d6b0c9" dependencies = [ "libc", "log", "miow", "ntapi", + "wasi 0.11.0+wasi-snapshot-preview1", "winapi", ] @@ -2014,9 +2250,9 @@ dependencies = [ [[package]] name = "mockall" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab571328afa78ae322493cacca3efac6a0f2e0a67305b4df31fd439ef129ac0" +checksum = "3d4d70639a72f972725db16350db56da68266ca368b2a1fe26724a903ad3d6b8" dependencies = [ "cfg-if 1.0.0", "downcast", @@ -2029,16 +2265,31 @@ dependencies = [ [[package]] name = "mockall_derive" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7e25b214433f669161f414959594216d8e6ba83b6679d3db96899c0b4639033" +checksum = "79ef208208a0dea3f72221e26e904cdc6db2e481d9ade89081ddd494f1dbaa6b" dependencies = [ "cfg-if 1.0.0", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] +[[package]] +name = "nb" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "801d31da0513b6ec5214e9bf433a77966320625a37860f910be265be6e18d06f" +dependencies = [ + "nb 1.0.0", +] + +[[package]] +name = "nb" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "546c37ac5d9e56f55e73b677106873d9d9f5190605e41a856503623648488cae" + [[package]] name = "nelson" version = "0.1.0" @@ -2046,24 +2297,12 @@ source = "git+https://github.com/MarinPostma/nelson.git?rev=675f13885548fb415ead [[package]] name = "nom" -version = "5.1.2" +version = "7.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" -dependencies = [ - "lexical-core", - "memchr", - "version_check", -] - -[[package]] -name = "nom" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" dependencies = [ "memchr", "minimal-lexical", - "version_check", ] [[package]] @@ -2074,7 +2313,7 @@ checksum = "37794436ca3029a3089e0b95d42da1f0b565ad271e4d3bb4bad0c7bb70b10605" dependencies = [ "bytecount", "memchr", - "nom 7.1.0", + "nom", ] [[package]] @@ -2094,9 +2333,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.2.6" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" dependencies = [ "autocfg", "num-integer", @@ -2120,6 +2359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -2134,9 +2374,9 @@ dependencies = [ [[package]] name = "num_threads" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97ba99ba6393e2c3734791401b66902d981cb03bf190af674ca69949b6d5fb15" +checksum = "aba1801fb138d8e85e11d0fc70baf4fe1cdfffda7c6cd34a854905df588e5ed0" dependencies = [ "libc", ] @@ -2158,15 +2398,9 @@ checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385" [[package]] name = "once_cell" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" - -[[package]] -name = "opaque-debug" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" [[package]] name = "ordered-float" @@ -2196,17 +2430,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.5", -] - [[package]] name = "parking_lot" version = "0.12.0" @@ -2214,28 +2437,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" dependencies = [ "lock_api", - "parking_lot_core 0.9.1", + "parking_lot_core", ] [[package]] name = "parking_lot_core" -version = "0.8.5" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" -dependencies = [ - "cfg-if 1.0.0", - "instant", - "libc", - "redox_syscall", - "smallvec", - "winapi", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954" +checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" dependencies = [ "cfg-if 1.0.0", "libc", @@ -2246,9 +2455,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0744126afe1a6dd7f394cb50a716dbe086cb06e255e53d8d0185d82828358fb5" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" [[package]] name = "path-matchers" @@ -2265,21 +2474,13 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cacbb3c4ff353b534a67fb8d7524d00229da4cb1dc8c79f4db96e375ab5b619" -[[package]] -name = "pdqselect" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec91767ecc0a0bbe558ce8c9da33c068066c57ecc8bb8477ef8c1ad3ef77c27" - [[package]] name = "pem" -version = "0.8.3" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd56cbd21fea48d0c440b41cd69c589faacade08c992d9a54e471b79d0fd13eb" +checksum = "e9a3b09a20e374558580a4914d3b7d89bd61b954a5a5e1dcbea98753addb1947" dependencies = [ - "base64 0.13.0", - "once_cell", - "regex", + "base64", ] [[package]] @@ -2288,6 +2489,14 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "permissive-json-pointer" +version = "0.2.0" +dependencies = [ + "big_s", + "serde_json", +] + [[package]] name = "phf" version = "0.10.1" @@ -2326,26 +2535,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "pin-project" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58ad3879ad3baf4e44784bc6a718a8698867bb991f8ce24d1bcbe2cfb4c3a75e" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744b6f092ba29c3650faf274db506afd39944f48420f6c86b17cfe0ee1cb36bb" -dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", -] - [[package]] name = "pin-project-lite" version = "0.2.8" @@ -2360,9 +2549,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" +checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" [[package]] name = "platform-dirs" @@ -2381,12 +2570,13 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "predicates" -version = "1.0.8" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49cfaf7fdaa3bfacc6fa3e7054e65148878354a5cfddcf661df4c851f8021df" +checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c" dependencies = [ - "difference", + "difflib", "float-cmp", + "itertools", "normalize-line-endings", "predicates-core", "regex", @@ -2415,9 +2605,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", "version_check", ] @@ -2427,8 +2617,8 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", + "proc-macro2 1.0.37", + "quote 1.0.17", "version_check", ] @@ -2443,9 +2633,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" dependencies = [ "unicode-xid 0.2.2", ] @@ -2493,6 +2683,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" +[[package]] +name = "quickcheck" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" +dependencies = [ + "rand", +] + [[package]] name = "quote" version = "0.6.13" @@ -2504,11 +2703,11 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" +checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" dependencies = [ - "proc-macro2 1.0.36", + "proc-macro2 1.0.37", ] [[package]] @@ -2570,35 +2769,36 @@ checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" dependencies = [ "crossbeam-channel", "crossbeam-deque", - "crossbeam-utils 0.8.7", + "crossbeam-utils 0.8.8", "lazy_static", "num_cpus", ] [[package]] name = "redox_syscall" -version = "0.2.10" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" dependencies = [ "bitflags", ] [[package]] name = "redox_users" -version = "0.4.0" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", "redox_syscall", + "thiserror", ] [[package]] name = "regex" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" dependencies = [ "aho-corasick", "memchr", @@ -2628,11 +2828,11 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f242f1488a539a79bac6dbe7c8609ae43b7914b7736210f239a37cccb32525" +checksum = "46a1f7aa4f35e5e8b4160449f51afc758f0ce6454315a9fa7d0d113e958c41eb" dependencies = [ - "base64 0.13.0", + "base64", "bytes", "encoding_rs", "futures-core", @@ -2649,7 +2849,7 @@ dependencies = [ "mime", "percent-encoding", "pin-project-lite", - "rustls 0.20.4", + "rustls", "rustls-pemfile", "serde", "serde_json", @@ -2679,17 +2879,38 @@ dependencies = [ "cc", "libc", "once_cell", - "spin", + "spin 0.5.2", "untrusted", "web-sys", "winapi", ] [[package]] -name = "roaring" -version = "0.6.7" +name = "riscv" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "536cfa885fc388b8ae69edf96d7970849b7d9c1395da1b8330f17715babf8a09" +checksum = "6907ccdd7a31012b70faf2af85cd9e5ba97657cc3987c4f13f8e4d2c2a088aba" +dependencies = [ + "bare-metal 1.0.0", + "bit_field", + "riscv-target", +] + +[[package]] +name = "riscv-target" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88aa938cda42a0cf62a20cfe8d139ff1af20c2e681212b5b34adb5a58333f222" +dependencies = [ + "lazy_static", + "regex", +] + +[[package]] +name = "roaring" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd539cab4e32019956fe7e0cf160bb6d4802f4be2b52c4253d76d3bb0f85a5f7" dependencies = [ "bytemuck", "byteorder", @@ -2698,13 +2919,12 @@ dependencies = [ [[package]] name = "rstar" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc6fc513b8c3853e43a0c3f909ded14ffa82e5170c9c5f6fb175f9c85c8a433" +checksum = "b40f1bfe5acdab44bc63e6699c28b74f75ec43afb59f3eda01e145aff86a25fa" dependencies = [ "heapless", "num-traits", - "pdqselect", "serde", "smallvec", ] @@ -2715,26 +2935,22 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver 0.9.0", +] + [[package]] name = "rustc_version" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver", -] - -[[package]] -name = "rustls" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" -dependencies = [ - "base64 0.13.0", - "log", - "ring", - "sct 0.6.1", - "webpki 0.21.4", + "semver 1.0.7", ] [[package]] @@ -2745,17 +2961,17 @@ checksum = "4fbfeb8d0ddb84706bc597a5574ab8912817c52a397f819e5b614e2265206921" dependencies = [ "log", "ring", - "sct 0.7.0", - "webpki 0.22.0", + "sct", + "webpki", ] [[package]] name = "rustls-pemfile" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eebeaeb360c87bfb72e84abdb3447159c0eaececf1bef2aecd65a8be949d1c9" +checksum = "1ee86d63972a7c661d1536fefe8c3c8407321c3df668891286de28abcd087360" dependencies = [ - "base64 0.13.0", + "base64", ] [[package]] @@ -2797,16 +3013,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "sct" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "sct" version = "0.7.0" @@ -2828,14 +3034,29 @@ dependencies = [ "serde", "serde_json", "thiserror", - "time 0.3.7", + "time 0.3.9", ] [[package]] name = "semver" -version = "1.0.6" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65bd28f48be7196d222d95b9243287f48d27aca604e08497513019ff0502cc4" + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" @@ -2852,9 +3073,9 @@ version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -2891,19 +3112,6 @@ dependencies = [ "serde", ] -[[package]] -name = "sha-1" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99cd6713db3cf16b6c84e06321e049a9b9f699826e16096d23bbcc44d15d51a6" -dependencies = [ - "block-buffer 0.9.0", - "cfg-if 1.0.0", - "cpufeatures", - "digest 0.9.0", - "opaque-debug", -] - [[package]] name = "sha-1" version = "0.10.0" @@ -2912,20 +3120,18 @@ checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" dependencies = [ "cfg-if 1.0.0", "cpufeatures", - "digest 0.10.3", + "digest", ] [[package]] name = "sha2" -version = "0.9.9" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" dependencies = [ - "block-buffer 0.9.0", "cfg-if 1.0.0", "cpufeatures", - "digest 0.9.0", - "opaque-debug", + "digest", ] [[package]] @@ -2939,38 +3145,39 @@ dependencies = [ [[package]] name = "simple_asn1" -version = "0.4.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692ca13de57ce0613a363c8c2f1de925adebc81b04c923ac60c5488bb44abe4b" +checksum = "4a762b1c38b9b990c694b9c2f8abe3372ce6a9ceaae6bca39cfc46e054f45745" dependencies = [ - "chrono", "num-bigint", "num-traits", + "thiserror", + "time 0.3.9", ] [[package]] name = "siphasher" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" +checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" [[package]] name = "slice-group-by" -version = "0.2.6" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f7474f0b646d228360ab62ed974744617bc869d959eac8403bfa3665931a7fb" +checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" [[package]] name = "smallstr" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e922794d168678729ffc7e07182721a14219c65814e66e91b839a272fe5ae4f" +checksum = "63b1aefdf380735ff8ded0b15f31aab05daf1f70216c01c02a12926badd1df9d" dependencies = [ "serde", "smallvec", @@ -2982,6 +3189,17 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + [[package]] name = "socket2" version = "0.4.4" @@ -2998,6 +3216,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spin" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "511254be0c5bcf062b019a6c89c01a664aa359ded62f78aa72c6fc137c0590e5" +dependencies = [ + "lock_api", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -3040,12 +3267,12 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.86" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" +checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", + "proc-macro2 1.0.37", + "quote 1.0.17", "unicode-xid 0.2.2", ] @@ -3064,17 +3291,17 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", "unicode-xid 0.2.2", ] [[package]] name = "sysinfo" -version = "0.20.5" +version = "0.23.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e223c65cd36b485a34c2ce6e38efa40777d31c4166d9076030c74cdcf971679f" +checksum = "ad04c584871b8dceb769a20b94e26a357a870c999b7246dcd4cb233d927547e3" dependencies = [ "cfg-if 1.0.0", "core-foundation-sys", @@ -3112,9 +3339,9 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ "winapi-util", ] @@ -3127,9 +3354,9 @@ checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" [[package]] name = "textwrap" -version = "0.14.2" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0066c8d12af8b5acd21e00547c3797fde4e8677254a7ee429176ccebbe93dd80" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] name = "thiserror" @@ -3146,9 +3373,9 @@ version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] @@ -3174,33 +3401,33 @@ dependencies = [ [[package]] name = "time" -version = "0.1.44" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc", - "wasi", "winapi", ] [[package]] name = "time" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "004cbc98f30fa233c61a38bc77e96a9106e65c88f2d3bef182ae952027e5753d" +checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" dependencies = [ "itoa 1.0.1", "libc", "num_threads", + "quickcheck", "serde", "time-macros", ] [[package]] name = "time-macros" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25eb0ca3468fc0acc11828786797f6ef9aa1555e4a211a60d64cc8e4d1be47d6" +checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" [[package]] name = "tinyvec" @@ -3229,7 +3456,7 @@ dependencies = [ "mio", "num_cpus", "once_cell", - "parking_lot 0.12.0", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -3243,20 +3470,20 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", ] [[package]] name = "tokio-rustls" -version = "0.23.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a27d5f2b839802bd8267fa19b0530f5a08b9c08cd417976be2a65d130fe1c11b" +checksum = "4151fda0cf2798550ad0b34bcfc9b9dcc2a9d2471c895c68f3a8818e54f2389e" dependencies = [ - "rustls 0.20.4", + "rustls", "tokio", - "webpki 0.22.0", + "webpki", ] [[package]] @@ -3272,30 +3499,16 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.6.9" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e99e1983e5d376cd8eb4b66604d2e99e79f5bd988c3055891dcd8c9e2604cc0" +checksum = "0edfdeb067411dba2044da6d1cb2df793dd35add7888d73c16e3381ded401764" dependencies = [ "bytes", "futures-core", "futures-sink", - "log", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64910e1b9c1901aaf5375561e35b9c057d95ff41a44ede043a03e09279eabaf1" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "log", "pin-project-lite", "tokio", + "tracing", ] [[package]] @@ -3315,20 +3528,33 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6c650a8ef0cd2dd93736f033d21cbd1224c5a967aa0c258d00fcf7dafef9b9f" +checksum = "4a1bdf54a7c28a2bbf701e1d2233f6c77f473486b94bee4f9678da5a148dca7f" dependencies = [ "cfg-if 1.0.0", + "log", "pin-project-lite", + "tracing-attributes", "tracing-core", ] [[package]] -name = "tracing-core" -version = "0.1.22" +name = "tracing-attributes" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cfcb51380632a72d3111cb8d3447a8d908e577d31beeac006f836383d29a23" +checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" +dependencies = [ + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", +] + +[[package]] +name = "tracing-core" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90442985ee2f57c9e1b548ee72ae842f4a9a20e3f417cc38dbc5dc684d9bb4ee" dependencies = [ "lazy_static", ] @@ -3393,6 +3619,22 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "ureq" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9399fa2f927a3d327187cbd201480cee55bee6ac5d3c77dd27f0c6814cff16d5" +dependencies = [ + "base64", + "chunked_transfer", + "log", + "once_cell", + "rustls", + "url", + "webpki", + "webpki-roots", +] + [[package]] name = "url" version = "2.2.2" @@ -3413,9 +3655,9 @@ checksum = "68b90931029ab9b034b300b797048cf23723400aa757e8a2bfb9d748102f9821" [[package]] name = "utf8-width" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cf7d77f457ef8dfa11e4cd5933c5ddb5dc52a94664071951219a97710f0a32b" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" [[package]] name = "uuid" @@ -3427,6 +3669,12 @@ dependencies = [ "serde", ] +[[package]] +name = "vcell" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3435,18 +3683,18 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vergen" -version = "5.1.17" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cf88d94e969e7956d924ba70741316796177fa0c79a2c9f4ab04998d96e966e" +checksum = "4db743914c971db162f35bf46601c5a63ec4452e61461937b4c1ab817a60c12e" dependencies = [ "anyhow", "cfg-if 1.0.0", - "chrono", "enum-iterator", "getset", "git2", "rustversion", "thiserror", + "time 0.3.9", ] [[package]] @@ -3455,6 +3703,21 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + +[[package]] +name = "volatile-register" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ee8f19f9d74293faf70901bc20ad067dc1ad390d2cbf1e3f75f721ffee908b6" +dependencies = [ + "vcell", +] + [[package]] name = "wait-timeout" version = "0.2.0" @@ -3487,9 +3750,15 @@ dependencies = [ [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" @@ -3510,9 +3779,9 @@ dependencies = [ "bumpalo", "lazy_static", "log", - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", "wasm-bindgen-shared", ] @@ -3534,7 +3803,7 @@ version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01" dependencies = [ - "quote 1.0.15", + "quote 1.0.17", "wasm-bindgen-macro-support", ] @@ -3544,9 +3813,9 @@ version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc" dependencies = [ - "proc-macro2 1.0.36", - "quote 1.0.15", - "syn 1.0.86", + "proc-macro2 1.0.37", + "quote 1.0.17", + "syn 1.0.91", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3567,16 +3836,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "webpki" version = "0.22.0" @@ -3589,18 +3848,18 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.22.2" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552ceb903e957524388c4d3475725ff2c8b7960922063af6ce53c9a43da07449" +checksum = "44d8de8415c823c8abd270ad483c6feeac771fad964890779f9a8cb24fbbc1bf" dependencies = [ - "webpki 0.22.0", + "webpki", ] [[package]] name = "whatlang" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a346d2eb29c03618693ed24a29d1acd0c3f2cb08ae58b9669d7461e033cf703" +checksum = "349357fdf0f049dcb402da4a4c5a5aae80a7f6b3e5976b38475ce4ac18e5cd2f" dependencies = [ "hashbrown 0.7.2", ] @@ -3648,9 +3907,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.32.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6" +checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" dependencies = [ "windows_aarch64_msvc", "windows_i686_gnu", @@ -3661,39 +3920,39 @@ dependencies = [ [[package]] name = "windows_aarch64_msvc" -version = "0.32.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5" +checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" [[package]] name = "windows_i686_gnu" -version = "0.32.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615" +checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" [[package]] name = "windows_i686_msvc" -version = "0.32.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172" +checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" [[package]] name = "windows_x86_64_gnu" -version = "0.32.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc" +checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" [[package]] name = "windows_x86_64_msvc" -version = "0.32.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" +checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" [[package]] name = "winreg" -version = "0.7.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" dependencies = [ "winapi", ] @@ -3707,6 +3966,12 @@ dependencies = [ "libc", ] +[[package]] +name = "yada" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d12cb7a57bbf2ab670ed9545bae3648048547f9039279a89ce000208e585c1" + [[package]] name = "zerocopy" version = "0.3.0" @@ -3723,8 +3988,8 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ - "proc-macro2 1.0.36", - "syn 1.0.86", + "proc-macro2 1.0.37", + "syn 1.0.91", "synstructure", ] @@ -3739,34 +4004,5 @@ dependencies = [ "crc32fast", "flate2", "thiserror", - "time 0.1.44", -] - -[[package]] -name = "zstd" -version = "0.10.0+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "4.1.4+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "1.6.3+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" -dependencies = [ - "cc", - "libc", + "time 0.1.43", ] diff --git a/Cargo.toml b/Cargo.toml index 65fa6fe13..03f4f5597 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,9 @@ [workspace] +resolver = "2" members = [ "meilisearch-http", "meilisearch-error", "meilisearch-lib", "meilisearch-auth", + "permissive-json-pointer", ] diff --git a/Dockerfile b/Dockerfile index 075dede67..ad21329fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,42 +1,22 @@ # Compile -FROM alpine:3.14 AS compiler +FROM rust:alpine3.14 AS compiler -RUN apk update --quiet \ - && apk add -q --no-cache curl build-base - -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +RUN apk add -q --update-cache --no-cache build-base openssl-dev WORKDIR /meilisearch -COPY Cargo.lock . -COPY Cargo.toml . - -COPY meilisearch-auth/Cargo.toml meilisearch-auth/ -COPY meilisearch-error/Cargo.toml meilisearch-error/ -COPY meilisearch-http/Cargo.toml meilisearch-http/ -COPY meilisearch-lib/Cargo.toml meilisearch-lib/ - -ENV RUSTFLAGS="-C target-feature=-crt-static" - -# Create dummy main.rs files for each workspace member to be able to compile all the dependencies -RUN find . -type d -name "meilisearch-*" | xargs -I{} sh -c 'mkdir {}/src; echo "fn main() { }" > {}/src/main.rs;' -# Use `cargo build` instead of `cargo vendor` because we need to not only download but compile dependencies too -RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - export JEMALLOC_SYS_WITH_LG_PAGE=16; \ - fi && \ - $HOME/.cargo/bin/cargo build --release -# Cleanup dummy main.rs files -RUN find . -path "*/src/main.rs" -delete - ARG COMMIT_SHA ARG COMMIT_DATE ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE} +ENV RUSTFLAGS="-C target-feature=-crt-static" COPY . . -RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ +RUN set -eux; \ + apkArch="$(apk --print-arch)"; \ + if [ "$apkArch" = "aarch64" ]; then \ export JEMALLOC_SYS_WITH_LG_PAGE=16; \ fi && \ - $HOME/.cargo/bin/cargo build --release + cargo build --release # Run FROM alpine:3.14 @@ -47,9 +27,20 @@ ENV MEILI_SERVER_PROVIDER docker RUN apk update --quiet \ && apk add -q --no-cache libgcc tini curl -COPY --from=compiler /meilisearch/target/release/meilisearch . +# add meilisearch to the `/bin` so you can run it from anywhere and it's easy +# to find. +COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch +# To stay compatible with the older version of the container (pre v0.27.0) we're +# going to symlink the meilisearch binary in the path to `/meilisearch` +RUN ln -s /bin/meilisearch /meilisearch + +# This directory should hold all the data related to meilisearch so we're going +# to move our PWD in there. +# We don't want to put the meilisearch binary +WORKDIR /meili_data + EXPOSE 7700/tcp ENTRYPOINT ["tini", "--"] -CMD ./meilisearch +CMD /bin/meilisearch diff --git a/LICENSE b/LICENSE index d1355c36e..744b04960 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019-2022 Meilisearch +Copyright (c) 2019-2022 Meili SAS Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/bors.toml b/bors.toml index ba97aaf43..d24e6c09b 100644 --- a/bors.toml +++ b/bors.toml @@ -2,8 +2,9 @@ status = [ 'Tests on ubuntu-18.04', 'Tests on macos-latest', 'Tests on windows-latest', - 'Run Clippy', - 'Run Rustfmt' + # 'Run Clippy', + 'Run Rustfmt', + 'Run tests in debug', ] pr_status = ['Milestone Check'] # 3 hours timeout diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index e29f8cec3..193959b05 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -1,15 +1,15 @@ [package] name = "meilisearch-auth" -version = "0.26.1" +version = "0.27.0" edition = "2021" [dependencies] enum-iterator = "0.7.0" -heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } -sha2 = "0.9.6" meilisearch-error = { path = "../meilisearch-error" } -serde_json = { version = "1.0.67", features = ["preserve_order"] } -time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" } rand = "0.8.4" -serde = { version = "1.0.130", features = ["derive"] } -thiserror = "1.0.28" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.79", features = ["preserve_order"] } +sha2 = "0.10.2" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } diff --git a/meilisearch-auth/src/error.rs b/meilisearch-auth/src/error.rs index 70cfd3905..8a87eda27 100644 --- a/meilisearch-auth/src/error.rs +++ b/meilisearch-auth/src/error.rs @@ -25,7 +25,7 @@ pub enum AuthControllerError { } internal_error!( - AuthControllerError: heed::Error, + AuthControllerError: milli::heed::Error, std::io::Error, serde_json::Error, std::str::Utf8Error diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index dab75cc5f..4bd3cdded 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -8,8 +8,8 @@ use std::path::Path; use std::str; use std::sync::Arc; -use heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; -use heed::{Database, Env, EnvOpenOptions, RwTxn}; +use milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use time::OffsetDateTime; use super::error::Result; @@ -39,7 +39,7 @@ impl Drop for HeedAuthStore { } } -pub fn open_auth_store_env(path: &Path) -> heed::Result { +pub fn open_auth_store_env(path: &Path) -> milli::heed::Result { let mut options = EnvOpenOptions::new(); options.map_size(AUTH_STORE_SIZE); // 1GB options.max_dbs(2); @@ -203,7 +203,7 @@ impl HeedAuthStore { /// optionnally on a spcific index, for a given key. pub struct KeyIdActionCodec; -impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec { +impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { type DItem = (KeyId, Action, Option<&'a [u8]>); fn bytes_decode(bytes: &'a [u8]) -> Option { @@ -218,7 +218,7 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec { } } -impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec { +impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); fn bytes_encode((key_id, action, index): &Self::EItem) -> Option> { diff --git a/meilisearch-error/Cargo.toml b/meilisearch-error/Cargo.toml index 2bda89a3c..77e24fe9a 100644 --- a/meilisearch-error/Cargo.toml +++ b/meilisearch-error/Cargo.toml @@ -1,15 +1,15 @@ [package] name = "meilisearch-error" -version = "0.26.1" +version = "0.27.0" authors = ["marin "] edition = "2021" [dependencies] -actix-web = { version = "4", default-features = false } +actix-web = { version = "4.0.1", default-features = false } proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } -serde = { version = "1.0.130", features = ["derive"] } -serde_json = "1.0.69" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = "1.0.79" [features] test-traits = ["proptest", "proptest-derive"] diff --git a/meilisearch-error/src/lib.rs b/meilisearch-error/src/lib.rs index 81371eb6d..11613497c 100644 --- a/meilisearch-error/src/lib.rs +++ b/meilisearch-error/src/lib.rs @@ -120,6 +120,7 @@ pub enum Code { IndexAlreadyExists, IndexNotFound, InvalidIndexUid, + InvalidMinWordLengthForTypo, // invalid state error InvalidState, @@ -271,6 +272,9 @@ impl Code { InvalidApiKeyDescription => { ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) } + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } } } diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index aeb13275b..86068e7a5 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -4,90 +4,89 @@ description = "Meilisearch HTTP server" edition = "2021" license = "MIT" name = "meilisearch-http" -version = "0.26.1" +version = "0.27.0" [[bin]] name = "meilisearch" path = "src/main.rs" [build-dependencies] -static-files = { version = "0.2.1", optional = true } -anyhow = { version = "1.0.43", optional = true } -cargo_toml = { version = "0.9", optional = true } +anyhow = { version = "1.0.56", optional = true } +cargo_toml = { version = "0.11.4", optional = true } hex = { version = "0.4.3", optional = true } -reqwest = { version = "0.11.4", features = ["blocking", "rustls-tls"], default-features = false, optional = true } -sha-1 = { version = "0.9.8", optional = true } -tempfile = { version = "3.2.0", optional = true } -vergen = { version = "5.1.15", default-features = false, features = ["git"] } +reqwest = { version = "0.11.9", features = ["blocking", "rustls-tls"], default-features = false, optional = true } +sha-1 = { version = "0.10.0", optional = true } +static-files = { version = "0.2.3", optional = true } +tempfile = { version = "3.3.0", optional = true } +vergen = { version = "7.0.0", default-features = false, features = ["git"] } zip = { version = "0.5.13", optional = true } [dependencies] -actix-cors = "0.6" -actix-web = { version = "4", features = ["rustls"] } +actix-cors = "0.6.1" +actix-web = { version = "4.0.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } -anyhow = { version = "1.0.43", features = ["backtrace"] } -arc-swap = "1.3.2" -async-stream = "0.3.2" -async-trait = "0.1.51" +anyhow = { version = "1.0.56", features = ["backtrace"] } +async-stream = "0.3.3" +async-trait = "0.1.52" bstr = "0.2.17" -byte-unit = { version = "4.0.12", default-features = false, features = ["std", "serde"] } +byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] } bytes = "1.1.0" -crossbeam-channel = "0.5.1" +clap = { version = "3.1.6", features = ["derive", "env"] } +crossbeam-channel = "0.5.2" either = "1.6.1" env_logger = "0.9.0" -flate2 = "1.0.21" +flate2 = "1.0.22" fst = "0.4.7" -futures = "0.3.17" -futures-util = "0.3.17" -heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } -http = "0.2.4" -indexmap = { version = "1.7.0", features = ["serde-1"] } -iso8601-duration = "0.1.0" -itertools = "0.10.1" -jsonwebtoken = "7" +futures = "0.3.21" +futures-util = "0.3.21" +http = "0.2.6" +indexmap = { version = "1.8.0", features = ["serde-1"] } +itertools = "0.10.3" +jsonwebtoken = "8.0.1" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-error = { path = "../meilisearch-error" } meilisearch-lib = { path = "../meilisearch-lib" } mime = "0.3.16" -num_cpus = "1.13.0" +num_cpus = "1.13.1" obkv = "0.2.0" -once_cell = "1.8.0" -parking_lot = "0.11.2" +once_cell = "1.10.0" +parking_lot = "0.12.0" +pin-project-lite = "0.2.8" platform-dirs = "0.3.0" -rand = "0.8.4" +rand = "0.8.5" rayon = "1.5.1" -regex = "1.5.4" -rustls = "0.20.2" -rustls-pemfile = "0.2" +regex = "1.5.5" +rustls = "0.20.4" +rustls-pemfile = "0.3.0" segment = { version = "0.2.0", optional = true } -serde = { version = "1.0.130", features = ["derive"] } -serde_json = { version = "1.0.67", features = ["preserve_order"] } -sha2 = "0.9.6" -siphasher = "0.3.7" -slice-group-by = "0.2.6" -static-files = { version = "0.2.1", optional = true } -clap = { version = "3.0", features = ["derive", "env"] } -sysinfo = "0.20.2" -tar = "0.4.37" -tempfile = "3.2.0" -thiserror = "1.0.28" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.79", features = ["preserve_order"] } +sha2 = "0.10.2" +siphasher = "0.3.10" +slice-group-by = "0.3.0" +static-files = { version = "0.2.3", optional = true } +sysinfo = "0.23.5" +tar = "0.4.38" +tempfile = "3.3.0" +thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } -tokio = { version = "1.11.0", features = ["full"] } -tokio-stream = "0.1.7" +tokio = { version = "1.17.0", features = ["full"] } +tokio-stream = "0.1.8" uuid = { version = "0.8.2", features = ["serde"] } walkdir = "2.3.2" -pin-project-lite = "0.2.8" [dev-dependencies] -actix-rt = "2.2.0" +actix-rt = "2.7.0" assert-json-diff = "2.0.1" maplit = "1.0.2" -paste = "1.0.5" +paste = "1.0.6" serde_url_params = "0.2.1" urlencoding = "2.1.0" [features] +default = ["analytics", "mini-dashboard"] +analytics = ["segment"] mini-dashboard = [ "actix-web-static-files", "static-files", @@ -99,12 +98,10 @@ mini-dashboard = [ "tempfile", "zip", ] -analytics = ["segment"] -default = ["analytics", "mini-dashboard"] [target.'cfg(target_os = "linux")'.dependencies] -tikv-jemallocator = "0.4.1" +tikv-jemallocator = "0.4.3" [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.9/build.zip" -sha1 = "b1833c3e5dc6b5d9d519ae4834935ae6c8a47024" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.1.10/build.zip" +sha1 = "1adf96592c267425c110bfefc36b7fc6bfb0f93d" diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 905d55281..3d3b23d70 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -8,7 +8,10 @@ use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; use meilisearch_auth::SearchRules; -use meilisearch_lib::index::{SearchQuery, SearchResult}; +use meilisearch_lib::index::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, +}; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; @@ -355,6 +358,13 @@ pub struct SearchAggregator { // pagination max_limit: usize, max_offset: usize, + + // formatting + highlight_pre_tag: bool, + highlight_post_tag: bool, + crop_marker: bool, + matches: bool, + crop_length: bool, } impl SearchAggregator { @@ -405,6 +415,12 @@ impl SearchAggregator { ret.max_limit = query.limit; ret.max_offset = query.offset.unwrap_or_default(); + ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG; + ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG; + ret.crop_marker = query.crop_marker != DEFAULT_CROP_MARKER; + ret.crop_length = query.crop_length != DEFAULT_CROP_LENGTH; + ret.matches = query.matches; + ret } @@ -452,6 +468,12 @@ impl SearchAggregator { // pagination self.max_limit = self.max_limit.max(other.max_limit); self.max_offset = self.max_offset.max(other.max_offset); + + self.highlight_pre_tag |= other.highlight_pre_tag; + self.highlight_post_tag |= other.highlight_post_tag; + self.crop_marker |= other.crop_marker; + self.matches |= other.matches; + self.crop_length |= other.crop_length; } pub fn into_event(self, user: &User, event_name: &str) -> Option { @@ -489,6 +511,13 @@ impl SearchAggregator { "max_limit": self.max_limit, "max_offset": self.max_offset, }, + "formatting": { + "highlight_pre_tag": self.highlight_pre_tag, + "highlight_post_tag": self.highlight_post_tag, + "crop_marker": self.crop_marker, + "matches": self.matches, + "crop_length": self.crop_length, + }, }); Some(Track { @@ -535,7 +564,7 @@ impl DocumentsAggregator { .headers() .get(CONTENT_TYPE) .map(|s| s.to_str().unwrap_or("unkown")) - .unwrap() + .unwrap_or("unkown") .to_string(); ret.content_types.insert(content_type); ret.index_creation = index_creation; diff --git a/meilisearch-http/src/extractors/authentication/mod.rs b/meilisearch-http/src/extractors/authentication/mod.rs index 0a0d9ecfe..c4cd9ef14 100644 --- a/meilisearch-http/src/extractors/authentication/mod.rs +++ b/meilisearch-http/src/extractors/authentication/mod.rs @@ -70,11 +70,9 @@ impl GuardedData { where P: Policy + 'static, { - Ok(tokio::task::spawn_blocking(move || { - P::authenticate(auth, token.as_ref(), index.as_deref()) - }) - .await - .map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))?) + tokio::task::spawn_blocking(move || P::authenticate(auth, token.as_ref(), index.as_deref())) + .await + .map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal)) } } @@ -131,8 +129,7 @@ pub trait Policy { } pub mod policies { - use jsonwebtoken::{dangerous_insecure_decode, decode, Algorithm, DecodingKey, Validation}; - use once_cell::sync::Lazy; + use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; @@ -141,11 +138,25 @@ pub mod policies { // reexport actions in policies in order to be used in routes configuration. pub use meilisearch_auth::actions; - pub static TENANT_TOKEN_VALIDATION: Lazy = Lazy::new(|| Validation { - validate_exp: false, - algorithms: vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512], - ..Default::default() - }); + fn tenant_token_validation() -> Validation { + let mut validation = Validation::default(); + validation.validate_exp = false; + validation.required_spec_claims.remove("exp"); + validation.algorithms = vec![Algorithm::HS256, Algorithm::HS384, Algorithm::HS512]; + validation + } + + /// Extracts the key prefix used to sign the payload from the payload, without performing any validation. + fn extract_key_prefix(token: &str) -> Option { + let mut validation = tenant_token_validation(); + validation.insecure_disable_signature_validation(); + let dummy_key = DecodingKey::from_secret(b"secret"); + let token_data = decode::(token, &dummy_key, &validation).ok()?; + + // get token fields without validating it. + let Claims { api_key_prefix, .. } = token_data.claims; + Some(api_key_prefix) + } pub struct MasterPolicy; @@ -204,27 +215,7 @@ pub mod policies { return None; } - // get token fields without validating it. - let Claims { - search_rules, - exp, - api_key_prefix, - } = dangerous_insecure_decode::(token).ok()?.claims; - - // Check index access if an index restriction is provided. - if let Some(index) = index { - if !search_rules.is_index_authorized(index) { - return None; - } - } - - // Check if token is expired. - if let Some(exp) = exp { - if OffsetDateTime::now_utc().unix_timestamp() > exp { - return None; - } - } - + let api_key_prefix = extract_key_prefix(token)?; // check if parent key is authorized to do the action. if auth .is_key_authorized(api_key_prefix.as_bytes(), Action::Search, index) @@ -232,15 +223,29 @@ pub mod policies { { // Check if tenant token is valid. let key = auth.generate_key(&api_key_prefix)?; - decode::( + let data = decode::( token, &DecodingKey::from_secret(key.as_bytes()), - &TENANT_TOKEN_VALIDATION, + &tenant_token_validation(), ) .ok()?; + // Check index access if an index restriction is provided. + if let Some(index) = index { + if !data.claims.search_rules.is_index_authorized(index) { + return None; + } + } + + // Check if token is expired. + if let Some(exp) = data.claims.exp { + if OffsetDateTime::now_utc().unix_timestamp() > exp { + return None; + } + } + return auth - .get_key_filters(api_key_prefix, Some(search_rules)) + .get_key_filters(api_key_prefix, Some(data.claims.search_rules)) .ok(); } diff --git a/meilisearch-http/src/helpers/env.rs b/meilisearch-http/src/helpers/env.rs index 9bc81bc69..b76c9c8a7 100644 --- a/meilisearch-http/src/helpers/env.rs +++ b/meilisearch-http/src/helpers/env.rs @@ -1,10 +1,11 @@ +use meilisearch_lib::heed::Env; use walkdir::WalkDir; pub trait EnvSizer { fn size(&self) -> u64; } -impl EnvSizer for heed::Env { +impl EnvSizer for Env { fn size(&self) -> u64 { WalkDir::new(self.path()) .into_iter() diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 0f1ff5970..04b61f74e 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -19,6 +19,7 @@ use serde::Serialize; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; #[derive(Debug, Clone, Parser, Serialize)] +#[clap(version)] pub struct Opt { /// The destination where the database must be created. #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] @@ -145,8 +146,8 @@ pub struct Opt { #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] pub log_level: String, - #[serde(skip)] - #[clap(skip)] + #[serde(flatten)] + #[clap(flatten)] pub indexer_options: IndexerOpts, #[serde(flatten)] @@ -258,3 +259,13 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_valid_opt() { + assert!(Opt::try_parse_from(Some("")).is_ok()); + } +} diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 14b3f74f5..14d36c1b3 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -2,7 +2,10 @@ use actix_web::{web, HttpRequest, HttpResponse}; use log::debug; use meilisearch_auth::IndexSearchRules; use meilisearch_error::ResponseError; -use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; +use meilisearch_lib::index::{ + default_crop_length, default_crop_marker, default_highlight_post_tag, + default_highlight_pre_tag, SearchQuery, DEFAULT_SEARCH_LIMIT, +}; use meilisearch_lib::MeiliSearch; use serde::Deserialize; use serde_json::Value; @@ -35,6 +38,12 @@ pub struct SearchQueryGet { #[serde(default = "Default::default")] matches: bool, facets_distribution: Option, + #[serde(default = "default_highlight_pre_tag")] + highlight_pre_tag: String, + #[serde(default = "default_highlight_post_tag")] + highlight_post_tag: String, + #[serde(default = "default_crop_marker")] + crop_marker: String, } impl From for SearchQuery { @@ -77,6 +86,9 @@ impl From for SearchQuery { sort, matches: other.matches, facets_distribution, + highlight_pre_tag: other.highlight_pre_tag, + highlight_post_tag: other.highlight_post_tag, + crop_marker: other.crop_marker, } } } diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index eeb3e71b3..222aca580 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -21,11 +21,11 @@ macro_rules! make_setting_route { use meilisearch_lib::milli::update::Setting; use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch}; - use crate::analytics::Analytics; - use crate::extractors::authentication::{policies::*, GuardedData}; - use crate::extractors::sequential_extractor::SeqHandler; - use crate::task::SummarizedTaskView; use meilisearch_error::ResponseError; + use $crate::analytics::Analytics; + use $crate::extractors::authentication::{policies::*, GuardedData}; + use $crate::extractors::sequential_extractor::SeqHandler; + use $crate::task::SummarizedTaskView; pub async fn delete( meilisearch: GuardedData, MeiliSearch>, @@ -145,8 +145,8 @@ make_setting_route!( "SortableAttributes Updated".to_string(), json!({ "sortable_attributes": { - "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), + "total": setting.as_ref().map(|sort| sort.len()), + "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")), }, }), Some(req), @@ -161,6 +161,47 @@ make_setting_route!( "displayedAttributes" ); +make_setting_route!( + "/typo-tolerance", + meilisearch_lib::index::updates::TypoSettings, + typo_tolerance, + "typoTolerance", + analytics, + |setting: &Option, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "TypoTolerance Updated".to_string(), + json!({ + "typo_tolerance": { + "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + "disable_on_attributes": setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + "disable_on_words": setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + "min_word_size_for_one_typo": setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.one_typo.set())) + .flatten(), + "min_word_size_for_two_typos": setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.two_typos.set())) + .flatten(), + }, + }), + Some(req), + ); + } +); + make_setting_route!( "/searchable-attributes", Vec, @@ -174,7 +215,7 @@ make_setting_route!( "SearchableAttributes Updated".to_string(), json!({ "searchable_attributes": { - "total": setting.as_ref().map(|searchable| searchable.len()).unwrap_or(0), + "total": setting.as_ref().map(|searchable| searchable.len()), }, }), Some(req), @@ -246,7 +287,8 @@ generate_configure!( distinct_attribute, stop_words, synonyms, - ranking_rules + ranking_rules, + typo_tolerance ); pub async fn update_all( @@ -265,15 +307,46 @@ pub async fn update_all( "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), }, "searchable_attributes": { - "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()).unwrap_or(0), + "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), }, "sortable_attributes": { - "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0), - "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false), + "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), }, "filterable_attributes": { - "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0), - "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), + "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), + }, + "typo_tolerance": { + "enabled": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.enabled.as_ref().set()) + .copied(), + "disable_on_attributes": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + "disable_on_words": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + "min_word_size_for_one_typo": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.one_typo.set())) + .flatten(), + "min_word_size_for_two_typos": settings.typo_tolerance + .as_ref() + .set() + .and_then(|s| s.min_word_size_for_typos + .as_ref() + .set() + .map(|s| s.two_typos.set())) + .flatten(), }, }), Some(&req), diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index dcb4b6266..b439ec52e 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -1,4 +1,6 @@ #![allow(dead_code)] + +use clap::Parser; use std::path::Path; use actix_web::http::StatusCode; @@ -126,36 +128,18 @@ pub fn default_settings(dir: impl AsRef) -> Opt { Opt { db_path: dir.as_ref().join("db"), dumps_dir: dir.as_ref().join("dump"), - http_addr: "127.0.0.1:7700".to_owned(), - master_key: None, env: "development".to_owned(), #[cfg(all(not(debug_assertions), feature = "analytics"))] no_analytics: true, max_index_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(), max_task_db_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(), http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(), - ssl_cert_path: None, - ssl_key_path: None, - ssl_auth_path: None, - ssl_ocsp_path: None, - ssl_require_auth: false, - ssl_resumption: false, - ssl_tickets: false, - import_snapshot: None, - ignore_missing_snapshot: false, - ignore_snapshot_if_db_exists: false, snapshot_dir: ".".into(), - schedule_snapshot: false, - snapshot_interval_sec: 0, - import_dump: None, - ignore_missing_dump: false, - ignore_dump_if_db_exists: false, indexer_options: IndexerOpts { // memory has to be unlimited because several meilisearch are running in test context. - max_memory: MaxMemory::unlimited(), - ..Default::default() + max_indexing_memory: MaxMemory::unlimited(), + ..Parser::parse_from(None as Option<&str>) }, - log_level: "off".into(), - scheduler_options: meilisearch_lib::options::SchedulerConfig::default(), + ..Parser::parse_from(None as Option<&str>) } } diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index c684458c5..652651e7f 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -212,7 +212,7 @@ async fn error_add_malformed_csv_documents() { assert_eq!( response["message"], json!( - r#"The `csv` payload provided is malformed. `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."# + r#"The `csv` payload provided is malformed: `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -236,7 +236,7 @@ async fn error_add_malformed_csv_documents() { assert_eq!( response["message"], json!( - r#"The `csv` payload provided is malformed. `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."# + r#"The `csv` payload provided is malformed: `CSV error: record 1 (line: 2, byte: 12): found record with 3 fields, but the previous record has 2 fields`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -307,6 +307,58 @@ async fn error_add_malformed_json_documents() { response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload") ); + + // truncate + + // length = 100 + let long = "0123456789".repeat(10); + + let document = format!("\"{}\"", long); + let req = test::TestRequest::put() + .uri("/indexes/dog/documents") + .set_payload(document) + .insert_header(("content-type", "application/json")) + .to_request(); + let res = test::call_service(&app, req).await; + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + assert_eq!(status_code, 400); + assert_eq!( + response["message"], + json!( + r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789", expected a documents, or a sequence of documents. at line 1 column 102`."# + ) + ); + assert_eq!(response["code"], json!("malformed_payload")); + assert_eq!(response["type"], json!("invalid_request")); + assert_eq!( + response["link"], + json!("https://docs.meilisearch.com/errors#malformed_payload") + ); + + // add one more char to the long string to test if the truncating works. + let document = format!("\"{}m\"", long); + let req = test::TestRequest::put() + .uri("/indexes/dog/documents") + .set_payload(document) + .insert_header(("content-type", "application/json")) + .to_request(); + let res = test::call_service(&app, req).await; + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + assert_eq!(status_code, 400); + assert_eq!( + response["message"], + json!( + r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...90123456789m", expected a documents, or a sequence of documents. at line 1 column 103`."# + ) + ); + assert_eq!(response["code"], json!("malformed_payload")); + assert_eq!(response["type"], json!("invalid_request")); + assert_eq!( + response["link"], + json!("https://docs.meilisearch.com/errors#malformed_payload") + ); } #[actix_rt::test] @@ -961,7 +1013,7 @@ async fn error_add_documents_invalid_geo_field() { assert_eq!(response["status"], "failed"); let expected_error = json!({ - "message": r#"The document with the id: `11` contains an invalid _geo field: `foobar`."#, + "message": r#"The document with the id: `11` contains an invalid `_geo` field."#, "code": "invalid_geo_field", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_geo_field" diff --git a/meilisearch-http/tests/documents/get_documents.rs b/meilisearch-http/tests/documents/get_documents.rs index 4ab479efb..6c93b9c13 100644 --- a/meilisearch-http/tests/documents/get_documents.rs +++ b/meilisearch-http/tests/documents/get_documents.rs @@ -155,7 +155,7 @@ async fn test_get_all_documents_offset() { .await; assert_eq!(code, 200); assert_eq!(response.as_array().unwrap().len(), 20); - assert_eq!(response.as_array().unwrap()[0]["id"], 13); + assert_eq!(response.as_array().unwrap()[0]["id"], 5); } #[actix_rt::test] diff --git a/meilisearch-http/tests/search/errors.rs b/meilisearch-http/tests/search/errors.rs index 44a6e79bb..500825364 100644 --- a/meilisearch-http/tests/search/errors.rs +++ b/meilisearch-http/tests/search/errors.rs @@ -36,6 +36,38 @@ async fn search_unexisting_parameter() { .await; } +#[actix_rt::test] +async fn search_invalid_highlight_and_crop_tags() { + let server = Server::new().await; + let index = server.index("test"); + + let fields = &["cropMarker", "highlightPreTag", "highlightPostTag"]; + + for field in fields { + // object + index + .search( + json!({field.to_string(): {"marker": ""}}), + |response, code| { + assert_eq!(code, 400, "field {} passing object: {}", &field, response); + assert_eq!(response["code"], "bad_request"); + }, + ) + .await; + + // array + index + .search( + json!({field.to_string(): ["marker", ""]}), + |response, code| { + assert_eq!(code, 400, "field {} passing array: {}", &field, response); + assert_eq!(response["code"], "bad_request"); + }, + ) + .await; + } +} + #[actix_rt::test] async fn filter_invalid_syntax_object() { let server = Server::new().await; diff --git a/meilisearch-http/tests/search/formatted.rs b/meilisearch-http/tests/search/formatted.rs new file mode 100644 index 000000000..13b8a07d8 --- /dev/null +++ b/meilisearch-http/tests/search/formatted.rs @@ -0,0 +1,376 @@ +use super::*; +use crate::common::Server; +use serde_json::json; + +#[actix_rt::test] +async fn formatted_contain_wildcard() { + let server = Server::new().await; + let index = server.index("test"); + + index + .update_settings(json!({ "displayedAttributes": ["id", "cattos"] })) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["*"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + }) + ); + + let (response, code) = index + .search_post( + json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post( + json!({ "q": "pesti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToCrop": ["*"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", + "cattos": "pesti", + } + }) + ); +} + +#[actix_rt::test] +async fn format_nested() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + "age": 2, + }, + { + "name": "buddy", + "age": 4, + }, + ], + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + }, + { + "name": "buddy", + }, + ], + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2", + }, + { + "name": "buddy", + "age": "4", + }, + ], + }, + }) + ); + + let (response, code) = index + .search_post(json!({ "q": "pesti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "doggos": [ + { + "name": "bobby", + "age": "2", + }, + { + "name": "buddy", + "age": "4", + }, + ], + }, + }) + ); +} + +#[actix_rt::test] +async fn displayedattr_2_smol() { + let server = Server::new().await; + let index = server.index("test"); + + // not enough displayed for the other settings + index + .update_settings(json!({ "displayedAttributes": ["id"] })) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToHighlight": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToCrop": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + + let (response, code) = index + .search_post( + json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }), + ) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + } + }) + ); + + let (response, code) = index + .search_post(json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "_formatted": { + "id": "852", + } + }) + ); +} diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index 7c7924c34..d9b36e85d 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -2,38 +2,96 @@ // should be tested in its own module to isolate tests and keep the tests readable. mod errors; +mod formatted; use crate::common::Server; use once_cell::sync::Lazy; use serde_json::{json, Value}; -static DOCUMENTS: Lazy = Lazy::new(|| { +pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { "title": "Shazam!", - "id": "287947" + "id": "287947", }, { "title": "Captain Marvel", - "id": "299537" + "id": "299537", }, { "title": "Escape Room", - "id": "522681" + "id": "522681", }, - { "title": "How to Train Your Dragon: The Hidden World", "id": "166428" + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", }, { "title": "Glass", - "id": "450465" + "id": "450465", } ]) }); +pub(self) static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2, + }, + { + "name": "buddy", + "age": 4, + }, + ], + "cattos": "pesti", + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8, + }, + ], + "cattos": ["simba", "pestiféré"], + }, + { + "id": 750, + "father": "romain", + "mother": "michelle", + "cattos": ["enigma"], + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5, + }, + { + "name": "fast", + "age": 6, + }, + ], + "cattos": ["moumoute", "gomez"], + }, + ]) +}); + #[actix_rt::test] async fn simple_placeholder_search() { let server = Server::new().await; - let index = server.index("test"); + let index = server.index("basic"); let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -45,6 +103,18 @@ async fn simple_placeholder_search() { assert_eq!(response["hits"].as_array().unwrap().len(), 5); }) .await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + index + .search(json!({}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 4); + }) + .await; } #[actix_rt::test] @@ -62,6 +132,18 @@ async fn simple_search() { assert_eq!(response["hits"].as_array().unwrap().len(), 1); }) .await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + index + .search(json!({"q": "pesti"}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + }) + .await; } #[actix_rt::test] @@ -88,6 +170,27 @@ async fn search_multiple_params() { }, ) .await; + + let index = server.index("nested"); + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + index + .search( + json!({ + "q": "pesti", + "attributesToCrop": ["catto:2"], + "attributesToHighlight": ["catto"], + "limit": 2, + "offset": 0, + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + }, + ) + .await; } #[actix_rt::test] @@ -114,6 +217,43 @@ async fn search_with_filter_string_notation() { }, ) .await; + + let index = server.index("nested"); + + index + .update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(3).await; + + index + .search( + json!({ + "filter": "cattos = pesti" + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + assert_eq!(response["hits"][0]["id"], json!(852)); + }, + ) + .await; + + index + .search( + json!({ + "filter": "doggos.age > 5" + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + assert_eq!(response["hits"][0]["id"], json!(654)); + assert_eq!(response["hits"][1]["id"], json!(951)); + }, + ) + .await; } #[actix_rt::test] @@ -170,6 +310,28 @@ async fn search_with_sort_on_numbers() { }, ) .await; + + let index = server.index("nested"); + + index + .update_settings(json!({"sortableAttributes": ["doggos.age"]})) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(3).await; + + index + .search( + json!({ + "sort": ["doggos.age:asc"] + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 4); + }, + ) + .await; } #[actix_rt::test] @@ -196,6 +358,28 @@ async fn search_with_sort_on_strings() { }, ) .await; + + let index = server.index("nested"); + + index + .update_settings(json!({"sortableAttributes": ["doggos.name"]})) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(3).await; + + index + .search( + json!({ + "sort": ["doggos.name:asc"] + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 4); + }, + ) + .await; } #[actix_rt::test] @@ -246,6 +430,85 @@ async fn search_facet_distribution() { }, ) .await; + + let index = server.index("nested"); + + index + .update_settings(json!({"filterableAttributes": ["father", "doggos.name"]})) + .await; + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(3).await; + + // TODO: TAMO: fix the test + index + .search( + json!({ + // "facetsDistribution": ["father", "doggos.name"] + "facetsDistribution": ["father"] + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + let dist = response["facetsDistribution"].as_object().unwrap(); + assert_eq!(dist.len(), 1); + assert_eq!( + dist["father"], + json!({ "jean": 1, "pierre": 1, "romain": 1, "jean-baptiste": 1}) + ); + /* + assert_eq!( + dist["doggos.name"], + json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) + ); + */ + }, + ) + .await; + + index + .update_settings(json!({"filterableAttributes": ["doggos"]})) + .await; + index.wait_task(4).await; + + index + .search( + json!({ + "facetsDistribution": ["doggos.name"] + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + let dist = response["facetsDistribution"].as_object().unwrap(); + assert_eq!(dist.len(), 1); + assert_eq!( + dist["doggos.name"], + json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) + ); + }, + ) + .await; + + index + .search( + json!({ + "facetsDistribution": ["doggos"] + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + let dist = response["facetsDistribution"].as_object().unwrap(); + dbg!(&dist); + assert_eq!(dist.len(), 3); + assert_eq!( + dist["doggos.name"], + json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) + ); + assert_eq!( + dist["doggos.age"], + json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1}) + ); + }, + ) + .await; } #[actix_rt::test] @@ -265,5 +528,81 @@ async fn displayed_attributes() { .search_post(json!({ "attributesToRetrieve": ["title", "id"] })) .await; assert_eq!(code, 200, "{}", response); - assert!(response["hits"].get("title").is_none()); + assert!(response["hits"][0].get("title").is_some()); +} + +#[actix_rt::test] +async fn placeholder_search_is_hard_limited() { + let server = Server::new().await; + let index = server.index("test"); + + let documents: Vec<_> = (0..1200) + .map(|i| json!({ "id": i, "text": "I am unique!" })) + .collect(); + index.add_documents(documents.into(), None).await; + index.wait_task(0).await; + + index + .search( + json!({ + "limit": 1500, + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1000); + }, + ) + .await; + + index + .search( + json!({ + "offset": 800, + "limit": 400, + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 200); + }, + ) + .await; +} + +#[actix_rt::test] +async fn search_is_hard_limited() { + let server = Server::new().await; + let index = server.index("test"); + + let documents: Vec<_> = (0..1200) + .map(|i| json!({ "id": i, "text": "I am unique!" })) + .collect(); + index.add_documents(documents.into(), None).await; + index.wait_task(0).await; + + index + .search( + json!({ + "q": "unique", + "limit": 1500, + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1000); + }, + ) + .await; + + index + .search( + json!({ + "q": "unique", + "offset": 800, + "limit": 400, + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 200); + }, + ) + .await; } diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index 6d9cc1ea5..98b4f9558 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -43,7 +43,7 @@ async fn get_settings() { let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); - assert_eq!(settings.keys().len(), 8); + assert_eq!(settings.keys().len(), 9); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["filterableAttributes"], json!([])); diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 7af40257d..41ef7169c 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -1,67 +1,66 @@ [package] name = "meilisearch-lib" -version = "0.26.1" +version = "0.27.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -actix-web = { version = "4", default-features = false } -anyhow = { version = "1.0.43", features = ["backtrace"] } -async-stream = "0.3.2" -async-trait = "0.1.51" -byte-unit = { version = "4.0.12", default-features = false, features = ["std"] } +actix-web = { version = "4.0.1", default-features = false } +anyhow = { version = "1.0.56", features = ["backtrace"] } +async-stream = "0.3.3" +async-trait = "0.1.52" +atomic_refcell = "0.1.8" +byte-unit = { version = "4.0.14", default-features = false, features = ["std"] } bytes = "1.1.0" +clap = { version = "3.1.6", features = ["derive", "env"] } +crossbeam-channel = "0.5.2" csv = "1.1.6" -crossbeam-channel = "0.5.1" +derivative = "2.2.0" either = "1.6.1" -flate2 = "1.0.21" +flate2 = "1.0.22" +fs_extra = "1.2.0" fst = "0.4.7" -futures = "0.3.17" -futures-util = "0.3.17" -heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } -http = "0.2.4" -indexmap = { version = "1.7.0", features = ["serde-1"] } -itertools = "0.10.1" +futures = "0.3.21" +futures-util = "0.3.21" +http = "0.2.6" +indexmap = { version = "1.8.0", features = ["serde-1"] } +itertools = "0.10.3" lazy_static = "1.4.0" log = "0.4.14" -meilisearch-error = { path = "../meilisearch-error" } meilisearch-auth = { path = "../meilisearch-auth" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.22.2" } +meilisearch-error = { path = "../meilisearch-error" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" } mime = "0.3.16" -num_cpus = "1.13.0" -once_cell = "1.8.0" -parking_lot = "0.11.2" -rand = "0.8.4" +num_cpus = "1.13.1" +obkv = "0.2.0" +once_cell = "1.10.0" +parking_lot = "0.12.0" +permissive-json-pointer = { path = "../permissive-json-pointer" } +rand = "0.8.5" rayon = "1.5.1" -regex = "1.5.4" -rustls = "0.19.1" -serde = { version = "1.0.130", features = ["derive"] } -serde_json = { version = "1.0.67", features = ["preserve_order"] } -siphasher = "0.3.7" -slice-group-by = "0.2.6" -clap = { version = "3.0", features = ["derive", "env"] } -tar = "0.4.37" -tempfile = "3.2.0" -thiserror = "1.0.28" +regex = "1.5.5" +reqwest = { version = "0.11.9", features = ["json", "rustls-tls"], default-features = false, optional = true } +rustls = "0.20.4" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.79", features = ["preserve_order"] } +siphasher = "0.3.10" +slice-group-by = "0.3.0" +sysinfo = "0.23.5" +tar = "0.4.38" +tempfile = "3.3.0" +thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } -tokio = { version = "1.11.0", features = ["full"] } +tokio = { version = "1.17.0", features = ["full"] } uuid = { version = "0.8.2", features = ["serde"] } walkdir = "2.3.2" -obkv = "0.2.0" -pin-project = "1.0.8" -whoami = { version = "1.1.3", optional = true } -reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } -sysinfo = "0.20.2" -derivative = "2.2.0" -fs_extra = "1.2.0" -atomic_refcell = "0.1.8" +whoami = { version = "1.2.1", optional = true } [dev-dependencies] -actix-rt = "2.2.0" -mockall = "0.10.2" -paste = "1.0.5" -nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} +actix-rt = "2.7.0" meilisearch-error = { path = "../meilisearch-error", features = ["test-traits"] } +mockall = "0.11.0" +nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} +paste = "1.0.6" proptest = "1.0.0" proptest-derive = "0.3.0" diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index cfd73038e..93c47afe8 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -1,4 +1,5 @@ -use std::fmt; +use std::borrow::Borrow; +use std::fmt::{self, Debug, Display}; use std::io::{self, BufRead, BufReader, BufWriter, Cursor, Read, Seek, Write}; use meilisearch_error::{internal_error, Code, ErrorCode}; @@ -23,17 +24,40 @@ impl fmt::Display for PayloadType { } } -#[derive(thiserror::Error, Debug)] +#[derive(Debug)] pub enum DocumentFormatError { - #[error("An internal error has occurred. `{0}`.")] Internal(Box), - #[error("The `{1}` payload provided is malformed. `{0}`.")] - MalformedPayload( - Box, - PayloadType, - ), + MalformedPayload(Box, PayloadType), } +impl Display for DocumentFormatError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), + Self::MalformedPayload(me, b) => match me.borrow() { + milli::documents::Error::JsonError(se) => { + // https://github.com/meilisearch/meilisearch/issues/2107 + // The user input maybe insanely long. We need to truncate it. + let mut serde_msg = se.to_string(); + let ellipsis = "..."; + if serde_msg.len() > 100 + ellipsis.len() { + serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis); + } + + write!( + f, + "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", + b, serde_msg + ) + } + _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), + }, + } + } +} + +impl std::error::Error for DocumentFormatError {} + impl From<(PayloadType, milli::documents::Error)> for DocumentFormatError { fn from((ty, error): (PayloadType, milli::documents::Error)) -> Self { match error { diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index 6da65192a..c3e7b8313 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -41,6 +41,9 @@ impl ErrorCode for MilliError<'_> { UserError::CriterionError(_) => Code::InvalidRankingRule, UserError::InvalidGeoField { .. } => Code::InvalidGeoField, UserError::SortError(_) => Code::Sort, + UserError::InvalidMinTypoWordLenSetting(_, _) => { + Code::InvalidMinWordLengthForTypo + } } } } diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 153b724b9..e201e738b 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -3,9 +3,9 @@ use std::io::{BufReader, Seek, SeekFrom, Write}; use std::path::Path; use anyhow::Context; -use heed::{EnvOpenOptions, RoTxn}; use indexmap::IndexMap; use milli::documents::DocumentBatchReader; +use milli::heed::{EnvOpenOptions, RoTxn}; use milli::update::{IndexDocumentsConfig, IndexerConfig}; use serde::{Deserialize, Serialize}; @@ -146,7 +146,7 @@ impl Index { indexer_config, config, |_| (), - ); + )?; builder.add_documents(documents_reader)?; builder.execute()?; } diff --git a/meilisearch-lib/src/index/error.rs b/meilisearch-lib/src/index/error.rs index f8dcc0dc8..89a12a41f 100644 --- a/meilisearch-lib/src/index/error.rs +++ b/meilisearch-lib/src/index/error.rs @@ -21,7 +21,7 @@ pub enum IndexError { internal_error!( IndexError: std::io::Error, - heed::Error, + milli::heed::Error, fst::Error, serde_json::Error, update_file_store::UpdateFileStoreError, diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index a17ed8504..03b4ca7dd 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -5,7 +5,8 @@ use std::ops::Deref; use std::path::Path; use std::sync::Arc; -use heed::{EnvOpenOptions, RoTxn}; +use fst::IntoStreamer; +use milli::heed::{EnvOpenOptions, RoTxn}; use milli::update::{IndexerConfig, Setting}; use milli::{obkv_to_json, FieldDistribution, FieldId}; use serde::{Deserialize, Serialize}; @@ -17,6 +18,7 @@ use crate::EnvSizer; use super::error::IndexError; use super::error::Result; +use super::updates::{MinWordSizeTyposSetting, TypoSettings}; use super::{Checked, Settings}; pub type Document = Map; @@ -37,7 +39,7 @@ impl IndexMeta { Self::new_txn(index, &txn) } - pub fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result { + pub fn new_txn(index: &Index, txn: &milli::heed::RoTxn) -> Result { let created_at = index.created_at(txn)?; let updated_at = index.updated_at(txn)?; let primary_key = index.primary_key(txn)?.map(String::from); @@ -168,6 +170,31 @@ impl Index { }) .collect(); + let min_typo_word_len = MinWordSizeTyposSetting { + one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), + two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), + }; + + let disabled_words = self + .exact_words(txn)? + .into_stream() + .into_strs()? + .into_iter() + .collect(); + + let disabled_attributes = self + .exact_attributes(txn)? + .into_iter() + .map(String::from) + .collect(); + + let typo_tolerance = TypoSettings { + enabled: Setting::Set(self.authorize_typos(txn)?), + min_word_size_for_typos: Setting::Set(min_typo_word_len), + disable_on_words: Setting::Set(disabled_words), + disable_on_attributes: Setting::Set(disabled_attributes), + }; + Ok(Settings { displayed_attributes: match displayed_attributes { Some(attrs) => Setting::Set(attrs), @@ -186,6 +213,7 @@ impl Index { None => Setting::Reset, }, synonyms: Setting::Set(synonyms), + typo_tolerance: Setting::Set(typo_tolerance), _kind: PhantomData, }) } @@ -250,7 +278,7 @@ impl Index { fn fields_to_display>( &self, - txn: &heed::RoTxn, + txn: &milli::heed::RoTxn, attributes_to_retrieve: &Option>, fields_ids_map: &milli::FieldsIdsMap, ) -> Result> { @@ -278,7 +306,7 @@ impl Index { let _txn = self.write_txn()?; self.inner .env - .copy_to_path(dst, heed::CompactionOption::Enabled)?; + .copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; Ok(()) } } diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs index 3e6be739b..3a42b2617 100644 --- a/meilisearch-lib/src/index/mod.rs +++ b/meilisearch-lib/src/index/mod.rs @@ -1,4 +1,8 @@ -pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; +pub use search::{ + default_crop_length, default_crop_marker, default_highlight_post_tag, + default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, +}; pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; mod dump; diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index 644b75468..7c12f985e 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -1,9 +1,9 @@ +use std::cmp::min; use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::str::FromStr; use std::time::Instant; use either::Either; -use indexmap::IndexMap; use milli::tokenizer::{Analyzer, AnalyzerConfig, Token}; use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError}; use regex::Regex; @@ -15,7 +15,7 @@ use crate::index::error::FacetError; use super::error::{IndexError, Result}; use super::index::Index; -pub type Document = IndexMap; +pub type Document = serde_json::Map; type MatchesInfo = BTreeMap>; #[derive(Serialize, Debug, Clone, PartialEq)] @@ -29,11 +29,30 @@ const fn default_search_limit() -> usize { DEFAULT_SEARCH_LIMIT } -pub const DEFAULT_CROP_LENGTH: usize = 200; +pub const DEFAULT_CROP_LENGTH: usize = 10; pub const fn default_crop_length() -> usize { DEFAULT_CROP_LENGTH } +pub const DEFAULT_CROP_MARKER: &str = "…"; +pub fn default_crop_marker() -> String { + DEFAULT_CROP_MARKER.to_string() +} + +pub const DEFAULT_HIGHLIGHT_PRE_TAG: &str = ""; +pub fn default_highlight_pre_tag() -> String { + DEFAULT_HIGHLIGHT_PRE_TAG.to_string() +} + +pub const DEFAULT_HIGHLIGHT_POST_TAG: &str = ""; +pub fn default_highlight_post_tag() -> String { + DEFAULT_HIGHLIGHT_POST_TAG.to_string() +} + +/// The maximimum number of results that the engine +/// will be able to return in one search call. +pub const HARD_RESULT_LIMIT: usize = 1000; + #[derive(Deserialize, Debug, Clone, PartialEq)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { @@ -52,6 +71,12 @@ pub struct SearchQuery { pub filter: Option, pub sort: Option>, pub facets_distribution: Option>, + #[serde(default = "default_highlight_pre_tag")] + pub highlight_pre_tag: String, + #[serde(default = "default_highlight_post_tag")] + pub highlight_post_tag: String, + #[serde(default = "default_crop_marker")] + pub crop_marker: String, } #[derive(Debug, Clone, Serialize, PartialEq)] @@ -80,12 +105,21 @@ pub struct SearchResult { pub exhaustive_facets_count: Option, } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Default)] struct FormatOptions { highlight: bool, crop: Option, } +impl FormatOptions { + pub fn merge(self, other: Self) -> Self { + Self { + highlight: self.highlight || other.highlight, + crop: self.crop.or(other.crop), + } + } +} + impl Index { pub fn perform_search(&self, query: SearchQuery) -> Result { let before_search = Instant::now(); @@ -97,8 +131,13 @@ impl Index { search.query(query); } - search.limit(query.limit); - search.offset(query.offset.unwrap_or_default()); + // Make sure that a user can't get more documents than the hard limit, + // we align that on the offset too. + let offset = min(query.offset.unwrap_or(0), HARD_RESULT_LIMIT); + let limit = min(query.limit, HARD_RESULT_LIMIT.saturating_sub(offset)); + + search.offset(offset); + search.limit(limit); if let Some(ref filter) = query.filter { if let Some(facets) = parse_filter(filter)? { @@ -182,22 +221,34 @@ impl Index { config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (query.highlight_pre_tag, query.highlight_post_tag), + query.crop_marker, + ); let mut documents = Vec::new(); let documents_iter = self.documents(&rtxn, documents_ids)?; for (_id, obkv) in documents_iter { - let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?; + // First generate a document with all the displayed fields + let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; + + // select the attributes to retrieve + let attributes_to_retrieve = to_retrieve_ids + .iter() + .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); + let mut document = + permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); let matches_info = query .matches .then(|| compute_matches(&matching_words, &document, &analyzer)); let formatted = format_fields( + &displayed_document, &fields_ids_map, - obkv, &formatter, &matching_words, &formatted_options, @@ -432,54 +483,63 @@ fn add_non_formatted_ids_to_formatted_options( } fn make_document( - attributes_to_retrieve: &BTreeSet, + displayed_attributes: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReaderU16, ) -> Result { - let mut document = Document::new(); + let mut document = serde_json::Map::new(); - for attr in attributes_to_retrieve { - if let Some(value) = obkv.get(*attr) { - let value = serde_json::from_slice(value)?; + // recreate the original json + for (key, value) in obkv.iter() { + let value = serde_json::from_slice(value)?; + let key = field_ids_map + .name(key) + .expect("Missing field name") + .to_string(); - // This unwrap must be safe since we got the ids from the fields_ids_map just - // before. - let key = field_ids_map - .name(*attr) - .expect("Missing field name") - .to_string(); - - document.insert(key, value); - } + document.insert(key, value); } + + // select the attributes to retrieve + let displayed_attributes = displayed_attributes + .iter() + .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); + + let document = permissive_json_pointer::select_values(&document, displayed_attributes); Ok(document) } fn format_fields>( + document: &Document, field_ids_map: &FieldsIdsMap, - obkv: obkv::KvReaderU16, formatter: &Formatter, matching_words: &impl Matcher, formatted_options: &BTreeMap, ) -> Result { - let mut document = Document::new(); + let selectors: Vec<_> = formatted_options + .keys() + // This unwrap must be safe since we got the ids from the fields_ids_map just + // before. + .map(|&fid| field_ids_map.name(fid).unwrap()) + .collect(); + let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied()); - for (id, format) in formatted_options { - if let Some(value) = obkv.get(*id) { - let mut value: Value = serde_json::from_slice(value)?; - - value = formatter.format_value(value, matching_words, *format); - - // This unwrap must be safe since we got the ids from the fields_ids_map just - // before. - let key = field_ids_map - .name(*id) - .expect("Missing field name") - .to_string(); - - document.insert(key, value); - } - } + permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| { + // To get the formatting option of each key we need to see all the rules that applies + // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` + // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only + // highlighted. + let format = formatted_options + .iter() + .filter(|(field, _option)| { + let name = field_ids_map.name(**field).unwrap(); + milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name) + }) + .fold(FormatOptions::default(), |acc, (_, option)| { + acc.merge(*option) + }); + *value = formatter.format_value(std::mem::take(value), matching_words, format); + }); Ok(document) } @@ -504,12 +564,21 @@ impl Matcher for MatchingWords { struct Formatter<'a, A> { analyzer: &'a Analyzer<'a, A>, - marks: (String, String), + highlight_tags: (String, String), + crop_marker: String, } impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { - pub fn new(analyzer: &'a Analyzer<'a, A>, marks: (String, String)) -> Self { - Self { analyzer, marks } + pub fn new( + analyzer: &'a Analyzer<'a, A>, + highlight_tags: (String, String), + crop_marker: String, + ) -> Self { + Self { + analyzer, + highlight_tags, + crop_marker, + } } fn format_value( @@ -573,10 +642,13 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { ) -> String { let analyzed = self.analyzer.analyze(&s); - let tokens: Box> = match format_options.crop { - Some(crop_len) => { + let mut tokens = analyzed.reconstruct(); + let mut crop_marker_before = false; + + let tokens_interval: Box> = match format_options.crop { + Some(crop_len) if crop_len > 0 => { let mut buffer = Vec::new(); - let mut tokens = analyzed.reconstruct().peekable(); + let mut tokens = tokens.by_ref().peekable(); while let Some((word, token)) = tokens.next_if(|(_, token)| matcher.matches(token).is_none()) @@ -586,16 +658,35 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { match tokens.next() { Some(token) => { - let mut total_len: usize = buffer.iter().map(|(word, _)| word.len()).sum(); - let before_iter = buffer.into_iter().skip_while(move |(word, _)| { - total_len -= word.len(); - total_len >= crop_len + let mut total_count: usize = buffer + .iter() + .filter(|(_, token)| token.is_separator().is_none()) + .count(); + + let crop_len_before = crop_len / 2; + // check if start will be cropped. + crop_marker_before = total_count > crop_len_before; + + let before_iter = buffer.into_iter().skip_while(move |(_, token)| { + if token.is_separator().is_none() { + total_count -= 1; + } + total_count >= crop_len_before }); + // rebalance remaining word count after the match. + let crop_len_after = if crop_marker_before { + crop_len.saturating_sub(crop_len_before + 1) + } else { + crop_len.saturating_sub(total_count + 1) + }; + let mut taken_after = 0; - let after_iter = tokens.take_while(move |(word, _)| { - let take = taken_after < crop_len; - taken_after += word.chars().count(); + let after_iter = tokens.take_while(move |(_, token)| { + let take = taken_after < crop_len_after; + if token.is_separator().is_none() { + taken_after += 1; + } take }); @@ -606,38 +697,57 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { // If no word matches in the attribute None => { let mut count = 0; - let iter = buffer.into_iter().take_while(move |(word, _)| { - let take = count < crop_len; - count += word.len(); - take - }); + let mut tokens = buffer.into_iter(); + let mut out: String = tokens + .by_ref() + .take_while(move |(_, token)| { + let take = count < crop_len; + if token.is_separator().is_none() { + count += 1; + } + take + }) + .map(|(word, _)| word) + .collect(); - Box::new(iter) + // if there are remaining tokens after formatted interval, + // put a crop marker at the end. + if tokens.next().is_some() { + out.push_str(&self.crop_marker); + } + + return out; } } } - None => Box::new(analyzed.reconstruct()), + _ => Box::new(tokens.by_ref()), }; - tokens.fold(String::new(), |mut out, (word, token)| { + let out = if crop_marker_before { + self.crop_marker.clone() + } else { + String::new() + }; + + let mut out = tokens_interval.fold(out, |mut out, (word, token)| { // Check if we need to do highlighting or computed matches before calling // Matcher::match since the call is expensive. if format_options.highlight && token.is_word() { if let Some(length) = matcher.matches(&token) { match word.get(..length).zip(word.get(length..)) { Some((head, tail)) => { - out.push_str(&self.marks.0); + out.push_str(&self.highlight_tags.0); out.push_str(head); - out.push_str(&self.marks.1); + out.push_str(&self.highlight_tags.1); out.push_str(tail); } // if we are in the middle of a character // or if all the word should be highlighted, // we highlight the complete word. None => { - out.push_str(&self.marks.0); + out.push_str(&self.highlight_tags.0); out.push_str(word); - out.push_str(&self.marks.1); + out.push_str(&self.highlight_tags.1); } } return out; @@ -645,7 +755,15 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> { } out.push_str(word); out - }) + }); + + // if there are remaining tokens after formatted interval, + // put a crop marker at the end. + if tokens.next().is_some() { + out.push_str(&self.crop_marker); + } + + out } } @@ -698,26 +816,34 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); - let id = fields.insert("test").unwrap(); + fields.insert("test").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "test": "hello", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let formatted_options = BTreeMap::new(); let matching_words = MatchingWords::default(); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, @@ -733,31 +859,28 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("The Hobbit".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. R. R. Tolkien".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "The Hobbit", + "author": "J. R. R. Tolkien", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -779,8 +902,8 @@ mod test { matching_words.insert("hobbit", Some(3)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, @@ -797,45 +920,30 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); let publication_year = fields.insert("publication_year").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); + let document: serde_json::Value = json!({ + "title": "The Hobbit", + "author": "J. R. R. Tolkien", + "publication_year": 1937, + }); - obkv.insert( - title, - Value::String("The Hobbit".into()).to_string().as_bytes(), - ) - .unwrap(); - - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - - obkv.insert( - author, - Value::String("J. R. R. Tolkien".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - - obkv = obkv::KvWriter::new(&mut buf); - - obkv.insert( - publication_year, - Value::Number(1937.into()).to_string().as_bytes(), - ) - .unwrap(); - - obkv.finish().unwrap(); - - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -864,8 +972,8 @@ mod test { matching_words.insert("1937", Some(4)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, @@ -884,29 +992,28 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Go💼od luck.".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("JacobLey".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Go💼od luck.", + "author": "JacobLey", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -930,8 +1037,8 @@ mod test { matching_words.insert("gobriefcase od", Some(11)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, @@ -948,28 +1055,28 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(title, Value::String("étoile".into()).to_string().as_bytes()) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. R. R. Tolkien".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "étoile", + "author": "J. R. R. Tolkien", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -991,8 +1098,8 @@ mod test { matching_words.insert("etoile", Some(1)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, @@ -1009,31 +1116,28 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Harry Potter and the Half-Blood Prince".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. K. Rowling".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Harry Potter and the Half-Blood Prince", + "author": "J. K. Rowling", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -1052,59 +1156,56 @@ mod test { ); let mut matching_words = BTreeMap::new(); - matching_words.insert("potter", Some(6)); + matching_words.insert("potter", Some(3)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, ) .unwrap(); - assert_eq!(value["title"], "Harry Potter and"); + assert_eq!(value["title"], "Harry Potter…"); assert_eq!(value["author"], "J. K. Rowling"); } #[test] - fn formatted_with_crop_10() { + fn formatted_with_crop_5() { let stop_words = fst::Set::default(); let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Harry Potter and the Half-Blood Prince".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. K. Rowling".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Harry Potter and the Half-Blood Prince", + "author": "J. K. Rowling", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( title, FormatOptions { highlight: false, - crop: Some(10), + crop: Some(5), }, ); formatted_options.insert( @@ -1116,18 +1217,18 @@ mod test { ); let mut matching_words = BTreeMap::new(); - matching_words.insert("potter", Some(6)); + matching_words.insert("potter", Some(5)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, ) .unwrap(); - assert_eq!(value["title"], "Harry Potter and the Half"); + assert_eq!(value["title"], "Harry Potter and the Half…"); assert_eq!(value["author"], "J. K. Rowling"); } @@ -1137,31 +1238,28 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Harry Potter and the Half-Blood Prince".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. K. Rowling".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Harry Potter and the Half-Blood Prince", + "author": "J. K. Rowling", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -1183,15 +1281,15 @@ mod test { matching_words.insert("potter", Some(6)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, ) .unwrap(); - assert_eq!(value["title"], "Potter"); + assert_eq!(value["title"], "Harry Potter and the Half-Blood Prince"); assert_eq!(value["author"], "J. K. Rowling"); } @@ -1201,38 +1299,35 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Harry Potter and the Half-Blood Prince".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. K. Rowling".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Harry Potter and the Half-Blood Prince", + "author": "J. K. Rowling", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( title, FormatOptions { highlight: false, - crop: Some(6), + crop: Some(1), }, ); formatted_options.insert( @@ -1247,15 +1342,15 @@ mod test { matching_words.insert("rowling", Some(3)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, ) .unwrap(); - assert_eq!(value["title"], "Harry "); + assert_eq!(value["title"], "Harry…"); assert_eq!(value["author"], "J. K. Rowling"); } @@ -1265,31 +1360,28 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Harry Potter and the Half-Blood Prince".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. K. Rowling".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Harry Potter and the Half-Blood Prince", + "author": "J. K. Rowling", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( @@ -1311,15 +1403,15 @@ mod test { matching_words.insert("and", Some(3)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, ) .unwrap(); - assert_eq!(value["title"], " and "); + assert_eq!(value["title"], "…and…"); assert_eq!(value["author"], "J. K. Rowling"); } @@ -1329,38 +1421,35 @@ mod test { let mut config = AnalyzerConfig::default(); config.stop_words(&stop_words); let analyzer = Analyzer::new(config); - let formatter = Formatter::new(&analyzer, (String::from(""), String::from(""))); + let formatter = Formatter::new( + &analyzer, + (String::from(""), String::from("")), + String::from("…"), + ); let mut fields = FieldsIdsMap::new(); let title = fields.insert("title").unwrap(); let author = fields.insert("author").unwrap(); - let mut buf = Vec::new(); - let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - title, - Value::String("Harry Potter and the Half-Blood Prince".into()) - .to_string() - .as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); - obkv = obkv::KvWriter::new(&mut buf); - obkv.insert( - author, - Value::String("J. K. Rowling".into()).to_string().as_bytes(), - ) - .unwrap(); - obkv.finish().unwrap(); + let document: serde_json::Value = json!({ + "title": "Harry Potter and the Half-Blood Prince", + "author": "J. K. Rowling", + }); - let obkv = obkv::KvReader::new(&buf); + // we need to convert the `serde_json::Map` into an `IndexMap`. + let document = document + .as_object() + .unwrap() + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); let mut formatted_options = BTreeMap::new(); formatted_options.insert( title, FormatOptions { highlight: true, - crop: Some(9), + crop: Some(4), }, ); formatted_options.insert( @@ -1375,15 +1464,15 @@ mod test { matching_words.insert("blood", Some(3)); let value = format_fields( + &document, &fields, - obkv, &formatter, &matching_words, &formatted_options, ) .unwrap(); - assert_eq!(value["title"], "the Half-Blood Prince"); + assert_eq!(value["title"], "…the Half-Blood Prince"); assert_eq!(value["author"], "J. K. Rowling"); } diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 07bb0da0e..3aefa1f5e 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -37,6 +37,37 @@ pub struct Checked; #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)] pub struct Unchecked; +#[cfg_attr(test, derive(proptest_derive::Arbitrary))] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct MinWordSizeTyposSetting { + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub one_typo: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub two_typos: Setting, +} + +#[cfg_attr(test, derive(proptest_derive::Arbitrary))] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct TypoSettings { + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub enabled: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub min_word_size_for_typos: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_words: Setting>, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_attributes: Setting>, +} /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// call to `check` will return a `Settings` from a `Settings`. @@ -80,6 +111,9 @@ pub struct Settings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] pub distinct_attribute: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + pub typo_tolerance: Setting, #[serde(skip)] pub _kind: PhantomData, @@ -96,6 +130,7 @@ impl Settings { stop_words: Setting::Reset, synonyms: Setting::Reset, distinct_attribute: Setting::Reset, + typo_tolerance: Setting::Reset, _kind: PhantomData, } } @@ -110,6 +145,7 @@ impl Settings { stop_words, synonyms, distinct_attribute, + typo_tolerance, .. } = self; @@ -122,6 +158,7 @@ impl Settings { stop_words, synonyms, distinct_attribute, + typo_tolerance, _kind: PhantomData, } } @@ -160,6 +197,7 @@ impl Settings { stop_words: self.stop_words, synonyms: self.synonyms, distinct_attribute: self.distinct_attribute, + typo_tolerance: self.typo_tolerance, _kind: PhantomData, } } @@ -176,7 +214,7 @@ pub struct Facets { impl Index { fn update_primary_key_txn<'a, 'b>( &'a self, - txn: &mut heed::RwTxn<'a, 'b>, + txn: &mut milli::heed::RwTxn<'a, 'b>, primary_key: String, ) -> Result { let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref()); @@ -248,7 +286,7 @@ impl Index { self.indexer_config.as_ref(), config, indexing_callback, - ); + )?; for content_uuid in contents.into_iter() { let content_file = file_store.get_update(content_uuid)?; @@ -334,6 +372,61 @@ pub fn apply_settings_to_builder( Setting::Reset => builder.reset_distinct_field(), Setting::NotSet => (), } + + match settings.typo_tolerance { + Setting::Set(ref value) => { + match value.enabled { + Setting::Set(val) => builder.set_autorize_typos(val), + Setting::Reset => builder.reset_authorize_typos(), + Setting::NotSet => (), + } + + match value.min_word_size_for_typos { + Setting::Set(ref setting) => { + match setting.one_typo { + Setting::Set(val) => builder.set_min_word_len_one_typo(val), + Setting::Reset => builder.reset_min_word_len_one_typo(), + Setting::NotSet => (), + } + match setting.two_typos { + Setting::Set(val) => builder.set_min_word_len_two_typos(val), + Setting::Reset => builder.reset_min_word_len_two_typos(), + Setting::NotSet => (), + } + } + Setting::Reset => { + builder.reset_min_word_len_one_typo(); + builder.reset_min_word_len_two_typos(); + } + Setting::NotSet => (), + } + + match value.disable_on_words { + Setting::Set(ref words) => { + builder.set_exact_words(words.clone()); + } + Setting::Reset => builder.reset_exact_words(), + Setting::NotSet => (), + } + + match value.disable_on_attributes { + Setting::Set(ref words) => { + builder.set_exact_attributes(words.iter().cloned().collect()) + } + Setting::Reset => builder.reset_exact_attributes(), + Setting::NotSet => (), + } + } + Setting::Reset => { + // all typo settings need to be reset here. + builder.reset_authorize_typos(); + builder.reset_min_word_len_one_typo(); + builder.reset_min_word_len_two_typos(); + builder.reset_exact_words(); + builder.reset_exact_attributes(); + } + Setting::NotSet => (), + } } #[cfg(test)] @@ -362,6 +455,7 @@ pub(crate) mod test { stop_words: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, + typo_tolerance: Setting::NotSet, _kind: PhantomData::, }; @@ -383,6 +477,7 @@ pub(crate) mod test { stop_words: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, + typo_tolerance: Setting::NotSet, _kind: PhantomData::, }; diff --git a/meilisearch-lib/src/index_controller/dump_actor/error.rs b/meilisearch-lib/src/index_controller/dump_actor/error.rs index 73faf1bbb..f72b6d1dd 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/error.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/error.rs @@ -18,7 +18,7 @@ pub enum DumpActorError { } internal_error!( - DumpActorError: heed::Error, + DumpActorError: milli::heed::Error, std::io::Error, tokio::task::JoinError, tokio::sync::oneshot::error::RecvError, diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs index d342f010f..38d61f146 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v4.rs @@ -1,9 +1,9 @@ use std::path::Path; use std::sync::Arc; -use heed::EnvOpenOptions; use log::info; use meilisearch_auth::AuthController; +use milli::heed::EnvOpenOptions; use crate::analytics; use crate::index_controller::dump_actor::Metadata; diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 108084c57..4cbba1e42 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -48,8 +48,8 @@ pub type Payload = Box< dyn Stream> + Send + Sync + 'static + Unpin, >; -pub fn open_meta_env(path: &Path, size: usize) -> heed::Result { - let mut options = heed::EnvOpenOptions::new(); +pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result { + let mut options = milli::heed::EnvOpenOptions::new(); options.map_size(size); options.max_dbs(20); options.open(path) @@ -178,15 +178,6 @@ impl IndexControllerBuilder { .max_task_store_size .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; - let db_exists = db_path.as_ref().exists(); - if db_exists { - // Directory could be pre-created without any database in. - let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); - if !db_is_empty { - versioning::check_version_file(db_path.as_ref())?; - } - } - if let Some(ref path) = self.import_snapshot { log::info!("Loading from snapshot {:?}", path); load_snapshot( @@ -207,6 +198,15 @@ impl IndexControllerBuilder { )?; } + let db_exists = db_path.as_ref().exists(); + if db_exists { + // Directory could be pre-created without any database in. + let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); + if !db_is_empty { + versioning::check_version_file(db_path.as_ref())?; + } + } + std::fs::create_dir_all(db_path.as_ref())?; let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); @@ -651,6 +651,9 @@ mod test { use crate::index::error::Result as IndexResult; use crate::index::Index; + use crate::index::{ + default_crop_marker, default_highlight_post_tag, default_highlight_pre_tag, + }; use crate::index_resolver::index_store::MockIndexStore; use crate::index_resolver::meta_store::MockIndexMetaStore; use crate::index_resolver::IndexResolver; @@ -691,6 +694,9 @@ mod test { filter: None, sort: None, facets_distribution: None, + highlight_pre_tag: default_highlight_pre_tag(), + highlight_post_tag: default_highlight_post_tag(), + crop_marker: default_crop_marker(), }; let result = SearchResult { diff --git a/meilisearch-lib/src/index_resolver/error.rs b/meilisearch-lib/src/index_resolver/error.rs index 9ac607a58..6c86aa6b8 100644 --- a/meilisearch-lib/src/index_resolver/error.rs +++ b/meilisearch-lib/src/index_resolver/error.rs @@ -45,7 +45,7 @@ impl From for IndexResolverError { } internal_error!( - IndexResolverError: heed::Error, + IndexResolverError: milli::heed::Error, uuid::Error, std::io::Error, tokio::task::JoinError, diff --git a/meilisearch-lib/src/index_resolver/meta_store.rs b/meilisearch-lib/src/index_resolver/meta_store.rs index 6ca615dbf..f53f9cae9 100644 --- a/meilisearch-lib/src/index_resolver/meta_store.rs +++ b/meilisearch-lib/src/index_resolver/meta_store.rs @@ -4,8 +4,8 @@ use std::io::{BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; use std::sync::Arc; -use heed::types::{SerdeBincode, Str}; -use heed::{CompactionOption, Database, Env}; +use milli::heed::types::{SerdeBincode, Str}; +use milli::heed::{CompactionOption, Database, Env}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -56,7 +56,7 @@ impl Drop for HeedMetaStore { } impl HeedMetaStore { - pub fn new(env: Arc) -> Result { + pub fn new(env: Arc) -> Result { let db = env.create_database(Some("uuids"))?; Ok(Self { env, db }) } @@ -153,7 +153,7 @@ impl HeedMetaStore { Ok(()) } - pub fn load_dump(src: impl AsRef, env: Arc) -> Result<()> { + pub fn load_dump(src: impl AsRef, env: Arc) -> Result<()> { let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); let indexes = File::open(&src_indexes)?; let mut indexes = BufReader::new(indexes); diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs index 9428d4a78..8ca3efdc6 100644 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ b/meilisearch-lib/src/index_resolver/mod.rs @@ -7,10 +7,10 @@ use std::path::Path; use std::sync::Arc; use error::{IndexResolverError, Result}; -use heed::Env; use index_store::{IndexStore, MapIndexStore}; use meilisearch_error::ResponseError; use meta_store::{HeedMetaStore, IndexMetaStore}; +use milli::heed::Env; use milli::update::{DocumentDeletionResult, IndexerConfig}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; @@ -39,7 +39,7 @@ pub fn create_index_resolver( path: impl AsRef, index_size: usize, indexer_opts: &IndexerOpts, - meta_env: Arc, + meta_env: Arc, file_store: UpdateFileStore, ) -> anyhow::Result { let uuid_store = HeedMetaStore::new(meta_env)?; diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 21b8eb8c3..1161340ba 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -13,8 +13,8 @@ mod update_file_store; use std::path::Path; pub use index_controller::MeiliSearch; - pub use milli; +pub use milli::heed; mod compression; pub mod document_formats; @@ -25,7 +25,7 @@ pub trait EnvSizer { fn size(&self) -> u64; } -impl EnvSizer for heed::Env { +impl EnvSizer for milli::heed::Env { fn size(&self) -> u64 { WalkDir::new(self.path()) .into_iter() diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 195576799..c71f1cba6 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -1,21 +1,23 @@ use core::fmt; -use std::{convert::TryFrom, ops::Deref, str::FromStr}; +use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; use byte_unit::{Byte, ByteError}; use clap::Parser; -use milli::{update::IndexerConfig, CompressionType}; +use milli::update::IndexerConfig; use serde::Serialize; use sysinfo::{RefreshKind, System, SystemExt}; -#[derive(Debug, Clone, Parser)] +#[derive(Debug, Clone, Parser, Serialize)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. - #[clap(long, default_value = "100000")] // 100k + #[serde(skip)] + #[clap(long, default_value = "100000", hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. - #[clap(long)] + #[serde(skip)] + #[clap(long, hide = true)] pub max_nb_chunks: Option, /// The maximum amount of memory the indexer will use. It defaults to 2/3 @@ -25,23 +27,16 @@ pub struct IndexerOpts { /// In case the engine is unable to retrieve the available memory the engine will /// try to use the memory it needs but without real limit, this can lead to /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[clap(long, default_value_t)] - pub max_memory: MaxMemory, + #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)] + pub max_indexing_memory: MaxMemory, - /// The name of the compression algorithm to use when compressing intermediate - /// Grenad chunks while indexing documents. + /// The maximum number of threads the indexer will use. + /// If the number set is higher than the real number of cores available in the machine, + /// it will use the maximum number of available cores. /// - /// Choosing a fast algorithm will make the indexing faster but may consume more memory. - #[clap(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])] - pub chunk_compression_type: CompressionType, - - /// The level of compression of the chosen algorithm. - #[clap(long, requires = "chunk-compression-type")] - pub chunk_compression_level: Option, - - /// Number of parallel jobs for indexing, defaults to # of CPUs. - #[clap(long)] - pub indexing_jobs: Option, + /// It defaults to half of the available threads. + #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] + pub max_indexing_threads: MaxThreads, } #[derive(Debug, Clone, Parser, Default, Serialize)] @@ -74,15 +69,13 @@ impl TryFrom<&IndexerOpts> for IndexerConfig { fn try_from(other: &IndexerOpts) -> Result { let thread_pool = rayon::ThreadPoolBuilder::new() - .num_threads(other.indexing_jobs.unwrap_or(num_cpus::get() / 2)) + .num_threads(*other.max_indexing_threads) .build()?; Ok(Self { log_every_n: Some(other.log_every_n), max_nb_chunks: other.max_nb_chunks, - max_memory: (*other.max_memory).map(|b| b.get_bytes() as usize), - chunk_compression_type: other.chunk_compression_type, - chunk_compression_level: other.chunk_compression_level, + max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), thread_pool: Some(thread_pool), max_positions_per_attributes: None, ..Default::default() @@ -95,16 +88,14 @@ impl Default for IndexerOpts { Self { log_every_n: 100_000, max_nb_chunks: None, - max_memory: MaxMemory::default(), - chunk_compression_type: CompressionType::None, - chunk_compression_level: None, - indexing_jobs: None, + max_indexing_memory: MaxMemory::default(), + max_indexing_threads: MaxThreads::default(), } } } /// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Serialize)] pub struct MaxMemory(Option); impl FromStr for MaxMemory { @@ -159,3 +150,34 @@ fn total_memory_bytes() -> Option { None } } + +#[derive(Debug, Clone, Copy, Serialize)] +pub struct MaxThreads(usize); + +impl FromStr for MaxThreads { + type Err = ParseIntError; + + fn from_str(s: &str) -> Result { + usize::from_str(s).map(Self) + } +} + +impl Default for MaxThreads { + fn default() -> Self { + MaxThreads(num_cpus::get() / 2) + } +} + +impl fmt::Display for MaxThreads { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Deref for MaxThreads { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index 3667d1021..6c27ad2f0 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -149,7 +149,7 @@ impl SnapshotJob { let env = open_meta_env(&self.src_path, self.meta_env_size)?; let dst = path.join("data.mdb"); - env.copy_to_path(dst, heed::CompactionOption::Enabled)?; + env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; Ok(()) } @@ -180,12 +180,12 @@ impl SnapshotJob { let dst = dst.join("data.mdb"); - let mut options = heed::EnvOpenOptions::new(); + let mut options = milli::heed::EnvOpenOptions::new(); options.map_size(self.index_size); let index = milli::Index::new(options, entry.path())?; index .env - .copy_to_path(dst, heed::CompactionOption::Enabled)?; + .copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; } Ok(()) @@ -198,7 +198,7 @@ impl SnapshotJob { let dst = dst.join("data.mdb"); let env = open_auth_store_env(&auth_path)?; - env.copy_to_path(dst, heed::CompactionOption::Enabled)?; + env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; Ok(()) } diff --git a/meilisearch-lib/src/tasks/error.rs b/meilisearch-lib/src/tasks/error.rs index 38561314f..d849b4c10 100644 --- a/meilisearch-lib/src/tasks/error.rs +++ b/meilisearch-lib/src/tasks/error.rs @@ -16,7 +16,7 @@ pub enum TaskError { } internal_error!( - TaskError: heed::Error, + TaskError: milli::heed::Error, JoinError, std::io::Error, serde_json::Error, diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index 695981d25..bdcd13f37 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -5,8 +5,8 @@ use std::io::{BufWriter, Write}; use std::path::Path; use std::sync::Arc; -use heed::{Env, RwTxn}; use log::debug; +use milli::heed::{Env, RwTxn}; use time::OffsetDateTime; use super::error::TaskError; @@ -61,7 +61,7 @@ impl Clone for TaskStore { } impl TaskStore { - pub fn new(env: Arc) -> Result { + pub fn new(env: Arc) -> Result { let store = Arc::new(Store::new(env)?); Ok(Self { store }) } @@ -248,7 +248,7 @@ pub mod test { } impl MockTaskStore { - pub fn new(env: Arc) -> Result { + pub fn new(env: Arc) -> Result { Ok(Self::Real(TaskStore::new(env)?)) } diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 582cfe27c..4ff986d8b 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -1,5 +1,5 @@ #[allow(clippy::upper_case_acronyms)] -type BEU64 = heed::zerocopy::U64; +type BEU64 = milli::heed::zerocopy::U64; const UID_TASK_IDS: &str = "uid_task_id"; const TASKS: &str = "tasks"; @@ -12,8 +12,8 @@ use std::ops::Range; use std::result::Result as StdResult; use std::sync::Arc; -use heed::types::{ByteSlice, OwnedType, SerdeJson, Unit}; -use heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn}; +use milli::heed::types::{ByteSlice, OwnedType, SerdeJson, Unit}; +use milli::heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn}; use crate::tasks::task::{Task, TaskId}; @@ -73,7 +73,7 @@ impl Store { /// be in an invalid state, with dangling processing tasks. /// You want to patch all un-finished tasks and put them in your pending /// queue with the `reset_and_return_unfinished_update` method. - pub fn new(env: Arc) -> Result { + pub fn new(env: Arc) -> Result { let uids_task_ids = env.create_database(Some(UID_TASK_IDS))?; let tasks = env.create_database(Some(TASKS))?; @@ -130,7 +130,7 @@ impl Store { let range = from..limit .map(|limit| (limit as u64).saturating_add(from)) .unwrap_or(u64::MAX); - let iter: Box>> = match filter { + let iter: Box>> = match filter { Some( ref filter @ TaskFilter { indexes: Some(_), .. @@ -150,7 +150,7 @@ impl Store { ), }; - let apply_fitler = |task: &StdResult<_, heed::Error>| match task { + let apply_fitler = |task: &StdResult<_, milli::heed::Error>| match task { Ok(ref t) => filter .as_ref() .and_then(|filter| filter.filter_fn.as_ref()) @@ -162,7 +162,7 @@ impl Store { let tasks = iter .filter(apply_fitler) .take(limit.unwrap_or(usize::MAX)) - .try_fold::<_, _, StdResult<_, heed::Error>>(Vec::new(), |mut v, task| { + .try_fold::<_, _, StdResult<_, milli::heed::Error>>(Vec::new(), |mut v, task| { v.push(task?); Ok(v) })?; @@ -172,7 +172,7 @@ impl Store { fn compute_candidates( &self, - txn: &heed::RoTxn, + txn: &milli::heed::RoTxn, filter: &TaskFilter, range: Range, ) -> Result> { @@ -188,10 +188,10 @@ impl Store { self.uids_task_ids .remap_key_type::() .rev_prefix_iter(txn, &index_uid)? - .map(|entry| -> StdResult<_, heed::Error> { + .map(|entry| -> StdResult<_, milli::heed::Error> { let (key, _) = entry?; - let (_, id) = - IndexUidTaskIdCodec::bytes_decode(key).ok_or(heed::Error::Decoding)?; + let (_, id) = IndexUidTaskIdCodec::bytes_decode(key) + .ok_or(milli::heed::Error::Decoding)?; Ok(id) }) .skip_while(|entry| { @@ -212,7 +212,7 @@ impl Store { // if we encounter an error we returns true to collect it later .unwrap_or(true) }) - .try_for_each::<_, StdResult<(), heed::Error>>(|id| { + .try_for_each::<_, StdResult<(), milli::heed::Error>>(|id| { candidates.push(id?); Ok(()) })?; @@ -225,8 +225,8 @@ impl Store { #[cfg(test)] pub mod test { - use heed::EnvOpenOptions; use itertools::Itertools; + use milli::heed::EnvOpenOptions; use nelson::Mocker; use proptest::collection::vec; use proptest::prelude::*; @@ -244,10 +244,10 @@ pub mod test { Fake(Mocker), } - pub struct TmpEnv(TempDir, Arc); + pub struct TmpEnv(TempDir, Arc); impl TmpEnv { - pub fn env(&self) -> Arc { + pub fn env(&self) -> Arc { self.1.clone() } } @@ -264,7 +264,7 @@ pub mod test { } impl MockStore { - pub fn new(env: Arc) -> Result { + pub fn new(env: Arc) -> Result { Ok(Self::Real(Store::new(env)?)) } diff --git a/permissive-json-pointer/Cargo.toml b/permissive-json-pointer/Cargo.toml new file mode 100644 index 000000000..b50f30f19 --- /dev/null +++ b/permissive-json-pointer/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "permissive-json-pointer" +version = "0.2.0" +edition = "2021" +description = "A permissive json pointer" +readme = "README.md" + +[dependencies] +serde_json = "1.0" + +[dev-dependencies] +big_s = "1.0" diff --git a/permissive-json-pointer/README.md b/permissive-json-pointer/README.md new file mode 100644 index 000000000..6a94cf00d --- /dev/null +++ b/permissive-json-pointer/README.md @@ -0,0 +1,134 @@ +# Permissive json pointer + +This crate provide an interface a little bit similar to what you know as “json pointer”. +But it’s actually doing something quite different. + +## The API + +The crate provide only one function called [`select_values`]. +It takes one object in parameter and a list of selectors. +It then returns a new object containing only the fields you selected. + +## The selectors + +The syntax for the selector is easier than with other API. +There is only ONE special symbol, it’s the `.`. + +If you write `dog` and provide the following object; +```json +{ + "dog": "bob", + "cat": "michel" +} +``` +You’ll get back; +```json +{ + "dog": "bob", +} +``` + +Easy right? + +Now the dot can either be used as a field name, or as a nested object. + +For example, if you have the following json; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + "age": 6 + } +} +``` + +What a crappy json! But never underestimate your users, they [_WILL_](https://xkcd.com/1172/) +somehow base their entire workflow on this kind of json. +Here with the `dog.name` selector both fields will be +selected and the following json will be returned; +```json +{ + "dog.name": "jean", + "dog": { + "name": "bob", + } +} +``` + +And as you can guess, this crate is as permissive as possible. +It’ll match everything it can! +Consider this even more crappy json; +```json +{ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } +} +``` +If you write `pet.dog.name` everything will be selected. + +## Matching arrays + +With this kind of selectors you can’t match a specific element in an array. +Your selector will be applied to all the element _in_ the array. + +Consider the following json; +```json +{ + "pets": [ + { + "animal": "dog", + "race": "bernese mountain", + }, + { + "animal": "dog", + "race": "golden retriever", + }, + { + "animal": "cat", + "age": 8, + } + ] +} +``` + +With the filter `pets.animal` you’ll get; +```json +{ + "pets": [ + { + "animal": "dog", + }, + { + "animal": "dog", + }, + { + "animal": "cat", + } + ] +} +``` + +The empty element in an array gets removed. So if you were to look +for `pets.age` you would only get; +```json +{ + "pets": [ + { + "age": 8, + } + ] +} +``` + +And I think that’s all you need to know 🎉 \ No newline at end of file diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs new file mode 100644 index 000000000..56382beae --- /dev/null +++ b/permissive-json-pointer/src/lib.rs @@ -0,0 +1,786 @@ +#![doc = include_str!("../README.md")] + +use std::collections::HashSet; + +use serde_json::*; + +type Document = Map; + +const SPLIT_SYMBOL: char = '.'; + +/// Returns `true` if the `selector` match the `key`. +/// +/// ```text +/// Example: +/// `animaux` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien` match `animaux` +/// `animaux.chien.nom` match `animaux` +/// `animaux.chien.nom` match `animaux.chien` +/// ----------------------------------------- +/// `animaux` doesn't match `animaux.chien` +/// `animaux.` doesn't match `animaux` +/// `animaux.ch` doesn't match `animaux.chien` +/// `animau` doesn't match `animaux` +/// ``` +fn contained_in(selector: &str, key: &str) -> bool { + selector.starts_with(key) + && selector[key.len()..] + .chars() + .next() + .map(|c| c == SPLIT_SYMBOL) + .unwrap_or(true) +} + +/// Map the selected leaf values of a json allowing you to update only the fields that were selected. +/// ``` +/// use serde_json::{Value, json}; +/// use permissive_json_pointer::map_leaf_values; +/// +/// let mut value: Value = json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "size": "80cm", +/// } +/// } +/// }); +/// map_leaf_values( +/// value.as_object_mut().unwrap(), +/// ["jean.race.name"], +/// |key, value| match (value, dbg!(key)) { +/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(), +/// _ => unreachable!(), +/// }, +/// ); +/// assert_eq!( +/// value, +/// json!({ +/// "jean": { +/// "age": 8, +/// "race": { +/// "name": "patou", +/// "size": "80cm", +/// } +/// } +/// }) +/// ); +/// ``` +pub fn map_leaf_values<'a>( + value: &mut Map, + selectors: impl IntoIterator, + mut mapper: impl FnMut(&str, &mut Value), +) { + let selectors: Vec<_> = selectors.into_iter().collect(); + map_leaf_values_in_object(value, &selectors, "", &mut mapper); +} + +pub fn map_leaf_values_in_object<'a>( + value: &mut Map, + selectors: &[&'a str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for (key, value) in value.iter_mut() { + let base_key = if base_key.is_empty() { + key.to_string() + } else { + format!("{}{}{}", base_key, SPLIT_SYMBOL, key) + }; + + // here if the user only specified `doggo` we need to iterate in all the fields of `doggo` + // so we check the contained_in on both side + let should_continue = selectors + .iter() + .any(|selector| contained_in(selector, &base_key) || contained_in(&base_key, selector)); + + if should_continue { + match value { + Value::Object(object) => { + map_leaf_values_in_object(object, selectors, &base_key, mapper) + } + Value::Array(array) => { + map_leaf_values_in_array(array, selectors, &base_key, mapper) + } + value => mapper(&base_key, value), + } + } + } +} + +pub fn map_leaf_values_in_array( + values: &mut [Value], + selectors: &[&str], + base_key: &str, + mapper: &mut impl FnMut(&str, &mut Value), +) { + for value in values.iter_mut() { + match value { + Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper), + Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper), + value => mapper(base_key, value), + } + } +} + +/// Permissively selects values in a json with a list of selectors. +/// Returns a new json containing all the selected fields. +/// ``` +/// use serde_json::*; +/// use permissive_json_pointer::select_values; +/// +/// let value: Value = json!({ +/// "name": "peanut", +/// "age": 8, +/// "race": { +/// "name": "bernese mountain", +/// "avg_age": 12, +/// "size": "80cm", +/// }, +/// }); +/// let value: &Map = value.as_object().unwrap(); +/// +/// let res: Value = select_values(value, vec!["name", "race.name"]).into(); +/// assert_eq!( +/// res, +/// json!({ +/// "name": "peanut", +/// "race": { +/// "name": "bernese mountain", +/// }, +/// }) +/// ); +/// ``` +pub fn select_values<'a>( + value: &Map, + selectors: impl IntoIterator, +) -> Map { + let selectors = selectors.into_iter().collect(); + create_value(value, selectors) +} + +fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document { + let mut new_value: Document = Map::new(); + + for (key, value) in value.iter() { + // first we insert all the key at the root level + if selectors.contains(key as &str) { + new_value.insert(key.to_string(), value.clone()); + // if the key was simple we can delete it and move to + // the next key + if is_simple(key) { + selectors.remove(key as &str); + continue; + } + } + + // we extract all the sub selectors matching the current field + // if there was [person.name, person.age] and if we are on the field + // `person`. Then we generate the following sub selectors: [name, age]. + let sub_selectors: HashSet<&str> = selectors + .iter() + .filter(|s| contained_in(s, key)) + .filter_map(|s| s.trim_start_matches(key).get(SPLIT_SYMBOL.len_utf8()..)) + .collect(); + + if !sub_selectors.is_empty() { + match value { + Value::Array(array) => { + let array = create_array(array, &sub_selectors); + if !array.is_empty() { + new_value.insert(key.to_string(), array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, sub_selectors); + if !object.is_empty() { + new_value.insert(key.to_string(), object.into()); + } + } + _ => (), + } + } + } + + new_value +} + +fn create_array(array: &Vec, selectors: &HashSet<&str>) -> Vec { + let mut res = Vec::new(); + + for value in array { + match value { + Value::Array(array) => { + let array = create_array(array, selectors); + if !array.is_empty() { + res.push(array.into()); + } + } + Value::Object(object) => { + let object = create_value(object, selectors.clone()); + if !object.is_empty() { + res.push(object.into()); + } + } + _ => (), + } + } + + res +} + +fn is_simple(key: impl AsRef) -> bool { + !key.as_ref().contains(SPLIT_SYMBOL) +} + +#[cfg(test)] +mod tests { + use big_s::S; + + use super::*; + + #[test] + fn test_contained_in() { + assert!(contained_in("animaux", "animaux")); + assert!(contained_in("animaux.chien", "animaux")); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure" + )); + assert!(contained_in( + "animaux.chien.race.bouvier bernois.fourrure.couleur", + "animaux.chien.race.bouvier bernois.fourrure.couleur" + )); + + // -- the wrongs + assert!(!contained_in("chien", "chat")); + assert!(!contained_in("animaux", "animaux.chien")); + assert!(!contained_in("animaux.chien", "animaux.chat")); + + // -- the strange edge cases + assert!(!contained_in("animaux.chien", "anima")); + assert!(!contained_in("animaux.chien", "animau")); + assert!(!contained_in("animaux.chien", "animaux.")); + assert!(!contained_in("animaux.chien", "animaux.c")); + assert!(!contained_in("animaux.chien", "animaux.ch")); + assert!(!contained_in("animaux.chien", "animaux.chi")); + assert!(!contained_in("animaux.chien", "animaux.chie")); + } + + #[test] + fn simple_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["name"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + }) + ); + + let res: Value = select_values(value, vec!["age"]).into(); + assert_eq!( + res, + json!({ + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["name", "age"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + }) + ); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["name", "age", "race"]).into(); + assert_eq!( + res, + json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn complex_key() { + let value: Value = json!({ + "name": "peanut", + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + println!("RIGHT BEFORE"); + + let res: Value = select_values(value, vec!["race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race.size"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }) + ); + + let res: Value = select_values( + value, + vec!["race.name", "race.size", "race.avg_age", "race.size", "age"], + ) + .into(); + assert_eq!( + res, + json!({ + "age": 8, + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race.name", "race"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + + let res: Value = select_values(value, vec!["race", "race.name"]).into(); + assert_eq!( + res, + json!({ + "race": { + "name": "bernese mountain", + "avg_age": 12, + "size": "80cm", + } + }) + ); + } + + #[test] + fn multi_level_nested() { + let value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["jean"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.size"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "size": "80cm", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race.name", "jean.age"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + } + } + }) + ); + + let res: Value = select_values(value, vec!["jean.race"]).into(); + assert_eq!( + res, + json!({ + "jean": { + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }) + ); + } + + #[test] + fn array_and_deep_nested() { + let value: Value = json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + }, + ] + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["doggos.jean"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = select_values(value, vec!["doggos.marc.race"]).into(); + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "race": { + "name": "golden retriever", + "size": "60cm", + } + } + } + ] + }) + ); + + let res: Value = + select_values(value, vec!["doggos.marc.race.name", "doggos.marc.age"]).into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + + let res: Value = select_values( + value, + vec![ + "doggos.marc.race.name", + "doggos.marc.age", + "doggos.jean.race.name", + "other.field", + ], + ) + .into(); + + assert_eq!( + res, + json!({ + "doggos": [ + { + "jean": { + "race": { + "name": "bernese mountain", + } + } + }, + { + "marc": { + "age": 4, + "race": { + "name": "golden retriever", + } + } + } + ] + }) + ); + } + + #[test] + fn all_conflict_variation() { + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name"]).into(); + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob" + }, + "pet": { + "dog.name": "michel" + }, + "pet": { + "dog": { + "name": "milan" + } + } + }) + ); + + let value: Value = json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }); + let value: &Document = value.as_object().unwrap(); + + let res: Value = select_values(value, vec!["pet.dog.name", "pet.dog", "pet"]).into(); + + assert_eq!( + res, + json!({ + "pet.dog.name": "jean", + "pet.dog": { + "name": "bob", + }, + "pet": { + "dog.name": "michel", + "dog": { + "name": "milan", + } + } + }) + ); + } + + #[test] + fn map_object() { + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + } + }); + + map_leaf_values( + value.as_object_mut().unwrap(), + ["jean.race.name"], + |key, value| match (value, dbg!(key)) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => unreachable!(), + }, + ); + + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + } + }) + ); + + let mut value: Value = json!({ + "jean": { + "age": 8, + "race": { + "name": "bernese mountain", + "size": "80cm", + } + }, + "bob": "lolpied", + }); + + let mut calls = 0; + map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| { + calls += 1; + match (value, key) { + (Value::String(name), "jean.race.name") => *name = S("patou"), + _ => println!("Called with {key}"), + } + }); + + assert_eq!(calls, 3); + assert_eq!( + value, + json!({ + "jean": { + "age": 8, + "race": { + "name": "patou", + "size": "80cm", + } + }, + "bob": "lolpied", + }) + ); + } +}