diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..601f711ff --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,3410 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61f2b7f93d2c7d2b08263acaa4a363b3e276806c68af6134c44f523bf1aacd" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "anyhow" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "as-slice" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45403b49e3954a4b8428a0ac21a4b7afadccf92bfd96273f1a58cd4812496ae0" +dependencies = [ + "generic-array 0.12.4", + "generic-array 0.13.3", + "generic-array 0.14.4", + "stable_deref_trait", +] + +[[package]] +name = "askama" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d298738b6e47e1034e560e5afe63aa488fea34e25ec11b855a76f0d7b8e73134" +dependencies = [ + "askama_derive", + "askama_escape", + "askama_shared", + "mime", + "mime_guess", +] + +[[package]] +name = "askama_derive" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2925c4c290382f9d2fa3d1c1b6a63fa1427099721ecca4749b154cc9c25522" +dependencies = [ + "askama_shared", + "proc-macro2 1.0.29", + "syn 1.0.76", +] + +[[package]] +name = "askama_escape" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90c108c1a94380c89d2215d0ac54ce09796823cca0fd91b299cfff3b33e346fb" + +[[package]] +name = "askama_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2582b77e0f3c506ec4838a25fa8a5f97b9bed72bb6d3d272ea1c031d8bd373bc" +dependencies = [ + "askama_escape", + "humansize", + "nom", + "num-traits", + "percent-encoding", + "proc-macro2 1.0.29", + "quote 1.0.9", + "serde", + "syn 1.0.76", + "toml", +] + +[[package]] +name = "askama_warp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96f410ab17fa08f70b5fda07ce1112418642c914864961630808979343ea226" +dependencies = [ + "askama", + "warp", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "backtrace" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7a905d892734eea339e896738c14b9afce22b5318f64b951e70bf3844419b01" +dependencies = [ + "addr2line", + "cc", + "cfg-if 1.0.0", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "benchmarks" +version = "0.1.0" +dependencies = [ + "anyhow", + "bytes 1.1.0", + "convert_case", + "criterion", + "flate2", + "heed", + "jemallocator", + "milli", + "reqwest", +] + +[[package]] +name = "big_s" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199edb7b90631283b10c2422e6a0bc8b7d987bf732995ba1de53b576c97e51a8" + +[[package]] +name = "bimap" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50ae17cabbc8a38a1e3e4c1a6a664e9a09672dc14d0896fa8d865d3a5a446b07" +dependencies = [ + "serde", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array 0.12.4", +] + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array 0.14.4", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "buf_redux" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b953a6887648bb07a535631f2bc00fbdb2a2216f135552cb3f534ed136b9c07f" +dependencies = [ + "memchr", + "safemem", +] + +[[package]] +name = "bumpalo" +version = "3.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9df67f7bf9ef8498769f994239c45613ef0c5899415fb58e9add412d2c1a538" + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" + +[[package]] +name = "byte-unit" +version = "4.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063197e6eb4b775b64160dedde7a0986bb2836cce140e9492e9e96f28e18bcd8" +dependencies = [ + "serde", + "utf8-width", +] + +[[package]] +name = "bytemuck" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72957246c41db82b8ef88a5486143830adeb8227ef9837740bdec67724cf2c5b" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" + +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + +[[package]] +name = "cast" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "cc" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" + +[[package]] +name = "cedarwood" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "963e82c7b94163808ca3a452608d260b64ba5bc7b5653b4af1af59887899f48d" +dependencies = [ + "smallvec", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "character_converter" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e48477ece09d6a21c033cb604968524a37782532727055d6f6faafac1781e5c" +dependencies = [ + "bincode", +] + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "serde", + "time", + "winapi 0.3.9", +] + +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "term_size", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "cli" +version = "0.1.0" +dependencies = [ + "bimap", + "byte-unit", + "color-eyre", + "csv", + "eyre", + "heed", + "indicatif", + "jemallocator", + "milli", + "serde", + "serde_json", + "stderrlog", + "structopt", +] + +[[package]] +name = "color-eyre" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f1885697ee8a177096d42f158922251a41973117f6d8a234cee94b9509157b7" +dependencies = [ + "backtrace", + "color-spantrace", + "eyre", + "indenter", + "once_cell", + "owo-colors", + "tracing-error", +] + +[[package]] +name = "color-spantrace" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6eee477a4a8a72f4addd4de416eb56d54bc307b284d6601bafdee1f4ea462d1" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + +[[package]] +name = "concat-arrays" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "console" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3993e6445baa160675931ec041a5e03ca84b9c6e32a056150d3aa2bdda0a1f45" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "terminal_size", + "winapi 0.3.9", +] + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "cow-utils" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173" + +[[package]] +name = "cpufeatures" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "criterion" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils 0.8.5", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils 0.8.5", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils 0.8.5", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c979cd6cfe72335896575c6b5688da489e420d36a27a0b9eb0c73db574b4a4b" +dependencies = [ + "crossbeam-utils 0.6.6", +] + +[[package]] +name = "crossbeam-utils" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" +dependencies = [ + "cfg-if 0.1.10", + "lazy_static", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +dependencies = [ + "cfg-if 1.0.0", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "deunicode" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c9736e15e7df1638a7f6eee92a6511615c738246a052af5ba86f039b65aede" + +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array 0.12.4", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array 0.14.4", +] + +[[package]] +name = "dtoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "encoding_rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "eyre" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "221239d1d5ea86bf5d6f91c9d6bc3646ffe471b08ff9b0f91c44f115ac969d2b" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + +[[package]] +name = "flate2" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +dependencies = [ + "cfg-if 1.0.0", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" + +[[package]] +name = "fst" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" + +[[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +dependencies = [ + "bitflags", + "fuchsia-zircon-sys", +] + +[[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" + +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + +[[package]] +name = "futures" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12aa0eb539080d55c3f2d45a67c3b58b6b0773c1a3ca2dfec66d58c97fd66ca" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d" + +[[package]] +name = "futures-executor" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45025be030969d763025784f7f355043dc6bc74093e4ecc5000ca4dc50d8745c" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377" + +[[package]] +name = "futures-macro" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e4a4b95cea4b4ccbcf1c5675ca7c4ee4e9e75eb79944d07defde18068f79bb" +dependencies = [ + "autocfg", + "proc-macro-hack", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "futures-sink" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11" + +[[package]] +name = "futures-task" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99" + +[[package]] +name = "futures-util" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36568465210a3a6ee45e1f165136d68671471a501e632e9a98d96872222b5481" +dependencies = [ + "autocfg", + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite 0.2.7", + "pin-utils", + "proc-macro-hack", + "proc-macro-nested", + "slab", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "generic-array" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" +dependencies = [ + "typenum", +] + +[[package]] +name = "generic-array" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f797e67af32588215eaaab8327027ee8e71b9dd0b2b26996aedf20c030fce309" +dependencies = [ + "typenum", +] + +[[package]] +name = "generic-array" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "geoutils" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e006f616a407d396ace1d2ebb3f43ed73189db8b098079bd129928d7645dd1e" + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", +] + +[[package]] +name = "gimli" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0a01e0497841a3b2db4f8afa483cce65f7e96a3498bd6c541734792aeac8fe7" + +[[package]] +name = "grenad" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a7a9cc43b28a20f791b17863f34a36654fdfa50be6d0a67bb18c1e34d145f18" +dependencies = [ + "bytemuck", + "byteorder", + "tempfile", +] + +[[package]] +name = "h2" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e4728fd124914ad25e99e3d15a9361a879f6620f63cb56bbb08f95abb97a535" +dependencies = [ + "bytes 0.5.6", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio 0.2.25", + "tokio-util 0.3.1", + "tracing", + "tracing-futures", +] + +[[package]] +name = "h2" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7f3675cfef6a30c8031cf9e6493ebdc3bb3272a3fea3923c4210d1830e6a472" +dependencies = [ + "bytes 1.1.0", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio 1.11.0", + "tokio-util 0.6.8", + "tracing", +] + +[[package]] +name = "half" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" + +[[package]] +name = "hash32" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4041af86e63ac4298ce40e5cca669066e75b6f1aa3390fe2561ffa5e1d9f4cc" +dependencies = [ + "byteorder", +] + +[[package]] +name = "hashbrown" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96282e96bfcd3da0d3aa9938bedf1e50df3269b6db08b4876d2da0bb1a0841cf" +dependencies = [ + "ahash", + "autocfg", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "headers" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0b7591fb62902706ae8e7aaff416b1b0fa2c0fd0878b46dc13baa3712d8a855" +dependencies = [ + "base64 0.13.0", + "bitflags", + "bytes 1.1.0", + "headers-core", + "http", + "mime", + "sha-1 0.9.8", + "time", +] + +[[package]] +name = "headers-core" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" +dependencies = [ + "http", +] + +[[package]] +name = "heapless" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634bd4d29cbf24424d0a4bfcbf80c6960129dc24424752a7d1d1390607023422" +dependencies = [ + "as-slice", + "generic-array 0.14.4", + "hash32", + "stable_deref_trait", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heed" +version = "0.12.1" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" +dependencies = [ + "byteorder", + "heed-traits", + "heed-types", + "libc", + "lmdb-rkv-sys", + "once_cell", + "page_size", + "serde", + "synchronoise", + "url", + "zerocopy", +] + +[[package]] +name = "heed-traits" +version = "0.7.0" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" + +[[package]] +name = "heed-types" +version = "0.7.2" +source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65" +dependencies = [ + "bincode", + "heed-traits", + "serde", + "serde_json", + "zerocopy", +] + +[[package]] +name = "helpers" +version = "0.14.0" +dependencies = [ + "anyhow", + "byte-unit", + "heed", + "jemallocator", + "milli", + "stderrlog", + "structopt", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "http" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11" +dependencies = [ + "bytes 1.1.0", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13d5ff830006f7646652e057693569bfe0d51760c0085a071769d142a205111b" +dependencies = [ + "bytes 0.5.6", + "http", +] + +[[package]] +name = "http-body" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "399c583b2979440c60be0821a6199eca73bc3c8dcd9d070d75ac726e2c6186e5" +dependencies = [ + "bytes 1.1.0", + "http", + "pin-project-lite 0.2.7", +] + +[[package]] +name = "http-ui" +version = "0.14.0" +dependencies = [ + "anyhow", + "askama", + "askama_warp", + "bimap", + "byte-unit", + "bytes 0.5.6", + "crossbeam-channel", + "csv", + "either", + "flate2", + "fst", + "funty", + "futures", + "heed", + "jemallocator", + "log", + "maplit", + "meilisearch-tokenizer", + "memmap", + "milli", + "once_cell", + "rayon", + "serde", + "serde_json", + "serde_test", + "stderrlog", + "structopt", + "tempfile", + "tokio 0.2.25", + "warp", +] + +[[package]] +name = "httparse" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503" + +[[package]] +name = "httpdate" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47" + +[[package]] +name = "httpdate" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6456b8a6c8f33fee7d958fcd1b60d55b11940a79e63ae87013e6d22e26034440" + +[[package]] +name = "human_format" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86cce260d758a9aa3d7c4b99d55c815a540f8a37514ba6046ab6be402a157cb0" + +[[package]] +name = "humansize" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" + +[[package]] +name = "hyper" +version = "0.13.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a6f157065790a3ed2f88679250419b5cdd96e714a0d65f7797fd337186e96bb" +dependencies = [ + "bytes 0.5.6", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.2.7", + "http", + "http-body 0.3.1", + "httparse", + "httpdate 0.3.2", + "itoa", + "pin-project 1.0.8", + "socket2 0.3.19", + "tokio 0.2.25", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "0.14.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d1cfb9e4f68655fa04c01f59edb405b6074a0f7118ea881e5026e4a1cd8593" +dependencies = [ + "bytes 1.1.0", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.4", + "http", + "http-body 0.4.3", + "httparse", + "httpdate 1.0.1", + "itoa", + "pin-project-lite 0.2.7", + "socket2 0.4.2", + "tokio 1.11.0", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f9f7a97316d44c0af9b0301e65010573a853a9fc97046d7331d7f6bc0fd5a64" +dependencies = [ + "futures-util", + "hyper 0.14.13", + "log", + "rustls", + "tokio 1.11.0", + "tokio-rustls", + "webpki", +] + +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "indexmap" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" +dependencies = [ + "autocfg", + "hashbrown 0.11.2", +] + +[[package]] +name = "indicatif" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" +dependencies = [ + "console", + "lazy_static", + "number_prefix", + "regex", +] + +[[package]] +name = "infos" +version = "0.14.0" +dependencies = [ + "anyhow", + "byte-unit", + "csv", + "heed", + "jemallocator", + "milli", + "roaring", + "serde_json", + "stderrlog", + "structopt", +] + +[[package]] +name = "input_buffer" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a8a95243d5a0398cae618ec29477c6e3cb631152be5c19481f80bc71559754" +dependencies = [ + "bytes 0.5.6", +] + +[[package]] +name = "iovec" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +dependencies = [ + "libc", +] + +[[package]] +name = "ipnet" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9" + +[[package]] +name = "itertools" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "jemalloc-sys" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45" +dependencies = [ + "cc", + "fs_extra", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69" +dependencies = [ + "jemalloc-sys", + "libc", +] + +[[package]] +name = "jieba-rs" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94287627d13ab7b943787ab20b54b37f8af11179ce85de4734071c88f9eab354" +dependencies = [ + "cedarwood", + "fxhash", + "hashbrown 0.11.2", + "lazy_static", + "phf", + "phf_codegen", + "regex", +] + +[[package]] +name = "js-sys" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" +dependencies = [ + "fst", +] + +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if 1.0.0", + "ryu", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2a5ac8f984bfcf3a823267e5fde638acc3325f6496633a5da6bb6eb2171e103" + +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + +[[package]] +name = "lmdb-rkv-sys" +version = "0.15.0" +source = "git+https://github.com/meilisearch/lmdb-rs#d0b50d02938ee84e4e4372697ea991fe2a4cae3b" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "logging_timer" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40d0c249955c17c2f8f86b5f501b16d2509ebbe775f7b1d1d2b1ba85ade2a793" +dependencies = [ + "log", + "logging_timer_proc_macros", +] + +[[package]] +name = "logging_timer_proc_macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "482c2c28e6bcfe7c4274f82f701774d755e6aa873edfd619460fcd0966e0eb07" +dependencies = [ + "log", + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + +[[package]] +name = "meilisearch-tokenizer" +version = "0.2.5" +source = "git+https://github.com/meilisearch/tokenizer.git?tag=v0.2.5#c0b5cf741ed9485147f2cbe523f2214d4fa4c395" +dependencies = [ + "character_converter", + "cow-utils", + "deunicode", + "fst", + "jieba-rs", + "once_cell", + "slice-group-by", + "unicode-segmentation", + "whatlang", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "memoffset" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" +dependencies = [ + "autocfg", +] + +[[package]] +name = "milli" +version = "0.14.0" +dependencies = [ + "big_s", + "bimap", + "bincode", + "bstr", + "byteorder", + "chrono", + "concat-arrays", + "crossbeam-channel", + "either", + "flate2", + "fst", + "fxhash", + "geoutils", + "grenad", + "heed", + "human_format", + "itertools", + "levenshtein_automata", + "linked-hash-map", + "log", + "logging_timer", + "maplit", + "meilisearch-tokenizer", + "memmap", + "obkv", + "once_cell", + "ordered-float", + "pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)", + "pest_derive", + "rand 0.8.4", + "rayon", + "roaring", + "rstar", + "serde", + "serde_json", + "slice-group-by", + "smallstr", + "smallvec", + "tempfile", + "uuid", +] + +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + +[[package]] +name = "mime_guess" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2684d4c2e97d99848d30b324b00c8fcc7e5c897b7cbb5819b09e7c90e8baf212" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "mio" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4" +dependencies = [ + "cfg-if 0.1.10", + "fuchsia-zircon", + "fuchsia-zircon-sys", + "iovec", + "kernel32-sys", + "libc", + "log", + "miow 0.2.2", + "net2", + "slab", + "winapi 0.2.8", +] + +[[package]] +name = "mio" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c2bdb6314ec10835cd3293dd268473a835c02b7b352e788be788b3c6ca6bb16" +dependencies = [ + "libc", + "log", + "miow 0.3.7", + "ntapi", + "winapi 0.3.9", +] + +[[package]] +name = "mio-named-pipes" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656" +dependencies = [ + "log", + "mio 0.6.23", + "miow 0.3.7", + "winapi 0.3.9", +] + +[[package]] +name = "mio-uds" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" +dependencies = [ + "iovec", + "libc", + "mio 0.6.23", +] + +[[package]] +name = "miow" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d" +dependencies = [ + "kernel32-sys", + "net2", + "winapi 0.2.8", + "ws2_32-sys", +] + +[[package]] +name = "miow" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "multipart" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050aeedc89243f5347c3e237e3e13dc76fbe4ae3742a57b94dc14f69acf76d4" +dependencies = [ + "buf_redux", + "httparse", + "log", + "mime", + "mime_guess", + "quick-error", + "rand 0.7.3", + "safemem", + "tempfile", + "twoway", +] + +[[package]] +name = "net2" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae" +dependencies = [ + "cfg-if 0.1.10", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec", + "funty", + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "object" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39f37e50073ccad23b6d09bcb5b263f4e76d3bb6038e4a3c08e52162ffa8abc2" +dependencies = [ + "memchr", +] + +[[package]] +name = "obkv" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385" + +[[package]] +name = "once_cell" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "ordered-float" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97c9d06878b3a851e8026ef94bf7fef9ba93062cd412601da4d9cf369b1cc62d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "owo-colors" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2386b4ebe91c2f7f51082d4cefa145d030e33a1842a96b12e4885cc3c01f7a55" + +[[package]] +name = "page_size" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +dependencies = [ + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "pdqselect" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec91767ecc0a0bbe558ce8c9da33c068066c57ecc8bb8477ef8c1ad3ef77c27" + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest" +version = "2.1.3" +source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" +dependencies = [ + "pest 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55" +dependencies = [ + "pest 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "pest_meta", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "pest_meta" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d" +dependencies = [ + "maplit", + "pest 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "sha-1 0.8.2", +] + +[[package]] +name = "phf" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ac8b67553a7ca9457ce0e526948cad581819238f4a9d1ea74545851fa24f37" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "963adb11cf22ee65dfd401cf75577c1aa0eca58c0b97f9337d2da61d3e640503" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082" +dependencies = [ + "phf_shared", + "rand 0.8.4", +] + +[[package]] +name = "phf_shared" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "918192b5c59119d51e0cd221f4d49dde9112824ba717369e903c97d076083d0f" +dependencies = [ + "pin-project-internal 0.4.28", +] + +[[package]] +name = "pin-project" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "576bc800220cc65dac09e99e97b08b358cfab6e17078de8dc5fee223bd2d0c08" +dependencies = [ + "pin-project-internal 1.0.8", +] + +[[package]] +name = "pin-project-internal" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be26700300be6d9d23264c73211d8190e755b6b5ca7a1b28230025511b52a5e" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e8fe8163d14ce7f0cdac2e040116f22eac817edabff0be91e8aff7e9accf389" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "pin-project-lite" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "257b64915a082f7811703966789728173279bdebb956b143dbcd23f6f970a777" + +[[package]] +name = "pin-project-lite" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + +[[package]] +name = "plotters" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" + +[[package]] +name = "plotters-svg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "version_check", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro-nested" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid 0.1.0", +] + +[[package]] +name = "proc-macro2" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d" +dependencies = [ + "unicode-xid 0.2.2", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2 1.0.29", +] + +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", +] + +[[package]] +name = "rand" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.3", + "rand_hc 0.3.1", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom 0.2.3", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_hc" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" +dependencies = [ + "rand_core 0.6.3", +] + +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils 0.8.5", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "reqwest" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "246e9f61b9bb77df069a947682be06e31ac43ea37862e244a69f177694ea6d22" +dependencies = [ + "base64 0.13.0", + "bytes 1.1.0", + "encoding_rs", + "futures-core", + "futures-util", + "http", + "http-body 0.4.3", + "hyper 0.14.13", + "hyper-rustls", + "ipnet", + "js-sys", + "lazy_static", + "log", + "mime", + "percent-encoding", + "pin-project-lite 0.2.7", + "rustls", + "serde", + "serde_urlencoded 0.7.0", + "tokio 1.11.0", + "tokio-rustls", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg", +] + +[[package]] +name = "retain_mut" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c17925a9027d298a4603d286befe3f9dc0e8ed02523141914eb628798d6e5b" + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi 0.3.9", +] + +[[package]] +name = "roaring" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "536cfa885fc388b8ae69edf96d7970849b7d9c1395da1b8330f17715babf8a09" +dependencies = [ + "bytemuck", + "byteorder", + "retain_mut", +] + +[[package]] +name = "rstar" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d535e658ada8c1987a113e5261f8b907f721b2854d666e72820671481b7ee125" +dependencies = [ + "heapless", + "num-traits", + "pdqselect", + "serde", + "smallvec", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustls" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" +dependencies = [ + "base64 0.13.0", + "log", + "ring", + "sct", + "webpki", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scoped-tls" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "sct" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "semver" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" + +[[package]] +name = "serde" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "serde_json" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_test" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82178225dbdeae2d5d190e8649287db6a3a32c6d24da22ae3146325aa353e4c" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97" +dependencies = [ + "dtoa", + "itoa", + "serde", + "url", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer 0.7.3", + "digest 0.8.1", + "fake-simd", + "opaque-debug 0.2.3", +] + +[[package]] +name = "sha-1" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99cd6713db3cf16b6c84e06321e049a9b9f699826e16096d23bbcc44d15d51a6" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if 1.0.0", + "cpufeatures", + "digest 0.9.0", + "opaque-debug 0.3.0", +] + +[[package]] +name = "sharded-slab" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +dependencies = [ + "libc", +] + +[[package]] +name = "siphasher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" + +[[package]] +name = "slab" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c307a32c1c5c437f38c7fd45d753050587732ba8628319fbdf12a7e289ccc590" + +[[package]] +name = "slice-group-by" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f7474f0b646d228360ab62ed974744617bc869d959eac8403bfa3665931a7fb" + +[[package]] +name = "smallstr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e922794d168678729ffc7e07182721a14219c65814e66e91b839a272fe5ae4f" +dependencies = [ + "serde", + "smallvec", +] + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "socket2" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "socket2" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516" +dependencies = [ + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "stderrlog" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45a53e2eff3e94a019afa6265e8ee04cb05b9d33fe9f5078b14e4e391d155a38" +dependencies = [ + "atty", + "chrono", + "log", + "termcolor", + "thread_local", +] + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf9d950ef167e25e0bdb073cf1d68e9ad2795ac826f2f3f59647817cf23c0bfa" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134d838a2c9943ac3125cf6df165eda53493451b719f3255b2a26b85f772d0ba" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid 0.1.0", +] + +[[package]] +name = "syn" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6f107db402c2c2055242dbf4d2af0e69197202e9faacbef9571bbe47f5a1b84" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "unicode-xid 0.2.2", +] + +[[package]] +name = "synchronoise" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d717ed0efc9d39ab3b642a096bc369a3e02a38a51c41845d7fe31bdad1d6eaeb" +dependencies = [ + "crossbeam-queue", +] + +[[package]] +name = "synstructure" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "474aaa926faa1603c40b7885a9eaea29b444d1cb2850cb7c0e37bb1a4182f4fa" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", + "unicode-xid 0.2.2", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "rand 0.8.4", + "redox_syscall", + "remove_dir_all", + "winapi 0.3.9", +] + +[[package]] +name = "term_size" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e4129646ca0ed8f45d09b929036bafad5377103edd06e50bf574b353d2b08d9" +dependencies = [ + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "termcolor" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "term_size", + "unicode-width", +] + +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi 0.3.9", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tinyvec" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5241dd6f21443a3606b432718b166d3cedc962fd4b8bea54a8bc7f514ebda986" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "tokio" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6703a273949a90131b290be1fe7b039d0fc884aa1935860dfcbe056f28cd8092" +dependencies = [ + "bytes 0.5.6", + "fnv", + "futures-core", + "iovec", + "lazy_static", + "libc", + "memchr", + "mio 0.6.23", + "mio-named-pipes", + "mio-uds", + "num_cpus", + "pin-project-lite 0.1.12", + "signal-hook-registry", + "slab", + "tokio-macros", + "winapi 0.3.9", +] + +[[package]] +name = "tokio" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4efe6fc2395938c8155973d7be49fe8d03a843726e285e100a8a383cc0154ce" +dependencies = [ + "autocfg", + "bytes 1.1.0", + "libc", + "memchr", + "mio 0.7.13", + "num_cpus", + "pin-project-lite 0.2.7", + "winapi 0.3.9", +] + +[[package]] +name = "tokio-macros" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e44da00bfc73a25f814cd8d7e57a68a5c31b74b3152a0a1d1f590c97ed06265a" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "tokio-rustls" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6" +dependencies = [ + "rustls", + "tokio 1.11.0", + "webpki", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d9e878ad426ca286e4dcae09cbd4e1973a7f8987d97570e2469703dd7f5720c" +dependencies = [ + "futures-util", + "log", + "pin-project 0.4.28", + "tokio 0.2.25", + "tungstenite", +] + +[[package]] +name = "tokio-util" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be8242891f2b6cbef26a2d7e8605133c2c554cd35b3e4948ea892d6d68436499" +dependencies = [ + "bytes 0.5.6", + "futures-core", + "futures-sink", + "log", + "pin-project-lite 0.1.12", + "tokio 0.2.25", +] + +[[package]] +name = "tokio-util" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d3725d3efa29485e87311c5b699de63cde14b00ed4d256b8318aa30ca452cd" +dependencies = [ + "bytes 1.1.0", + "futures-core", + "futures-sink", + "log", + "pin-project-lite 0.2.7", + "tokio 1.11.0", +] + +[[package]] +name = "toml" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +dependencies = [ + "serde", +] + +[[package]] +name = "tower-service" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" + +[[package]] +name = "tracing" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84f96e095c0c82419687c20ddf5cb3eadb61f4e1405923c9dc8e53a1adacbda8" +dependencies = [ + "cfg-if 1.0.0", + "log", + "pin-project-lite 0.2.7", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98863d0dd09fa59a1b79c6750ad80dbda6b75f4e71c437a6a1a8cb91a8bcbd77" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", +] + +[[package]] +name = "tracing-core" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46125608c26121c81b0c6d693eab5a420e416da7e43c426d2e8f7df8da8a3acf" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "tracing-error" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4d7c0b83d4a500748fa5879461652b361edf5c9d51ede2a2ac03875ca185e24" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "pin-project 1.0.8", + "tracing", +] + +[[package]] +name = "tracing-subscriber" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd0568dbfe3baf7048b7908d2b32bca0d81cd56bec6d2a8f894b01d74f86be3" +dependencies = [ + "sharded-slab", + "thread_local", + "tracing-core", +] + +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + +[[package]] +name = "tungstenite" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0308d80d86700c5878b9ef6321f020f29b1bb9d5ff3cab25e75e23f3a492a23" +dependencies = [ + "base64 0.12.3", + "byteorder", + "bytes 0.5.6", + "http", + "httparse", + "input_buffer", + "log", + "rand 0.7.3", + "sha-1 0.9.8", + "url", + "utf-8", +] + +[[package]] +name = "twoway" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b11b2b5241ba34be09c3cc85a36e56e48f9888862e19cedf23336d35316ed1" +dependencies = [ + "memchr", +] + +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "246f4c42e67e7a4e3c6106ff716a5d067d4132a642840b242e357e468a2a0085" + +[[package]] +name = "unicode-normalization" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "urlencoding" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a1f0175e03a0973cf4afd476bef05c26e228520400eb1fd473ad417b1c00ffb" + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cf7d77f457ef8dfa11e4cd5933c5ddb5dc52a94664071951219a97710f0a32b" + +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.3", +] + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi 0.3.9", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + +[[package]] +name = "warp" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f41be6df54c97904af01aa23e613d4521eed7ab23537cede692d4058f6449407" +dependencies = [ + "bytes 0.5.6", + "futures", + "headers", + "http", + "hyper 0.13.10", + "log", + "mime", + "mime_guess", + "multipart", + "pin-project 0.4.28", + "scoped-tls", + "serde", + "serde_json", + "serde_urlencoded 0.6.1", + "tokio 0.2.25", + "tokio-tungstenite", + "tower-service", + "tracing", + "tracing-futures", + "urlencoding", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "wasm-bindgen" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +dependencies = [ + "cfg-if 1.0.0", + "serde", + "serde_json", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +dependencies = [ + "quote 1.0.9", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.76", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" + +[[package]] +name = "web-sys" +version = "0.3.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aabe153544e473b775453675851ecc86863d2a81d786d741f6b76778f2a48940" +dependencies = [ + "webpki", +] + +[[package]] +name = "whatlang" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a346d2eb29c03618693ed24a29d1acd0c3f2cb08ae58b9669d7461e033cf703" +dependencies = [ + "hashbrown 0.7.2", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "winreg" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "ws2_32-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" + +[[package]] +name = "zerocopy" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6580539ad917b7c026220c4b3f2c08d52ce54d6ce0dc491e66002e35388fab46" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" +dependencies = [ + "proc-macro2 1.0.29", + "syn 1.0.76", + "synstructure", +] diff --git a/Cargo.toml b/Cargo.toml index 822907ca8..b78989f50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["milli", "http-ui", "benchmarks", "infos", "helpers", "search"] +members = ["milli", "http-ui", "benchmarks", "infos", "helpers", "cli"] default-members = ["milli"] [profile.dev] diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 000000000..24fb214b9 --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "cli" +version = "0.1.0" +edition = "2018" +description = "A CLI to interact with a milli index" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +indicatif = "0.16.2" +serde = "1.0.129" +serde_json = "1.0.66" +structopt = "0.3.22" +milli = { path = "../milli" } +eyre = "0.6.5" +color-eyre = "0.5.11" +heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] } +byte-unit = { version = "4.0.12", features = ["serde"] } +bimap = "0.6.1" +csv = "1.1.6" +stderrlog = "0.5.1" + +[target.'cfg(target_os = "linux")'.dependencies] +jemallocator = "0.3.2" diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 000000000..b84ff3243 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,335 @@ +use std::fs::File; +use std::io::{stdin, Cursor, Read}; +use std::path::PathBuf; +use std::str::FromStr; + +use byte_unit::Byte; +use eyre::Result; +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use milli::update::UpdateIndexingStep::{ + ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition, +}; +use serde_json::{Map, Value}; +use structopt::StructOpt; + +#[cfg(target_os = "linux")] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +#[derive(Debug, StructOpt)] +#[structopt(name = "Milli CLI", about = "A simple CLI to manipulate a milli index.")] +struct Cli { + #[structopt(short, long)] + index_path: PathBuf, + #[structopt(short = "s", long, default_value = "100GiB")] + index_size: Byte, + /// Verbose mode (-v, -vv, -vvv, etc.) + #[structopt(short, long, parse(from_occurrences))] + verbose: usize, + #[structopt(subcommand)] + subcommand: Command, +} + +#[derive(Debug, StructOpt)] +enum Command { + DocumentAddition(DocumentAddition), + Search(Search), + SettingsUpdate(SettingsUpdate), +} + +fn setup(opt: &Cli) -> Result<()> { + color_eyre::install()?; + stderrlog::new() + .verbosity(opt.verbose) + .show_level(false) + .timestamp(stderrlog::Timestamp::Off) + .init()?; + Ok(()) +} + +fn main() -> Result<()> { + let command = Cli::from_args(); + + setup(&command)?; + + let mut options = heed::EnvOpenOptions::new(); + options.map_size(command.index_size.get_bytes() as usize); + let index = milli::Index::new(options, command.index_path)?; + + match command.subcommand { + Command::DocumentAddition(addition) => addition.perform(index)?, + Command::Search(search) => search.perform(index)?, + Command::SettingsUpdate(update) => update.perform(index)?, + } + + Ok(()) +} + +#[derive(Debug)] +enum DocumentAdditionFormat { + Csv, + Json, + Jsonl, +} + +impl FromStr for DocumentAdditionFormat { + type Err = eyre::Error; + + fn from_str(s: &str) -> Result { + match s { + "csv" => Ok(Self::Csv), + "jsonl" => Ok(Self::Jsonl), + "json" => Ok(Self::Json), + other => eyre::bail!("invalid format: {}", other), + } + } +} + +#[derive(Debug, StructOpt)] +struct DocumentAddition { + #[structopt(short, long, default_value = "json", possible_values = &["csv", "jsonl", "json"])] + format: DocumentAdditionFormat, + /// Path to the update file, if not present, will read from stdin. + #[structopt(short, long)] + path: Option, + /// Whether to generate missing document ids. + #[structopt(short, long)] + autogen_docids: bool, + /// Whether to update or replace the documents if they already exist. + #[structopt(short, long)] + update_documents: bool, +} + +impl DocumentAddition { + fn perform(&self, index: milli::Index) -> Result<()> { + let reader: Box = match self.path { + Some(ref path) => { + let file = File::open(path)?; + Box::new(file) + } + None => Box::new(stdin()), + }; + + println!("parsing documents..."); + + let documents = match self.format { + DocumentAdditionFormat::Csv => documents_from_csv(reader)?, + DocumentAdditionFormat::Json => documents_from_json(reader)?, + DocumentAdditionFormat::Jsonl => documents_from_jsonl(reader)?, + }; + + let reader = milli::documents::DocumentBatchReader::from_reader(Cursor::new(documents))?; + + println!("Adding {} documents to the index.", reader.len()); + + let mut txn = index.env.write_txn()?; + let mut addition = milli::update::IndexDocuments::new(&mut txn, &index, 0); + + if self.update_documents { + addition.index_documents_method(milli::update::IndexDocumentsMethod::UpdateDocuments); + } + + addition.log_every_n(100); + + if self.autogen_docids { + addition.enable_autogenerate_docids() + } + + let mut bars = Vec::new(); + let progesses = MultiProgress::new(); + for _ in 0..4 { + let bar = ProgressBar::hidden(); + let bar = progesses.add(bar); + bars.push(bar); + } + + std::thread::spawn(move || { + progesses.join().unwrap(); + }); + + let result = addition.execute(reader, |step, _| indexing_callback(step, &bars))?; + + txn.commit()?; + + println!("{:?}", result); + Ok(()) + } +} + +fn indexing_callback(step: milli::update::UpdateIndexingStep, bars: &[ProgressBar]) { + let step_index = step.step(); + let bar = &bars[step_index]; + if step_index > 0 { + let prev = &bars[step_index - 1]; + if !prev.is_finished() { + prev.disable_steady_tick(); + prev.finish_at_current_pos(); + } + } + + let style = ProgressStyle::default_bar() + .template("[eta: {eta_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}") + .progress_chars("##-"); + + match step { + RemapDocumentAddition { documents_seen } => { + bar.set_style(ProgressStyle::default_spinner()); + bar.set_message(format!("remaped {} documents so far.", documents_seen)); + } + ComputeIdsAndMergeDocuments { documents_seen, total_documents } => { + bar.set_style(style); + bar.set_length(total_documents as u64); + bar.set_message("Merging documents..."); + bar.set_position(documents_seen as u64); + } + IndexDocuments { documents_seen, total_documents } => { + bar.set_style(style); + bar.set_length(total_documents as u64); + bar.set_message("Indexing documents..."); + bar.set_position(documents_seen as u64); + } + MergeDataIntoFinalDatabase { databases_seen, total_databases } => { + bar.set_style(style); + bar.set_length(total_databases as u64); + bar.set_message("Merging databases..."); + bar.set_position(databases_seen as u64); + } + } + bar.enable_steady_tick(200); +} + +fn documents_from_jsonl(reader: impl Read) -> Result> { + let mut writer = Cursor::new(Vec::new()); + let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; + + let values = serde_json::Deserializer::from_reader(reader) + .into_iter::>(); + for document in values { + let document = document?; + documents.add_documents(document)?; + } + documents.finish()?; + + Ok(writer.into_inner()) +} + +fn documents_from_json(reader: impl Read) -> Result> { + let mut writer = Cursor::new(Vec::new()); + let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; + + let json: serde_json::Value = serde_json::from_reader(reader)?; + documents.add_documents(json)?; + documents.finish()?; + + Ok(writer.into_inner()) +} + +fn documents_from_csv(reader: impl Read) -> Result> { + let mut writer = Cursor::new(Vec::new()); + let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; + + let mut records = csv::Reader::from_reader(reader); + let iter = records.deserialize::>(); + + for doc in iter { + let doc = doc?; + documents.add_documents(doc)?; + } + + documents.finish()?; + + Ok(writer.into_inner()) +} + +#[derive(Debug, StructOpt)] +struct Search { + query: Option, + #[structopt(short, long)] + filter: Option, + #[structopt(short, long)] + offset: Option, + #[structopt(short, long)] + limit: Option, +} + +impl Search { + fn perform(&self, index: milli::Index) -> Result<()> { + let txn = index.env.read_txn()?; + let mut search = index.search(&txn); + + if let Some(ref query) = self.query { + search.query(query); + } + + if let Some(ref filter) = self.filter { + let condition = milli::FilterCondition::from_str(&txn, &index, filter)?; + search.filter(condition); + } + + if let Some(offset) = self.offset { + search.offset(offset); + } + + if let Some(limit) = self.limit { + search.limit(limit); + } + + let result = search.execute()?; + + let fields_ids_map = index.fields_ids_map(&txn)?; + let displayed_fields = + index.displayed_fields_ids(&txn)?.unwrap_or_else(|| fields_ids_map.ids().collect()); + let documents = index.documents(&txn, result.documents_ids)?; + let mut jsons = Vec::new(); + for (_, obkv) in documents { + let json = milli::obkv_to_json(&displayed_fields, &fields_ids_map, obkv)?; + jsons.push(json); + } + + let hits = serde_json::to_string_pretty(&jsons)?; + + println!("{}", hits); + + Ok(()) + } +} + +#[derive(Debug, StructOpt)] +struct SettingsUpdate { + #[structopt(short, long)] + filterable_attributes: Option>, +} + +impl SettingsUpdate { + fn perform(&self, index: milli::Index) -> Result<()> { + let mut txn = index.env.write_txn()?; + + let mut update = milli::update::Settings::new(&mut txn, &index, 0); + update.log_every_n(100); + + if let Some(ref filterable_attributes) = self.filterable_attributes { + if !filterable_attributes.is_empty() { + update.set_filterable_fields(filterable_attributes.iter().cloned().collect()); + } else { + update.reset_filterable_fields(); + } + } + + let mut bars = Vec::new(); + let progesses = MultiProgress::new(); + for _ in 0..4 { + let bar = ProgressBar::hidden(); + let bar = progesses.add(bar); + bars.push(bar); + } + + std::thread::spawn(move || { + progesses.join().unwrap(); + }); + + update.execute(|step, _| indexing_callback(step, &bars))?; + + txn.commit()?; + Ok(()) + } +} diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index 44183eaa2..1f897e820 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -37,6 +37,8 @@ fst = "0.4.5" # Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105) funty = "=1.1" +bimap = "0.6.1" +csv = "1.1.6" [dev-dependencies] maplit = "1.0.2" diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 5dbb0c326..1bacdfbed 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -3,6 +3,7 @@ mod update_store; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt::Display; use std::fs::{create_dir_all, File}; +use std::io::Cursor; use std::net::SocketAddr; use std::num::{NonZeroU32, NonZeroUsize}; use std::path::PathBuf; @@ -18,8 +19,9 @@ use flate2::read::GzDecoder; use futures::{stream, FutureExt, StreamExt}; use heed::EnvOpenOptions; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; +use milli::documents::DocumentBatchReader; use milli::update::UpdateIndexingStep::*; -use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat}; +use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use milli::{obkv_to_json, CompressionType, FilterCondition, Index, MatchingWords, SearchResult}; use once_cell::sync::OnceCell; use rayon::ThreadPool; @@ -350,19 +352,12 @@ async fn main() -> anyhow::Result<()> { let before_update = Instant::now(); // we extract the update type and execute the update itself. let result: anyhow::Result<()> = - match meta { + (|| match meta { UpdateMeta::DocumentsAddition { method, format, encoding } => { // We must use the write transaction of the update here. let mut wtxn = index_cloned.write_txn()?; let mut builder = update_builder.index_documents(&mut wtxn, &index_cloned); - match format.as_str() { - "csv" => builder.update_format(UpdateFormat::Csv), - "json" => builder.update_format(UpdateFormat::Json), - "json-stream" => builder.update_format(UpdateFormat::JsonStream), - otherwise => panic!("invalid update format {:?}", otherwise), - }; - match method.as_str() { "replace" => builder .index_documents_method(IndexDocumentsMethod::ReplaceDocuments), @@ -377,11 +372,18 @@ async fn main() -> anyhow::Result<()> { otherwise => panic!("invalid encoding format {:?}", otherwise), }; - let result = builder.execute(reader, |indexing_step, update_id| { + let documents = match format.as_str() { + "csv" => documents_from_csv(reader)?, + "json" => documents_from_json(reader)?, + "jsonl" => documents_from_jsonl(reader)?, + otherwise => panic!("invalid update format {:?}", otherwise), + }; + + let documents = DocumentBatchReader::from_reader(Cursor::new(documents))?; + + let result = builder.execute(documents, |indexing_step, update_id| { let (current, total) = match indexing_step { - TransformFromUserIntoGenericFormat { documents_seen } => { - (documents_seen, None) - } + RemapDocumentAddition { documents_seen } => (documents_seen, None), ComputeIdsAndMergeDocuments { documents_seen, total_documents } => { (documents_seen, Some(total_documents)) } @@ -482,9 +484,7 @@ async fn main() -> anyhow::Result<()> { let result = builder.execute(|indexing_step, update_id| { let (current, total) = match indexing_step { - TransformFromUserIntoGenericFormat { documents_seen } => { - (documents_seen, None) - } + RemapDocumentAddition { documents_seen } => (documents_seen, None), ComputeIdsAndMergeDocuments { documents_seen, total_documents } => { (documents_seen, Some(total_documents)) } @@ -526,7 +526,7 @@ async fn main() -> anyhow::Result<()> { Err(e) => Err(e.into()), } } - }; + })(); let meta = match result { Ok(()) => { @@ -842,7 +842,7 @@ async fn main() -> anyhow::Result<()> { UpdateStatus, >, update_method: Option, - update_format: UpdateFormat, + format: String, encoding: Option, mut stream: impl futures::Stream> + Unpin, ) -> Result { @@ -863,13 +863,6 @@ async fn main() -> anyhow::Result<()> { _ => String::from("replace"), }; - let format = match update_format { - UpdateFormat::Csv => String::from("csv"), - UpdateFormat::Json => String::from("json"), - UpdateFormat::JsonStream => String::from("json-stream"), - _ => panic!("Unknown update format"), - }; - let meta = UpdateMeta::DocumentsAddition { method, format, encoding }; let update_id = update_store.register_update(&meta, &mmap[..]).unwrap(); let _ = update_status_sender.send(UpdateStatus::Pending { update_id, meta }); @@ -893,9 +886,9 @@ async fn main() -> anyhow::Result<()> { .and(warp::body::stream()) .and_then(move |content_type: String, content_encoding, params: QueryUpdate, stream| { let format = match content_type.as_str() { - "text/csv" => UpdateFormat::Csv, - "application/json" => UpdateFormat::Json, - "application/x-ndjson" => UpdateFormat::JsonStream, + "text/csv" => "csv", + "application/json" => "json", + "application/x-ndjson" => "jsonl", otherwise => panic!("invalid update format: {}", otherwise), }; @@ -903,7 +896,7 @@ async fn main() -> anyhow::Result<()> { update_store_cloned.clone(), update_status_sender_cloned.clone(), params.method, - format, + format.to_string(), content_encoding, stream, ) @@ -1031,6 +1024,49 @@ async fn main() -> anyhow::Result<()> { Ok(()) } +fn documents_from_jsonl(reader: impl io::Read) -> anyhow::Result> { + let mut writer = Cursor::new(Vec::new()); + let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; + + let values = serde_json::Deserializer::from_reader(reader) + .into_iter::>(); + for document in values { + let document = document?; + documents.add_documents(document)?; + } + documents.finish()?; + + Ok(writer.into_inner()) +} + +fn documents_from_json(reader: impl io::Read) -> anyhow::Result> { + let mut writer = Cursor::new(Vec::new()); + let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; + + let json: serde_json::Value = serde_json::from_reader(reader)?; + documents.add_documents(json)?; + documents.finish()?; + + Ok(writer.into_inner()) +} + +fn documents_from_csv(reader: impl io::Read) -> anyhow::Result> { + let mut writer = Cursor::new(Vec::new()); + let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; + + let mut records = csv::Reader::from_reader(reader); + let iter = records.deserialize::>(); + + for doc in iter { + let doc = doc?; + documents.add_documents(doc)?; + } + + documents.finish()?; + + Ok(writer.into_inner()) +} + #[cfg(test)] mod tests { use maplit::{btreeset, hashmap, hashset}; diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 35e4644fa..37a524197 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -5,12 +5,13 @@ authors = ["Kerollmops "] edition = "2018" [dependencies] +bimap = { version = "0.6.1", features = ["serde"] } +bincode = "1.3.3" bstr = "0.2.15" byteorder = "1.4.2" chrono = { version = "0.4.19", features = ["serde"] } concat-arrays = "0.1.2" crossbeam-channel = "0.5.1" -csv = "1.1.5" either = "1.6.1" flate2 = "1.0.20" fst = "0.4.5" diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs new file mode 100644 index 000000000..ba1319eff --- /dev/null +++ b/milli/src/documents/builder.rs @@ -0,0 +1,80 @@ +use std::io; + +use byteorder::{BigEndian, WriteBytesExt}; +use serde::ser::Serialize; + +use super::serde::DocumentSerializer; +use super::{ByteCounter, DocumentsBatchIndex, DocumentsMetadata, Error}; + +/// The `DocumentsBatchBuilder` provides a way to build a documents batch in the intermediary +/// format used by milli. +/// +/// The writer used by the DocumentBatchBuilder can be read using a `DocumentBatchReader` to +/// iterate other the documents. +/// +/// ## example: +/// ``` +/// use milli::documents::DocumentBatchBuilder; +/// use serde_json::json; +/// use std::io::Cursor; +/// +/// let mut writer = Cursor::new(Vec::new()); +/// let mut builder = DocumentBatchBuilder::new(&mut writer).unwrap(); +/// builder.add_documents(json!({"id": 1, "name": "foo"})).unwrap(); +/// builder.finish().unwrap(); +/// ``` +pub struct DocumentBatchBuilder { + serializer: DocumentSerializer, +} + +impl DocumentBatchBuilder { + pub fn new(writer: W) -> Result { + let index = DocumentsBatchIndex::new(); + let mut writer = ByteCounter::new(writer); + // add space to write the offset of the metadata at the end of the writer + writer.write_u64::(0)?; + + let serializer = + DocumentSerializer { writer, buffer: Vec::new(), index, count: 0, allow_seq: true }; + + Ok(Self { serializer }) + } + + /// Returns the number of documents that have been written to the builder. + pub fn len(&self) -> usize { + self.serializer.count + } + + /// This method must be called after the document addition is terminated. It will put the + /// metadata at the end of the file, and write the metadata offset at the beginning on the + /// file. + pub fn finish(self) -> Result<(), Error> { + let DocumentSerializer { + writer: ByteCounter { mut writer, count: offset }, + index, + count, + .. + } = self.serializer; + + let meta = DocumentsMetadata { count, index }; + + bincode::serialize_into(&mut writer, &meta)?; + + writer.seek(io::SeekFrom::Start(0))?; + writer.write_u64::(offset as u64)?; + + writer.flush()?; + + Ok(()) + } + + /// Adds documents to the builder. + /// + /// The internal index is updated with the fields found + /// in the documents. Document must either be a map or a sequences of map, anything else will + /// fail. + pub fn add_documents(&mut self, document: T) -> Result<(), Error> { + document.serialize(&mut self.serializer)?; + Ok(()) + } +} diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs new file mode 100644 index 000000000..da4227e6b --- /dev/null +++ b/milli/src/documents/mod.rs @@ -0,0 +1,233 @@ +mod builder; +/// The documents module defines an intermediary document format that milli uses for indexation, and +/// provides an API to easily build and read such documents. +/// +/// The `DocumentBatchBuilder` interface allows to write batches of documents to a writer, that can +/// later be read by milli using the `DocumentBatchReader` interface. +mod reader; +mod serde; + +use std::{fmt, io}; + +use ::serde::{Deserialize, Serialize}; +use bimap::BiHashMap; +pub use builder::DocumentBatchBuilder; +pub use reader::DocumentBatchReader; + +use crate::FieldId; + +/// A bidirectional map that links field ids to their name in a document batch. +pub type DocumentsBatchIndex = BiHashMap; + +#[derive(Debug, Serialize, Deserialize)] +struct DocumentsMetadata { + count: usize, + index: DocumentsBatchIndex, +} + +pub struct ByteCounter { + count: usize, + writer: W, +} + +impl ByteCounter { + fn new(writer: W) -> Self { + Self { count: 0, writer } + } +} + +impl io::Write for ByteCounter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let count = self.writer.write(buf)?; + self.count += count; + Ok(count) + } + + fn flush(&mut self) -> io::Result<()> { + self.writer.flush() + } +} + +#[derive(Debug)] +pub enum Error { + InvalidDocumentFormat, + Custom(String), + JsonError(serde_json::Error), + Serialize(bincode::Error), + Io(io::Error), + DocumentTooLarge, +} + +impl From for Error { + fn from(other: io::Error) -> Self { + Self::Io(other) + } +} + +impl From for Error { + fn from(other: bincode::Error) -> Self { + Self::Serialize(other) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::Custom(s) => write!(f, "Unexpected serialization error: {}", s), + Error::InvalidDocumentFormat => f.write_str("Invalid document addition format."), + Error::JsonError(err) => write!(f, "Couldn't serialize document value: {}", err), + Error::Io(e) => e.fmt(f), + Error::DocumentTooLarge => f.write_str("Provided document is too large (>2Gib)"), + Error::Serialize(e) => e.fmt(f), + } + } +} + +impl std::error::Error for Error {} + +/// Macro used to generate documents, with the same syntax as `serde_json::json` +#[cfg(test)] +macro_rules! documents { + ($data:tt) => {{ + let documents = serde_json::json!($data); + let mut writer = std::io::Cursor::new(Vec::new()); + let mut builder = crate::documents::DocumentBatchBuilder::new(&mut writer).unwrap(); + builder.add_documents(documents).unwrap(); + builder.finish().unwrap(); + + writer.set_position(0); + + crate::documents::DocumentBatchReader::from_reader(writer).unwrap() + }}; +} + +#[cfg(test)] +mod test { + use serde_json::{json, Value}; + + use super::*; + + #[test] + fn create_documents_no_errors() { + let json = json!({ + "number": 1, + "string": "this is a field", + "array": ["an", "array"], + "object": { + "key": "value", + }, + "bool": true + }); + + let mut v = Vec::new(); + let mut cursor = io::Cursor::new(&mut v); + + let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); + + builder.add_documents(json).unwrap(); + + builder.finish().unwrap(); + + let mut documents = + DocumentBatchReader::from_reader(io::Cursor::new(cursor.into_inner())).unwrap(); + + assert_eq!(documents.index().iter().count(), 5); + + let reader = documents.next_document_with_index().unwrap().unwrap(); + + assert_eq!(reader.1.iter().count(), 5); + assert!(documents.next_document_with_index().unwrap().is_none()); + } + + #[test] + fn test_add_multiple_documents() { + let doc1 = json!({ + "bool": true, + }); + let doc2 = json!({ + "toto": false, + }); + + let mut v = Vec::new(); + let mut cursor = io::Cursor::new(&mut v); + + let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); + + builder.add_documents(doc1).unwrap(); + builder.add_documents(doc2).unwrap(); + + builder.finish().unwrap(); + + let mut documents = + DocumentBatchReader::from_reader(io::Cursor::new(cursor.into_inner())).unwrap(); + + assert_eq!(documents.index().iter().count(), 2); + + let reader = documents.next_document_with_index().unwrap().unwrap(); + + assert_eq!(reader.1.iter().count(), 1); + assert!(documents.next_document_with_index().unwrap().is_some()); + assert!(documents.next_document_with_index().unwrap().is_none()); + } + + #[test] + fn add_documents_array() { + let docs = json!([ + { "toto": false }, + { "tata": "hello" }, + ]); + + let mut v = Vec::new(); + let mut cursor = io::Cursor::new(&mut v); + + let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); + + builder.add_documents(docs).unwrap(); + + builder.finish().unwrap(); + + let mut documents = + DocumentBatchReader::from_reader(io::Cursor::new(cursor.into_inner())).unwrap(); + + assert_eq!(documents.index().iter().count(), 2); + + let reader = documents.next_document_with_index().unwrap().unwrap(); + + assert_eq!(reader.1.iter().count(), 1); + assert!(documents.next_document_with_index().unwrap().is_some()); + assert!(documents.next_document_with_index().unwrap().is_none()); + } + + #[test] + fn add_invalid_document_format() { + let mut v = Vec::new(); + let mut cursor = io::Cursor::new(&mut v); + + let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); + + let docs = json!([[ + { "toto": false }, + { "tata": "hello" }, + ]]); + + assert!(builder.add_documents(docs).is_err()); + + let docs = json!("hello"); + + assert!(builder.add_documents(docs).is_err()); + } + + #[test] + fn test_nested() { + let mut docs = documents!([{ + "hello": { + "toto": ["hello"] + } + }]); + + let (_index, doc) = docs.next_document_with_index().unwrap().unwrap(); + + let nested: Value = serde_json::from_slice(doc.get(0).unwrap()).unwrap(); + assert_eq!(nested, json!({ "toto": ["hello"] })); + } +} diff --git a/milli/src/documents/reader.rs b/milli/src/documents/reader.rs new file mode 100644 index 000000000..14d7c8ceb --- /dev/null +++ b/milli/src/documents/reader.rs @@ -0,0 +1,75 @@ +use std::io; +use std::io::{BufReader, Read}; +use std::mem::size_of; + +use byteorder::{BigEndian, ReadBytesExt}; +use obkv::KvReader; + +use super::{DocumentsBatchIndex, DocumentsMetadata, Error}; +use crate::FieldId; + +/// The `DocumentsBatchReader` provides a way to iterate over documents that have been created with +/// a `DocumentsBatchWriter`. +/// +/// The documents are returned in the form of `obkv::Reader` where each field is identified with a +/// `FieldId`. The mapping between the field ids and the field names is done thanks to the index. +pub struct DocumentBatchReader { + reader: BufReader, + metadata: DocumentsMetadata, + buffer: Vec, + seen_documents: usize, +} + +impl DocumentBatchReader { + /// Construct a `DocumentsReader` from a reader. + /// + /// It first retrieves the index, then moves to the first document. Subsequent calls to + /// `next_document` advance the document reader until all the documents have been read. + pub fn from_reader(mut reader: R) -> Result { + let mut buffer = Vec::new(); + + let meta_offset = reader.read_u64::()?; + reader.seek(io::SeekFrom::Start(meta_offset))?; + reader.read_to_end(&mut buffer)?; + let metadata: DocumentsMetadata = bincode::deserialize(&buffer)?; + + reader.seek(io::SeekFrom::Start(size_of::() as u64))?; + buffer.clear(); + + let reader = BufReader::new(reader); + + Ok(Self { reader, metadata, buffer, seen_documents: 0 }) + } + + /// Returns the next document in the reader, and wraps it in an `obkv::KvReader`, along with a + /// reference to the addition index. + pub fn next_document_with_index<'a>( + &'a mut self, + ) -> io::Result)>> { + if self.seen_documents < self.metadata.count { + let doc_len = self.reader.read_u32::()?; + self.buffer.resize(doc_len as usize, 0); + self.reader.read_exact(&mut self.buffer)?; + self.seen_documents += 1; + + let reader = KvReader::new(&self.buffer); + Ok(Some((&self.metadata.index, reader))) + } else { + Ok(None) + } + } + + /// Return the fields index for the documents batch. + pub fn index(&self) -> &DocumentsBatchIndex { + &self.metadata.index + } + + /// Returns the number of documents in the reader. + pub fn len(&self) -> usize { + self.metadata.count + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} diff --git a/milli/src/documents/serde.rs b/milli/src/documents/serde.rs new file mode 100644 index 000000000..76dc8915c --- /dev/null +++ b/milli/src/documents/serde.rs @@ -0,0 +1,465 @@ +use std::convert::TryInto; +use std::{fmt, io}; + +use byteorder::{BigEndian, WriteBytesExt}; +use obkv::KvWriter; +use serde::ser::{Impossible, Serialize, SerializeMap, SerializeSeq, Serializer}; + +use super::{ByteCounter, DocumentsBatchIndex, Error}; +use crate::FieldId; + +pub struct DocumentSerializer { + pub writer: ByteCounter, + pub buffer: Vec, + pub index: DocumentsBatchIndex, + pub count: usize, + pub allow_seq: bool, +} + +impl<'a, W: io::Write> Serializer for &'a mut DocumentSerializer { + type Ok = (); + + type Error = Error; + + type SerializeSeq = SeqSerializer<'a, W>; + type SerializeTuple = Impossible<(), Self::Error>; + type SerializeTupleStruct = Impossible<(), Self::Error>; + type SerializeTupleVariant = Impossible<(), Self::Error>; + type SerializeMap = MapSerializer<'a, &'a mut ByteCounter>; + type SerializeStruct = Impossible<(), Self::Error>; + type SerializeStructVariant = Impossible<(), Self::Error>; + fn serialize_map(self, _len: Option) -> Result { + self.buffer.clear(); + let cursor = io::Cursor::new(&mut self.buffer); + self.count += 1; + let map_serializer = MapSerializer { + map: KvWriter::new(cursor), + index: &mut self.index, + writer: &mut self.writer, + buffer: Vec::new(), + }; + + Ok(map_serializer) + } + + fn serialize_seq(self, _len: Option) -> Result { + if self.allow_seq { + // Only allow sequence of documents of depth 1. + self.allow_seq = false; + Ok(SeqSerializer { serializer: self }) + } else { + Err(Error::InvalidDocumentFormat) + } + } + + fn serialize_bool(self, _v: bool) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i8(self, _v: i8) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i16(self, _v: i16) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i32(self, _v: i32) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i64(self, _v: i64) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u8(self, _v: u8) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u16(self, _v: u16) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u32(self, _v: u32) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u64(self, _v: u64) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_f32(self, _v: f32) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_f64(self, _v: f64) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_char(self, _v: char) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_str(self, _v: &str) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_none(self) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_some(self, _value: &T) -> Result + where + T: Serialize, + { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_unit(self) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } +} + +pub struct SeqSerializer<'a, W> { + serializer: &'a mut DocumentSerializer, +} + +impl<'a, W: io::Write> SerializeSeq for SeqSerializer<'a, W> { + type Ok = (); + type Error = Error; + + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(&mut *self.serializer)?; + Ok(()) + } + + fn end(self) -> Result { + Ok(()) + } +} + +pub struct MapSerializer<'a, W> { + map: KvWriter>, FieldId>, + index: &'a mut DocumentsBatchIndex, + writer: W, + buffer: Vec, +} + +/// This implementation of SerializeMap uses serilialize_entry instead of seriliaze_key and +/// serialize_value, therefore these to methods remain unimplemented. +impl<'a, W: io::Write> SerializeMap for MapSerializer<'a, W> { + type Ok = (); + type Error = Error; + + fn serialize_key(&mut self, _key: &T) -> Result<(), Self::Error> { + unreachable!() + } + + fn serialize_value(&mut self, _value: &T) -> Result<(), Self::Error> { + unreachable!() + } + + fn end(mut self) -> Result { + let data = self.map.into_inner().map_err(Error::Io)?.into_inner(); + let data_len: u32 = data.len().try_into().map_err(|_| Error::DocumentTooLarge)?; + + self.writer.write_u32::(data_len).map_err(Error::Io)?; + self.writer.write_all(&data).map_err(Error::Io)?; + + Ok(()) + } + + fn serialize_entry( + &mut self, + key: &K, + value: &V, + ) -> Result<(), Self::Error> + where + K: Serialize, + V: Serialize, + { + let field_serializer = FieldSerializer { index: &mut self.index }; + let field_id: FieldId = key.serialize(field_serializer)?; + + self.buffer.clear(); + let mut cursor = io::Cursor::new(&mut self.buffer); + serde_json::to_writer(&mut cursor, value).map_err(Error::JsonError)?; + + self.map.insert(field_id, cursor.into_inner()).map_err(Error::Io)?; + + Ok(()) + } +} + +struct FieldSerializer<'a> { + index: &'a mut DocumentsBatchIndex, +} + +impl<'a> serde::Serializer for FieldSerializer<'a> { + type Ok = FieldId; + + type Error = Error; + + type SerializeSeq = Impossible; + type SerializeTuple = Impossible; + type SerializeTupleStruct = Impossible; + type SerializeTupleVariant = Impossible; + type SerializeMap = Impossible; + type SerializeStruct = Impossible; + type SerializeStructVariant = Impossible; + + fn serialize_str(self, ws: &str) -> Result { + let field_id = match self.index.get_by_right(ws) { + Some(field_id) => *field_id, + None => { + let field_id = self.index.len() as FieldId; + self.index.insert(field_id, ws.to_string()); + field_id + } + }; + + Ok(field_id) + } + + fn serialize_bool(self, _v: bool) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i8(self, _v: i8) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i16(self, _v: i16) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i32(self, _v: i32) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_i64(self, _v: i64) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u8(self, _v: u8) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u16(self, _v: u16) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u32(self, _v: u32) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_u64(self, _v: u64) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_f32(self, _v: f32) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_f64(self, _v: f64) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_char(self, _v: char) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_none(self) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_some(self, _value: &T) -> Result + where + T: Serialize, + { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_unit(self) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: Serialize, + { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_map(self, _len: Option) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidDocumentFormat) + } +} + +impl serde::ser::Error for Error { + fn custom(msg: T) -> Self { + Error::Custom(msg.to_string()) + } +} diff --git a/milli/src/error.rs b/milli/src/error.rs index e6bd3fd62..fe0ac2cf7 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -55,7 +55,6 @@ pub enum FieldIdMapMissingEntry { #[derive(Debug)] pub enum UserError { AttributeLimitReached, - Csv(csv::Error), DocumentLimitReached, InvalidAscDescSyntax { name: String }, InvalidDocumentId { document_id: Value }, @@ -212,7 +211,6 @@ impl fmt::Display for UserError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), - Self::Csv(error) => error.fmt(f), Self::DocumentLimitReached => f.write_str("maximum number of documents reached"), Self::InvalidFacetsDistribution { invalid_facets_name } => { let name_list = diff --git a/milli/src/index.rs b/milli/src/index.rs index f7603148d..dd5851ccc 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -868,7 +868,7 @@ pub(crate) mod tests { use maplit::btreemap; use tempfile::TempDir; - use crate::update::{IndexDocuments, UpdateFormat}; + use crate::update::IndexDocuments; use crate::Index; pub(crate) struct TempIndex { @@ -904,13 +904,12 @@ pub(crate) mod tests { let index = Index::new(options, &path).unwrap(); let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ + let content = documents!([ { "id": 1, "name": "kevin" }, { "id": 2, "name": "bob", "age": 20 }, { "id": 2, "name": "bob", "age": 20 } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -929,8 +928,12 @@ pub(crate) mod tests { // we add all the documents a second time. we are supposed to get the same // field_distribution in the end let mut wtxn = index.write_txn().unwrap(); - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); + let content = documents!([ + { "id": 1, "name": "kevin" }, + { "id": 2, "name": "bob", "age": 20 }, + { "id": 2, "name": "bob", "age": 20 } + ]); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -947,13 +950,12 @@ pub(crate) mod tests { ); // then we update a document by removing one field and another by adding one field - let content = &br#"[ + let content = documents!([ { "id": 1, "name": "kevin", "has_dog": true }, { "id": 2, "name": "bob" } - ]"#[..]; + ]); let mut wtxn = index.write_txn().unwrap(); - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 7c9f56665..550e7f13d 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -1,6 +1,9 @@ #[macro_use] extern crate pest_derive; +#[macro_use] +pub mod documents; + mod criterion; mod error; mod external_documents_ids; diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index e7dc52a82..deb51a053 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -27,6 +27,7 @@ pub trait Distinct { #[cfg(test)] mod test { use std::collections::HashSet; + use std::io::Cursor; use once_cell::sync::Lazy; use rand::seq::SliceRandom; @@ -34,19 +35,20 @@ mod test { use roaring::RoaringBitmap; use serde_json::{json, Value}; + use crate::documents::{DocumentBatchBuilder, DocumentBatchReader}; use crate::index::tests::TempIndex; use crate::index::Index; - use crate::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; + use crate::update::{IndexDocumentsMethod, UpdateBuilder}; use crate::{DocumentId, FieldId, BEU32}; - static JSON: Lazy = Lazy::new(generate_json); + static JSON: Lazy> = Lazy::new(generate_documents); - fn generate_json() -> Value { + fn generate_documents() -> Vec { let mut rng = rand::thread_rng(); let num_docs = rng.gen_range(10..30); - let mut documents = Vec::new(); - + let mut cursor = Cursor::new(Vec::new()); + let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); let txts = ["Toto", "Titi", "Tata"]; let cats = (1..10).map(|i| i.to_string()).collect::>(); let cat_ints = (1..10).collect::>(); @@ -66,10 +68,11 @@ mod test { "txts": sample_txts[..(rng.gen_range(0..3))], "cat-ints": sample_ints[..(rng.gen_range(0..3))], }); - documents.push(doc); + builder.add_documents(doc).unwrap(); } - Value::Array(documents) + builder.finish().unwrap(); + cursor.into_inner() } /// Returns a temporary index populated with random test documents, the FieldId for the @@ -89,13 +92,15 @@ mod test { let mut addition = builder.index_documents(&mut txn, &index); addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); - addition.update_format(UpdateFormat::Json); - addition.execute(JSON.to_string().as_bytes(), |_, _| ()).unwrap(); + let reader = + crate::documents::DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap(); + addition.execute(reader, |_, _| ()).unwrap(); let fields_map = index.fields_ids_map(&txn).unwrap(); let fid = fields_map.id(&distinct).unwrap(); - let map = (0..JSON.as_array().unwrap().len() as u32).collect(); + let documents = DocumentBatchReader::from_reader(Cursor::new(&*JSON)).unwrap(); + let map = (0..documents.len() as u32).collect(); txn.commit().unwrap(); diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index e937cb65f..ea4193eaf 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -82,7 +82,7 @@ mod tests { use heed::EnvOpenOptions; use super::*; - use crate::update::{IndexDocuments, UpdateFormat}; + use crate::update::IndexDocuments; #[test] fn clear_documents() { @@ -92,14 +92,12 @@ mod tests { let index = Index::new(options, &path).unwrap(); let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ + let content = documents!([ { "id": 0, "name": "kevin", "age": 20 }, { "id": 1, "name": "kevina" }, { "id": 2, "name": "benoit", "country": "France", "_geo": { "lng": 42, "lat": 35 } } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); - builder.execute(content, |_, _| ()).unwrap(); + ]); + IndexDocuments::new(&mut wtxn, &index, 0).execute(content, |_, _| ()).unwrap(); // Clear all documents from the database. let builder = ClearDocuments::new(&mut wtxn, &index, 1); diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index b49cdc3cd..1b16ba9bf 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -567,7 +567,7 @@ mod tests { use maplit::hashset; use super::*; - use crate::update::{IndexDocuments, Settings, UpdateFormat}; + use crate::update::{IndexDocuments, Settings}; use crate::FilterCondition; #[test] @@ -578,13 +578,12 @@ mod tests { let index = Index::new(options, &path).unwrap(); let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ + let content = documents!([ { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } }, { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] }, { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); // delete those documents, ids are synchronous therefore 0, 1, and 2. @@ -609,13 +608,12 @@ mod tests { let index = Index::new(options, &path).unwrap(); let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ + let content = documents!([ { "mysuperid": 0, "name": "kevin" }, { "mysuperid": 1, "name": "kevina" }, { "mysuperid": 2, "name": "benoit" } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); // Delete not all of the documents but some of them. @@ -640,7 +638,7 @@ mod tests { builder.set_filterable_fields(hashset! { S("label") }); builder.execute(|_, _| ()).unwrap(); - let content = &br#"[ + let content = documents!([ {"docid":"1_4","label":"sign"}, {"docid":"1_5","label":"letter"}, {"docid":"1_7","label":"abstract,cartoon,design,pattern"}, @@ -661,9 +659,8 @@ mod tests { {"docid":"1_58","label":"abstract,art,cartoon"}, {"docid":"1_68","label":"design"}, {"docid":"1_69","label":"geometry"} - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); // Delete not all of the documents but some of them. @@ -692,7 +689,7 @@ mod tests { builder.set_sortable_fields(hashset!(S("_geo"))); builder.execute(|_, _| ()).unwrap(); - let content = &r#"[ + let content = documents!([ {"id":"1","city":"Lille", "_geo": { "lat": 50.629973371633746, "lng": 3.0569447399419570 } }, {"id":"2","city":"Mons-en-Barœul", "_geo": { "lat": 50.641586120121050, "lng": 3.1106593480348670 } }, {"id":"3","city":"Hellemmes", "_geo": { "lat": 50.631220965518080, "lng": 3.1106399673339933 } }, @@ -713,12 +710,10 @@ mod tests { {"id":"18","city":"Amiens", "_geo": { "lat": 49.931472529669996, "lng": 2.2710499758317080 } }, {"id":"19","city":"Compiègne", "_geo": { "lat": 49.444980887725656, "lng": 2.7913841281529015 } }, {"id":"20","city":"Paris", "_geo": { "lat": 48.902100060895480, "lng": 2.3708400867406930 } } - ]"#[..]; + ]); let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); - builder.execute(content.as_bytes(), |_, _| ()).unwrap(); + IndexDocuments::new(&mut wtxn, &index, 0).execute(content, |_, _| ()).unwrap(); let external_document_ids = index.external_documents_ids(&wtxn).unwrap(); let ids_to_delete: Vec = external_ids_to_delete diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index bdd00dc56..f9577243f 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -4,7 +4,7 @@ mod transform; mod typed_chunk; use std::collections::HashSet; -use std::io::{self, BufRead, BufReader}; +use std::io::{Read, Seek}; use std::iter::FromIterator; use std::num::{NonZeroU32, NonZeroUsize}; use std::time::Instant; @@ -24,6 +24,7 @@ pub use self::helpers::{ }; use self::helpers::{grenad_obkv_into_chunks, GrenadParameters}; pub use self::transform::{Transform, TransformOutput}; +use crate::documents::DocumentBatchReader; use crate::update::{ Facets, UpdateBuilder, UpdateIndexingStep, WordPrefixDocids, WordPrefixPairProximityDocids, WordsLevelPositions, WordsPrefixesFst, @@ -57,17 +58,6 @@ pub enum WriteMethod { GetMergePut, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[non_exhaustive] -pub enum UpdateFormat { - /// The given update is a real **comma seperated** CSV with headers on the first line. - Csv, - /// The given update is a JSON array with documents inside. - Json, - /// The given update is a JSON stream with a document on each line. - JsonStream, -} - pub struct IndexDocuments<'t, 'u, 'i, 'a> { wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index, @@ -85,7 +75,6 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> { words_positions_level_group_size: Option, words_positions_min_level_size: Option, update_method: IndexDocumentsMethod, - update_format: UpdateFormat, autogenerate_docids: bool, update_id: u64, } @@ -113,18 +102,17 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { words_positions_level_group_size: None, words_positions_min_level_size: None, update_method: IndexDocumentsMethod::ReplaceDocuments, - update_format: UpdateFormat::Json, autogenerate_docids: false, update_id, } } - pub fn index_documents_method(&mut self, method: IndexDocumentsMethod) { - self.update_method = method; + pub fn log_every_n(&mut self, n: usize) { + self.log_every_n = Some(n); } - pub fn update_format(&mut self, format: UpdateFormat) { - self.update_format = format; + pub fn index_documents_method(&mut self, method: IndexDocumentsMethod) { + self.update_method = method; } pub fn enable_autogenerate_docids(&mut self) { @@ -136,16 +124,17 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { } #[logging_timer::time("IndexDocuments::{}")] - pub fn execute(self, reader: R, progress_callback: F) -> Result + pub fn execute( + self, + reader: DocumentBatchReader, + progress_callback: F, + ) -> Result where - R: io::Read, + R: Read + Seek, F: Fn(UpdateIndexingStep, u64) + Sync, { - let mut reader = BufReader::new(reader); - reader.fill_buf()?; - // Early return when there is no document to add - if reader.buffer().is_empty() { + if reader.is_empty() { return Ok(DocumentAdditionResult { nb_documents: 0 }); } @@ -165,14 +154,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { autogenerate_docids: self.autogenerate_docids, }; - let output = match self.update_format { - UpdateFormat::Csv => transform.output_from_csv(reader, &progress_callback)?, - UpdateFormat::Json => transform.output_from_json(reader, &progress_callback)?, - UpdateFormat::JsonStream => { - transform.output_from_json_stream(reader, &progress_callback)? - } - }; - + let output = transform.read_documents(reader, progress_callback)?; let nb_documents = output.documents_count; info!("Update transformed in {:.02?}", before_transform.elapsed()); @@ -462,6 +444,7 @@ mod tests { use heed::EnvOpenOptions; use super::*; + use crate::documents::DocumentBatchBuilder; use crate::update::DeleteDocuments; use crate::HashMap; @@ -474,9 +457,12 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name\n1,kevin\n2,kevina\n3,benoit\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); + let content = documents!([ + { "id": 1, "name": "kevin" }, + { "id": 2, "name": "kevina" }, + { "id": 3, "name": "benoit" } + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -488,9 +474,8 @@ mod tests { // Second we send 1 document with id 1, to erase the previous ones. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name\n1,updated kevin\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Csv); + let content = documents!([ { "id": 1, "name": "updated kevin" } ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 1); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -502,9 +487,12 @@ mod tests { // Third we send 3 documents again to replace the existing ones. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name\n1,updated second kevin\n2,updated kevina\n3,updated benoit\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 2); - builder.update_format(UpdateFormat::Csv); + let content = documents!([ + { "id": 1, "name": "updated second kevin" }, + { "id": 2, "name": "updated kevina" }, + { "id": 3, "name": "updated benoit" } + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 2); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -525,9 +513,12 @@ mod tests { // First we send 3 documents with duplicate ids and // change the index method to merge documents. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name\n1,kevin\n1,kevina\n1,benoit\n"[..]; + let content = documents!([ + { "id": 1, "name": "kevin" }, + { "id": 1, "name": "kevina" }, + { "id": 1, "name": "benoit" } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -552,9 +543,8 @@ mod tests { // Second we send 1 document with id 1, to force it to be merged with the previous one. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,age\n1,25\n"[..]; + let content = documents!([ { "id": 1, "age": 25 } ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -574,13 +564,13 @@ mod tests { let mut doc_iter = doc.iter(); assert_eq!(doc_iter.next(), Some((0, &br#""1""#[..]))); assert_eq!(doc_iter.next(), Some((1, &br#""benoit""#[..]))); - assert_eq!(doc_iter.next(), Some((2, &br#""25""#[..]))); + assert_eq!(doc_iter.next(), Some((2, &br#"25"#[..]))); assert_eq!(doc_iter.next(), None); drop(rtxn); } #[test] - fn not_auto_generated_csv_documents_ids() { + fn not_auto_generated_documents_ids() { let path = tempfile::tempdir().unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(10 * 1024 * 1024); // 10 MB @@ -588,35 +578,12 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name\nkevin\nkevina\nbenoit\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); - assert!(builder.execute(content, |_, _| ()).is_err()); - wtxn.commit().unwrap(); - - // Check that there is no document. - let rtxn = index.read_txn().unwrap(); - let count = index.number_of_documents(&rtxn).unwrap(); - assert_eq!(count, 0); - drop(rtxn); - } - - #[test] - fn not_auto_generated_json_documents_ids() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // First we send 3 documents and 2 without ids. - let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ - { "name": "kevina", "id": 21 }, + let content = documents!([ { "name": "kevin" }, + { "name": "kevina" }, { "name": "benoit" } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); assert!(builder.execute(content, |_, _| ()).is_err()); wtxn.commit().unwrap(); @@ -636,10 +603,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name\nkevin\nkevina\nbenoit\n"[..]; + let content = documents!([ + { "name": "kevin" }, + { "name": "kevina" }, + { "name": "benoit" } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -655,10 +625,9 @@ mod tests { // Second we send 1 document with the generated uuid, to erase the previous ones. let mut wtxn = index.write_txn().unwrap(); - let content = format!("id,name\n{},updated kevin", kevin_uuid); - let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Csv); - builder.execute(content.as_bytes(), |_, _| ()).unwrap(); + let content = documents!([ { "name": "updated kevin", "id": kevin_uuid } ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 1); + builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); // Check that there is **always** 3 documents. @@ -689,9 +658,12 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name\n1,kevin\n2,kevina\n3,benoit\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); + let content = documents!([ + { "id": 1, "name": "kevin" }, + { "id": 2, "name": "kevina" }, + { "id": 3, "name": "benoit" } + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -703,9 +675,9 @@ mod tests { // Second we send 1 document without specifying the id. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name\nnew kevin"[..]; + let content = documents!([ { "name": "new kevin" } ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Csv); + builder.enable_autogenerate_docids(); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -717,7 +689,7 @@ mod tests { } #[test] - fn empty_csv_update() { + fn empty_update() { let path = tempfile::tempdir().unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(10 * 1024 * 1024); // 10 MB @@ -725,9 +697,8 @@ mod tests { // First we send 0 documents and only headers. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); + let content = documents!([]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -738,83 +709,6 @@ mod tests { drop(rtxn); } - #[test] - fn json_documents() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // First we send 3 documents with an id for only one of them. - let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ - { "name": "kevin" }, - { "name": "kevina", "id": 21 }, - { "name": "benoit" } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Json); - builder.execute(content, |_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Check that there is 3 documents now. - let rtxn = index.read_txn().unwrap(); - let count = index.number_of_documents(&rtxn).unwrap(); - assert_eq!(count, 3); - drop(rtxn); - } - - #[test] - fn empty_json_update() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // First we send 0 documents. - let mut wtxn = index.write_txn().unwrap(); - let content = &b"[]"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Json); - builder.execute(content, |_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Check that there is no documents. - let rtxn = index.read_txn().unwrap(); - let count = index.number_of_documents(&rtxn).unwrap(); - assert_eq!(count, 0); - drop(rtxn); - } - - #[test] - fn json_stream_documents() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // First we send 3 documents with an id for only one of them. - let mut wtxn = index.write_txn().unwrap(); - let content = &br#" - { "name": "kevin" } - { "name": "kevina", "id": 21 } - { "name": "benoit" } - "#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::JsonStream); - builder.execute(content, |_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Check that there is 3 documents now. - let rtxn = index.read_txn().unwrap(); - let count = index.number_of_documents(&rtxn).unwrap(); - assert_eq!(count, 3); - drop(rtxn); - } - #[test] fn invalid_documents_ids() { let path = tempfile::tempdir().unwrap(); @@ -825,18 +719,16 @@ mod tests { // First we send 1 document with an invalid id. let mut wtxn = index.write_txn().unwrap(); // There is a space in the document id. - let content = &b"id,name\nbrume bleue,kevin\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); + let content = documents!([ { "id": "brume bleue", "name": "kevin" } ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); assert!(builder.execute(content, |_, _| ()).is_err()); wtxn.commit().unwrap(); // First we send 1 document with a valid id. let mut wtxn = index.write_txn().unwrap(); // There is a space in the document id. - let content = &b"id,name\n32,kevin\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Csv); + let content = documents!([ { "id": 32, "name": "kevin" } ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 1); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -848,7 +740,7 @@ mod tests { } #[test] - fn complex_json_documents() { + fn complex_documents() { let path = tempfile::tempdir().unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(10 * 1024 * 1024); // 10 MB @@ -856,13 +748,12 @@ mod tests { // First we send 3 documents with an id for only one of them. let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ + let content = documents!([ { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } }, { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] }, { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -893,33 +784,31 @@ mod tests { // First we send 3 documents with an id for only one of them. let mut wtxn = index.write_txn().unwrap(); - let documents = &r#"[ + let documents = documents!([ { "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5, "_geo": { "lat": 12, "lng": 42 } }, { "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 }, { "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 }, { "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" }, { "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" }, { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams", "_geo": { "lat": 35, "lng": 23 } } - ]"#[..]; + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); - builder.execute(Cursor::new(documents), |_, _| ()).unwrap(); + builder.execute(documents, |_, _| ()).unwrap(); wtxn.commit().unwrap(); let mut wtxn = index.write_txn().unwrap(); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Json); builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments); - let documents = &r#"[ + let documents = documents!([ { "id": 2, "author": "J. Austen", "date": "1813" } - ]"#[..]; + ]); - builder.execute(Cursor::new(documents), |_, _| ()).unwrap(); + builder.execute(documents, |_, _| ()).unwrap(); wtxn.commit().unwrap(); } @@ -931,15 +820,13 @@ mod tests { let index = Index::new(options, &path).unwrap(); let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ + let content = documents!([ { "objectId": 123, "title": "Pride and Prejudice", "comment": "A great book" }, { "objectId": 456, "title": "Le Petit Prince", "comment": "A french book" }, { "objectId": 1, "title": "Alice In Wonderland", "comment": "A weird book" }, { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); - builder.execute(content, |_, _| ()).unwrap(); + ]); + IndexDocuments::new(&mut wtxn, &index, 0).execute(content, |_, _| ()).unwrap(); assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId")); @@ -951,22 +838,18 @@ mod tests { let external_documents_ids = index.external_documents_ids(&wtxn).unwrap(); assert!(external_documents_ids.get("30").is_none()); - let content = &br#"[ + let content = documents!([ { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); - builder.execute(content, |_, _| ()).unwrap(); + ]); + IndexDocuments::new(&mut wtxn, &index, 0).execute(content, |_, _| ()).unwrap(); let external_documents_ids = index.external_documents_ids(&wtxn).unwrap(); assert!(external_documents_ids.get("30").is_some()); - let content = &br#"[ + let content = documents!([ { "objectId": 30, "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); - builder.execute(content, |_, _| ()).unwrap(); + ]); + IndexDocuments::new(&mut wtxn, &index, 0).execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); } @@ -987,12 +870,16 @@ mod tests { big_object.insert(key, "I am a text!"); } - let content = vec![big_object]; - let content = serde_json::to_string(&content).unwrap(); + let mut cursor = Cursor::new(Vec::new()); - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Json); - builder.execute(Cursor::new(content), |_, _| ()).unwrap(); + let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); + builder.add_documents(big_object).unwrap(); + builder.finish().unwrap(); + cursor.set_position(0); + let content = DocumentBatchReader::from_reader(cursor).unwrap(); + + let builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); } @@ -1005,16 +892,38 @@ mod tests { let index = Index::new(options, &path).unwrap(); let mut wtxn = index.write_txn().unwrap(); - let content = r#"#id,title,au{hor,genre,price$ -2,"Prideand Prejudice","Jane Austin","romance",3.5$ -456,"Le Petit Prince","Antoine de Saint-Exupéry","adventure",10.0$ -1,Wonderland","Lewis Carroll","fantasy",25.99$ -4,"Harry Potter ing","fantasy\0lood Prince","J. K. Rowling","fantasy\0, -"#; + let content = documents!([ + { + "id": 2, + "title": "Prideand Prejudice", + "au{hor": "Jane Austin", + "genre": "romance", + "price$": "3.5$", + }, + { + "id": 456, + "title": "Le Petit Prince", + "au{hor": "Antoine de Saint-Exupéry", + "genre": "adventure", + "price$": "10.0$", + }, + { + "id": 1, + "title": "Wonderland", + "au{hor": "Lewis Carroll", + "genre": "fantasy", + "price$": "25.99$", + }, + { + "id": 4, + "title": "Harry Potter ing fantasy\0lood Prince", + "au{hor": "J. K. Rowling", + "genre": "fantasy\0", + }, + ]); - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); - builder.execute(content.as_bytes(), |_, _| ()).unwrap(); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index e8b61ef14..fc5eb2c84 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -1,12 +1,12 @@ use std::borrow::Cow; use std::collections::btree_map::Entry; +use std::collections::HashMap; use std::fs::File; use std::io::{Read, Seek, SeekFrom}; -use std::iter::Peekable; -use std::result::Result as StdResult; use std::time::Instant; use grenad::CompressionType; +use itertools::Itertools; use log::info; use roaring::RoaringBitmap; use serde_json::{Map, Value}; @@ -15,7 +15,8 @@ use super::helpers::{ create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn, }; use super::IndexDocumentsMethod; -use crate::error::{InternalError, UserError}; +use crate::documents::{DocumentBatchReader, DocumentsBatchIndex}; +use crate::error::{Error, InternalError, UserError}; use crate::index::db_name; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::{ExternalDocumentsIds, FieldDistribution, FieldId, FieldsIdsMap, Index, Result, BEU32}; @@ -51,90 +52,63 @@ pub struct Transform<'t, 'i> { pub autogenerate_docids: bool, } -fn is_primary_key(field: impl AsRef) -> bool { - field.as_ref().to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME) +/// Create a mapping between the field ids found in the document batch and the one that were +/// already present in the index. +/// +/// If new fields are present in the addition, they are added to the index field ids map. +fn create_fields_mapping( + index_field_map: &mut FieldsIdsMap, + batch_field_map: &DocumentsBatchIndex, +) -> Result> { + batch_field_map + .iter() + // we sort by id here to ensure a deterministic mapping of the fields, that preserves + // the original ordering. + .sorted_by_key(|(&id, _)| id) + .map(|(field, name)| match index_field_map.id(&name) { + Some(id) => Ok((*field, id)), + None => index_field_map + .insert(&name) + .ok_or(Error::UserError(UserError::AttributeLimitReached)) + .map(|id| (*field, id)), + }) + .collect() +} + +fn find_primary_key(index: &bimap::BiHashMap) -> Option<&str> { + index + .right_values() + .find(|v| v.to_lowercase().contains(DEFAULT_PRIMARY_KEY_NAME)) + .map(String::as_str) } impl Transform<'_, '_> { - pub fn output_from_json(self, reader: R, progress_callback: F) -> Result - where - R: Read, - F: Fn(UpdateIndexingStep) + Sync, - { - self.output_from_generic_json(reader, false, progress_callback) - } - - pub fn output_from_json_stream( + pub fn read_documents( self, - reader: R, + mut reader: DocumentBatchReader, progress_callback: F, ) -> Result where - R: Read, - F: Fn(UpdateIndexingStep) + Sync, - { - self.output_from_generic_json(reader, true, progress_callback) - } - - fn output_from_generic_json( - self, - reader: R, - is_stream: bool, - progress_callback: F, - ) -> Result - where - R: Read, + R: Read + Seek, F: Fn(UpdateIndexingStep) + Sync, { + let fields_index = reader.index(); let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); + let mapping = create_fields_mapping(&mut fields_ids_map, fields_index)?; - // Deserialize the whole batch of documents in memory. - let mut documents: Peekable< - Box>>>, - > = if is_stream { - let iter = serde_json::Deserializer::from_reader(reader).into_iter(); - let iter = Box::new(iter) as Box>; - iter.peekable() - } else { - let vec: Vec<_> = serde_json::from_reader(reader).map_err(UserError::SerdeJson)?; - let iter = vec.into_iter().map(Ok); - let iter = Box::new(iter) as Box>; - iter.peekable() - }; + let alternative_name = self + .index + .primary_key(self.rtxn)? + .or_else(|| find_primary_key(fields_index)) + .map(String::from); - // We extract the primary key from the first document in - // the batch if it hasn't already been defined in the index - let first = match documents.peek().map(StdResult::as_ref).transpose() { - Ok(first) => first, - Err(_) => { - let error = documents.next().unwrap().unwrap_err(); - return Err(UserError::SerdeJson(error).into()); - } - }; - - let alternative_name = - first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); - let (primary_key_id, primary_key) = compute_primary_key_pair( + let (primary_key_id, primary_key_name) = compute_primary_key_pair( self.index.primary_key(self.rtxn)?, &mut fields_ids_map, alternative_name, self.autogenerate_docids, )?; - if documents.peek().is_none() { - return Ok(TransformOutput { - primary_key, - fields_ids_map, - field_distribution: self.index.field_distribution(self.rtxn)?, - external_documents_ids: ExternalDocumentsIds::default(), - new_documents_ids: RoaringBitmap::new(), - replaced_documents_ids: RoaringBitmap::new(), - documents_count: 0, - documents_file: tempfile::tempfile()?, - }); - } - // We must choose the appropriate merge function for when two or more documents // with the same user id must be merged or fully replaced in the same batch. let merge_function = match self.index_documents_method { @@ -151,204 +125,103 @@ impl Transform<'_, '_> { self.max_memory, ); - let mut json_buffer = Vec::new(); let mut obkv_buffer = Vec::new(); - let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH]; let mut documents_count = 0; - - for result in documents { - let document = result.map_err(UserError::SerdeJson)?; - + let mut external_id_buffer = Vec::new(); + let mut field_buffer: Vec<(u16, &[u8])> = Vec::new(); + while let Some((addition_index, document)) = reader.next_document_with_index()? { + let mut field_buffer_cache = drop_and_reuse(field_buffer); if self.log_every_n.map_or(false, |len| documents_count % len == 0) { - progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat { + progress_callback(UpdateIndexingStep::RemapDocumentAddition { documents_seen: documents_count, }); } - obkv_buffer.clear(); - let mut writer = obkv::KvWriter::<_, FieldId>::new(&mut obkv_buffer); - - // We prepare the fields ids map with the documents keys. - for (key, _value) in &document { - fields_ids_map.insert(&key).ok_or(UserError::AttributeLimitReached)?; + for (k, v) in document.iter() { + let mapped_id = *mapping.get(&k).unwrap(); + field_buffer_cache.push((mapped_id, v)); } - // We retrieve the user id from the document based on the primary key name, - // if the document id isn't present we generate a uuid. - let external_id = match document.get(&primary_key) { - Some(value) => match value { - Value::String(string) => Cow::Borrowed(string.as_str()), - Value::Number(number) => Cow::Owned(number.to_string()), - content => { - return Err( - UserError::InvalidDocumentId { document_id: content.clone() }.into() - ) + // We need to make sure that every document has a primary key. After we have remapped + // all the fields in the document, we try to find the primary key value. If we can find + // it, transform it into a string and validate it, and then update it in the + // document. If none is found, and we were told to generate missing document ids, then + // we create the missing field, and update the new document. + let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH]; + let external_id = + match field_buffer_cache.iter_mut().find(|(id, _)| *id == primary_key_id) { + Some((_, bytes)) => { + let value = match serde_json::from_slice(bytes).unwrap() { + Value::String(string) => match validate_document_id(&string) { + Some(s) if s.len() == string.len() => string, + Some(s) => s.to_string(), + None => { + return Err(UserError::InvalidDocumentId { + document_id: Value::String(string), + } + .into()) + } + }, + Value::Number(number) => number.to_string(), + content => { + return Err(UserError::InvalidDocumentId { + document_id: content.clone(), + } + .into()) + } + }; + serde_json::to_writer(&mut external_id_buffer, &value).unwrap(); + *bytes = &external_id_buffer; + Cow::Owned(value) } - }, - None => { - if !self.autogenerate_docids { - return Err(UserError::MissingDocumentId { document }.into()); - } - let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer); - Cow::Borrowed(uuid) - } - }; + None => { + if !self.autogenerate_docids { + let mut json = Map::new(); + for (key, value) in document.iter() { + let key = addition_index.get_by_left(&key).cloned(); + let value = serde_json::from_slice::(&value).ok(); - // We iterate in the fields ids ordered. - for (field_id, name) in fields_ids_map.iter() { - json_buffer.clear(); + if let Some((k, v)) = key.zip(value) { + json.insert(k, v); + } + } - // We try to extract the value from the document and if we don't find anything - // and this should be the document id we return the one we generated. - if let Some(value) = document.get(name) { - // We serialize the attribute values. - serde_json::to_writer(&mut json_buffer, value) - .map_err(InternalError::SerdeJson)?; - writer.insert(field_id, &json_buffer)?; - } - // We validate the document id [a-zA-Z0-9\-_]. - if field_id == primary_key_id && validate_document_id(&external_id).is_none() { - return Err(UserError::InvalidDocumentId { - document_id: Value::from(external_id), + return Err(UserError::MissingDocumentId { document: json }.into()); + } + + let uuid = + uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer); + serde_json::to_writer(&mut external_id_buffer, &uuid).unwrap(); + field_buffer_cache.push((primary_key_id, &external_id_buffer)); + Cow::Borrowed(&*uuid) } - .into()); - } + }; + + // Insertion in a obkv need to be done with keys ordered. For now they are ordered + // according to the document addition key order, so we sort it according to the + // fieldids map keys order. + field_buffer_cache.sort_unstable_by(|(f1, _), (f2, _)| f1.cmp(&f2)); + + // The last step is to build the new obkv document, and insert it in the sorter. + let mut writer = obkv::KvWriter::new(&mut obkv_buffer); + for (k, v) in field_buffer_cache.iter() { + writer.insert(*k, v)?; } // We use the extracted/generated user id as the key for this document. - sorter.insert(external_id.as_bytes(), &obkv_buffer)?; + sorter.insert(&external_id.as_ref().as_bytes(), &obkv_buffer)?; documents_count += 1; - } - progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat { - documents_seen: documents_count, - }); - - // Now that we have a valid sorter that contains the user id and the obkv we - // give it to the last transforming function which returns the TransformOutput. - self.output_from_sorter( - sorter, - primary_key, - fields_ids_map, - documents_count, - external_documents_ids, - progress_callback, - ) - } - - pub fn output_from_csv(self, reader: R, progress_callback: F) -> Result - where - R: Read, - F: Fn(UpdateIndexingStep) + Sync, - { - let mut fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); - - let mut csv = csv::Reader::from_reader(reader); - let headers = csv.headers().map_err(UserError::Csv)?; - - let mut fields_ids = Vec::new(); - // Generate the new fields ids based on the current fields ids and this CSV headers. - for (i, header) in headers.iter().enumerate() { - let id = fields_ids_map.insert(header).ok_or(UserError::AttributeLimitReached)?; - fields_ids.push((id, i)); - } - - // Extract the position of the primary key in the current headers, None if not found. - let primary_key_pos = match self.index.primary_key(self.rtxn)? { - Some(primary_key) => { - // The primary key is known so we must find the position in the CSV headers. - headers.iter().position(|h| h == primary_key) - } - None => headers.iter().position(is_primary_key), - }; - - // Returns the field id in the fields ids map, create an "id" field - // in case it is not in the current headers. - let alternative_name = primary_key_pos.map(|pos| headers[pos].to_string()); - let (primary_key_id, primary_key_name) = compute_primary_key_pair( - self.index.primary_key(self.rtxn)?, - &mut fields_ids_map, - alternative_name, - self.autogenerate_docids, - )?; - - // The primary key field is not present in the header, so we need to create it. - if primary_key_pos.is_none() { - fields_ids.push((primary_key_id, usize::max_value())); - } - - // We sort the fields ids by the fields ids map id, this way we are sure to iterate over - // the records fields in the fields ids map order and correctly generate the obkv. - fields_ids.sort_unstable_by_key(|(field_id, _)| *field_id); - - // We initialize the sorter with the user indexing settings. - let mut sorter = create_sorter( - keep_latest_obkv, - self.chunk_compression_type, - self.chunk_compression_level, - self.max_nb_chunks, - self.max_memory, - ); - - // We write into the sorter to merge and deduplicate the documents - // based on the external ids. - let mut json_buffer = Vec::new(); - let mut obkv_buffer = Vec::new(); - let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH]; - let mut documents_count = 0; - - let mut record = csv::StringRecord::new(); - while csv.read_record(&mut record).map_err(UserError::Csv)? { - obkv_buffer.clear(); - let mut writer = obkv::KvWriter::<_, FieldId>::new(&mut obkv_buffer); - - if self.log_every_n.map_or(false, |len| documents_count % len == 0) { - progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat { - documents_seen: documents_count, - }); - } - - // We extract the user id if we know where it is or generate an UUID V4 otherwise. - let external_id = match primary_key_pos { - Some(pos) => { - let external_id = &record[pos]; - // We validate the document id [a-zA-Z0-9\-_]. - match validate_document_id(&external_id) { - Some(valid) => valid, - None => { - return Err(UserError::InvalidDocumentId { - document_id: Value::from(external_id), - } - .into()) - } - } - } - None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer), - }; - - // When the primary_key_field_id is found in the fields ids list - // we return the generated document id instead of the record field. - let iter = fields_ids.iter().map(|(fi, i)| { - let field = if *fi == primary_key_id { external_id } else { &record[*i] }; - (fi, field) + progress_callback(UpdateIndexingStep::RemapDocumentAddition { + documents_seen: documents_count, }); - // We retrieve the field id based on the fields ids map fields ids order. - for (field_id, field) in iter { - // We serialize the attribute values as JSON strings. - json_buffer.clear(); - serde_json::to_writer(&mut json_buffer, &field) - .map_err(InternalError::SerdeJson)?; - writer.insert(*field_id, &json_buffer)?; - } - - // We use the extracted/generated user id as the key for this document. - sorter.insert(external_id, &obkv_buffer)?; - documents_count += 1; + obkv_buffer.clear(); + field_buffer = drop_and_reuse(field_buffer_cache); + external_id_buffer.clear(); } - progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat { + progress_callback(UpdateIndexingStep::RemapDocumentAddition { documents_seen: documents_count, }); @@ -359,7 +232,6 @@ impl Transform<'_, '_> { primary_key_name, fields_ids_map, documents_count, - external_documents_ids, progress_callback, ) } @@ -373,12 +245,12 @@ impl Transform<'_, '_> { primary_key: String, fields_ids_map: FieldsIdsMap, approximate_number_of_documents: usize, - mut external_documents_ids: ExternalDocumentsIds<'_>, progress_callback: F, ) -> Result where F: Fn(UpdateIndexingStep) + Sync, { + let mut external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); let documents_ids = self.index.documents_ids(self.rtxn)?; let mut field_distribution = self.index.field_distribution(self.rtxn)?; let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids); @@ -610,6 +482,17 @@ fn validate_document_id(document_id: &str) -> Option<&str> { }) } +/// Drops all the value of type `U` in vec, and reuses the allocation to create a `Vec`. +/// +/// The size and alignment of T and U must match. +fn drop_and_reuse(mut vec: Vec) -> Vec { + debug_assert_eq!(std::mem::align_of::(), std::mem::align_of::()); + debug_assert_eq!(std::mem::size_of::(), std::mem::size_of::()); + vec.clear(); + debug_assert!(vec.is_empty()); + vec.into_iter().map(|_| unreachable!()).collect() +} + #[cfg(test)] mod test { use super::*; diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 36ed7d8fa..d80437ec7 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -2,9 +2,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::clear_documents::ClearDocuments; pub use self::delete_documents::DeleteDocuments; pub use self::facets::Facets; -pub use self::index_documents::{ - DocumentAdditionResult, IndexDocuments, IndexDocumentsMethod, UpdateFormat, -}; +pub use self::index_documents::{DocumentAdditionResult, IndexDocuments, IndexDocumentsMethod}; pub use self::settings::{Setting, Settings}; pub use self::update_builder::UpdateBuilder; pub use self::update_step::UpdateIndexingStep; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index f1b3e2628..4aa79f6e3 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -111,6 +111,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + pub fn log_every_n(&mut self, n: usize) { + self.log_every_n = Some(n); + } + pub fn reset_searchable_fields(&mut self) { self.searchable_fields = Setting::Reset; } @@ -501,7 +505,7 @@ mod tests { use super::*; use crate::error::Error; - use crate::update::{IndexDocuments, UpdateFormat}; + use crate::update::IndexDocuments; use crate::{Criterion, FilterCondition, SearchResult}; #[test] @@ -513,9 +517,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"id,name,age\n0,kevin,23\n1,kevina,21\n2,benoit,34\n"[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); - builder.update_format(UpdateFormat::Csv); + + let content = documents!([ + { "id": 1, "name": "kevin", "age": 23 }, + { "id": 2, "name": "kevina", "age": 21}, + { "id": 3, "name": "benoit", "age": 34 } + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -567,10 +575,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name,age\nkevin,23\nkevina,21\nbenoit,34\n"[..]; + let content = documents!([ + { "name": "kevin", "age": 23}, + { "name": "kevina", "age": 21 }, + { "name": "benoit", "age": 34 } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -611,10 +622,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name,age\nkevin,23\nkevina,21\nbenoit,34\n"[..]; + let content = documents!([ + { "name": "kevin", "age": 23}, + { "name": "kevina", "age": 21 }, + { "name": "benoit", "age": 34 } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -633,10 +647,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name,age\nkevin,23\nkevina,21\nbenoit,34\n"[..]; + let content = documents!([ + { "name": "kevin", "age": 23}, + { "name": "kevina", "age": 21 }, + { "name": "benoit", "age": 34 } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); // In the same transaction we change the displayed fields to be only the age. @@ -678,13 +695,12 @@ mod tests { builder.execute(|_, _| ()).unwrap(); // Then index some documents. - let content = &br#"[ - { "name": "kevin", "age": 23 }, + let content = documents!([ + { "name": "kevin", "age": 23}, { "name": "kevina", "age": 21 }, { "name": "benoit", "age": 34 } - ]"#[..]; + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Json); builder.enable_autogenerate_docids(); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -695,11 +711,19 @@ mod tests { assert_eq!(fields_ids, hashset! { S("age") }); // Only count the field_id 0 and level 0 facet values. // TODO we must support typed CSVs for numbers to be understood. + let fidmap = index.fields_ids_map(&rtxn).unwrap(); + println!("fidmap: {:?}", fidmap); + for document in index.all_documents(&rtxn).unwrap() { + let document = document.unwrap(); + let json = crate::obkv_to_json(&fidmap.ids().collect::>(), &fidmap, document.1) + .unwrap(); + println!("json: {:?}", json); + } let count = index .facet_id_f64_docids .remap_key_type::() - // The faceted field id is 2u16 - .prefix_iter(&rtxn, &[0, 2, 0]) + // The faceted field id is 1u16 + .prefix_iter(&rtxn, &[0, 1, 0]) .unwrap() .count(); assert_eq!(count, 3); @@ -707,25 +731,23 @@ mod tests { // Index a little more documents with new and current facets values. let mut wtxn = index.write_txn().unwrap(); - let content = &br#"[ - { "name": "kevin2", "age": 23 }, + let content = documents!([ + { "name": "kevin2", "age": 23}, { "name": "kevina2", "age": 21 }, - { "name": "benoit", "age": 35 } - ]"#[..]; + { "name": "benoit", "age": 35 } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 2); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Json); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); // Only count the field_id 0 and level 0 facet values. - // TODO we must support typed CSVs for numbers to be understood. let count = index .facet_id_f64_docids .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) + .prefix_iter(&rtxn, &[0, 1, 0]) .unwrap() .count(); assert_eq!(count, 4); @@ -747,13 +769,12 @@ mod tests { builder.execute(|_, _| ()).unwrap(); // Then index some documents. - let content = &br#"[ - { "name": "kevin", "age": 23 }, + let content = documents!([ + { "name": "kevin", "age": 23}, { "name": "kevina", "age": 21 }, { "name": "benoit", "age": 34 } - ]"#[..]; + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Json); builder.enable_autogenerate_docids(); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -790,7 +811,7 @@ mod tests { builder.execute(|_, _| ()).unwrap(); // Then index some documents. - let content = &br#"[ + let content = documents!([ { "name": "kevin", "age": 23 }, { "name": "kevina", "age": 21 }, { "name": "benoit", "age": 34 }, @@ -798,9 +819,8 @@ mod tests { { "name": "bertrand", "age": 34 }, { "name": "bernie", "age": 34 }, { "name": "ben", "age": 34 } - ]"#[..]; + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Json); builder.enable_autogenerate_docids(); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -822,10 +842,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name,age\nkevin,23\nkevina,21\nbenoit,34\n"[..]; + let content = documents!([ + { "name": "kevin", "age": 23}, + { "name": "kevina", "age": 21 }, + { "name": "benoit", "age": 34 } + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -844,10 +867,13 @@ mod tests { // First we send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name,age,maxim\nkevin,23,I love dogs\nkevina,21,Doggos are the best\nbenoit,34,The crepes are really good\n"[..]; + let content = documents!([ + { "name": "kevin", "age": 23, "maxim": "I love dogs" }, + { "name": "kevina", "age": 21, "maxim": "Doggos are the best" }, + { "name": "benoit", "age": 34, "maxim": "The crepes are really good" }, + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); // In the same transaction we provide some stop_words @@ -915,10 +941,13 @@ mod tests { // Send 3 documents with ids from 1 to 3. let mut wtxn = index.write_txn().unwrap(); - let content = &b"name,age,maxim\nkevin,23,I love dogs\nkevina,21,Doggos are the best\nbenoit,34,The crepes are really good\n"[..]; + let content = documents!([ + { "name": "kevin", "age": 23, "maxim": "I love dogs"}, + { "name": "kevina", "age": 21, "maxim": "Doggos are the best"}, + { "name": "benoit", "age": 34, "maxim": "The crepes are really good"}, + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); builder.enable_autogenerate_docids(); - builder.update_format(UpdateFormat::Csv); builder.execute(content, |_, _| ()).unwrap(); // In the same transaction provide some synonyms @@ -1038,7 +1067,7 @@ mod tests { assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); // Then index some documents with the "mykey" primary key. - let content = &br#"[ + let content = documents!([ { "mykey": 1, "name": "kevin", "age": 23 }, { "mykey": 2, "name": "kevina", "age": 21 }, { "mykey": 3, "name": "benoit", "age": 34 }, @@ -1046,9 +1075,8 @@ mod tests { { "mykey": 5, "name": "bertrand", "age": 34 }, { "mykey": 6, "name": "bernie", "age": 34 }, { "mykey": 7, "name": "ben", "age": 34 } - ]"#[..]; + ]); let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Json); builder.disable_autogenerate_docids(); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -1087,7 +1115,7 @@ mod tests { builder.set_filterable_fields(hashset! { S("genres") }); builder.execute(|_, _| ()).unwrap(); - let content = &br#"[ + let content = documents!([ { "id": 11, "title": "Star Wars", @@ -1105,9 +1133,8 @@ mod tests { "poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg", "release_date": 819676800 } - ]"#[..]; - let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); - builder.update_format(UpdateFormat::Json); + ]); + let builder = IndexDocuments::new(&mut wtxn, &index, 1); builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); diff --git a/milli/src/update/update_step.rs b/milli/src/update/update_step.rs index 68a32bb67..fd5739caf 100644 --- a/milli/src/update/update_step.rs +++ b/milli/src/update/update_step.rs @@ -2,10 +2,9 @@ use UpdateIndexingStep::*; #[derive(Debug, Clone, Copy)] pub enum UpdateIndexingStep { - /// Transform from the original user given format (CSV, JSON, JSON lines) - /// into a generic format based on the obkv and grenad crates. This step also - /// deduplicate potential documents in this batch update by merging or replacing them. - TransformFromUserIntoGenericFormat { documents_seen: usize }, + /// Remap document addition fields the one present in the database, adding new fields in to the + /// schema on the go. + RemapDocumentAddition { documents_seen: usize }, /// This step check the external document id, computes the internal ids and merge /// the documents that are already present in the database. @@ -23,7 +22,7 @@ pub enum UpdateIndexingStep { impl UpdateIndexingStep { pub const fn step(&self) -> usize { match self { - TransformFromUserIntoGenericFormat { .. } => 0, + RemapDocumentAddition { .. } => 0, ComputeIdsAndMergeDocuments { .. } => 1, IndexDocuments { .. } => 2, MergeDataIntoFinalDatabase { .. } => 3, diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index e3f6c5b09..cda0da617 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -1,11 +1,13 @@ use std::cmp::Reverse; use std::collections::HashSet; +use std::io::Cursor; use big_s::S; use either::{Either, Left, Right}; use heed::EnvOpenOptions; use maplit::{hashmap, hashset}; -use milli::update::{Settings, UpdateBuilder, UpdateFormat}; +use milli::documents::{DocumentBatchBuilder, DocumentBatchReader}; +use milli::update::{Settings, UpdateBuilder}; use milli::{AscDesc, Criterion, DocumentId, Index, Member}; use serde::Deserialize; use slice_group_by::GroupBy; @@ -55,9 +57,20 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut builder = UpdateBuilder::new(0); builder.max_memory(10 * 1024 * 1024); // 10MiB let mut builder = builder.index_documents(&mut wtxn, &index); - builder.update_format(UpdateFormat::JsonStream); builder.enable_autogenerate_docids(); - builder.execute(CONTENT.as_bytes(), |_, _| ()).unwrap(); + let mut cursor = Cursor::new(Vec::new()); + let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); + let reader = Cursor::new(CONTENT.as_bytes()); + for doc in serde_json::Deserializer::from_reader(reader).into_iter::() { + documents_builder.add_documents(doc.unwrap()).unwrap(); + } + documents_builder.finish().unwrap(); + + cursor.set_position(0); + + // index documents + let content = DocumentBatchReader::from_reader(cursor).unwrap(); + builder.execute(content, |_, _| ()).unwrap(); wtxn.commit().unwrap(); diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index f6a937f67..f3b04c4fa 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -1,10 +1,12 @@ use std::cmp::Reverse; +use std::io::Cursor; use big_s::S; use heed::EnvOpenOptions; use itertools::Itertools; use maplit::hashset; -use milli::update::{Settings, UpdateBuilder, UpdateFormat}; +use milli::documents::{DocumentBatchBuilder, DocumentBatchReader}; +use milli::update::{Settings, UpdateBuilder}; use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult}; use rand::Rng; use Criterion::*; @@ -386,31 +388,37 @@ fn criteria_ascdesc() { let mut builder = UpdateBuilder::new(0); builder.max_memory(10 * 1024 * 1024); // 10MiB let mut builder = builder.index_documents(&mut wtxn, &index); - builder.update_format(UpdateFormat::Csv); builder.enable_autogenerate_docids(); - let content = [ - vec![S("name,age")], - (0..ASC_DESC_CANDIDATES_THRESHOLD + 1) - .map(|_| { - let mut rng = rand::thread_rng(); + let mut cursor = Cursor::new(Vec::new()); + let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap(); - let age = rng.gen::().to_string(); - let name = rng - .sample_iter(&rand::distributions::Alphanumeric) - .map(char::from) - .filter(|c| *c >= 'a' && *c <= 'z') - .take(10) - .collect::(); + (0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| { + let mut rng = rand::thread_rng(); - format!("{},{}", name, age) - }) - .collect::>(), - ] - .iter() - .flatten() - .join("\n"); - builder.execute(content.as_bytes(), |_, _| ()).unwrap(); + let age = rng.gen::().to_string(); + let name = rng + .sample_iter(&rand::distributions::Alphanumeric) + .map(char::from) + .filter(|c| *c >= 'a' && *c <= 'z') + .take(10) + .collect::(); + + let json = serde_json::json!({ + "name": name, + "age": age, + }); + + batch_builder.add_documents(json).unwrap(); + }); + + batch_builder.finish().unwrap(); + + cursor.set_position(0); + + let reader = DocumentBatchReader::from_reader(cursor).unwrap(); + + builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); diff --git a/search/src/main.rs b/search/src/main.rs deleted file mode 100644 index fba714dab..000000000 --- a/search/src/main.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::io::{self, BufRead, Write}; -use std::iter::once; -use std::path::PathBuf; -use std::time::Instant; - -use byte_unit::Byte; -use heed::EnvOpenOptions; -use log::debug; -use milli::{obkv_to_json, Index}; -use structopt::StructOpt; - -#[cfg(target_os = "linux")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - -#[derive(Debug, StructOpt)] -/// A simple search helper binary for the milli project. -pub struct Opt { - /// The database path where the database is located. - /// It is created if it doesn't already exist. - #[structopt(long = "db", parse(from_os_str))] - database: PathBuf, - - /// The maximum size the database can take on disk. It is recommended to specify - /// the whole disk space (value must be a multiple of a page size). - #[structopt(long = "db-size", default_value = "100 GiB")] - database_size: Byte, - - /// Verbose mode (-v, -vv, -vvv, etc.) - #[structopt(short, long, parse(from_occurrences))] - verbose: usize, - - /// The query string to search for (doesn't support prefix search yet). - query: Option, - - /// Compute and print the facet distribution of all the faceted fields. - #[structopt(long)] - print_facet_distribution: bool, -} - -fn main() -> anyhow::Result<()> { - let opt = Opt::from_args(); - - stderrlog::new() - .verbosity(opt.verbose) - .show_level(false) - .timestamp(stderrlog::Timestamp::Off) - .init()?; - - // Return an error if the database does not exist. - if !opt.database.exists() { - anyhow::bail!("The database ({}) does not exist.", opt.database.display()); - } - - let mut options = EnvOpenOptions::new(); - options.map_size(opt.database_size.get_bytes() as usize); - - // Open the LMDB database. - let index = Index::new(options, &opt.database)?; - let rtxn = index.read_txn()?; - let fields_ids_map = index.fields_ids_map(&rtxn)?; - let displayed_fields = match index.displayed_fields_ids(&rtxn)? { - Some(fields) => fields, - None => fields_ids_map.iter().map(|(id, _)| id).collect(), - }; - - let stdin = io::stdin(); - let lines = match opt.query { - Some(query) => Box::new(once(Ok(query))), - None => Box::new(stdin.lock().lines()) as Box>, - }; - - let mut stdout = io::stdout(); - for result in lines { - let before = Instant::now(); - - let query = result?; - let result = index.search(&rtxn).query(query).execute()?; - let documents = index.documents(&rtxn, result.documents_ids.iter().cloned())?; - - for (_id, record) in documents { - let val = obkv_to_json(&displayed_fields, &fields_ids_map, record)?; - serde_json::to_writer(&mut stdout, &val)?; - let _ = writeln!(&mut stdout); - } - - if opt.print_facet_distribution { - let facets = - index.facets_distribution(&rtxn).candidates(result.candidates).execute()?; - serde_json::to_writer(&mut stdout, &facets)?; - let _ = writeln!(&mut stdout); - } - - debug!("Took {:.02?} to find {} documents", before.elapsed(), result.documents_ids.len()); - } - - Ok(()) -}