From 4536dfccd05c522b3fc720e3d3dc51a2c68a6d65 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 25 May 2021 17:55:45 +0200 Subject: [PATCH] add a way to provide primary_key or autogenerate documents ids --- Cargo.lock | 346 ++++++++++++++++++++++++++++++++++-- benchmarks/benches/songs.rs | 1 + benchmarks/benches/utils.rs | 7 + 3 files changed, 338 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b1da2b3f..04fd284c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -122,6 +122,20 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "benchmarks" +version = "0.1.0" +dependencies = [ + "anyhow", + "bytes 1.0.1", + "convert_case", + "criterion", + "flate2", + "heed", + "milli", + "reqwest", +] + [[package]] name = "big_s" version = "1.0.2" @@ -327,6 +341,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "cow-utils" version = "0.1.2" @@ -506,6 +526,15 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "encoding_rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "fake-simd" version = "0.1.2" @@ -750,12 +779,31 @@ dependencies = [ "http", "indexmap", "slab", - "tokio", - "tokio-util", + "tokio 0.2.25", + "tokio-util 0.3.1", "tracing", "tracing-futures", ] +[[package]] +name = "h2" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "825343c4eef0b63f541f8903f395dc5beb362a979b5799a84062527ef1e37726" +dependencies = [ + "bytes 1.0.1", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio 1.6.0", + "tokio-util 0.6.7", + "tracing", +] + [[package]] name = "half" version = "1.7.1" @@ -893,6 +941,17 @@ dependencies = [ "http", ] +[[package]] +name = "http-body" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60daa14be0e0786db0f03a9e57cb404c9d756eed2b6c62b9ea98ec5743ec75a9" +dependencies = [ + "bytes 1.0.1", + "http", + "pin-project-lite 0.2.6", +] + [[package]] name = "http-ui" version = "0.2.1" @@ -922,7 +981,7 @@ dependencies = [ "stderrlog", "structopt", "tempfile", - "tokio", + "tokio 0.2.25", "warp", ] @@ -960,20 +1019,59 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.2.7", "http", - "http-body", + "http-body 0.3.1", "httparse", "httpdate", "itoa", "pin-project 1.0.5", - "socket2", - "tokio", + "socket2 0.3.19", + "tokio 0.2.25", "tower-service", "tracing", "want", ] +[[package]] +name = "hyper" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf09f61b52cfcf4c00de50df88ae423d6c02354e385a86341133b5338630ad1" +dependencies = [ + "bytes 1.0.1", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.3", + "http", + "http-body 0.4.2", + "httparse", + "httpdate", + "itoa", + "pin-project 1.0.5", + "socket2 0.4.0", + "tokio 1.6.0", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f9f7a97316d44c0af9b0301e65010573a853a9fc97046d7331d7f6bc0fd5a64" +dependencies = [ + "futures-util", + "hyper 0.14.5", + "log", + "rustls", + "tokio 1.6.0", + "tokio-rustls", + "webpki", +] + [[package]] name = "idna" version = "0.2.2" @@ -1029,6 +1127,12 @@ dependencies = [ "libc", ] +[[package]] +name = "ipnet" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135" + [[package]] name = "itertools" version = "0.9.0" @@ -1261,7 +1365,6 @@ dependencies = [ "bstr", "byteorder", "chrono", - "criterion", "crossbeam-channel", "csv", "either", @@ -1343,6 +1446,19 @@ dependencies = [ "winapi 0.2.8", ] +[[package]] +name = "mio" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" +dependencies = [ + "libc", + "log", + "miow 0.3.7", + "ntapi", + "winapi 0.3.9", +] + [[package]] name = "mio-named-pipes" version = "0.1.7" @@ -1350,7 +1466,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0840c1c50fd55e521b247f949c241c9997709f23bd7f023b9762cd561e935656" dependencies = [ "log", - "mio", + "mio 0.6.23", "miow 0.3.7", "winapi 0.3.9", ] @@ -1363,7 +1479,7 @@ checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" dependencies = [ "iovec", "libc", - "mio", + "mio 0.6.23", ] [[package]] @@ -1441,6 +1557,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi 0.3.9", +] + [[package]] name = "num-integer" version = "0.1.44" @@ -1956,12 +2081,62 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "reqwest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124" +dependencies = [ + "base64 0.13.0", + "bytes 1.0.1", + "encoding_rs", + "futures-core", + "futures-util", + "http", + "http-body 0.4.2", + "hyper 0.14.5", + "hyper-rustls", + "ipnet", + "js-sys", + "lazy_static", + "log", + "mime", + "percent-encoding", + "pin-project-lite 0.2.6", + "rustls", + "serde", + "serde_urlencoded 0.7.0", + "tokio 1.6.0", + "tokio-rustls", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg", +] + [[package]] name = "retain_mut" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53552c6c49e1e13f1a203ef0080ab3bbef0beb570a528993e83df057a9d9bba1" +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi 0.3.9", +] + [[package]] name = "roaring" version = "0.6.6" @@ -1982,6 +2157,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustls" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" +dependencies = [ + "base64 0.13.0", + "log", + "ring", + "sct", + "webpki", +] + [[package]] name = "ryu" version = "1.0.5" @@ -2015,6 +2203,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "sct" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "search" version = "0.2.1" @@ -2108,6 +2306,18 @@ dependencies = [ "url", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "sha-1" version = "0.8.2" @@ -2193,6 +2403,22 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "socket2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e3dfc207c526015c632472a77be09cf1b6e46866581aecae5cc38fb4235dea2" +dependencies = [ + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "static_assertions" version = "1.1.0" @@ -2386,7 +2612,7 @@ dependencies = [ "lazy_static", "libc", "memchr", - "mio", + "mio 0.6.23", "mio-named-pipes", "mio-uds", "num_cpus", @@ -2397,6 +2623,21 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "tokio" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd3076b5c8cc18138b8f8814895c11eb4de37114a5d127bafdc5e55798ceef37" +dependencies = [ + "autocfg", + "bytes 1.0.1", + "libc", + "memchr", + "mio 0.7.11", + "num_cpus", + "pin-project-lite 0.2.6", +] + [[package]] name = "tokio-macros" version = "0.2.6" @@ -2408,6 +2649,17 @@ dependencies = [ "syn 1.0.64", ] +[[package]] +name = "tokio-rustls" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6" +dependencies = [ + "rustls", + "tokio 1.6.0", + "webpki", +] + [[package]] name = "tokio-tungstenite" version = "0.11.0" @@ -2417,7 +2669,7 @@ dependencies = [ "futures-util", "log", "pin-project 0.4.27", - "tokio", + "tokio 0.2.25", "tungstenite", ] @@ -2432,7 +2684,21 @@ dependencies = [ "futures-sink", "log", "pin-project-lite 0.1.12", - "tokio", + "tokio 0.2.25", +] + +[[package]] +name = "tokio-util" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1caa0b0c8d94a049db56b5acf8cba99dc0623aab1b26d5b5f5e2d945846b3592" +dependencies = [ + "bytes 1.0.1", + "futures-core", + "futures-sink", + "log", + "pin-project-lite 0.2.6", + "tokio 1.6.0", ] [[package]] @@ -2578,6 +2844,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "url" version = "2.2.1" @@ -2654,7 +2926,7 @@ dependencies = [ "futures", "headers", "http", - "hyper", + "hyper 0.13.10", "log", "mime", "mime_guess", @@ -2663,8 +2935,8 @@ dependencies = [ "scoped-tls", "serde", "serde_json", - "serde_urlencoded", - "tokio", + "serde_urlencoded 0.6.1", + "tokio 0.2.25", "tokio-tungstenite", "tower-service", "tracing", @@ -2691,6 +2963,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fe8f61dba8e5d645a4d8132dc7a0a66861ed5e1045d2c0ed940fab33bac0fbe" dependencies = [ "cfg-if 1.0.0", + "serde", + "serde_json", "wasm-bindgen-macro", ] @@ -2709,6 +2983,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73157efb9af26fb564bb59a009afd1c7c334a44db171d280690d0c3faaec3468" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.72" @@ -2748,6 +3034,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aabe153544e473b775453675851ecc86863d2a81d786d741f6b76778f2a48940" +dependencies = [ + "webpki", +] + [[package]] name = "whatlang" version = "0.9.0" @@ -2800,6 +3105,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "winreg" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +dependencies = [ + "winapi 0.3.9", +] + [[package]] name = "ws2_32-sys" version = "0.2.1" diff --git a/benchmarks/benches/songs.rs b/benchmarks/benches/songs.rs index dd52a0afc..dea8cd605 100644 --- a/benchmarks/benches/songs.rs +++ b/benchmarks/benches/songs.rs @@ -48,6 +48,7 @@ const BASE_CONF: Conf = Conf { "Notstandskomitee ", // 4 ], configure: base_conf, + primary_key: Some("id"), ..Conf::BASE }; diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index e0feb9b0e..6fa5f2d19 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -56,6 +56,10 @@ pub fn base_setup(conf: &Conf) -> Index { options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.max_readers(10); let index = Index::new(options, conf.database_name).unwrap(); + if let Some(primary_key) = conf.primary_key { + let mut wtxn = index.write_txn().unwrap(); + index.put_primary_key(&mut wtxn, primary_key).unwrap(); + } let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); @@ -78,6 +82,9 @@ pub fn base_setup(conf: &Conf) -> Index { let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.index_documents(&mut wtxn, &index); + if let None = conf.primary_key { + builder.enable_autogenerate_docids(); + } builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); let reader = File::open(conf.dataset)