diff --git a/Cargo.lock b/Cargo.lock index f1beff04f..17c8a6b6c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,11 +1,3 @@ -[[package]] -name = "arrayvec" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "base64" version = "0.5.2" @@ -14,27 +6,13 @@ dependencies = [ "byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "bincode" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "bitflags" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "blob" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "base64 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -47,15 +25,6 @@ name = "byteorder" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "bytes" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "bzip2-sys" version = "0.1.6" @@ -70,11 +39,6 @@ name = "cc" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "cfg-if" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "cmake" version = "0.1.31" @@ -91,36 +55,6 @@ dependencies = [ "build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "crossbeam-deque" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "crossbeam-epoch 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "crossbeam-utils" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "dtoa" version = "0.4.3" @@ -131,14 +65,6 @@ name = "elapsed" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "env_logger" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "fst" version = "0.3.0" @@ -148,25 +74,6 @@ dependencies = [ "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "fuchsia-zircon" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "fuchsia-zircon-sys" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "futures" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "gcc" version = "0.3.54" @@ -182,54 +89,11 @@ name = "group-by" version = "0.1.0" source = "git+https://github.com/Kerollmops/group-by.git#034fadc462dc511ed53f44f6091f8707a27ca392" -[[package]] -name = "httparse" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "idna" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "iovec" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "itoa" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "kernel32-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "lazy_static" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "lazycell" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "levenshtein_automata" version = "0.1.1" @@ -269,22 +133,6 @@ dependencies = [ "vcpkg 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "log" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "log" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "lz4-sys" version = "1.8.0" @@ -294,11 +142,6 @@ dependencies = [ "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "matches" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "memmap" version = "0.6.2" @@ -308,68 +151,6 @@ dependencies = [ "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "memoffset" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "mio" -version = "0.6.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "miow 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "miow" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "net2" -version = "0.2.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "nodrop" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "num_cpus" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "percent-encoding" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "pkg-config" version = "0.3.11" @@ -391,53 +172,15 @@ dependencies = [ "proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "rand" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "raptor" version = "0.1.0" dependencies = [ - "bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)", "group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)", "levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "raptor-http" -version = "0.1.0" -dependencies = [ - "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "raptor 0.1.0", "rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-minihttp 0.1.0 (git+https://github.com/Kerollmops/tokio-minihttp.git)", - "tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -446,8 +189,8 @@ version = "0.1.0" dependencies = [ "raptor 0.1.0", "rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", "unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -457,19 +200,11 @@ name = "raptor-search" version = "0.1.0" dependencies = [ "elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)", "raptor 0.1.0", "rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "redox_syscall" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "rocksdb" version = "0.3.0" @@ -480,24 +215,14 @@ dependencies = [ "librocksdb_sys 0.1.0 (git+https://github.com/pingcap/rust-rocksdb.git)", ] -[[package]] -name = "scoped-tls" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "scopeguard" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "serde" -version = "1.0.69" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "serde_derive" -version = "1.0.69" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -512,24 +237,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "slab" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "slab" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "smallvec" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "snappy-sys" version = "0.1.0" @@ -550,211 +260,6 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "take" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "time" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-fs 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-tcp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-threadpool 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-timer 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-udp 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-codec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-core" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)", - "scoped-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-timer 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-executor" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-fs" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-threadpool 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-io" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-minihttp" -version = "0.1.0" -source = "git+https://github.com/Kerollmops/tokio-minihttp.git#1e3f5655e4e64171b87e80d5f872db1345eab54a" -dependencies = [ - "bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "httparse 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-proto" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", - "slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-reactor" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)", - "slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-service" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-tcp" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-threadpool" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "crossbeam-deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-timer" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-udp" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-codec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "unicode-normalization" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "unicode-xid" version = "0.1.0" @@ -765,26 +270,11 @@ name = "unidecode" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "url" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "vcpkg" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "winapi" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "winapi" version = "0.3.5" @@ -794,11 +284,6 @@ dependencies = [ "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "winapi-build" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" @@ -809,19 +294,10 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "ws2_32-sys" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "zstd-sys" version = "1.4.4+zstd.1.3.5" -source = "git+https://github.com/gyscos/zstd-rs.git#9ff4442c1977fad400f90d9c48e4f114c474117c" +source = "git+https://github.com/gyscos/zstd-rs.git#0100c8483ce88c9ab359d1aecdd199641dfb8b5e" dependencies = [ "blob 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)", @@ -830,98 +306,40 @@ dependencies = [ ] [metadata] -"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" "checksum base64 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30e93c03064e7590d0466209155251b90c22e37fab1daf2771582598b5827557" -"checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7" -"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789" "checksum blob 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "122c3fa3949d822d2a51c648db9e8105d6e75b89dc628cc366901d3d396fa4f4" "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9" -"checksum bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7dd32989a66957d3f0cba6588f15d4281a733f4e9ffc43fcd2385f57d3bf99ff" "checksum bzip2-sys 0.1.6 (git+https://github.com/alexcrichton/bzip2-rs.git)" = "" "checksum cc 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)" = "49ec142f5768efb5b7622aebc3fdbdbb8950a4b9ba996393cb76ef7466e8747d" -"checksum cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efe5c877e17a9c717a0bf3613b2709f723202c4e4675cc8f12926ded29bcb17e" "checksum cmake 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "95470235c31c726d72bf2e1f421adc1e65b9d561bf5529612cbe1a72da1467b3" "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" -"checksum crossbeam-deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe8153ef04a7594ded05b427ffad46ddeaf22e63fd48d42b3e1e3bb4db07cae7" -"checksum crossbeam-epoch 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2af0e75710d6181e234c8ecc79f14a97907850a541b13b0be1dd10992f2e4620" -"checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b" "checksum dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d301140eb411af13d3115f9a562c85cc6b541ade9dfa314132244aaee7489dd" "checksum elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6f4e5af126dafd0741c2ad62d47f68b28602550102e5f0dd45c8a97fc8b49c29" -"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)" = "" -"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" -"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" -"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c" "checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" "checksum group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)" = "" -"checksum httparse 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7b6288d7db100340ca12873fd4d08ad1b8f206a9457798dfb17c018a33fee540" -"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" -"checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08" "checksum itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5adb58558dcd1d786b5f0bd15f3226ee23486e24b7b58304b60f64dc68e62606" -"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -"checksum lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e6412c5e2ad9584b0b8e979393122026cdd6d2a80b933f890dcd694ddbe73739" -"checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef" "checksum levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)" = "" "checksum libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)" = "b685088df2b950fccadf07a7187c8ef846a959c142338a48f9dc0b94517eb5f1" "checksum librocksdb_sys 0.1.0 (git+https://github.com/pingcap/rust-rocksdb.git)" = "" "checksum libz-sys 1.0.18 (git+https://github.com/busyjay/libz-sys.git?branch=static-link)" = "" -"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" -"checksum log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "61bd98ae7f7b754bc53dca7d44b604f733c6bba044ea6f41bc8d89272d8161d2" "checksum lz4-sys 1.8.0 (git+https://github.com/busyjay/lz4-rs.git?branch=adjust-build)" = "" -"checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376" "checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" -"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" -"checksum mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)" = "4fcfcb32d63961fb6f367bfd5d21e4600b92cd310f71f9dca25acae196eb1560" -"checksum miow 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919" -"checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88" -"checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2" -"checksum num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30" -"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" "checksum pkg-config 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)" = "110d5ee3593dbb73f56294327fe5668bcc997897097cbc76b51e7aed3f52452f" "checksum proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "effdb53b25cdad54f8f48843d67398f7ef2e14f12c1b4cb4effc549a6462a4d6" "checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035" -"checksum rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)" = "15a732abf9d20f0ad8eeb6f909bf6868722d9a06e1e50802b6a70351f40b4eb1" -"checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5" -"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" "checksum rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)" = "" -"checksum scoped-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "332ffa32bf586782a3efaeb58f127980944bbc8c4d6913a86107ac2a5ab24b28" -"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" -"checksum serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)" = "210e5a3b159c566d7527e9b22e44be73f2e0fcc330bb78fef4dbccb56d2e74c8" -"checksum serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)" = "dd724d68017ae3a7e63600ee4b2fdb3cad2158ffd1821d44aff4580f63e2b593" +"checksum serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "0c3adf19c07af6d186d91dae8927b83b0553d07ca56cbf7f2f32560455c91920" +"checksum serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "3525a779832b08693031b8ecfb0de81cd71cfd3812088fafe9a7496789572124" "checksum serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "84b8035cabe9b35878adec8ac5fe03d5f6bc97ff6edd7ccb96b44c1276ba390e" -"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" -"checksum slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fdeff4cd9ecff59ec7e3744cbca73dfe5ac35c2aedb2cfba8a1c715a18912e9d" -"checksum smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c8cbcd6df1e117c2210e13ab5109635ad68a929fcbb8964dc965b76cb5ee013" "checksum snappy-sys 0.1.0 (git+https://github.com/busyjay/rust-snappy.git?branch=static-link)" = "" "checksum syn 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)" = "2beff8ebc3658f07512a413866875adddd20f4fd47b2a4e6c9da65cd281baaea" -"checksum take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b157868d8ac1f56b64604539990685fa7611d8fa9e5476cf0c02cf34d32917c5" -"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" -"checksum tokio 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8ee337e5f4e501fc32966fec6fe0ca0cc1c237b0b1b14a335f8bfe3c5f06e286" -"checksum tokio-codec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "881e9645b81c2ce95fcb799ded2c29ffb9f25ef5bef909089a420e5961dd8ccb" -"checksum tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)" = "aeeffbbb94209023feaef3c196a41cbcdafa06b4a6f893f68779bb5e53796f71" -"checksum tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8cac2a7883ff3567e9d66bb09100d09b33d90311feca0206c7ca034bc0c55113" -"checksum tokio-fs 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42bae2f6e33865b99069d95bcddfc85c9f0849b4e7e7399eeee71956ef34d7" -"checksum tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a5c9635ee806f26d302b8baa1e145689a280d8f5aa8d0552e7344808da54cc21" -"checksum tokio-minihttp 0.1.0 (git+https://github.com/Kerollmops/tokio-minihttp.git)" = "" -"checksum tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fbb47ae81353c63c487030659494b295f6cb6576242f907f203473b191b0389" -"checksum tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e00ec63bbec2c97ce1178cb0587b2c438b2f6b09d3ee54a33c45a9cf0d530810" -"checksum tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "24da22d077e0f15f55162bdbdc661228c1581892f52074fb242678d015b45162" -"checksum tokio-tcp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ec9b094851aadd2caf83ba3ad8e8c4ce65a42104f7b94d9e6550023f0407853f" -"checksum tokio-threadpool 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "24ab84f574027b0e875378f31575cf175360891919e93a3490f07e76e00e4efb" -"checksum tokio-timer 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "028b94314065b90f026a21826cffd62a4e40a92cda3e5c069cc7b02e5945f5e9" -"checksum tokio-udp 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43eb534af6e8f37d43ab1b612660df14755c42bd003c5f8d2475ee78cc4600c0" -"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -"checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc" -"checksum url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2a321979c09843d272956e73700d12c4e7d3d92b2ee112b31548aef0d4efc5a6" "checksum vcpkg 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cbe533e138811704c0e3cbde65a818b35d3240409b4346256c5ede403e082474" -"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" -"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -"checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" "checksum zstd-sys 1.4.4+zstd.1.3.5 (git+https://github.com/gyscos/zstd-rs.git)" = "" diff --git a/Cargo.toml b/Cargo.toml index fcfd400e4..a74947be5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,6 @@ members = [ "raptor", "raptor-indexer", "raptor-search", - "raptor-http", ] [profile.release] diff --git a/raptor-http/.gitignore b/raptor-http/.gitignore deleted file mode 100644 index 70e3cae73..000000000 --- a/raptor-http/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ - -/target -**/*.rs.bk diff --git a/raptor-http/Cargo.toml b/raptor-http/Cargo.toml deleted file mode 100644 index da2c71276..000000000 --- a/raptor-http/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "raptor-http" -version = "0.1.0" -authors = ["Kerollmops "] - -[dependencies] -env_logger = { version = "0.3", default-features = false } -raptor = { path = "../raptor" } -futures = "0.1" -serde = "1.0" -serde_derive = "1.0" -tokio-minihttp = { git = "https://github.com/Kerollmops/tokio-minihttp.git" } -tokio-proto = "0.1" -tokio-service = "0.1" -url = "1.7" - -[dependencies.fst] -git = "https://github.com/Kerollmops/fst.git" -branch = "op-builder-with-state" - -[dependencies.rocksdb] -git = "https://github.com/pingcap/rust-rocksdb.git" diff --git a/raptor-http/src/main.rs b/raptor-http/src/main.rs deleted file mode 100644 index aef21ba66..000000000 --- a/raptor-http/src/main.rs +++ /dev/null @@ -1,102 +0,0 @@ -extern crate env_logger; -extern crate rocksdb; -extern crate fst; -extern crate futures; -extern crate raptor; -extern crate tokio_minihttp; -extern crate tokio_proto; -extern crate tokio_service; -extern crate url; - -use std::{io, fs}; -use std::sync::Arc; - -use fst::Streamer; -use futures::future; -use rocksdb::{DB, DBOptions}; -use tokio_minihttp::{Request, Response, Http}; -use tokio_proto::TcpServer; -use tokio_service::Service; - -use raptor::{DocIndexMap, RankedStream, LevBuilder}; - -struct MainService { - map: Arc, - lev_builder: Arc, - db: Arc, -} - -impl Service for MainService { - type Request = Request; - type Response = Response; - type Error = io::Error; - type Future = future::Ok; - - fn call(&self, request: Request) -> Self::Future { - - let url = format!("http://raptor.net{}", request.path()); - let url = url::Url::parse(&url).unwrap(); - - let mut resp = Response::new(); - resp.header("Content-Type", "text/html"); - resp.header("charset", "utf-8"); - - if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") { - let query = query.to_lowercase(); - - let mut automatons = Vec::new(); - - for query in query.split_whitespace() { - let lev = self.lev_builder.get_automaton(query); - automatons.push(lev); - } - - let mut limit = 20; - let mut stream = RankedStream::new(&self.map, self.map.values(), automatons.clone(), 20); - - let mut body = String::new(); - body.push_str(""); - - while let Some(document_id) = stream.next() { - if limit == 0 { break } - - body.push_str(&format!("

{:?}

", document_id)); - - limit -= 1; - } - - body.push_str(""); - - resp.body_vec(body.into_bytes()); - } - - future::ok(resp) - } -} - -fn main() { - drop(env_logger::init()); - - let addr = "0.0.0.0:8080".parse().unwrap(); - - let lev_builder = Arc::new(LevBuilder::new()); - let map = { - let fst = fs::read("map.fst").unwrap(); - let values = fs::read("values.vecs").unwrap(); - let map = DocIndexMap::from_bytes(fst, &values).unwrap(); - Arc::new(map) - }; - - let db = { - let opts = DBOptions::new(); - let error_if_log_file_exist = false; - let db = DB::open_for_read_only(opts, "rocksdb/storage", error_if_log_file_exist).unwrap(); - Arc::new(db) - }; - - TcpServer::new(Http, addr).serve(move || Ok(MainService { - map: map.clone(), - lev_builder: lev_builder.clone(), - db: db.clone(), - })) -} diff --git a/raptor-indexer/src/main.rs b/raptor-indexer/src/main.rs index 26e1338a6..332765bb7 100644 --- a/raptor-indexer/src/main.rs +++ b/raptor-indexer/src/main.rs @@ -13,7 +13,7 @@ use std::fs::{self, File}; use std::io::{self, BufReader, BufRead}; use std::iter; -use raptor::{DocIndexMapBuilder, DocIndexMap, DocIndex}; +use raptor::{MetadataBuilder, Metadata, DocIndex}; use rocksdb::{DB, WriteBatch, Writable}; use serde_json::from_str; use unidecode::unidecode; @@ -62,11 +62,11 @@ fn main() { } }; - let map_file = "map.fst"; - let values_file = "values.vecs"; + let map_file = "map.meta"; + let indexes_file = "indexes.meta"; let rocksdb_file = "rocksdb/storage"; - for file in &[map_file, values_file, rocksdb_file] { + for file in &[map_file, indexes_file, rocksdb_file] { match is_readonly(file) { Ok(true) => panic!("the {:?} file is readonly, please make it writeable", file), Err(ref e) if e.kind() == io::ErrorKind::NotFound => (), @@ -75,10 +75,12 @@ fn main() { } } - fs::remove_file(rocksdb_file); let db = DB::open_default(rocksdb_file).unwrap(); - let mut builder = DocIndexMapBuilder::new(); + let map = File::create(map_file).unwrap(); + let indexes = File::create(indexes_file).unwrap(); + let mut builder = MetadataBuilder::new(map, indexes); + for line in data.lines() { let line = line.unwrap(); @@ -117,15 +119,12 @@ fn main() { } } - let map = File::create(map_file).unwrap(); - let values = File::create(values_file).unwrap(); - - let (map, values) = builder.build(map, values).unwrap(); + builder.finish().unwrap(); set_readonly(map_file, true).unwrap(); - set_readonly(values_file, true).unwrap(); + set_readonly(indexes_file, true).unwrap(); set_readonly(rocksdb_file, true).unwrap(); println!("Checking the dump consistency..."); - unsafe { DocIndexMap::from_paths("map.fst", "values.vecs").unwrap() }; + unsafe { Metadata::from_paths(map_file, indexes_file).unwrap() }; } diff --git a/raptor-search/Cargo.toml b/raptor-search/Cargo.toml index 85fb9c202..6fc6ecd5c 100644 --- a/raptor-search/Cargo.toml +++ b/raptor-search/Cargo.toml @@ -4,11 +4,8 @@ version = "0.1.0" authors = ["Kerollmops "] [dependencies] -env_logger = { version = "0.3", default-features = false } raptor = { path = "../raptor" } elapsed = "0.1" -serde = "1.0" -serde_derive = "1.0" [dependencies.fst] git = "https://github.com/Kerollmops/fst.git" diff --git a/raptor-search/src/main.rs b/raptor-search/src/main.rs index 7f2a431a9..33e20670a 100644 --- a/raptor-search/src/main.rs +++ b/raptor-search/src/main.rs @@ -1,4 +1,3 @@ -extern crate env_logger; extern crate rocksdb; extern crate fst; extern crate raptor; @@ -10,49 +9,55 @@ use std::io::{self, Write}; use elapsed::measure_time; use fst::Streamer; use rocksdb::{DB, DBOptions}; -use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder}; +use raptor::{Metadata, RankedStream, LevBuilder}; -fn search(map: &DocIndexMap, lev_builder: &LevBuilder, db: &DB, query: &str) { +fn search(metadata: &Metadata, database: &DB, lev_builder: &LevBuilder, query: &str) { let mut automatons = Vec::new(); for query in query.split_whitespace() { let lev = lev_builder.get_automaton(query); automatons.push(lev); } - let mut stream = RankedStream::new(&map, map.values(), automatons, 20); + let map = metadata.as_map(); + let indexes = metadata.as_indexes(); + + let mut stream = RankedStream::new(&map, &indexes, automatons, 20); while let Some(document) = stream.next() { - print!("{:?} ", document.document_id); + print!("{:?}", document.document_id); let title_key = format!("{}-title", document.document_id); - let title = db.get(title_key.as_bytes()).unwrap().unwrap(); + let title = database.get(title_key.as_bytes()).unwrap().unwrap(); let title = unsafe { from_utf8_unchecked(&title) }; - print!("{:?}", title); + print!(" {:?}", title); println!(); } } fn main() { - drop(env_logger::init()); + let map_file = "map.meta"; + let indexes_file = "indexes.meta"; + let rocksdb_file = "rocksdb/storage"; - let (elapsed, map) = measure_time(|| load_map("map.fst", "values.vecs").unwrap()); - println!("{} to load the map", elapsed); + let (elapsed, meta) = measure_time(|| unsafe { + Metadata::from_paths(map_file, indexes_file).unwrap() + }); + println!("{} to load metadata", elapsed); + + let (elapsed, db) = measure_time(|| { + let options = DBOptions::new(); + DB::open_for_read_only(options, rocksdb_file, false).unwrap() + }); + println!("{} to load the RocksDB database", elapsed); let (elapsed, lev_builder) = measure_time(|| LevBuilder::new()); println!("{} to load the levenshtein automaton", elapsed); - let (elapsed, db) = measure_time(|| { - let opts = DBOptions::new(); - let error_if_log_file_exist = false; - DB::open_for_read_only(opts, "rocksdb/storage", error_if_log_file_exist).unwrap() - }); - println!("{} to load the rocksdb DB", elapsed); - match env::args().nth(1) { Some(query) => { println!("Searching for: {:?}", query); let query = query.to_lowercase(); - let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &db, &query)); + let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query)); println!("Finished in {}", elapsed); }, None => loop { @@ -65,7 +70,7 @@ fn main() { if query.is_empty() { break } - let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &db, &query)); + let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query)); println!("Finished in {}", elapsed); }, } diff --git a/raptor/Cargo.toml b/raptor/Cargo.toml index 5fd342f88..666a6b125 100644 --- a/raptor/Cargo.toml +++ b/raptor/Cargo.toml @@ -4,9 +4,7 @@ version = "0.1.0" authors = ["Kerollmops "] [dependencies] -bincode = "1.0" -serde = "1.0" -serde_derive = "1.0" +byteorder = "1.2" [dependencies.fst] git = "https://github.com/Kerollmops/fst.git" @@ -17,5 +15,8 @@ git = "https://github.com/Kerollmops/levenshtein-automata.git" branch = "custom-fst" features = ["fst_automaton"] +[dependencies.rocksdb] +git = "https://github.com/pingcap/rust-rocksdb.git" + [dependencies.group-by] git = "https://github.com/Kerollmops/group-by.git" diff --git a/raptor/src/lib.rs b/raptor/src/lib.rs index 46c109d69..ed37b5464 100644 --- a/raptor/src/lib.rs +++ b/raptor/src/lib.rs @@ -1,28 +1,22 @@ -#[macro_use] extern crate serde_derive; -extern crate bincode; extern crate fst; extern crate group_by; extern crate levenshtein_automata; -extern crate serde; +extern crate byteorder; +extern crate rocksdb; -pub mod map; pub mod rank; -mod levenshtein; +pub mod metadata; +pub mod levenshtein; -use std::path::Path; -use std::fs; - -pub use self::map::{Map, MapBuilder, Values}; -pub use self::map::{ - OpBuilder, IndexedValues, - OpWithStateBuilder, IndexedValuesWithState, +pub use self::metadata::{ + Metadata, MetadataBuilder, + StreamWithState, StreamWithStateBuilder, + UnionWithState, OpWithStateBuilder, + IndexedValuesWithState, }; pub use self::rank::{RankedStream}; pub use self::levenshtein::LevBuilder; -pub type DocIndexMap = Map; -pub type DocIndexMapBuilder = MapBuilder; - pub type DocumentId = u64; /// This structure represent the position of a word @@ -30,7 +24,8 @@ pub type DocumentId = u64; /// /// This is stored in the map, generated at index time, /// extracted and interpreted at search time. -#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)] +#[repr(C)] pub struct DocIndex { /// The document identifier where the word was found. @@ -109,12 +104,3 @@ impl Match { } } } - - -pub fn load_map(map: P, values: Q) -> fst::Result -where P: AsRef, Q: AsRef, -{ - let fst = fs::read(map)?; - let values = fs::read(values)?; - DocIndexMap::from_bytes(fst, &values) -} diff --git a/raptor/src/map.rs b/raptor/src/map.rs deleted file mode 100644 index bd27ede7e..000000000 --- a/raptor/src/map.rs +++ /dev/null @@ -1,404 +0,0 @@ -use bincode; -use fst::{self, Automaton}; -use serde::de::DeserializeOwned; -use serde::ser::Serialize; -use std::collections::BTreeMap; -use std::collections::btree_map::Entry; -use std::fs::File; -use std::io::{Write, BufReader}; -use std::ops::Range; -use std::path::Path; - -#[derive(Debug)] -pub struct Map { - inner: fst::Map, - values: Values, -} - -impl Map { - pub unsafe fn from_paths(map: P, values: Q) -> fst::Result - where - T: DeserializeOwned, - P: AsRef, - Q: AsRef - { - let inner = fst::Map::from_path(map)?; - - // TODO handle errors !!! - let values = File::open(values).unwrap(); - let values = BufReader::new(values); - let values = bincode::deserialize_from(values).unwrap(); - - Ok(Self { inner, values }) - } - - pub fn from_bytes(map: Vec, values: &[u8]) -> fst::Result - where - T: DeserializeOwned - { - let inner = fst::Map::from_bytes(map)?; - let values = bincode::deserialize(values).unwrap(); - - Ok(Self { inner, values }) - } - - pub fn stream(&self) -> Stream { - Stream { - inner: self.inner.stream(), - values: &self.values, - } - } - - pub fn contains_key>(&self, key: K) -> bool { - self.inner.contains_key(key) - } - - pub fn get>(&self, key: K) -> Option<&[T]> { - self.inner.get(key).map(|i| unsafe { self.values.get_unchecked(i as usize) }) - } - - pub fn search(&self, aut: A) -> StreamBuilder { - StreamBuilder { - inner: self.inner.search(aut), - values: &self.values, - } - } - - pub fn as_map(&self) -> &fst::Map { - &self.inner - } - - pub fn values(&self) -> &Values { - &self.values - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct Values { - ranges: Box<[Range]>, - values: Box<[T]>, -} - -impl Values { - fn new(raw: Vec>) -> Self { - let cap = raw.len(); - let mut ranges = Vec::with_capacity(cap); - let cap = raw.iter().map(Vec::len).sum(); - let mut values = Vec::with_capacity(cap); - - for mut v in &raw { - let len = v.len() as u64; - let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0); - - let range = Range { start, end: start + len }; - ranges.push(range); - } - - values.extend(raw.into_iter().flat_map(IntoIterator::into_iter)); - - let ranges = ranges.into_boxed_slice(); - let values = values.into_boxed_slice(); - - Self { ranges, values } - } - - pub unsafe fn get_unchecked(&self, index: usize) -> &[T] { - let range = self.ranges.get_unchecked(index); - let range = Range { start: range.start as usize, end: range.end as usize }; - self.values.get_unchecked(range) - } -} - -#[derive(Debug)] -pub struct MapBuilder { - map: BTreeMap, - // This makes many memory indirections but it is only used - // at index time, not kept for query time. - values: Vec>, -} - -impl MapBuilder { - pub fn new() -> Self { - Self { - map: BTreeMap::new(), - values: Vec::new(), - } - } - - pub fn insert>(&mut self, key: S, value: T) { - let key = key.into(); - match self.map.entry(key) { - Entry::Vacant(e) => { - self.values.push(vec![value]); - let index = (self.values.len() - 1) as u64; - e.insert(index); - }, - Entry::Occupied(e) => { - let index = *e.get(); - let values = &mut self.values[index as usize]; - values.push(value); - }, - } - } - - pub fn build_in_memory(self) -> fst::Result> { - Ok(Map { - inner: fst::Map::from_iter(self.map)?, - values: Values::new(self.values), - }) - } - - pub fn build(self, map_wrt: W, mut values_wrt: X) -> fst::Result<(W, X)> - where - T: Serialize, - W: Write, - X: Write - { - let mut builder = fst::MapBuilder::new(map_wrt)?; - builder.extend_iter(self.map)?; - let map = builder.into_inner()?; - let values = Values::new(self.values); - - // TODO handle that error !!! - bincode::serialize_into(&mut values_wrt, &values).unwrap(); - - Ok((map, values_wrt)) - } -} - -pub struct OpBuilder<'m, 'v, T: 'v> { - inner: fst::map::OpBuilder<'m>, - values: &'v Values, -} - -impl<'m, 'v, T: 'v> OpBuilder<'m, 'v, T> { - pub fn new(values: &'v Values) -> Self { - OpBuilder { - inner: fst::map::OpBuilder::new(), - values: values, - } - } - - pub fn add(mut self, streamable: I) -> Self - where - I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>, - S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>, - { - self.push(streamable); - self - } - - pub fn push(&mut self, streamable: I) - where - I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>, - S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>, - { - self.inner.push(streamable); - } - - pub fn union(self) -> Union<'m, 'v, T> { - Union { - inner: self.inner.union(), - outs: Vec::new(), - values: self.values, - } - } -} - -pub struct Union<'m, 'v, T: 'v> { - inner: fst::map::Union<'m>, - outs: Vec>, - values: &'v Values, -} - -impl<'a, 'm, 'v, T: 'v + 'a> fst::Streamer<'a> for Union<'m, 'v, T> { - type Item = (&'a [u8], &'a [IndexedValues<'a, T>]); - - fn next(&'a mut self) -> Option { - match self.inner.next() { - Some((s, ivalues)) => { - self.outs.clear(); - for ivalue in ivalues { - let index = ivalue.index; - let values = unsafe { self.values.get_unchecked(ivalue.value as usize) }; - self.outs.push(IndexedValues { index, values }) - } - Some((s, &self.outs)) - }, - None => None, - } - } -} - -#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct IndexedValues<'a, T: 'a> { - pub index: usize, - pub values: &'a [T], -} - -pub struct OpWithStateBuilder<'m, 'v, T: 'v, U> { - inner: fst::map::OpWithStateBuilder<'m, U>, - values: &'v Values, -} - -impl<'m, 'v, T: 'v, U: 'static> OpWithStateBuilder<'m, 'v, T, U> { - pub fn new(values: &'v Values) -> Self { - Self { - inner: fst::map::OpWithStateBuilder::new(), - values: values, - } - } - - pub fn add(mut self, streamable: I) -> Self - where - I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>, - S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>, - { - self.push(streamable); - self - } - - pub fn push(&mut self, streamable: I) - where - I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>, - S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>, - { - self.inner.push(streamable); - } - - pub fn union(self) -> UnionWithState<'m, 'v, T, U> { - UnionWithState { - inner: self.inner.union(), - outs: Vec::new(), - values: self.values, - } - } -} - -pub struct UnionWithState<'m, 'v, T: 'v, U> { - inner: fst::map::UnionWithState<'m, U>, - outs: Vec>, - values: &'v Values, -} - -impl<'a, 'm, 'v, T: 'v + 'a, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, T, U> -where - U: Clone, -{ - // TODO prefer returning (&[u8], index, value T, state) one by one - type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, T, U>]); - - fn next(&'a mut self) -> Option { - match self.inner.next() { - Some((s, ivalues)) => { - self.outs.clear(); - self.outs.reserve(ivalues.len()); - for ivalue in ivalues { - let index = ivalue.index; - let values = unsafe { self.values.get_unchecked(ivalue.value as usize) }; - let state = ivalue.state.clone(); - self.outs.push(IndexedValuesWithState { index, values, state }) - } - Some((s, &self.outs)) - }, - None => None, - } - } -} - -#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct IndexedValuesWithState<'a, T: 'a, U> { - pub index: usize, - pub values: &'a [T], - pub state: U, -} - -pub struct StreamBuilder<'m, 'v, T: 'v, A> { - inner: fst::map::StreamBuilder<'m, A>, - values: &'v Values, -} - -impl<'m, 'v, T: 'v, A> StreamBuilder<'m, 'v, T, A> { - pub fn with_state(self) -> StreamWithStateBuilder<'m, 'v, T, A> { - StreamWithStateBuilder { - inner: self.inner.with_state(), - values: self.values, - } - } -} - -impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'m, 'v, T, A> { - type Item = >::Item; - type Into = Stream<'m, 'v, T, A>; - - fn into_stream(self) -> Self::Into { - Stream { - inner: self.inner.into_stream(), - values: self.values, - } - } -} - -pub struct Stream<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> { - inner: fst::map::Stream<'m, A>, - values: &'v Values, -} - -impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, 'v, T, A> { - type Item = (&'a [u8], &'a [T]); - - fn next(&'a mut self) -> Option { - // Here we can't just `map` because of some borrow rules - match self.inner.next() { - Some((key, i)) => { - let values = unsafe { self.values.get_unchecked(i as usize) }; - Some((key, values)) - }, - None => None, - } - } -} - -pub struct StreamWithStateBuilder<'m, 'v, T: 'v, A> { - inner: fst::map::StreamWithStateBuilder<'m, A>, - values: &'v Values, -} - -impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::IntoStreamer<'a> for StreamWithStateBuilder<'m, 'v, T, A> -where - A: Automaton, - A::State: Clone, -{ - type Item = >::Item; - type Into = StreamWithState<'m, 'v, T, A>; - - fn into_stream(self) -> Self::Into { - StreamWithState { - inner: self.inner.into_stream(), - values: self.values, - } - } -} - -pub struct StreamWithState<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> { - inner: fst::map::StreamWithState<'m, A>, - values: &'v Values, -} - -impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::Streamer<'a> for StreamWithState<'m, 'v, T, A> -where - A: Automaton, - A::State: Clone, -{ - type Item = (&'a [u8], &'a [T], A::State); - - fn next(&'a mut self) -> Option { - match self.inner.next() { - Some((key, i, state)) => { - let values = unsafe { self.values.get_unchecked(i as usize) }; - Some((key, values, state)) - }, - None => None, - } - } -} diff --git a/raptor/src/metadata.rs b/raptor/src/metadata.rs new file mode 100644 index 000000000..7f34eebf0 --- /dev/null +++ b/raptor/src/metadata.rs @@ -0,0 +1,427 @@ +use std::sync::Arc; +use std::ops::Deref; +use std::error::Error; +use std::path::Path; +use std::collections::btree_map::{Entry, BTreeMap}; +use std::slice::from_raw_parts; +use std::io::{self, Write}; +use std::mem; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use fst::{self, Map, MapBuilder, Automaton}; +use fst::raw::MmapReadOnly; +use DocIndex; + +#[repr(C)] +struct Range { + start: u64, + end: u64, +} + +#[derive(Clone)] +enum DocIndexesData { + Shared { + vec: Arc>, + offset: usize, + len: usize, + }, + Mmap(MmapReadOnly), +} + +impl Deref for DocIndexesData { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + match self { + DocIndexesData::Shared { vec, offset, len } => { + &vec[*offset..offset + len] + }, + DocIndexesData::Mmap(m) => m.as_slice(), + } + } +} + +#[derive(Clone)] +pub struct DocIndexes { + ranges: DocIndexesData, + indexes: DocIndexesData, +} + +impl DocIndexes { + pub unsafe fn from_path>(path: P) -> io::Result { + let mmap = MmapReadOnly::open_path(path)?; + + let range_len = mmap.as_slice().read_u64::()?; + let range_len = range_len as usize * mem::size_of::(); + + let offset = mem::size_of::() as usize; + let ranges = DocIndexesData::Mmap(mmap.range(offset, range_len)); + + let len = mmap.len() - range_len - offset; + let offset = offset + range_len; + let indexes = DocIndexesData::Mmap(mmap.range(offset, len)); + + Ok(DocIndexes { ranges, indexes }) + } + + pub fn from_bytes(vec: Vec) -> io::Result { + let vec = Arc::new(vec); + + let range_len = vec.as_slice().read_u64::()?; + let range_len = range_len as usize * mem::size_of::(); + + let offset = mem::size_of::() as usize; + let ranges = DocIndexesData::Shared { + vec: vec.clone(), + offset, + len: range_len + }; + + let len = vec.len() - range_len - offset; + let offset = offset + range_len; + let indexes = DocIndexesData::Shared { vec, offset, len }; + + Ok(DocIndexes { ranges, indexes }) + } + + pub fn get(&self, index: u64) -> Option<&[DocIndex]> { + self.ranges().get(index as usize).map(|Range { start, end }| { + let start = *start as usize; + let end = *end as usize; + &self.indexes()[start..end] + }) + } + + fn ranges(&self) -> &[Range] { + let slice = &self.ranges; + let ptr = slice.as_ptr() as *const Range; + let len = slice.len() / mem::size_of::(); + unsafe { from_raw_parts(ptr, len) } + } + + fn indexes(&self) -> &[DocIndex] { + let slice = &self.indexes; + let ptr = slice.as_ptr() as *const DocIndex; + let len = slice.len() / mem::size_of::(); + unsafe { from_raw_parts(ptr, len) } + } +} + +pub struct Metadata { + map: Map, + indexes: DocIndexes, +} + +impl Metadata { + pub unsafe fn from_paths(map: P, indexes: Q) -> Result> + where P: AsRef, + Q: AsRef, + { + let map = Map::from_path(map)?; + let indexes = DocIndexes::from_path(indexes)?; + Ok(Metadata { map, indexes }) + } + + pub fn from_bytes(map: Vec, indexes: Vec) -> Result> { + let map = Map::from_bytes(map)?; + let indexes = DocIndexes::from_bytes(indexes)?; + Ok(Metadata { map, indexes }) + } + + pub fn get>(&self, key: K) -> Option<&[DocIndex]> { + self.map.get(key).and_then(|index| self.indexes.get(index)) + } + + pub fn as_map(&self) -> &Map { + &self.map + } + + pub fn as_indexes(&self) -> &DocIndexes { + &self.indexes + } + + pub fn explode(self) -> (Map, DocIndexes) { + (self.map, self.indexes) + } +} + +pub struct Inner { + keys: BTreeMap, + indexes: Vec>, + number_docs: usize, +} + +impl Inner { + pub fn new() -> Self { + Inner { + keys: BTreeMap::new(), + indexes: Vec::new(), + number_docs: 0, + } + } + + pub fn number_doc_indexes(&self) -> usize { + self.number_docs + } + + pub fn insert(&mut self, key: String, value: DocIndex) { + match self.keys.entry(key) { + Entry::Vacant(e) => { + let index = self.indexes.len() as u64; + self.indexes.push(vec![value]); + e.insert(index); + }, + Entry::Occupied(e) => { + let index = *e.get(); + let vec = &mut self.indexes[index as usize]; + vec.push(value); + }, + } + self.number_docs += 1; + } +} + +pub struct MetadataBuilder { + inner: Inner, + map: W, + indexes: X, +} + +impl MetadataBuilder { + + pub fn new(map: W, indexes: X) -> Self { + Self { inner: Inner::new(), map, indexes } + } + + pub fn insert(&mut self, key: String, index: DocIndex) { + self.inner.insert(key, index) + } + + pub fn finish(self) -> Result<(), Box> { + self.into_inner().map(|_| ()) + } + + pub fn into_inner(mut self) -> Result<(W, X), Box> { + let number_docs = self.inner.number_doc_indexes(); + + let mut keys_builder = MapBuilder::new(self.map)?; + keys_builder.extend_iter(self.inner.keys)?; + let map = keys_builder.into_inner()?; + + // write down doc_indexes into the indexes Writer + let (ranges, values) = into_sliced_ranges(self.inner.indexes, number_docs); + let len = ranges.len() as u64; + + // TODO check if this is correct + self.indexes.write_u64::(len)?; + unsafe { + // write Ranges first + let slice = into_u8_slice(ranges.as_slice()); + self.indexes.write_all(slice)?; + + // write Values after + let slice = into_u8_slice(values.as_slice()); + self.indexes.write_all(slice)?; + } + self.indexes.flush()?; + + Ok((map, self.indexes)) + } +} + +fn into_sliced_ranges(vecs: Vec>, number_docs: usize) -> (Vec, Vec) { + let cap = vecs.len(); + let mut ranges = Vec::with_capacity(cap); + let mut values = Vec::with_capacity(number_docs); + + for mut v in &vecs { + let len = v.len() as u64; + let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0); + + let range = Range { start, end: start + len }; + ranges.push(range); + } + + values.extend(vecs.into_iter().flatten()); + + (ranges, values) +} + +unsafe fn into_u8_slice(slice: &[T]) -> &[u8] { + let ptr = slice.as_ptr() as *const u8; + let len = slice.len() * mem::size_of::(); + from_raw_parts(ptr, len) +} + +pub struct OpWithStateBuilder<'m, 'v, U> { + inner: fst::map::OpWithStateBuilder<'m, U>, + indexes: &'v DocIndexes, +} + +impl<'m, 'v, U: 'static> OpWithStateBuilder<'m, 'v, U> { + pub fn new(indexes: &'v DocIndexes) -> Self { + Self { + inner: fst::map::OpWithStateBuilder::new(), + indexes: indexes, + } + } + + pub fn add(mut self, streamable: I) -> Self + where + I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>, + S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>, + { + self.push(streamable); + self + } + + pub fn push(&mut self, streamable: I) + where + I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>, + S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>, + { + self.inner.push(streamable); + } + + pub fn union(self) -> UnionWithState<'m, 'v, U> { + UnionWithState { + inner: self.inner.union(), + outs: Vec::new(), + indexes: self.indexes, + } + } +} + +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct IndexedValuesWithState<'a, U> { + pub index: usize, + pub values: &'a [DocIndex], + pub state: U, +} + +pub struct UnionWithState<'m, 'v, U> { + inner: fst::map::UnionWithState<'m, U>, + outs: Vec>, + indexes: &'v DocIndexes, +} + +impl<'a, 'm, 'v, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, U> +where + U: Clone, +{ + type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, U>]); + + fn next(&'a mut self) -> Option { + match self.inner.next() { + Some((s, ivalues)) => { + self.outs.clear(); + self.outs.reserve(ivalues.len()); + for ivalue in ivalues { + if let Some(values) = self.indexes.get(ivalue.value) { + let index = ivalue.index; + let state = ivalue.state.clone(); + self.outs.push(IndexedValuesWithState { index, values, state }) + } + } + Some((s, &self.outs)) + }, + None => None, + } + } +} + +pub struct StreamWithStateBuilder<'m, 'v, A> { + inner: fst::map::StreamWithStateBuilder<'m, A>, + indexes: &'v DocIndexes, +} + +impl<'m, 'v, 'a, A: 'a> fst::IntoStreamer<'a> for StreamWithStateBuilder<'m, 'v, A> +where + A: Automaton, + A::State: Clone, +{ + type Item = >::Item; + type Into = StreamWithState<'m, 'v, A>; + + fn into_stream(self) -> Self::Into { + StreamWithState { + inner: self.inner.into_stream(), + indexes: self.indexes, + } + } +} + +pub struct StreamWithState<'m, 'v, A: Automaton = fst::automaton::AlwaysMatch> { + inner: fst::map::StreamWithState<'m, A>, + indexes: &'v DocIndexes, +} + +impl<'m, 'v, 'a, A: 'a> fst::Streamer<'a> for StreamWithState<'m, 'v, A> +where + A: Automaton, + A::State: Clone, +{ + type Item = (&'a [u8], &'a [DocIndex], A::State); + + fn next(&'a mut self) -> Option { + match self.inner.next() { + Some((key, i, state)) => { + match self.indexes.get(i) { + Some(values) => Some((key, values, state)), + None => None, + } + }, + None => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_serialize_deserialize() { + let mapw = Vec::new(); + let indexesw = Vec::new(); + + let builder = MetadataBuilder::new(mapw, indexesw); + let (map, indexes) = builder.into_inner().unwrap(); + + let metas = Metadata::from_bytes(map, indexes).unwrap(); + assert_eq!(metas.get("chameau"), None); + } + + #[test] + fn one_doc_serialize_deserialize() { + let mapw = Vec::new(); + let indexesw = Vec::new(); + + let mut builder = MetadataBuilder::new(mapw, indexesw); + + let doc = DocIndex { document: 12, attribute: 1, attribute_index: 22 }; + builder.insert("chameau".into(), doc); + + let (map, indexes) = builder.into_inner().unwrap(); + + let metas = Metadata::from_bytes(map, indexes).unwrap(); + assert_eq!(metas.get("chameau"), Some(&[doc][..])); + } + + #[test] + fn multiple_docs_serialize_deserialize() { + let mapw = Vec::new(); + let indexesw = Vec::new(); + + let mut builder = MetadataBuilder::new(mapw, indexesw); + + let doc1 = DocIndex { document: 12, attribute: 1, attribute_index: 22 }; + let doc2 = DocIndex { document: 31, attribute: 0, attribute_index: 1 }; + builder.insert("chameau".into(), doc1); + builder.insert("chameau".into(), doc2); + + let (map, indexes) = builder.into_inner().unwrap(); + + let metas = Metadata::from_bytes(map, indexes).unwrap(); + assert_eq!(metas.get("chameau"), Some(&[doc1, doc2][..])); + } +} diff --git a/raptor/src/rank/mod.rs b/raptor/src/rank/mod.rs index 0937fc4f5..c6490abce 100644 --- a/raptor/src/rank/mod.rs +++ b/raptor/src/rank/mod.rs @@ -8,11 +8,10 @@ mod exact; use std::cmp::Ordering; use std::collections::HashMap; use std::{mem, vec}; -use DocIndexMap; use fst; use levenshtein::Levenshtein; -use map::{OpWithStateBuilder, UnionWithState, Values}; -use {Match, DocIndex, DocumentId}; +use metadata::{DocIndexes, OpWithStateBuilder, UnionWithState}; +use {Match, DocumentId}; use group_by::GroupByMut; use self::sum_of_typos::sum_of_typos; @@ -117,7 +116,7 @@ impl IntoIterator for Pool { pub enum RankedStream<'m, 'v> { Fed { - inner: UnionWithState<'m, 'v, DocIndex, u32>, + inner: UnionWithState<'m, 'v, u32>, automatons: Vec, pool: Pool, }, @@ -127,11 +126,11 @@ pub enum RankedStream<'m, 'v> { } impl<'m, 'v> RankedStream<'m, 'v> { - pub fn new(map: &'m DocIndexMap, values: &'v Values, automatons: Vec, limit: usize) -> Self { - let mut op = OpWithStateBuilder::new(values); + pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec, limit: usize) -> Self { + let mut op = OpWithStateBuilder::new(indexes); for automaton in automatons.iter().map(|l| l.dfa.clone()) { - let stream = map.as_map().search(automaton).with_state(); + let stream = map.search(automaton).with_state(); op.push(stream); } diff --git a/raptor/src/rank/sum_of_words_position.rs b/raptor/src/rank/sum_of_words_position.rs index 7e32e4bff..dc523bb8f 100644 --- a/raptor/src/rank/sum_of_words_position.rs +++ b/raptor/src/rank/sum_of_words_position.rs @@ -3,10 +3,10 @@ use Match; use rank::{match_query_index, Document}; use group_by::GroupBy; -pub fn sum_of_words_position(lhs: &Document, rhs: &Document) -> Ordering { - let key = |matches: &[Match]| -> u32 { - GroupBy::new(matches, match_query_index).map(|m| m[0].attribute_index).sum() - }; +fn key(matches: &[Match]) -> u32 { + GroupBy::new(matches, match_query_index).map(|m| m[0].attribute_index).sum() +} +pub fn sum_of_words_position(lhs: &Document, rhs: &Document) -> Ordering { key(&lhs.matches).cmp(&key(&rhs.matches)) }