mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
feat: Improve the indexing time a little bit
...by a factor of 17.6x.
This commit is contained in:
parent
a2152a4064
commit
e5c54c4399
602
Cargo.lock
generated
602
Cargo.lock
generated
@ -1,11 +1,3 @@
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.5.2"
|
||||
@ -14,27 +6,13 @@ dependencies = [
|
||||
"byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "blob"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"base64 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -47,15 +25,6 @@ name = "byteorder"
|
||||
version = "1.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bzip2-sys"
|
||||
version = "0.1.6"
|
||||
@ -70,11 +39,6 @@ name = "cc"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.31"
|
||||
@ -91,36 +55,6 @@ dependencies = [
|
||||
"build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"crossbeam-epoch 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "0.4.3"
|
||||
@ -131,14 +65,6 @@ name = "elapsed"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fst"
|
||||
version = "0.3.0"
|
||||
@ -148,25 +74,6 @@ dependencies = [
|
||||
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon-sys"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "gcc"
|
||||
version = "0.3.54"
|
||||
@ -182,54 +89,11 @@ name = "group-by"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/Kerollmops/group-by.git#034fadc462dc511ed53f44f6091f8707a27ca392"
|
||||
|
||||
[[package]]
|
||||
name = "httparse"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iovec"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "kernel32-sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.1.1"
|
||||
@ -269,22 +133,6 @@ dependencies = [
|
||||
"vcpkg 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4-sys"
|
||||
version = "1.8.0"
|
||||
@ -294,11 +142,6 @@ dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.6.2"
|
||||
@ -308,68 +151,6 @@ dependencies = [
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.6.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"miow 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miow"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "net2"
|
||||
version = "0.2.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodrop"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.11"
|
||||
@ -391,53 +172,15 @@ dependencies = [
|
||||
"proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.3.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raptor"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)",
|
||||
"group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)",
|
||||
"levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raptor-http"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"raptor 0.1.0",
|
||||
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-minihttp 0.1.0 (git+https://github.com/Kerollmops/tokio-minihttp.git)",
|
||||
"tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -446,8 +189,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"raptor 0.1.0",
|
||||
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
@ -457,19 +200,11 @@ name = "raptor-search"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)",
|
||||
"raptor 0.1.0",
|
||||
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rocksdb"
|
||||
version = "0.3.0"
|
||||
@ -480,24 +215,14 @@ dependencies = [
|
||||
"librocksdb_sys 0.1.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scoped-tls"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.69"
|
||||
version = "1.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.69"
|
||||
version = "1.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -512,24 +237,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "snappy-sys"
|
||||
version = "0.1.0"
|
||||
@ -550,211 +260,6 @@ dependencies = [
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "take"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-fs 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-tcp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-threadpool 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-timer 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-udp 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-codec"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-core"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"scoped-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-timer 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-executor"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-fs"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-threadpool 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-io"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-minihttp"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/Kerollmops/tokio-minihttp.git#1e3f5655e4e64171b87e80d5f872db1345eab54a"
|
||||
dependencies = [
|
||||
"bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"httparse 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-proto"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-reactor"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-service"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tcp"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-threadpool"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"crossbeam-deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-timer"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-udp"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-codec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
@ -765,26 +270,11 @@ name = "unidecode"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.5"
|
||||
@ -794,11 +284,6 @@ dependencies = [
|
||||
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-build"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
@ -809,19 +294,10 @@ name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "ws2_32-sys"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "1.4.4+zstd.1.3.5"
|
||||
source = "git+https://github.com/gyscos/zstd-rs.git#9ff4442c1977fad400f90d9c48e4f114c474117c"
|
||||
source = "git+https://github.com/gyscos/zstd-rs.git#0100c8483ce88c9ab359d1aecdd199641dfb8b5e"
|
||||
dependencies = [
|
||||
"blob 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -830,98 +306,40 @@ dependencies = [
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
|
||||
"checksum base64 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30e93c03064e7590d0466209155251b90c22e37fab1daf2771582598b5827557"
|
||||
"checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7"
|
||||
"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789"
|
||||
"checksum blob 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "122c3fa3949d822d2a51c648db9e8105d6e75b89dc628cc366901d3d396fa4f4"
|
||||
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
|
||||
"checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9"
|
||||
"checksum bytes 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7dd32989a66957d3f0cba6588f15d4281a733f4e9ffc43fcd2385f57d3bf99ff"
|
||||
"checksum bzip2-sys 0.1.6 (git+https://github.com/alexcrichton/bzip2-rs.git)" = "<none>"
|
||||
"checksum cc 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)" = "49ec142f5768efb5b7622aebc3fdbdbb8950a4b9ba996393cb76ef7466e8747d"
|
||||
"checksum cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efe5c877e17a9c717a0bf3613b2709f723202c4e4675cc8f12926ded29bcb17e"
|
||||
"checksum cmake 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "95470235c31c726d72bf2e1f421adc1e65b9d561bf5529612cbe1a72da1467b3"
|
||||
"checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
|
||||
"checksum crossbeam-deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe8153ef04a7594ded05b427ffad46ddeaf22e63fd48d42b3e1e3bb4db07cae7"
|
||||
"checksum crossbeam-epoch 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2af0e75710d6181e234c8ecc79f14a97907850a541b13b0be1dd10992f2e4620"
|
||||
"checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b"
|
||||
"checksum dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d301140eb411af13d3115f9a562c85cc6b541ade9dfa314132244aaee7489dd"
|
||||
"checksum elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6f4e5af126dafd0741c2ad62d47f68b28602550102e5f0dd45c8a97fc8b49c29"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)" = "<none>"
|
||||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c"
|
||||
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb"
|
||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
|
||||
"checksum group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)" = "<none>"
|
||||
"checksum httparse 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7b6288d7db100340ca12873fd4d08ad1b8f206a9457798dfb17c018a33fee540"
|
||||
"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
|
||||
"checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08"
|
||||
"checksum itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5adb58558dcd1d786b5f0bd15f3226ee23486e24b7b58304b60f64dc68e62606"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e6412c5e2ad9584b0b8e979393122026cdd6d2a80b933f890dcd694ddbe73739"
|
||||
"checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef"
|
||||
"checksum levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)" = "<none>"
|
||||
"checksum libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)" = "b685088df2b950fccadf07a7187c8ef846a959c142338a48f9dc0b94517eb5f1"
|
||||
"checksum librocksdb_sys 0.1.0 (git+https://github.com/pingcap/rust-rocksdb.git)" = "<none>"
|
||||
"checksum libz-sys 1.0.18 (git+https://github.com/busyjay/libz-sys.git?branch=static-link)" = "<none>"
|
||||
"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
||||
"checksum log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "61bd98ae7f7b754bc53dca7d44b604f733c6bba044ea6f41bc8d89272d8161d2"
|
||||
"checksum lz4-sys 1.8.0 (git+https://github.com/busyjay/lz4-rs.git?branch=adjust-build)" = "<none>"
|
||||
"checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376"
|
||||
"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
|
||||
"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
|
||||
"checksum mio 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)" = "4fcfcb32d63961fb6f367bfd5d21e4600b92cd310f71f9dca25acae196eb1560"
|
||||
"checksum miow 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919"
|
||||
"checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88"
|
||||
"checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2"
|
||||
"checksum num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30"
|
||||
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
|
||||
"checksum pkg-config 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)" = "110d5ee3593dbb73f56294327fe5668bcc997897097cbc76b51e7aed3f52452f"
|
||||
"checksum proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "effdb53b25cdad54f8f48843d67398f7ef2e14f12c1b4cb4effc549a6462a4d6"
|
||||
"checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035"
|
||||
"checksum rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)" = "15a732abf9d20f0ad8eeb6f909bf6868722d9a06e1e50802b6a70351f40b4eb1"
|
||||
"checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5"
|
||||
"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1"
|
||||
"checksum rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)" = "<none>"
|
||||
"checksum scoped-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "332ffa32bf586782a3efaeb58f127980944bbc8c4d6913a86107ac2a5ab24b28"
|
||||
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
||||
"checksum serde 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)" = "210e5a3b159c566d7527e9b22e44be73f2e0fcc330bb78fef4dbccb56d2e74c8"
|
||||
"checksum serde_derive 1.0.69 (registry+https://github.com/rust-lang/crates.io-index)" = "dd724d68017ae3a7e63600ee4b2fdb3cad2158ffd1821d44aff4580f63e2b593"
|
||||
"checksum serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "0c3adf19c07af6d186d91dae8927b83b0553d07ca56cbf7f2f32560455c91920"
|
||||
"checksum serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "3525a779832b08693031b8ecfb0de81cd71cfd3812088fafe9a7496789572124"
|
||||
"checksum serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "84b8035cabe9b35878adec8ac5fe03d5f6bc97ff6edd7ccb96b44c1276ba390e"
|
||||
"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
|
||||
"checksum slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fdeff4cd9ecff59ec7e3744cbca73dfe5ac35c2aedb2cfba8a1c715a18912e9d"
|
||||
"checksum smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c8cbcd6df1e117c2210e13ab5109635ad68a929fcbb8964dc965b76cb5ee013"
|
||||
"checksum snappy-sys 0.1.0 (git+https://github.com/busyjay/rust-snappy.git?branch=static-link)" = "<none>"
|
||||
"checksum syn 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)" = "2beff8ebc3658f07512a413866875adddd20f4fd47b2a4e6c9da65cd281baaea"
|
||||
"checksum take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b157868d8ac1f56b64604539990685fa7611d8fa9e5476cf0c02cf34d32917c5"
|
||||
"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b"
|
||||
"checksum tokio 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8ee337e5f4e501fc32966fec6fe0ca0cc1c237b0b1b14a335f8bfe3c5f06e286"
|
||||
"checksum tokio-codec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "881e9645b81c2ce95fcb799ded2c29ffb9f25ef5bef909089a420e5961dd8ccb"
|
||||
"checksum tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)" = "aeeffbbb94209023feaef3c196a41cbcdafa06b4a6f893f68779bb5e53796f71"
|
||||
"checksum tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8cac2a7883ff3567e9d66bb09100d09b33d90311feca0206c7ca034bc0c55113"
|
||||
"checksum tokio-fs 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42bae2f6e33865b99069d95bcddfc85c9f0849b4e7e7399eeee71956ef34d7"
|
||||
"checksum tokio-io 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a5c9635ee806f26d302b8baa1e145689a280d8f5aa8d0552e7344808da54cc21"
|
||||
"checksum tokio-minihttp 0.1.0 (git+https://github.com/Kerollmops/tokio-minihttp.git)" = "<none>"
|
||||
"checksum tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fbb47ae81353c63c487030659494b295f6cb6576242f907f203473b191b0389"
|
||||
"checksum tokio-reactor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e00ec63bbec2c97ce1178cb0587b2c438b2f6b09d3ee54a33c45a9cf0d530810"
|
||||
"checksum tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "24da22d077e0f15f55162bdbdc661228c1581892f52074fb242678d015b45162"
|
||||
"checksum tokio-tcp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ec9b094851aadd2caf83ba3ad8e8c4ce65a42104f7b94d9e6550023f0407853f"
|
||||
"checksum tokio-threadpool 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "24ab84f574027b0e875378f31575cf175360891919e93a3490f07e76e00e4efb"
|
||||
"checksum tokio-timer 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "028b94314065b90f026a21826cffd62a4e40a92cda3e5c069cc7b02e5945f5e9"
|
||||
"checksum tokio-udp 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43eb534af6e8f37d43ab1b612660df14755c42bd003c5f8d2475ee78cc4600c0"
|
||||
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
|
||||
"checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc"
|
||||
"checksum url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2a321979c09843d272956e73700d12c4e7d3d92b2ee112b31548aef0d4efc5a6"
|
||||
"checksum vcpkg 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cbe533e138811704c0e3cbde65a818b35d3240409b4346256c5ede403e082474"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
"checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e"
|
||||
"checksum zstd-sys 1.4.4+zstd.1.3.5 (git+https://github.com/gyscos/zstd-rs.git)" = "<none>"
|
||||
|
@ -3,7 +3,6 @@ members = [
|
||||
"raptor",
|
||||
"raptor-indexer",
|
||||
"raptor-search",
|
||||
"raptor-http",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
3
raptor-http/.gitignore
vendored
3
raptor-http/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
|
||||
/target
|
||||
**/*.rs.bk
|
@ -1,22 +0,0 @@
|
||||
[package]
|
||||
name = "raptor-http"
|
||||
version = "0.1.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
env_logger = { version = "0.3", default-features = false }
|
||||
raptor = { path = "../raptor" }
|
||||
futures = "0.1"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
tokio-minihttp = { git = "https://github.com/Kerollmops/tokio-minihttp.git" }
|
||||
tokio-proto = "0.1"
|
||||
tokio-service = "0.1"
|
||||
url = "1.7"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "op-builder-with-state"
|
||||
|
||||
[dependencies.rocksdb]
|
||||
git = "https://github.com/pingcap/rust-rocksdb.git"
|
@ -1,102 +0,0 @@
|
||||
extern crate env_logger;
|
||||
extern crate rocksdb;
|
||||
extern crate fst;
|
||||
extern crate futures;
|
||||
extern crate raptor;
|
||||
extern crate tokio_minihttp;
|
||||
extern crate tokio_proto;
|
||||
extern crate tokio_service;
|
||||
extern crate url;
|
||||
|
||||
use std::{io, fs};
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::Streamer;
|
||||
use futures::future;
|
||||
use rocksdb::{DB, DBOptions};
|
||||
use tokio_minihttp::{Request, Response, Http};
|
||||
use tokio_proto::TcpServer;
|
||||
use tokio_service::Service;
|
||||
|
||||
use raptor::{DocIndexMap, RankedStream, LevBuilder};
|
||||
|
||||
struct MainService {
|
||||
map: Arc<DocIndexMap>,
|
||||
lev_builder: Arc<LevBuilder>,
|
||||
db: Arc<DB>,
|
||||
}
|
||||
|
||||
impl Service for MainService {
|
||||
type Request = Request;
|
||||
type Response = Response;
|
||||
type Error = io::Error;
|
||||
type Future = future::Ok<Response, io::Error>;
|
||||
|
||||
fn call(&self, request: Request) -> Self::Future {
|
||||
|
||||
let url = format!("http://raptor.net{}", request.path());
|
||||
let url = url::Url::parse(&url).unwrap();
|
||||
|
||||
let mut resp = Response::new();
|
||||
resp.header("Content-Type", "text/html");
|
||||
resp.header("charset", "utf-8");
|
||||
|
||||
if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
||||
let query = query.to_lowercase();
|
||||
|
||||
let mut automatons = Vec::new();
|
||||
|
||||
for query in query.split_whitespace() {
|
||||
let lev = self.lev_builder.get_automaton(query);
|
||||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut limit = 20;
|
||||
let mut stream = RankedStream::new(&self.map, self.map.values(), automatons.clone(), 20);
|
||||
|
||||
let mut body = String::new();
|
||||
body.push_str("<html><body>");
|
||||
|
||||
while let Some(document_id) = stream.next() {
|
||||
if limit == 0 { break }
|
||||
|
||||
body.push_str(&format!("<p>{:?}</p>", document_id));
|
||||
|
||||
limit -= 1;
|
||||
}
|
||||
|
||||
body.push_str("</body></html>");
|
||||
|
||||
resp.body_vec(body.into_bytes());
|
||||
}
|
||||
|
||||
future::ok(resp)
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
drop(env_logger::init());
|
||||
|
||||
let addr = "0.0.0.0:8080".parse().unwrap();
|
||||
|
||||
let lev_builder = Arc::new(LevBuilder::new());
|
||||
let map = {
|
||||
let fst = fs::read("map.fst").unwrap();
|
||||
let values = fs::read("values.vecs").unwrap();
|
||||
let map = DocIndexMap::from_bytes(fst, &values).unwrap();
|
||||
Arc::new(map)
|
||||
};
|
||||
|
||||
let db = {
|
||||
let opts = DBOptions::new();
|
||||
let error_if_log_file_exist = false;
|
||||
let db = DB::open_for_read_only(opts, "rocksdb/storage", error_if_log_file_exist).unwrap();
|
||||
Arc::new(db)
|
||||
};
|
||||
|
||||
TcpServer::new(Http, addr).serve(move || Ok(MainService {
|
||||
map: map.clone(),
|
||||
lev_builder: lev_builder.clone(),
|
||||
db: db.clone(),
|
||||
}))
|
||||
}
|
@ -13,7 +13,7 @@ use std::fs::{self, File};
|
||||
use std::io::{self, BufReader, BufRead};
|
||||
use std::iter;
|
||||
|
||||
use raptor::{DocIndexMapBuilder, DocIndexMap, DocIndex};
|
||||
use raptor::{MetadataBuilder, Metadata, DocIndex};
|
||||
use rocksdb::{DB, WriteBatch, Writable};
|
||||
use serde_json::from_str;
|
||||
use unidecode::unidecode;
|
||||
@ -62,11 +62,11 @@ fn main() {
|
||||
}
|
||||
};
|
||||
|
||||
let map_file = "map.fst";
|
||||
let values_file = "values.vecs";
|
||||
let map_file = "map.meta";
|
||||
let indexes_file = "indexes.meta";
|
||||
let rocksdb_file = "rocksdb/storage";
|
||||
|
||||
for file in &[map_file, values_file, rocksdb_file] {
|
||||
for file in &[map_file, indexes_file, rocksdb_file] {
|
||||
match is_readonly(file) {
|
||||
Ok(true) => panic!("the {:?} file is readonly, please make it writeable", file),
|
||||
Err(ref e) if e.kind() == io::ErrorKind::NotFound => (),
|
||||
@ -75,10 +75,12 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
fs::remove_file(rocksdb_file);
|
||||
let db = DB::open_default(rocksdb_file).unwrap();
|
||||
|
||||
let mut builder = DocIndexMapBuilder::new();
|
||||
let map = File::create(map_file).unwrap();
|
||||
let indexes = File::create(indexes_file).unwrap();
|
||||
let mut builder = MetadataBuilder::new(map, indexes);
|
||||
|
||||
for line in data.lines() {
|
||||
let line = line.unwrap();
|
||||
|
||||
@ -117,15 +119,12 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
let map = File::create(map_file).unwrap();
|
||||
let values = File::create(values_file).unwrap();
|
||||
|
||||
let (map, values) = builder.build(map, values).unwrap();
|
||||
builder.finish().unwrap();
|
||||
|
||||
set_readonly(map_file, true).unwrap();
|
||||
set_readonly(values_file, true).unwrap();
|
||||
set_readonly(indexes_file, true).unwrap();
|
||||
set_readonly(rocksdb_file, true).unwrap();
|
||||
|
||||
println!("Checking the dump consistency...");
|
||||
unsafe { DocIndexMap::from_paths("map.fst", "values.vecs").unwrap() };
|
||||
unsafe { Metadata::from_paths(map_file, indexes_file).unwrap() };
|
||||
}
|
||||
|
@ -4,11 +4,8 @@ version = "0.1.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
env_logger = { version = "0.3", default-features = false }
|
||||
raptor = { path = "../raptor" }
|
||||
elapsed = "0.1"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
|
@ -1,4 +1,3 @@
|
||||
extern crate env_logger;
|
||||
extern crate rocksdb;
|
||||
extern crate fst;
|
||||
extern crate raptor;
|
||||
@ -10,49 +9,55 @@ use std::io::{self, Write};
|
||||
use elapsed::measure_time;
|
||||
use fst::Streamer;
|
||||
use rocksdb::{DB, DBOptions};
|
||||
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder};
|
||||
use raptor::{Metadata, RankedStream, LevBuilder};
|
||||
|
||||
fn search(map: &DocIndexMap, lev_builder: &LevBuilder, db: &DB, query: &str) {
|
||||
fn search(metadata: &Metadata, database: &DB, lev_builder: &LevBuilder, query: &str) {
|
||||
let mut automatons = Vec::new();
|
||||
for query in query.split_whitespace() {
|
||||
let lev = lev_builder.get_automaton(query);
|
||||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut stream = RankedStream::new(&map, map.values(), automatons, 20);
|
||||
let map = metadata.as_map();
|
||||
let indexes = metadata.as_indexes();
|
||||
|
||||
let mut stream = RankedStream::new(&map, &indexes, automatons, 20);
|
||||
while let Some(document) = stream.next() {
|
||||
print!("{:?} ", document.document_id);
|
||||
print!("{:?}", document.document_id);
|
||||
|
||||
let title_key = format!("{}-title", document.document_id);
|
||||
let title = db.get(title_key.as_bytes()).unwrap().unwrap();
|
||||
let title = database.get(title_key.as_bytes()).unwrap().unwrap();
|
||||
let title = unsafe { from_utf8_unchecked(&title) };
|
||||
print!("{:?}", title);
|
||||
print!(" {:?}", title);
|
||||
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
drop(env_logger::init());
|
||||
let map_file = "map.meta";
|
||||
let indexes_file = "indexes.meta";
|
||||
let rocksdb_file = "rocksdb/storage";
|
||||
|
||||
let (elapsed, map) = measure_time(|| load_map("map.fst", "values.vecs").unwrap());
|
||||
println!("{} to load the map", elapsed);
|
||||
let (elapsed, meta) = measure_time(|| unsafe {
|
||||
Metadata::from_paths(map_file, indexes_file).unwrap()
|
||||
});
|
||||
println!("{} to load metadata", elapsed);
|
||||
|
||||
let (elapsed, db) = measure_time(|| {
|
||||
let options = DBOptions::new();
|
||||
DB::open_for_read_only(options, rocksdb_file, false).unwrap()
|
||||
});
|
||||
println!("{} to load the RocksDB database", elapsed);
|
||||
|
||||
let (elapsed, lev_builder) = measure_time(|| LevBuilder::new());
|
||||
println!("{} to load the levenshtein automaton", elapsed);
|
||||
|
||||
let (elapsed, db) = measure_time(|| {
|
||||
let opts = DBOptions::new();
|
||||
let error_if_log_file_exist = false;
|
||||
DB::open_for_read_only(opts, "rocksdb/storage", error_if_log_file_exist).unwrap()
|
||||
});
|
||||
println!("{} to load the rocksdb DB", elapsed);
|
||||
|
||||
match env::args().nth(1) {
|
||||
Some(query) => {
|
||||
println!("Searching for: {:?}", query);
|
||||
let query = query.to_lowercase();
|
||||
let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &db, &query));
|
||||
let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query));
|
||||
println!("Finished in {}", elapsed);
|
||||
},
|
||||
None => loop {
|
||||
@ -65,7 +70,7 @@ fn main() {
|
||||
|
||||
if query.is_empty() { break }
|
||||
|
||||
let (elapsed, _) = measure_time(|| search(&map, &lev_builder, &db, &query));
|
||||
let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query));
|
||||
println!("Finished in {}", elapsed);
|
||||
},
|
||||
}
|
||||
|
@ -4,9 +4,7 @@ version = "0.1.0"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
bincode = "1.0"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
byteorder = "1.2"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
@ -17,5 +15,8 @@ git = "https://github.com/Kerollmops/levenshtein-automata.git"
|
||||
branch = "custom-fst"
|
||||
features = ["fst_automaton"]
|
||||
|
||||
[dependencies.rocksdb]
|
||||
git = "https://github.com/pingcap/rust-rocksdb.git"
|
||||
|
||||
[dependencies.group-by]
|
||||
git = "https://github.com/Kerollmops/group-by.git"
|
||||
|
@ -1,28 +1,22 @@
|
||||
#[macro_use] extern crate serde_derive;
|
||||
extern crate bincode;
|
||||
extern crate fst;
|
||||
extern crate group_by;
|
||||
extern crate levenshtein_automata;
|
||||
extern crate serde;
|
||||
extern crate byteorder;
|
||||
extern crate rocksdb;
|
||||
|
||||
pub mod map;
|
||||
pub mod rank;
|
||||
mod levenshtein;
|
||||
pub mod metadata;
|
||||
pub mod levenshtein;
|
||||
|
||||
use std::path::Path;
|
||||
use std::fs;
|
||||
|
||||
pub use self::map::{Map, MapBuilder, Values};
|
||||
pub use self::map::{
|
||||
OpBuilder, IndexedValues,
|
||||
OpWithStateBuilder, IndexedValuesWithState,
|
||||
pub use self::metadata::{
|
||||
Metadata, MetadataBuilder,
|
||||
StreamWithState, StreamWithStateBuilder,
|
||||
UnionWithState, OpWithStateBuilder,
|
||||
IndexedValuesWithState,
|
||||
};
|
||||
pub use self::rank::{RankedStream};
|
||||
pub use self::levenshtein::LevBuilder;
|
||||
|
||||
pub type DocIndexMap = Map<DocIndex>;
|
||||
pub type DocIndexMapBuilder = MapBuilder<DocIndex>;
|
||||
|
||||
pub type DocumentId = u64;
|
||||
|
||||
/// This structure represent the position of a word
|
||||
@ -30,7 +24,8 @@ pub type DocumentId = u64;
|
||||
///
|
||||
/// This is stored in the map, generated at index time,
|
||||
/// extracted and interpreted at search time.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
#[repr(C)]
|
||||
pub struct DocIndex {
|
||||
|
||||
/// The document identifier where the word was found.
|
||||
@ -109,12 +104,3 @@ impl Match {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn load_map<P, Q>(map: P, values: Q) -> fst::Result<DocIndexMap>
|
||||
where P: AsRef<Path>, Q: AsRef<Path>,
|
||||
{
|
||||
let fst = fs::read(map)?;
|
||||
let values = fs::read(values)?;
|
||||
DocIndexMap::from_bytes(fst, &values)
|
||||
}
|
||||
|
@ -1,404 +0,0 @@
|
||||
use bincode;
|
||||
use fst::{self, Automaton};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::ser::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::fs::File;
|
||||
use std::io::{Write, BufReader};
|
||||
use std::ops::Range;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Map<T> {
|
||||
inner: fst::Map,
|
||||
values: Values<T>,
|
||||
}
|
||||
|
||||
impl<T> Map<T> {
|
||||
pub unsafe fn from_paths<P, Q>(map: P, values: Q) -> fst::Result<Self>
|
||||
where
|
||||
T: DeserializeOwned,
|
||||
P: AsRef<Path>,
|
||||
Q: AsRef<Path>
|
||||
{
|
||||
let inner = fst::Map::from_path(map)?;
|
||||
|
||||
// TODO handle errors !!!
|
||||
let values = File::open(values).unwrap();
|
||||
let values = BufReader::new(values);
|
||||
let values = bincode::deserialize_from(values).unwrap();
|
||||
|
||||
Ok(Self { inner, values })
|
||||
}
|
||||
|
||||
pub fn from_bytes(map: Vec<u8>, values: &[u8]) -> fst::Result<Self>
|
||||
where
|
||||
T: DeserializeOwned
|
||||
{
|
||||
let inner = fst::Map::from_bytes(map)?;
|
||||
let values = bincode::deserialize(values).unwrap();
|
||||
|
||||
Ok(Self { inner, values })
|
||||
}
|
||||
|
||||
pub fn stream(&self) -> Stream<T> {
|
||||
Stream {
|
||||
inner: self.inner.stream(),
|
||||
values: &self.values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_key<K: AsRef<[u8]>>(&self, key: K) -> bool {
|
||||
self.inner.contains_key(key)
|
||||
}
|
||||
|
||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[T]> {
|
||||
self.inner.get(key).map(|i| unsafe { self.values.get_unchecked(i as usize) })
|
||||
}
|
||||
|
||||
pub fn search<A: Automaton>(&self, aut: A) -> StreamBuilder<T, A> {
|
||||
StreamBuilder {
|
||||
inner: self.inner.search(aut),
|
||||
values: &self.values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_map(&self) -> &fst::Map {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
pub fn values(&self) -> &Values<T> {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Values<T> {
|
||||
ranges: Box<[Range<u64>]>,
|
||||
values: Box<[T]>,
|
||||
}
|
||||
|
||||
impl<T> Values<T> {
|
||||
fn new(raw: Vec<Vec<T>>) -> Self {
|
||||
let cap = raw.len();
|
||||
let mut ranges = Vec::with_capacity(cap);
|
||||
let cap = raw.iter().map(Vec::len).sum();
|
||||
let mut values = Vec::with_capacity(cap);
|
||||
|
||||
for mut v in &raw {
|
||||
let len = v.len() as u64;
|
||||
let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0);
|
||||
|
||||
let range = Range { start, end: start + len };
|
||||
ranges.push(range);
|
||||
}
|
||||
|
||||
values.extend(raw.into_iter().flat_map(IntoIterator::into_iter));
|
||||
|
||||
let ranges = ranges.into_boxed_slice();
|
||||
let values = values.into_boxed_slice();
|
||||
|
||||
Self { ranges, values }
|
||||
}
|
||||
|
||||
pub unsafe fn get_unchecked(&self, index: usize) -> &[T] {
|
||||
let range = self.ranges.get_unchecked(index);
|
||||
let range = Range { start: range.start as usize, end: range.end as usize };
|
||||
self.values.get_unchecked(range)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MapBuilder<T> {
|
||||
map: BTreeMap<String, u64>,
|
||||
// This makes many memory indirections but it is only used
|
||||
// at index time, not kept for query time.
|
||||
values: Vec<Vec<T>>,
|
||||
}
|
||||
|
||||
impl<T> MapBuilder<T> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
map: BTreeMap::new(),
|
||||
values: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert<S: Into<String>>(&mut self, key: S, value: T) {
|
||||
let key = key.into();
|
||||
match self.map.entry(key) {
|
||||
Entry::Vacant(e) => {
|
||||
self.values.push(vec![value]);
|
||||
let index = (self.values.len() - 1) as u64;
|
||||
e.insert(index);
|
||||
},
|
||||
Entry::Occupied(e) => {
|
||||
let index = *e.get();
|
||||
let values = &mut self.values[index as usize];
|
||||
values.push(value);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_in_memory(self) -> fst::Result<Map<T>> {
|
||||
Ok(Map {
|
||||
inner: fst::Map::from_iter(self.map)?,
|
||||
values: Values::new(self.values),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build<W, X>(self, map_wrt: W, mut values_wrt: X) -> fst::Result<(W, X)>
|
||||
where
|
||||
T: Serialize,
|
||||
W: Write,
|
||||
X: Write
|
||||
{
|
||||
let mut builder = fst::MapBuilder::new(map_wrt)?;
|
||||
builder.extend_iter(self.map)?;
|
||||
let map = builder.into_inner()?;
|
||||
let values = Values::new(self.values);
|
||||
|
||||
// TODO handle that error !!!
|
||||
bincode::serialize_into(&mut values_wrt, &values).unwrap();
|
||||
|
||||
Ok((map, values_wrt))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpBuilder<'m, 'v, T: 'v> {
|
||||
inner: fst::map::OpBuilder<'m>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v> OpBuilder<'m, 'v, T> {
|
||||
pub fn new(values: &'v Values<T>) -> Self {
|
||||
OpBuilder {
|
||||
inner: fst::map::OpBuilder::new(),
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> Union<'m, 'v, T> {
|
||||
Union {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Union<'m, 'v, T: 'v> {
|
||||
inner: fst::map::Union<'m>,
|
||||
outs: Vec<IndexedValues<'v, T>>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, T: 'v + 'a> fst::Streamer<'a> for Union<'m, 'v, T> {
|
||||
type Item = (&'a [u8], &'a [IndexedValues<'a, T>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
for ivalue in ivalues {
|
||||
let index = ivalue.index;
|
||||
let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
|
||||
self.outs.push(IndexedValues { index, values })
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
pub struct IndexedValues<'a, T: 'a> {
|
||||
pub index: usize,
|
||||
pub values: &'a [T],
|
||||
}
|
||||
|
||||
pub struct OpWithStateBuilder<'m, 'v, T: 'v, U> {
|
||||
inner: fst::map::OpWithStateBuilder<'m, U>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v, U: 'static> OpWithStateBuilder<'m, 'v, T, U> {
|
||||
pub fn new(values: &'v Values<T>) -> Self {
|
||||
Self {
|
||||
inner: fst::map::OpWithStateBuilder::new(),
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> UnionWithState<'m, 'v, T, U> {
|
||||
UnionWithState {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UnionWithState<'m, 'v, T: 'v, U> {
|
||||
inner: fst::map::UnionWithState<'m, U>,
|
||||
outs: Vec<IndexedValuesWithState<'v, T, U>>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, T: 'v + 'a, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, T, U>
|
||||
where
|
||||
U: Clone,
|
||||
{
|
||||
// TODO prefer returning (&[u8], index, value T, state) one by one
|
||||
type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, T, U>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
self.outs.reserve(ivalues.len());
|
||||
for ivalue in ivalues {
|
||||
let index = ivalue.index;
|
||||
let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
|
||||
let state = ivalue.state.clone();
|
||||
self.outs.push(IndexedValuesWithState { index, values, state })
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
pub struct IndexedValuesWithState<'a, T: 'a, U> {
|
||||
pub index: usize,
|
||||
pub values: &'a [T],
|
||||
pub state: U,
|
||||
}
|
||||
|
||||
pub struct StreamBuilder<'m, 'v, T: 'v, A> {
|
||||
inner: fst::map::StreamBuilder<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v, A> StreamBuilder<'m, 'v, T, A> {
|
||||
pub fn with_state(self) -> StreamWithStateBuilder<'m, 'v, T, A> {
|
||||
StreamWithStateBuilder {
|
||||
inner: self.inner.with_state(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::IntoStreamer<'a> for StreamBuilder<'m, 'v, T, A> {
|
||||
type Item = <Self::Into as fst::Streamer<'a>>::Item;
|
||||
type Into = Stream<'m, 'v, T, A>;
|
||||
|
||||
fn into_stream(self) -> Self::Into {
|
||||
Stream {
|
||||
inner: self.inner.into_stream(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Stream<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> {
|
||||
inner: fst::map::Stream<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: Automaton> fst::Streamer<'a> for Stream<'m, 'v, T, A> {
|
||||
type Item = (&'a [u8], &'a [T]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
// Here we can't just `map` because of some borrow rules
|
||||
match self.inner.next() {
|
||||
Some((key, i)) => {
|
||||
let values = unsafe { self.values.get_unchecked(i as usize) };
|
||||
Some((key, values))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithStateBuilder<'m, 'v, T: 'v, A> {
|
||||
inner: fst::map::StreamWithStateBuilder<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::IntoStreamer<'a> for StreamWithStateBuilder<'m, 'v, T, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
{
|
||||
type Item = <Self::Into as fst::Streamer<'a>>::Item;
|
||||
type Into = StreamWithState<'m, 'v, T, A>;
|
||||
|
||||
fn into_stream(self) -> Self::Into {
|
||||
StreamWithState {
|
||||
inner: self.inner.into_stream(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithState<'m, 'v, T: 'v, A: Automaton = fst::automaton::AlwaysMatch> {
|
||||
inner: fst::map::StreamWithState<'m, A>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, T: 'v + 'a, A: 'a> fst::Streamer<'a> for StreamWithState<'m, 'v, T, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
{
|
||||
type Item = (&'a [u8], &'a [T], A::State);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((key, i, state)) => {
|
||||
let values = unsafe { self.values.get_unchecked(i as usize) };
|
||||
Some((key, values, state))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
427
raptor/src/metadata.rs
Normal file
427
raptor/src/metadata.rs
Normal file
@ -0,0 +1,427 @@
|
||||
use std::sync::Arc;
|
||||
use std::ops::Deref;
|
||||
use std::error::Error;
|
||||
use std::path::Path;
|
||||
use std::collections::btree_map::{Entry, BTreeMap};
|
||||
use std::slice::from_raw_parts;
|
||||
use std::io::{self, Write};
|
||||
use std::mem;
|
||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||
use fst::{self, Map, MapBuilder, Automaton};
|
||||
use fst::raw::MmapReadOnly;
|
||||
use DocIndex;
|
||||
|
||||
#[repr(C)]
|
||||
struct Range {
|
||||
start: u64,
|
||||
end: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum DocIndexesData {
|
||||
Shared {
|
||||
vec: Arc<Vec<u8>>,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
},
|
||||
Mmap(MmapReadOnly),
|
||||
}
|
||||
|
||||
impl Deref for DocIndexesData {
|
||||
type Target = [u8];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
DocIndexesData::Shared { vec, offset, len } => {
|
||||
&vec[*offset..offset + len]
|
||||
},
|
||||
DocIndexesData::Mmap(m) => m.as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocIndexes {
|
||||
ranges: DocIndexesData,
|
||||
indexes: DocIndexesData,
|
||||
}
|
||||
|
||||
impl DocIndexes {
|
||||
pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
||||
let mmap = MmapReadOnly::open_path(path)?;
|
||||
|
||||
let range_len = mmap.as_slice().read_u64::<LittleEndian>()?;
|
||||
let range_len = range_len as usize * mem::size_of::<Range>();
|
||||
|
||||
let offset = mem::size_of::<u64>() as usize;
|
||||
let ranges = DocIndexesData::Mmap(mmap.range(offset, range_len));
|
||||
|
||||
let len = mmap.len() - range_len - offset;
|
||||
let offset = offset + range_len;
|
||||
let indexes = DocIndexesData::Mmap(mmap.range(offset, len));
|
||||
|
||||
Ok(DocIndexes { ranges, indexes })
|
||||
}
|
||||
|
||||
pub fn from_bytes(vec: Vec<u8>) -> io::Result<Self> {
|
||||
let vec = Arc::new(vec);
|
||||
|
||||
let range_len = vec.as_slice().read_u64::<LittleEndian>()?;
|
||||
let range_len = range_len as usize * mem::size_of::<Range>();
|
||||
|
||||
let offset = mem::size_of::<u64>() as usize;
|
||||
let ranges = DocIndexesData::Shared {
|
||||
vec: vec.clone(),
|
||||
offset,
|
||||
len: range_len
|
||||
};
|
||||
|
||||
let len = vec.len() - range_len - offset;
|
||||
let offset = offset + range_len;
|
||||
let indexes = DocIndexesData::Shared { vec, offset, len };
|
||||
|
||||
Ok(DocIndexes { ranges, indexes })
|
||||
}
|
||||
|
||||
pub fn get(&self, index: u64) -> Option<&[DocIndex]> {
|
||||
self.ranges().get(index as usize).map(|Range { start, end }| {
|
||||
let start = *start as usize;
|
||||
let end = *end as usize;
|
||||
&self.indexes()[start..end]
|
||||
})
|
||||
}
|
||||
|
||||
fn ranges(&self) -> &[Range] {
|
||||
let slice = &self.ranges;
|
||||
let ptr = slice.as_ptr() as *const Range;
|
||||
let len = slice.len() / mem::size_of::<Range>();
|
||||
unsafe { from_raw_parts(ptr, len) }
|
||||
}
|
||||
|
||||
fn indexes(&self) -> &[DocIndex] {
|
||||
let slice = &self.indexes;
|
||||
let ptr = slice.as_ptr() as *const DocIndex;
|
||||
let len = slice.len() / mem::size_of::<DocIndex>();
|
||||
unsafe { from_raw_parts(ptr, len) }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Metadata {
|
||||
map: Map,
|
||||
indexes: DocIndexes,
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
pub unsafe fn from_paths<P, Q>(map: P, indexes: Q) -> Result<Self, Box<Error>>
|
||||
where P: AsRef<Path>,
|
||||
Q: AsRef<Path>,
|
||||
{
|
||||
let map = Map::from_path(map)?;
|
||||
let indexes = DocIndexes::from_path(indexes)?;
|
||||
Ok(Metadata { map, indexes })
|
||||
}
|
||||
|
||||
pub fn from_bytes(map: Vec<u8>, indexes: Vec<u8>) -> Result<Self, Box<Error>> {
|
||||
let map = Map::from_bytes(map)?;
|
||||
let indexes = DocIndexes::from_bytes(indexes)?;
|
||||
Ok(Metadata { map, indexes })
|
||||
}
|
||||
|
||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[DocIndex]> {
|
||||
self.map.get(key).and_then(|index| self.indexes.get(index))
|
||||
}
|
||||
|
||||
pub fn as_map(&self) -> &Map {
|
||||
&self.map
|
||||
}
|
||||
|
||||
pub fn as_indexes(&self) -> &DocIndexes {
|
||||
&self.indexes
|
||||
}
|
||||
|
||||
pub fn explode(self) -> (Map, DocIndexes) {
|
||||
(self.map, self.indexes)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Inner {
|
||||
keys: BTreeMap<String, u64>,
|
||||
indexes: Vec<Vec<DocIndex>>,
|
||||
number_docs: usize,
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
pub fn new() -> Self {
|
||||
Inner {
|
||||
keys: BTreeMap::new(),
|
||||
indexes: Vec::new(),
|
||||
number_docs: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn number_doc_indexes(&self) -> usize {
|
||||
self.number_docs
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, key: String, value: DocIndex) {
|
||||
match self.keys.entry(key) {
|
||||
Entry::Vacant(e) => {
|
||||
let index = self.indexes.len() as u64;
|
||||
self.indexes.push(vec![value]);
|
||||
e.insert(index);
|
||||
},
|
||||
Entry::Occupied(e) => {
|
||||
let index = *e.get();
|
||||
let vec = &mut self.indexes[index as usize];
|
||||
vec.push(value);
|
||||
},
|
||||
}
|
||||
self.number_docs += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MetadataBuilder<W, X> {
|
||||
inner: Inner,
|
||||
map: W,
|
||||
indexes: X,
|
||||
}
|
||||
|
||||
impl<W: Write, X: Write> MetadataBuilder<W, X> {
|
||||
|
||||
pub fn new(map: W, indexes: X) -> Self {
|
||||
Self { inner: Inner::new(), map, indexes }
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, key: String, index: DocIndex) {
|
||||
self.inner.insert(key, index)
|
||||
}
|
||||
|
||||
pub fn finish(self) -> Result<(), Box<Error>> {
|
||||
self.into_inner().map(|_| ())
|
||||
}
|
||||
|
||||
pub fn into_inner(mut self) -> Result<(W, X), Box<Error>> {
|
||||
let number_docs = self.inner.number_doc_indexes();
|
||||
|
||||
let mut keys_builder = MapBuilder::new(self.map)?;
|
||||
keys_builder.extend_iter(self.inner.keys)?;
|
||||
let map = keys_builder.into_inner()?;
|
||||
|
||||
// write down doc_indexes into the indexes Writer
|
||||
let (ranges, values) = into_sliced_ranges(self.inner.indexes, number_docs);
|
||||
let len = ranges.len() as u64;
|
||||
|
||||
// TODO check if this is correct
|
||||
self.indexes.write_u64::<LittleEndian>(len)?;
|
||||
unsafe {
|
||||
// write Ranges first
|
||||
let slice = into_u8_slice(ranges.as_slice());
|
||||
self.indexes.write_all(slice)?;
|
||||
|
||||
// write Values after
|
||||
let slice = into_u8_slice(values.as_slice());
|
||||
self.indexes.write_all(slice)?;
|
||||
}
|
||||
self.indexes.flush()?;
|
||||
|
||||
Ok((map, self.indexes))
|
||||
}
|
||||
}
|
||||
|
||||
fn into_sliced_ranges<T>(vecs: Vec<Vec<T>>, number_docs: usize) -> (Vec<Range>, Vec<T>) {
|
||||
let cap = vecs.len();
|
||||
let mut ranges = Vec::with_capacity(cap);
|
||||
let mut values = Vec::with_capacity(number_docs);
|
||||
|
||||
for mut v in &vecs {
|
||||
let len = v.len() as u64;
|
||||
let start = ranges.last().map(|&Range { end, .. }| end).unwrap_or(0);
|
||||
|
||||
let range = Range { start, end: start + len };
|
||||
ranges.push(range);
|
||||
}
|
||||
|
||||
values.extend(vecs.into_iter().flatten());
|
||||
|
||||
(ranges, values)
|
||||
}
|
||||
|
||||
unsafe fn into_u8_slice<T>(slice: &[T]) -> &[u8] {
|
||||
let ptr = slice.as_ptr() as *const u8;
|
||||
let len = slice.len() * mem::size_of::<T>();
|
||||
from_raw_parts(ptr, len)
|
||||
}
|
||||
|
||||
pub struct OpWithStateBuilder<'m, 'v, U> {
|
||||
inner: fst::map::OpWithStateBuilder<'m, U>,
|
||||
indexes: &'v DocIndexes,
|
||||
}
|
||||
|
||||
impl<'m, 'v, U: 'static> OpWithStateBuilder<'m, 'v, U> {
|
||||
pub fn new(indexes: &'v DocIndexes) -> Self {
|
||||
Self {
|
||||
inner: fst::map::OpWithStateBuilder::new(),
|
||||
indexes: indexes,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> UnionWithState<'m, 'v, U> {
|
||||
UnionWithState {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
indexes: self.indexes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
pub struct IndexedValuesWithState<'a, U> {
|
||||
pub index: usize,
|
||||
pub values: &'a [DocIndex],
|
||||
pub state: U,
|
||||
}
|
||||
|
||||
pub struct UnionWithState<'m, 'v, U> {
|
||||
inner: fst::map::UnionWithState<'m, U>,
|
||||
outs: Vec<IndexedValuesWithState<'v, U>>,
|
||||
indexes: &'v DocIndexes,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, U>
|
||||
where
|
||||
U: Clone,
|
||||
{
|
||||
type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, U>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
self.outs.reserve(ivalues.len());
|
||||
for ivalue in ivalues {
|
||||
if let Some(values) = self.indexes.get(ivalue.value) {
|
||||
let index = ivalue.index;
|
||||
let state = ivalue.state.clone();
|
||||
self.outs.push(IndexedValuesWithState { index, values, state })
|
||||
}
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithStateBuilder<'m, 'v, A> {
|
||||
inner: fst::map::StreamWithStateBuilder<'m, A>,
|
||||
indexes: &'v DocIndexes,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, A: 'a> fst::IntoStreamer<'a> for StreamWithStateBuilder<'m, 'v, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
{
|
||||
type Item = <Self::Into as fst::Streamer<'a>>::Item;
|
||||
type Into = StreamWithState<'m, 'v, A>;
|
||||
|
||||
fn into_stream(self) -> Self::Into {
|
||||
StreamWithState {
|
||||
inner: self.inner.into_stream(),
|
||||
indexes: self.indexes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithState<'m, 'v, A: Automaton = fst::automaton::AlwaysMatch> {
|
||||
inner: fst::map::StreamWithState<'m, A>,
|
||||
indexes: &'v DocIndexes,
|
||||
}
|
||||
|
||||
impl<'m, 'v, 'a, A: 'a> fst::Streamer<'a> for StreamWithState<'m, 'v, A>
|
||||
where
|
||||
A: Automaton,
|
||||
A::State: Clone,
|
||||
{
|
||||
type Item = (&'a [u8], &'a [DocIndex], A::State);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((key, i, state)) => {
|
||||
match self.indexes.get(i) {
|
||||
Some(values) => Some((key, values, state)),
|
||||
None => None,
|
||||
}
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn empty_serialize_deserialize() {
|
||||
let mapw = Vec::new();
|
||||
let indexesw = Vec::new();
|
||||
|
||||
let builder = MetadataBuilder::new(mapw, indexesw);
|
||||
let (map, indexes) = builder.into_inner().unwrap();
|
||||
|
||||
let metas = Metadata::from_bytes(map, indexes).unwrap();
|
||||
assert_eq!(metas.get("chameau"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_doc_serialize_deserialize() {
|
||||
let mapw = Vec::new();
|
||||
let indexesw = Vec::new();
|
||||
|
||||
let mut builder = MetadataBuilder::new(mapw, indexesw);
|
||||
|
||||
let doc = DocIndex { document: 12, attribute: 1, attribute_index: 22 };
|
||||
builder.insert("chameau".into(), doc);
|
||||
|
||||
let (map, indexes) = builder.into_inner().unwrap();
|
||||
|
||||
let metas = Metadata::from_bytes(map, indexes).unwrap();
|
||||
assert_eq!(metas.get("chameau"), Some(&[doc][..]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_docs_serialize_deserialize() {
|
||||
let mapw = Vec::new();
|
||||
let indexesw = Vec::new();
|
||||
|
||||
let mut builder = MetadataBuilder::new(mapw, indexesw);
|
||||
|
||||
let doc1 = DocIndex { document: 12, attribute: 1, attribute_index: 22 };
|
||||
let doc2 = DocIndex { document: 31, attribute: 0, attribute_index: 1 };
|
||||
builder.insert("chameau".into(), doc1);
|
||||
builder.insert("chameau".into(), doc2);
|
||||
|
||||
let (map, indexes) = builder.into_inner().unwrap();
|
||||
|
||||
let metas = Metadata::from_bytes(map, indexes).unwrap();
|
||||
assert_eq!(metas.get("chameau"), Some(&[doc1, doc2][..]));
|
||||
}
|
||||
}
|
@ -8,11 +8,10 @@ mod exact;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::{mem, vec};
|
||||
use DocIndexMap;
|
||||
use fst;
|
||||
use levenshtein::Levenshtein;
|
||||
use map::{OpWithStateBuilder, UnionWithState, Values};
|
||||
use {Match, DocIndex, DocumentId};
|
||||
use metadata::{DocIndexes, OpWithStateBuilder, UnionWithState};
|
||||
use {Match, DocumentId};
|
||||
use group_by::GroupByMut;
|
||||
|
||||
use self::sum_of_typos::sum_of_typos;
|
||||
@ -117,7 +116,7 @@ impl IntoIterator for Pool {
|
||||
|
||||
pub enum RankedStream<'m, 'v> {
|
||||
Fed {
|
||||
inner: UnionWithState<'m, 'v, DocIndex, u32>,
|
||||
inner: UnionWithState<'m, 'v, u32>,
|
||||
automatons: Vec<Levenshtein>,
|
||||
pool: Pool,
|
||||
},
|
||||
@ -127,11 +126,11 @@ pub enum RankedStream<'m, 'v> {
|
||||
}
|
||||
|
||||
impl<'m, 'v> RankedStream<'m, 'v> {
|
||||
pub fn new(map: &'m DocIndexMap, values: &'v Values<DocIndex>, automatons: Vec<Levenshtein>, limit: usize) -> Self {
|
||||
let mut op = OpWithStateBuilder::new(values);
|
||||
pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
|
||||
let mut op = OpWithStateBuilder::new(indexes);
|
||||
|
||||
for automaton in automatons.iter().map(|l| l.dfa.clone()) {
|
||||
let stream = map.as_map().search(automaton).with_state();
|
||||
let stream = map.search(automaton).with_state();
|
||||
op.push(stream);
|
||||
}
|
||||
|
||||
|
@ -3,10 +3,10 @@ use Match;
|
||||
use rank::{match_query_index, Document};
|
||||
use group_by::GroupBy;
|
||||
|
||||
pub fn sum_of_words_position(lhs: &Document, rhs: &Document) -> Ordering {
|
||||
let key = |matches: &[Match]| -> u32 {
|
||||
GroupBy::new(matches, match_query_index).map(|m| m[0].attribute_index).sum()
|
||||
};
|
||||
fn key(matches: &[Match]) -> u32 {
|
||||
GroupBy::new(matches, match_query_index).map(|m| m[0].attribute_index).sum()
|
||||
}
|
||||
|
||||
pub fn sum_of_words_position(lhs: &Document, rhs: &Document) -> Ordering {
|
||||
key(&lhs.matches).cmp(&key(&rhs.matches))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user