mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Use the log crate instead of stderr
This commit is contained in:
parent
2c62eeea3c
commit
12358476da
125
Cargo.lock
generated
125
Cargo.lock
generated
@ -79,12 +79,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atty"
|
name = "atty"
|
||||||
version = "0.2.14"
|
version = "0.2.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hermit-abi",
|
|
||||||
"libc",
|
"libc",
|
||||||
|
"termion",
|
||||||
"winapi 0.3.8",
|
"winapi 0.3.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -164,7 +164,7 @@ version = "0.2.13"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931"
|
checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-automata",
|
"regex-automata",
|
||||||
"serde",
|
"serde",
|
||||||
@ -228,6 +228,17 @@ version = "0.1.10"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chrono"
|
||||||
|
version = "0.4.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c74d84029116787153e02106bf53e66828452a4b325cc8652b788b5967c0a0b6"
|
||||||
|
dependencies = [
|
||||||
|
"num-integer",
|
||||||
|
"num-traits",
|
||||||
|
"time",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "2.33.1"
|
version = "2.33.1"
|
||||||
@ -281,7 +292,7 @@ dependencies = [
|
|||||||
"criterion-plot",
|
"criterion-plot",
|
||||||
"csv",
|
"csv",
|
||||||
"itertools",
|
"itertools",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"oorandom",
|
"oorandom",
|
||||||
"plotters",
|
"plotters",
|
||||||
@ -324,7 +335,7 @@ dependencies = [
|
|||||||
"autocfg 1.0.0",
|
"autocfg 1.0.0",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"maybe-uninit",
|
"maybe-uninit",
|
||||||
"memoffset",
|
"memoffset",
|
||||||
"scopeguard",
|
"scopeguard",
|
||||||
@ -348,7 +359,7 @@ checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg 1.0.0",
|
"autocfg 1.0.0",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -845,6 +856,12 @@ dependencies = [
|
|||||||
"winapi-build",
|
"winapi-build",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "0.2.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
@ -951,6 +968,7 @@ dependencies = [
|
|||||||
"itertools",
|
"itertools",
|
||||||
"jemallocator",
|
"jemallocator",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
|
"log 0.4.8",
|
||||||
"memmap",
|
"memmap",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"oxidized-mtbl",
|
"oxidized-mtbl",
|
||||||
@ -960,6 +978,7 @@ dependencies = [
|
|||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
"smallstr",
|
"smallstr",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
|
"stderrlog",
|
||||||
"structopt",
|
"structopt",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"tokio",
|
"tokio",
|
||||||
@ -1115,6 +1134,16 @@ dependencies = [
|
|||||||
"version_check 0.9.2",
|
"version_check 0.9.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-integer"
|
||||||
|
version = "0.1.43"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg 1.0.0",
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.12"
|
version = "0.2.12"
|
||||||
@ -1134,6 +1163,12 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "numtoa"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell"
|
name = "once_cell"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
@ -1505,7 +1540,7 @@ dependencies = [
|
|||||||
"crossbeam-deque",
|
"crossbeam-deque",
|
||||||
"crossbeam-queue",
|
"crossbeam-queue",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -1524,6 +1559,15 @@ version = "0.1.56"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
|
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redox_termios"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
|
||||||
|
dependencies = [
|
||||||
|
"redox_syscall",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.3.9"
|
version = "1.3.9"
|
||||||
@ -1738,6 +1782,19 @@ dependencies = [
|
|||||||
"winapi 0.3.8",
|
"winapi 0.3.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stderrlog"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32e5ee9b90a5452c570a0b0ac1c99ae9498db7e56e33d74366de7f2a7add7f25"
|
||||||
|
dependencies = [
|
||||||
|
"atty",
|
||||||
|
"chrono",
|
||||||
|
"log 0.4.8",
|
||||||
|
"termcolor",
|
||||||
|
"thread_local",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "structopt"
|
name = "structopt"
|
||||||
version = "0.3.14"
|
version = "0.3.14"
|
||||||
@ -1745,7 +1802,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
|
checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"structopt-derive",
|
"structopt-derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -1810,6 +1867,27 @@ dependencies = [
|
|||||||
"winapi 0.3.8",
|
"winapi 0.3.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "termcolor"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "termion"
|
||||||
|
version = "1.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c22cec9d8978d906be5ac94bceb5a010d885c626c4c8855721a4dbd20e3ac905"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"numtoa",
|
||||||
|
"redox_syscall",
|
||||||
|
"redox_termios",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "textwrap"
|
name = "textwrap"
|
||||||
version = "0.11.0"
|
version = "0.11.0"
|
||||||
@ -1819,6 +1897,16 @@ dependencies = [
|
|||||||
"unicode-width",
|
"unicode-width",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thread_local"
|
||||||
|
version = "0.3.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static 0.2.11",
|
||||||
|
"unreachable",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.1.43"
|
version = "0.1.43"
|
||||||
@ -1849,7 +1937,7 @@ dependencies = [
|
|||||||
"fnv",
|
"fnv",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"iovec",
|
"iovec",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"libc",
|
"libc",
|
||||||
"memchr",
|
"memchr",
|
||||||
"mio",
|
"mio",
|
||||||
@ -2010,6 +2098,15 @@ version = "0.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unreachable"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
||||||
|
dependencies = [
|
||||||
|
"void",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "url"
|
name = "url"
|
||||||
version = "2.1.1"
|
version = "2.1.1"
|
||||||
@ -2045,6 +2142,12 @@ version = "0.9.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
|
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "void"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.3.1"
|
version = "2.3.1"
|
||||||
@ -2115,7 +2218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "ded84f06e0ed21499f6184df0e0cb3494727b0c5da89534e0fcc55c51d812101"
|
checksum = "ded84f06e0ed21499f6184df0e0cb3494727b0c5da89534e0fcc55c51d812101"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"lazy_static",
|
"lazy_static 1.4.0",
|
||||||
"log 0.4.8",
|
"log 0.4.8",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -27,6 +27,10 @@ smallvec = "1.4.0"
|
|||||||
structopt = { version = "0.3.14", default-features = false }
|
structopt = { version = "0.3.14", default-features = false }
|
||||||
tempfile = "3.1.0"
|
tempfile = "3.1.0"
|
||||||
|
|
||||||
|
# logging
|
||||||
|
log = "0.4.8"
|
||||||
|
stderrlog = "0.4.3"
|
||||||
|
|
||||||
# best proximity
|
# best proximity
|
||||||
indexmap = "1.4.0"
|
indexmap = "1.4.0"
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
use crate::iter_shortest_paths::astar_bag;
|
use crate::iter_shortest_paths::astar_bag;
|
||||||
|
|
||||||
const ONE_ATTRIBUTE: u32 = 1000;
|
const ONE_ATTRIBUTE: u32 = 1000;
|
||||||
@ -153,18 +154,18 @@ impl BestProximity {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
eprintln!("BestProximity::next() took {:.02?}", before.elapsed());
|
debug!("BestProximity::next() took {:.02?}", before.elapsed());
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Some((paths, proximity)) => {
|
Some((paths, proximity)) => {
|
||||||
self.best_proximity = proximity + 1;
|
self.best_proximity = proximity + 1;
|
||||||
// We retrieve the last path that we convert into a Vec
|
// We retrieve the last path that we convert into a Vec
|
||||||
let paths: Vec<_> = paths.map(|p| p.iter().filter_map(Node::position).collect()).collect();
|
let paths: Vec<_> = paths.map(|p| p.iter().filter_map(Node::position).collect()).collect();
|
||||||
eprintln!("result: {} {:?}", proximity, paths);
|
debug!("result: {} {:?}", proximity, paths);
|
||||||
Some((proximity, paths))
|
Some((proximity, paths))
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
eprintln!("result: {:?}", None as Option<()>);
|
debug!("result: {:?}", None as Option<()>);
|
||||||
self.best_proximity += 1;
|
self.best_proximity += 1;
|
||||||
None
|
None
|
||||||
},
|
},
|
||||||
|
@ -11,6 +11,7 @@ use cow_utils::CowUtils;
|
|||||||
use fst::{Streamer, IntoStreamer};
|
use fst::{Streamer, IntoStreamer};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
|
use log::debug;
|
||||||
use oxidized_mtbl::{Reader, ReaderOptions, Writer, Merger, MergerOptions};
|
use oxidized_mtbl::{Reader, ReaderOptions, Writer, Merger, MergerOptions};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -86,7 +87,7 @@ struct MtblKvStore(Option<File>);
|
|||||||
|
|
||||||
impl MtblKvStore {
|
impl MtblKvStore {
|
||||||
fn from_indexed(mut indexed: Indexed) -> anyhow::Result<MtblKvStore> {
|
fn from_indexed(mut indexed: Indexed) -> anyhow::Result<MtblKvStore> {
|
||||||
eprintln!("Creating an MTBL store from an Indexed...");
|
debug!("Creating an MTBL store from an Indexed...");
|
||||||
|
|
||||||
let outfile = tempfile::tempfile()?;
|
let outfile = tempfile::tempfile()?;
|
||||||
let mut out = Writer::new(outfile, None)?;
|
let mut out = Writer::new(outfile, None)?;
|
||||||
@ -152,7 +153,7 @@ impl MtblKvStore {
|
|||||||
|
|
||||||
let out = out.into_inner()?;
|
let out = out.into_inner()?;
|
||||||
|
|
||||||
eprintln!("MTBL store created!");
|
debug!("MTBL store created!");
|
||||||
Ok(MtblKvStore(Some(out)))
|
Ok(MtblKvStore(Some(out)))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,7 +199,7 @@ impl MtblKvStore {
|
|||||||
fn from_many<F>(stores: Vec<MtblKvStore>, mut f: F) -> anyhow::Result<()>
|
fn from_many<F>(stores: Vec<MtblKvStore>, mut f: F) -> anyhow::Result<()>
|
||||||
where F: FnMut(&[u8], &[u8]) -> anyhow::Result<()>
|
where F: FnMut(&[u8], &[u8]) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
eprintln!("Merging {} MTBL stores...", stores.len());
|
debug!("Merging {} MTBL stores...", stores.len());
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
let mmaps: Vec<_> = stores.iter().flat_map(|m| {
|
let mmaps: Vec<_> = stores.iter().flat_map(|m| {
|
||||||
@ -217,7 +218,7 @@ impl MtblKvStore {
|
|||||||
(f)(k, v)?;
|
(f)(k, v)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("MTBL stores merged in {:.02?}!", before.elapsed());
|
debug!("MTBL stores merged in {:.02?}!", before.elapsed());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -256,7 +257,7 @@ fn index_csv(
|
|||||||
max_mem_usage: usize,
|
max_mem_usage: usize,
|
||||||
) -> anyhow::Result<Vec<MtblKvStore>>
|
) -> anyhow::Result<Vec<MtblKvStore>>
|
||||||
{
|
{
|
||||||
eprintln!("{:?}: Indexing into an Indexed...", thread_index);
|
debug!("{:?}: Indexing into an Indexed...", thread_index);
|
||||||
|
|
||||||
let mut stores = Vec::new();
|
let mut stores = Vec::new();
|
||||||
|
|
||||||
@ -281,7 +282,7 @@ fn index_csv(
|
|||||||
let document_id = DocumentId::try_from(document_id).context("generated id is too big")?;
|
let document_id = DocumentId::try_from(document_id).context("generated id is too big")?;
|
||||||
|
|
||||||
if document_id % (ONE_MILLION as u32) == 0 {
|
if document_id % (ONE_MILLION as u32) == 0 {
|
||||||
eprintln!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32);
|
debug!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) {
|
for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) {
|
||||||
@ -310,21 +311,21 @@ fn index_csv(
|
|||||||
if documents.len() % 100_000 == 0 {
|
if documents.len() % 100_000 == 0 {
|
||||||
let usage = mem_usage(&word_positions, &word_position_docids, &documents);
|
let usage = mem_usage(&word_positions, &word_position_docids, &documents);
|
||||||
if usage > max_mem_usage {
|
if usage > max_mem_usage {
|
||||||
eprintln!("Whoops too much memory used ({}B).", usage);
|
debug!("Whoops too much memory used ({}B).", usage);
|
||||||
|
|
||||||
let word_positions = mem::take(&mut word_positions);
|
let word_positions = mem::take(&mut word_positions);
|
||||||
let word_position_docids = mem::take(&mut word_position_docids);
|
let word_position_docids = mem::take(&mut word_position_docids);
|
||||||
let documents = mem::take(&mut documents);
|
let documents = mem::take(&mut documents);
|
||||||
|
|
||||||
let indexed = Indexed::new(word_positions, word_position_docids, headers.clone(), documents)?;
|
let indexed = Indexed::new(word_positions, word_position_docids, headers.clone(), documents)?;
|
||||||
eprintln!("{:?}: Indexed created!", thread_index);
|
debug!("{:?}: Indexed created!", thread_index);
|
||||||
stores.push(MtblKvStore::from_indexed(indexed)?);
|
stores.push(MtblKvStore::from_indexed(indexed)?);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let indexed = Indexed::new(word_positions, word_position_docids, headers, documents)?;
|
let indexed = Indexed::new(word_positions, word_position_docids, headers, documents)?;
|
||||||
eprintln!("{:?}: Indexed created!", thread_index);
|
debug!("{:?}: Indexed created!", thread_index);
|
||||||
stores.push(MtblKvStore::from_indexed(indexed)?);
|
stores.push(MtblKvStore::from_indexed(indexed)?);
|
||||||
|
|
||||||
Ok(stores)
|
Ok(stores)
|
||||||
@ -372,7 +373,7 @@ fn writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyh
|
|||||||
fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> {
|
fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> {
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
eprintln!("Computing the attributes documents ids...");
|
debug!("Computing the attributes documents ids...");
|
||||||
|
|
||||||
let fst = match index.fst(&wtxn)? {
|
let fst = match index.fst(&wtxn)? {
|
||||||
Some(fst) => fst.map_data(|s| s.to_vec())?,
|
Some(fst) => fst.map_data(|s| s.to_vec())?,
|
||||||
@ -408,7 +409,7 @@ fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> any
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
|
debug!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -444,7 +445,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let stores: Vec<_> = stores.into_iter().flatten().collect();
|
let stores: Vec<_> = stores.into_iter().flatten().collect();
|
||||||
|
|
||||||
eprintln!("We are writing into LMDB...");
|
debug!("We are writing into LMDB...");
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
|
|
||||||
MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?;
|
MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?;
|
||||||
@ -452,7 +453,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
let count = index.documents.len(&wtxn)?;
|
let count = index.documents.len(&wtxn)?;
|
||||||
|
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
eprintln!("Wrote {} documents into LMDB", count);
|
debug!("Wrote {} documents into LMDB", count);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,9 @@ use std::path::PathBuf;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use structopt::StructOpt;
|
use log::debug;
|
||||||
use milli::{Index, BEU32};
|
use milli::{Index, BEU32};
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
@ -62,7 +63,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len());
|
debug!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -158,7 +158,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.or(query_route);
|
.or(query_route);
|
||||||
|
|
||||||
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
|
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
|
||||||
eprintln!("listening on http://{}", addr);
|
println!("listening on http://{}", addr);
|
||||||
warp::serve(routes).run(addr).await;
|
warp::serve(routes).run(addr).await;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
19
src/lib.rs
19
src/lib.rs
@ -14,6 +14,7 @@ use fxhash::{FxHasher32, FxHasher64};
|
|||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{PolyDatabase, Database};
|
use heed::{PolyDatabase, Database};
|
||||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||||
|
use log::debug;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -138,7 +139,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("{} words for {:?} we have found positions {:?} in {:.02?}",
|
debug!("{} words for {:?} we have found positions {:?} in {:.02?}",
|
||||||
count, word, union_positions, before.elapsed());
|
count, word, union_positions, before.elapsed());
|
||||||
words.push(derived_words);
|
words.push(derived_words);
|
||||||
positions.push(union_positions.iter().collect());
|
positions.push(union_positions.iter().collect());
|
||||||
@ -168,9 +169,9 @@ impl Index {
|
|||||||
words_attributes_docids.push(intersect_docids);
|
words_attributes_docids.push(intersect_docids);
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("The documents you must find for each attribute: {:?}", words_attributes_docids);
|
debug!("The documents you must find for each attribute: {:?}", words_attributes_docids);
|
||||||
|
|
||||||
eprintln!("Retrieving words positions took {:.02?}", before.elapsed());
|
debug!("Retrieving words positions took {:.02?}", before.elapsed());
|
||||||
|
|
||||||
// Returns the union of the same position for all the derived words.
|
// Returns the union of the same position for all the derived words.
|
||||||
let unions_word_pos = |word: usize, pos: u32| {
|
let unions_word_pos = |word: usize, pos: u32| {
|
||||||
@ -259,10 +260,10 @@ impl Index {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect",
|
debug!("retrieving words took {:.02?} and took {:.02?} to intersect",
|
||||||
elapsed_retrieving, before_intersect.elapsed());
|
elapsed_retrieving, before_intersect.elapsed());
|
||||||
|
|
||||||
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
|
debug!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
|
||||||
proximity, positions, before.elapsed(),
|
proximity, positions, before.elapsed(),
|
||||||
intersect_docids.as_ref().map_or(0, |rb| rb.len()));
|
intersect_docids.as_ref().map_or(0, |rb| rb.len()));
|
||||||
|
|
||||||
@ -272,7 +273,7 @@ impl Index {
|
|||||||
|
|
||||||
// We found enough documents we can stop here
|
// We found enough documents we can stop here
|
||||||
if documents.iter().map(RoaringBitmap::len).sum::<u64>() + same_proximity_union.len() >= 20 {
|
if documents.iter().map(RoaringBitmap::len).sum::<u64>() + same_proximity_union.len() >= 20 {
|
||||||
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
|
debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -294,8 +295,8 @@ impl Index {
|
|||||||
}
|
}
|
||||||
documents.retain(|rb| !rb.is_empty());
|
documents.retain(|rb| !rb.is_empty());
|
||||||
|
|
||||||
eprintln!("documents: {:?}", documents);
|
debug!("documents: {:?}", documents);
|
||||||
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
|
debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
|
||||||
|
|
||||||
// We found enough documents we can stop here.
|
// We found enough documents we can stop here.
|
||||||
if documents.iter().map(RoaringBitmap::len).sum::<u64>() >= 20 {
|
if documents.iter().map(RoaringBitmap::len).sum::<u64>() >= 20 {
|
||||||
@ -303,7 +304,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eprintln!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
|
debug!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
|
||||||
Ok(documents.iter().flatten().take(20).collect())
|
Ok(documents.iter().flatten().take(20).collect())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user