Move the command lines helpers into different crates

This commit is contained in:
Clément Renault 2021-02-14 18:55:15 +01:00
parent d8f3421608
commit fecf3d6fc1
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
11 changed files with 142 additions and 48 deletions

66
Cargo.lock generated
View File

@ -532,6 +532,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fs_extra"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
[[package]]
name = "fst"
version = "0.4.5"
@ -556,9 +562,9 @@ checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
[[package]]
name = "funty"
version = "1.2.0"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1847abb9cb65d566acd5942e94aea9c8f547ad02c98e1649326fc0e8910b8b1e"
checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7"
[[package]]
name = "futures"
@ -881,6 +887,7 @@ dependencies = [
"either",
"flate2",
"fst",
"funty",
"futures",
"grenad",
"heed",
@ -968,6 +975,22 @@ dependencies = [
"hashbrown 0.9.1",
]
[[package]]
name = "infos"
version = "0.1.0"
dependencies = [
"anyhow",
"byte-unit",
"csv",
"heed",
"jemallocator",
"milli",
"roaring",
"serde_json",
"stderrlog",
"structopt",
]
[[package]]
name = "input_buffer"
version = "0.3.1"
@ -1010,6 +1033,27 @@ version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "jemalloc-sys"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
dependencies = [
"cc",
"fs_extra",
"libc",
]
[[package]]
name = "jemallocator"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
dependencies = [
"jemalloc-sys",
"libc",
]
[[package]]
name = "jieba-rs"
version = "0.6.2"
@ -1172,7 +1216,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"bstr",
"byte-unit",
"byteorder",
"criterion",
"crossbeam-channel",
@ -1204,8 +1247,6 @@ dependencies = [
"serde_json",
"smallstr",
"smallvec",
"stderrlog",
"structopt",
"tempfile",
"uuid",
]
@ -1902,6 +1943,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "search"
version = "0.1.0"
dependencies = [
"anyhow",
"byte-unit",
"heed",
"jemallocator",
"log",
"milli",
"serde_json",
"stderrlog",
"structopt",
]
[[package]]
name = "semver"
version = "0.9.0"

View File

@ -1,5 +1,5 @@
[workspace]
members = ["milli", "http-ui"]
members = ["milli", "http-ui", "infos", "search"]
default-members = ["milli"]
[profile.release]

View File

@ -34,3 +34,6 @@ warp = "0.2.2"
log = "0.4.11"
stderrlog = "0.5.0"
fst = "0.4.5"
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
funty = "=1.1.0"

17
infos/Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "infos"
version = "0.1.0"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
csv = "1.1.3"
heed = "0.10.5"
jemallocator = "0.3.2"
milli = { path = "../milli" }
roaring = "0.6.4"
serde_json = "1.0.59"
stderrlog = "0.5.0"
structopt = { version = "0.3.14", default-features = false }

View File

@ -4,12 +4,16 @@ use std::{str, io, fmt};
use anyhow::Context;
use byte_unit::Byte;
use crate::Index;
use heed::EnvOpenOptions;
use milli::Index;
use structopt::StructOpt;
use Command::*;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const MAIN_DB_NAME: &str = "main";
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
@ -153,7 +157,18 @@ enum Command {
PatchToNewExternalIds,
}
pub fn run(opt: Opt) -> anyhow::Result<()> {
fn main() -> Result<(), ()> {
let opt = Opt::from_args();
match run(opt) {
Ok(()) => Ok(()),
Err(e) => {
eprintln!("{}", e);
Err(())
},
}
}
fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)
@ -204,7 +219,7 @@ fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::R
let documents_ids = documents_ids.to_owned();
index.main.put::<_, ByteSlice, ByteSlice>(
wtxn,
crate::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
&documents_ids,
)?;
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
@ -242,7 +257,7 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
rtxn: &'txn heed::RoTxn,
db: heed::Database<heed::types::ByteSlice, DC>,
field_id: u8,
facet_type: crate::facet::FacetType,
facet_type: milli::facet::FacetType,
string_fn: impl Fn(&str) -> T + 'txn,
float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
@ -250,8 +265,8 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
where
DC: heed::BytesDecode<'txn>,
{
use crate::facet::FacetType;
use crate::heed_codec::facet::{
use milli::facet::FacetType;
use milli::heed_codec::facet::{
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
};
@ -504,7 +519,7 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::io::{BufWriter, Write as _};
use crate::obkv_to_json;
use milli::obkv_to_json;
let stdout = io::stdout();
let mut out = BufWriter::new(stdout);
@ -548,7 +563,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use crate::{DocumentId, BEU32StrCodec};
use milli::{DocumentId, BEU32StrCodec};
let mut words_counts = Vec::new();
let mut count = 0;
@ -587,7 +602,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use crate::BoRoaringBitmapCodec;
use milli::BoRoaringBitmapCodec;
let mut values_length = Vec::new();
let mut count = 0;
@ -639,7 +654,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
use heed::types::ByteSlice;
use heed::{Error, BytesDecode};
use roaring::RoaringBitmap;
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
db: heed::PolyDatabase,
@ -720,7 +735,7 @@ fn word_pair_proximities_docids(
) -> anyhow::Result<()>
{
use heed::types::ByteSlice;
use crate::RoaringBitmapCodec;
use milli::RoaringBitmapCodec;
let stdout = io::stdout();
let mut wtr = csv::Writer::from_writer(stdout.lock());

View File

@ -7,7 +7,6 @@ edition = "2018"
[dependencies]
anyhow = "1.0.28"
bstr = "0.2.13"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
byteorder = "1.3.4"
crossbeam-channel = "0.5.0"
csv = "1.1.3"
@ -33,7 +32,6 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.59", features = ["preserve_order"] }
smallstr = { version = "0.2.0", features = ["serde"] }
smallvec = "1.4.0"
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
tempfile = "3.1.0"
uuid = { version = "0.8.1", features = ["v4"] }
@ -46,7 +44,6 @@ itertools = "0.9.0"
# logging
log = "0.4.11"
stderrlog = "0.5.0"
[dev-dependencies]
criterion = "0.3.3"

View File

@ -3,15 +3,14 @@
mod criterion;
mod external_documents_ids;
mod fields_ids_map;
mod index;
mod mdfs;
mod query_tokens;
mod search;
mod update_store;
pub mod facet;
pub mod heed_codec;
pub mod index;
pub mod proximity;
pub mod subcommand;
pub mod update;
use std::borrow::Cow;

View File

@ -1,22 +0,0 @@
use structopt::StructOpt;
use milli::subcommand::infos::{self, Opt as InfosOpt};
use milli::subcommand::search::{self, Opt as SearchOpt};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The milli project.")]
enum Command {
Infos(InfosOpt),
Search(SearchOpt),
}
fn main() -> anyhow::Result<()> {
match Command::from_args() {
Command::Infos(opt) => infos::run(opt),
Command::Search(opt) => search::run(opt),
}
}

View File

@ -1,2 +0,0 @@
pub mod infos;
pub mod search;

16
search/Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "search"
version = "0.1.0"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
[dependencies]
anyhow = "1.0.28"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = "0.10.5"
jemallocator = "0.3.2"
log = "0.4.11"
milli = { path = "../milli" }
serde_json = "1.0.59"
stderrlog = "0.5.0"
structopt = { version = "0.3.14", default-features = false }

View File

@ -8,7 +8,11 @@ use heed::EnvOpenOptions;
use log::debug;
use structopt::StructOpt;
use crate::{Index, obkv_to_json};
use milli::{Index, obkv_to_json};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
/// A simple search helper binary for the milli project.
@ -35,7 +39,18 @@ pub struct Opt {
print_facet_distribution: bool,
}
pub fn run(opt: Opt) -> anyhow::Result<()> {
fn main() -> Result<(), ()> {
let opt = Opt::from_args();
match run(opt) {
Ok(()) => Ok(()),
Err(e) => {
eprintln!("{}", e);
Err(())
},
}
}
fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)