Move the binaries into one with subcommands

This commit is contained in:
Clément Renault 2020-10-19 13:44:17 +02:00
parent ff389f1270
commit 65e32fecb1
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
9 changed files with 168 additions and 88 deletions

125
Cargo.lock generated
View File

@ -257,6 +257,15 @@ dependencies = [
"bitflags",
]
[[package]]
name = "cloudabi"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4344512281c643ae7638bbabc3af17a11307803ec8f0fcad9fae512a8bf36467"
dependencies = [
"bitflags",
]
[[package]]
name = "const_fn"
version = "0.4.2"
@ -637,7 +646,7 @@ dependencies = [
"indexmap",
"log 0.4.11",
"slab",
"tokio",
"tokio 0.2.21",
"tokio-util",
]
@ -793,7 +802,7 @@ dependencies = [
"pin-project",
"socket2",
"time",
"tokio",
"tokio 0.2.21",
"tower-service",
"want",
]
@ -828,6 +837,15 @@ dependencies = [
"bytes",
]
[[package]]
name = "instant"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63312a18f7ea8760cdd0a7c5aac1a619752a246b833545e3e36d1f81f7cd9e66"
dependencies = [
"cfg-if 0.1.10",
]
[[package]]
name = "iovec"
version = "0.1.4"
@ -939,6 +957,15 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "lock_api"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c"
dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.3.9"
@ -1020,7 +1047,7 @@ dependencies = [
"stderrlog",
"structopt",
"tempfile",
"tokio",
"tokio 0.3.0",
"warp",
]
@ -1090,26 +1117,16 @@ dependencies = [
]
[[package]]
name = "mio-named-pipes"
version = "0.1.6"
name = "mio"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5e374eff525ce1c5b7687c4cef63943e7686524a387933ad27ca7ec43779cb3"
checksum = "e53a6ea5f38c0a48ca42159868c6d8e1bd56c0451238856cc08d58563643bdc3"
dependencies = [
"log 0.4.11",
"mio",
"miow 0.3.4",
"winapi 0.3.8",
]
[[package]]
name = "mio-uds"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0"
dependencies = [
"iovec",
"libc",
"mio",
"log 0.4.11",
"miow 0.3.4",
"ntapi",
"winapi 0.3.8",
]
[[package]]
@ -1193,6 +1210,15 @@ dependencies = [
"version_check 0.9.2",
]
[[package]]
name = "ntapi"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a31937dea023539c72ddae0e3571deadc1414b300483fa7aaec176168cfa9d2"
dependencies = [
"winapi 0.3.8",
]
[[package]]
name = "num-integer"
version = "0.1.43"
@ -1256,6 +1282,32 @@ dependencies = [
"winapi 0.3.8",
]
[[package]]
name = "parking_lot"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4893845fa2ca272e647da5d0e46660a314ead9c2fdd9a883aabc32e481a8733"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b"
dependencies = [
"cfg-if 0.1.10",
"cloudabi 0.1.0",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi 0.3.8",
]
[[package]]
name = "percent-encoding"
version = "2.1.0"
@ -1539,7 +1591,7 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
dependencies = [
"cloudabi",
"cloudabi 0.0.3",
"fuchsia-cprng",
"libc",
"rand_core 0.4.2",
@ -2027,12 +2079,27 @@ dependencies = [
"futures-core",
"iovec",
"lazy_static",
"memchr",
"mio 0.6.22",
"pin-project-lite",
"slab",
]
[[package]]
name = "tokio"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7137dbb0abee577362ccdc7df21605cfcbb949243aeab47dac9ea6ef7d830e21"
dependencies = [
"bytes",
"fnv",
"futures-core",
"lazy_static",
"libc",
"memchr",
"mio",
"mio-named-pipes",
"mio-uds",
"mio 0.7.3",
"num_cpus",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"slab",
@ -2042,9 +2109,9 @@ dependencies = [
[[package]]
name = "tokio-macros"
version = "0.2.5"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0c3acc6aa564495a0f2e1d59fab677cd7f81a19994cfc7f3ad0e64301560389"
checksum = "d48caa7b66c7a6ec943edf78d21a594fbeb24e536c781da67d5c32edec54103f"
dependencies = [
"proc-macro2",
"quote",
@ -2060,7 +2127,7 @@ dependencies = [
"futures",
"log 0.4.11",
"pin-project",
"tokio",
"tokio 0.2.21",
"tungstenite",
]
@ -2075,7 +2142,7 @@ dependencies = [
"futures-sink",
"log 0.4.11",
"pin-project-lite",
"tokio",
"tokio 0.2.21",
]
[[package]]
@ -2263,7 +2330,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
"tokio",
"tokio 0.2.21",
"tokio-tungstenite",
"tower-service",
"urlencoding",

View File

@ -3,7 +3,6 @@ name = "milli"
version = "0.1.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
default-run = "indexer"
[dependencies]
anyhow = "1.0.28"
@ -43,7 +42,7 @@ stderrlog = "0.5.0"
askama = "0.10.1"
askama_warp = "0.10.0"
serde = { version = "1.0", features = ["derive"] }
tokio = { version = "0.2.15", features = ["full"] }
tokio = { version = "0.3.0", features = ["full"] }
warp = "0.2.2"
[dev-dependencies]

View File

@ -5,6 +5,7 @@ mod search;
mod update_store;
pub mod heed_codec;
pub mod proximity;
pub mod subcommand;
pub mod tokenizer;
use std::collections::HashMap;

28
src/main.rs Normal file
View File

@ -0,0 +1,28 @@
use structopt::StructOpt;
use milli::subcommand::indexer::{self, Opt as IndexerOpt};
use milli::subcommand::infos::{self, Opt as InfosOpt};
use milli::subcommand::serve::{self, Opt as ServeOpt};
use milli::subcommand::search::{self, Opt as SearchOpt};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The milli project.")]
enum Command {
Serve(ServeOpt),
Indexer(IndexerOpt),
Infos(InfosOpt),
Search(SearchOpt),
}
fn main() -> anyhow::Result<()> {
match Command::from_args() {
Command::Serve(opt) => serve::run(opt),
Command::Indexer(opt) => indexer::run(opt),
Command::Infos(opt) => infos::run(opt),
Command::Search(opt) => search::run(opt),
}
}

View File

@ -22,9 +22,9 @@ use roaring::RoaringBitmap;
use structopt::StructOpt;
use tempfile::tempfile;
use milli::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use milli::tokenizer::{simple_tokenizer, only_token};
use milli::{SmallVec32, Index, Position, DocumentId};
use crate::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::tokenizer::{simple_tokenizer, only_token};
use crate::{SmallVec32, Index, Position, DocumentId};
const LMDB_MAX_KEY_LENGTH: usize = 511;
const ONE_KILOBYTE: usize = 1024 * 1024;
@ -32,18 +32,14 @@ const ONE_KILOBYTE: usize = 1024 * 1024;
const MAX_POSITION: usize = 1000;
const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION;
const WORDS_FST_KEY: &[u8] = milli::WORDS_FST_KEY.as_bytes();
const HEADERS_KEY: &[u8] = milli::HEADERS_KEY.as_bytes();
const DOCUMENTS_IDS_KEY: &[u8] = milli::DOCUMENTS_IDS_KEY.as_bytes();
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes();
const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes();
const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes();
#[derive(Debug, StructOpt)]
#[structopt(name = "milli-indexer")]
/// The indexer binary of the milli project.
struct Opt {
pub struct Opt {
/// The database path where the database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -191,7 +187,7 @@ fn compute_words_pair_proximities(
for ((w1, ps1), (w2, ps2)) in word_positions.iter().cartesian_product(word_positions) {
let mut min_prox = None;
for (ps1, ps2) in ps1.iter().cartesian_product(ps2) {
let prox = milli::proximity::positions_proximity(*ps1, *ps2);
let prox = crate::proximity::positions_proximity(*ps1, *ps2);
let prox = u8::try_from(prox).unwrap();
// We don't care about a word that appear at the
// same position or too far from the other.
@ -736,9 +732,7 @@ fn csv_readers(
}
}
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)

View File

@ -2,16 +2,12 @@ use std::path::PathBuf;
use std::{str, io};
use anyhow::Context;
use crate::Index;
use heed::EnvOpenOptions;
use milli::Index;
use structopt::StructOpt;
use Command::*;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const MAIN_DB_NAME: &str = "main";
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
@ -33,8 +29,8 @@ const POSTINGS_DATABASE_NAMES: &[&str] = &[
];
#[derive(Debug, StructOpt)]
#[structopt(name = "milli-info", about = "A stats crawler for milli.")]
struct Opt {
/// A stats fetcher for milli.
pub struct Opt {
/// The database path where the database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -133,8 +129,11 @@ enum Command {
},
}
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
stderrlog::new()
.verbosity(opt.verbose)
@ -142,11 +141,6 @@ fn main() -> anyhow::Result<()> {
.timestamp(stderrlog::Timestamp::Off)
.init()?;
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
// Open the LMDB database.
let index = Index::new(&env)?;
let rtxn = env.read_txn()?;
@ -196,7 +190,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use heed::types::{Str, ByteSlice};
use milli::heed_codec::BEU32StrCodec;
use crate::heed_codec::BEU32StrCodec;
let main_name = "main";
let word_docids_name = "word_docids";
@ -306,7 +300,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use milli::{DocumentId, BEU32StrCodec};
use crate::{DocumentId, BEU32StrCodec};
let mut words_counts = Vec::new();
let mut count = 0;
@ -345,7 +339,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use milli::BoRoaringBitmapCodec;
use crate::BoRoaringBitmapCodec;
let mut values_length = Vec::new();
let mut count = 0;
@ -397,7 +391,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
use heed::types::ByteSlice;
use heed::{Error, BytesDecode};
use roaring::RoaringBitmap;
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
db: heed::PolyDatabase,
@ -478,7 +472,7 @@ fn word_pair_proximities_docids(
) -> anyhow::Result<()>
{
use heed::types::ByteSlice;
use milli::RoaringBitmapCodec;
use crate::RoaringBitmapCodec;
let stdout = io::stdout();
let mut wtr = csv::Writer::from_writer(stdout.lock());

4
src/subcommand/mod.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod indexer;
pub mod infos;
pub mod search;
pub mod serve;

View File

@ -5,16 +5,13 @@ use std::time::Instant;
use heed::EnvOpenOptions;
use log::debug;
use milli::Index;
use structopt::StructOpt;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use crate::Index;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli-search", about = "A simple search binary for milli project.")]
struct Opt {
/// A simple search helper binary for the milli project.
pub struct Opt {
/// The database path where the database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -33,9 +30,7 @@ struct Opt {
query: Option<String>,
}
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)

View File

@ -11,16 +11,12 @@ use serde::Deserialize;
use structopt::StructOpt;
use warp::{Filter, http::Response};
use milli::tokenizer::{simple_tokenizer, TokenType};
use milli::{Index, SearchResult};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use crate::tokenizer::{simple_tokenizer, TokenType};
use crate::{Index, SearchResult};
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The server binary of the milli project.")]
struct Opt {
/// The HTTP main server of the milli project.
pub struct Opt {
/// The database path where the LMDB database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -73,10 +69,7 @@ struct IndexTemplate {
docs_count: usize,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)
@ -231,8 +224,13 @@ async fn main() -> anyhow::Result<()> {
.or(dash_logo_black_route)
.or(query_route);
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
warp::serve(routes).run(addr).await;
let addr = SocketAddr::from_str(&opt.http_listen_addr)?;
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?
.block_on(async {
warp::serve(routes).run(addr).await
});
Ok(())
}