Move the binaries into one with subcommands

This commit is contained in:
Clément Renault 2020-10-19 13:44:17 +02:00
parent ff389f1270
commit 65e32fecb1
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
9 changed files with 168 additions and 88 deletions

View file

@ -5,6 +5,7 @@ mod search;
mod update_store;
pub mod heed_codec;
pub mod proximity;
pub mod subcommand;
pub mod tokenizer;
use std::collections::HashMap;

28
src/main.rs Normal file
View file

@ -0,0 +1,28 @@
use structopt::StructOpt;
use milli::subcommand::indexer::{self, Opt as IndexerOpt};
use milli::subcommand::infos::{self, Opt as InfosOpt};
use milli::subcommand::serve::{self, Opt as ServeOpt};
use milli::subcommand::search::{self, Opt as SearchOpt};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The milli project.")]
enum Command {
Serve(ServeOpt),
Indexer(IndexerOpt),
Infos(InfosOpt),
Search(SearchOpt),
}
fn main() -> anyhow::Result<()> {
match Command::from_args() {
Command::Serve(opt) => serve::run(opt),
Command::Indexer(opt) => indexer::run(opt),
Command::Infos(opt) => infos::run(opt),
Command::Search(opt) => search::run(opt),
}
}

View file

@ -22,9 +22,9 @@ use roaring::RoaringBitmap;
use structopt::StructOpt;
use tempfile::tempfile;
use milli::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use milli::tokenizer::{simple_tokenizer, only_token};
use milli::{SmallVec32, Index, Position, DocumentId};
use crate::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::tokenizer::{simple_tokenizer, only_token};
use crate::{SmallVec32, Index, Position, DocumentId};
const LMDB_MAX_KEY_LENGTH: usize = 511;
const ONE_KILOBYTE: usize = 1024 * 1024;
@ -32,18 +32,14 @@ const ONE_KILOBYTE: usize = 1024 * 1024;
const MAX_POSITION: usize = 1000;
const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION;
const WORDS_FST_KEY: &[u8] = milli::WORDS_FST_KEY.as_bytes();
const HEADERS_KEY: &[u8] = milli::HEADERS_KEY.as_bytes();
const DOCUMENTS_IDS_KEY: &[u8] = milli::DOCUMENTS_IDS_KEY.as_bytes();
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes();
const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes();
const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes();
#[derive(Debug, StructOpt)]
#[structopt(name = "milli-indexer")]
/// The indexer binary of the milli project.
struct Opt {
pub struct Opt {
/// The database path where the database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -191,7 +187,7 @@ fn compute_words_pair_proximities(
for ((w1, ps1), (w2, ps2)) in word_positions.iter().cartesian_product(word_positions) {
let mut min_prox = None;
for (ps1, ps2) in ps1.iter().cartesian_product(ps2) {
let prox = milli::proximity::positions_proximity(*ps1, *ps2);
let prox = crate::proximity::positions_proximity(*ps1, *ps2);
let prox = u8::try_from(prox).unwrap();
// We don't care about a word that appear at the
// same position or too far from the other.
@ -736,9 +732,7 @@ fn csv_readers(
}
}
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)

View file

@ -2,16 +2,12 @@ use std::path::PathBuf;
use std::{str, io};
use anyhow::Context;
use crate::Index;
use heed::EnvOpenOptions;
use milli::Index;
use structopt::StructOpt;
use Command::*;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
const MAIN_DB_NAME: &str = "main";
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
@ -33,8 +29,8 @@ const POSTINGS_DATABASE_NAMES: &[&str] = &[
];
#[derive(Debug, StructOpt)]
#[structopt(name = "milli-info", about = "A stats crawler for milli.")]
struct Opt {
/// A stats fetcher for milli.
pub struct Opt {
/// The database path where the database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -133,8 +129,11 @@ enum Command {
},
}
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
stderrlog::new()
.verbosity(opt.verbose)
@ -142,11 +141,6 @@ fn main() -> anyhow::Result<()> {
.timestamp(stderrlog::Timestamp::Off)
.init()?;
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
// Open the LMDB database.
let index = Index::new(&env)?;
let rtxn = env.read_txn()?;
@ -196,7 +190,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use heed::types::{Str, ByteSlice};
use milli::heed_codec::BEU32StrCodec;
use crate::heed_codec::BEU32StrCodec;
let main_name = "main";
let word_docids_name = "word_docids";
@ -306,7 +300,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use milli::{DocumentId, BEU32StrCodec};
use crate::{DocumentId, BEU32StrCodec};
let mut words_counts = Vec::new();
let mut count = 0;
@ -345,7 +339,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use heed::types::DecodeIgnore;
use milli::BoRoaringBitmapCodec;
use crate::BoRoaringBitmapCodec;
let mut values_length = Vec::new();
let mut count = 0;
@ -397,7 +391,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
use heed::types::ByteSlice;
use heed::{Error, BytesDecode};
use roaring::RoaringBitmap;
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
db: heed::PolyDatabase,
@ -478,7 +472,7 @@ fn word_pair_proximities_docids(
) -> anyhow::Result<()>
{
use heed::types::ByteSlice;
use milli::RoaringBitmapCodec;
use crate::RoaringBitmapCodec;
let stdout = io::stdout();
let mut wtr = csv::Writer::from_writer(stdout.lock());

4
src/subcommand/mod.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod indexer;
pub mod infos;
pub mod search;
pub mod serve;

View file

@ -5,16 +5,13 @@ use std::time::Instant;
use heed::EnvOpenOptions;
use log::debug;
use milli::Index;
use structopt::StructOpt;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use crate::Index;
#[derive(Debug, StructOpt)]
#[structopt(name = "milli-search", about = "A simple search binary for milli project.")]
struct Opt {
/// A simple search helper binary for the milli project.
pub struct Opt {
/// The database path where the database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -33,9 +30,7 @@ struct Opt {
query: Option<String>,
}
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)

View file

@ -11,16 +11,12 @@ use serde::Deserialize;
use structopt::StructOpt;
use warp::{Filter, http::Response};
use milli::tokenizer::{simple_tokenizer, TokenType};
use milli::{Index, SearchResult};
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
use crate::tokenizer::{simple_tokenizer, TokenType};
use crate::{Index, SearchResult};
#[derive(Debug, StructOpt)]
#[structopt(name = "milli", about = "The server binary of the milli project.")]
struct Opt {
/// The HTTP main server of the milli project.
pub struct Opt {
/// The database path where the LMDB database is located.
/// It is created if it doesn't already exist.
#[structopt(long = "db", parse(from_os_str))]
@ -73,10 +69,7 @@ struct IndexTemplate {
docs_count: usize,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
pub fn run(opt: Opt) -> anyhow::Result<()> {
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)
@ -231,8 +224,13 @@ async fn main() -> anyhow::Result<()> {
.or(dash_logo_black_route)
.or(query_route);
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
warp::serve(routes).run(addr).await;
let addr = SocketAddr::from_str(&opt.http_listen_addr)?;
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?
.block_on(async {
warp::serve(routes).run(addr).await
});
Ok(())
}