mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Move the binaries into one with subcommands
This commit is contained in:
parent
ff389f1270
commit
65e32fecb1
9 changed files with 168 additions and 88 deletions
|
@ -5,6 +5,7 @@ mod search;
|
|||
mod update_store;
|
||||
pub mod heed_codec;
|
||||
pub mod proximity;
|
||||
pub mod subcommand;
|
||||
pub mod tokenizer;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
|
28
src/main.rs
Normal file
28
src/main.rs
Normal file
|
@ -0,0 +1,28 @@
|
|||
use structopt::StructOpt;
|
||||
|
||||
use milli::subcommand::indexer::{self, Opt as IndexerOpt};
|
||||
use milli::subcommand::infos::{self, Opt as InfosOpt};
|
||||
use milli::subcommand::serve::{self, Opt as ServeOpt};
|
||||
use milli::subcommand::search::{self, Opt as SearchOpt};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli", about = "The milli project.")]
|
||||
enum Command {
|
||||
Serve(ServeOpt),
|
||||
Indexer(IndexerOpt),
|
||||
Infos(InfosOpt),
|
||||
Search(SearchOpt),
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
match Command::from_args() {
|
||||
Command::Serve(opt) => serve::run(opt),
|
||||
Command::Indexer(opt) => indexer::run(opt),
|
||||
Command::Infos(opt) => infos::run(opt),
|
||||
Command::Search(opt) => search::run(opt),
|
||||
}
|
||||
}
|
|
@ -22,9 +22,9 @@ use roaring::RoaringBitmap;
|
|||
use structopt::StructOpt;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use milli::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
use milli::tokenizer::{simple_tokenizer, only_token};
|
||||
use milli::{SmallVec32, Index, Position, DocumentId};
|
||||
use crate::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
use crate::tokenizer::{simple_tokenizer, only_token};
|
||||
use crate::{SmallVec32, Index, Position, DocumentId};
|
||||
|
||||
const LMDB_MAX_KEY_LENGTH: usize = 511;
|
||||
const ONE_KILOBYTE: usize = 1024 * 1024;
|
||||
|
@ -32,18 +32,14 @@ const ONE_KILOBYTE: usize = 1024 * 1024;
|
|||
const MAX_POSITION: usize = 1000;
|
||||
const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION;
|
||||
|
||||
const WORDS_FST_KEY: &[u8] = milli::WORDS_FST_KEY.as_bytes();
|
||||
const HEADERS_KEY: &[u8] = milli::HEADERS_KEY.as_bytes();
|
||||
const DOCUMENTS_IDS_KEY: &[u8] = milli::DOCUMENTS_IDS_KEY.as_bytes();
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes();
|
||||
const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes();
|
||||
const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes();
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli-indexer")]
|
||||
/// The indexer binary of the milli project.
|
||||
struct Opt {
|
||||
pub struct Opt {
|
||||
/// The database path where the database is located.
|
||||
/// It is created if it doesn't already exist.
|
||||
#[structopt(long = "db", parse(from_os_str))]
|
||||
|
@ -191,7 +187,7 @@ fn compute_words_pair_proximities(
|
|||
for ((w1, ps1), (w2, ps2)) in word_positions.iter().cartesian_product(word_positions) {
|
||||
let mut min_prox = None;
|
||||
for (ps1, ps2) in ps1.iter().cartesian_product(ps2) {
|
||||
let prox = milli::proximity::positions_proximity(*ps1, *ps2);
|
||||
let prox = crate::proximity::positions_proximity(*ps1, *ps2);
|
||||
let prox = u8::try_from(prox).unwrap();
|
||||
// We don't care about a word that appear at the
|
||||
// same position or too far from the other.
|
||||
|
@ -736,9 +732,7 @@ fn csv_readers(
|
|||
}
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
|
@ -2,16 +2,12 @@ use std::path::PathBuf;
|
|||
use std::{str, io};
|
||||
|
||||
use anyhow::Context;
|
||||
use crate::Index;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::Index;
|
||||
use structopt::StructOpt;
|
||||
|
||||
use Command::*;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
const MAIN_DB_NAME: &str = "main";
|
||||
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
||||
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
||||
|
@ -33,8 +29,8 @@ const POSTINGS_DATABASE_NAMES: &[&str] = &[
|
|||
];
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli-info", about = "A stats crawler for milli.")]
|
||||
struct Opt {
|
||||
/// A stats fetcher for milli.
|
||||
pub struct Opt {
|
||||
/// The database path where the database is located.
|
||||
/// It is created if it doesn't already exist.
|
||||
#[structopt(long = "db", parse(from_os_str))]
|
||||
|
@ -133,8 +129,11 @@ enum Command {
|
|||
},
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let env = EnvOpenOptions::new()
|
||||
.map_size(opt.database_size)
|
||||
.max_dbs(10)
|
||||
.open(&opt.database)?;
|
||||
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
|
@ -142,11 +141,6 @@ fn main() -> anyhow::Result<()> {
|
|||
.timestamp(stderrlog::Timestamp::Off)
|
||||
.init()?;
|
||||
|
||||
let env = EnvOpenOptions::new()
|
||||
.map_size(opt.database_size)
|
||||
.max_dbs(10)
|
||||
.open(&opt.database)?;
|
||||
|
||||
// Open the LMDB database.
|
||||
let index = Index::new(&env)?;
|
||||
let rtxn = env.read_txn()?;
|
||||
|
@ -196,7 +190,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use heed::types::{Str, ByteSlice};
|
||||
use milli::heed_codec::BEU32StrCodec;
|
||||
use crate::heed_codec::BEU32StrCodec;
|
||||
|
||||
let main_name = "main";
|
||||
let word_docids_name = "word_docids";
|
||||
|
@ -306,7 +300,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
|
|||
|
||||
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
use heed::types::DecodeIgnore;
|
||||
use milli::{DocumentId, BEU32StrCodec};
|
||||
use crate::{DocumentId, BEU32StrCodec};
|
||||
|
||||
let mut words_counts = Vec::new();
|
||||
let mut count = 0;
|
||||
|
@ -345,7 +339,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
|
|||
|
||||
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
use heed::types::DecodeIgnore;
|
||||
use milli::BoRoaringBitmapCodec;
|
||||
use crate::BoRoaringBitmapCodec;
|
||||
|
||||
let mut values_length = Vec::new();
|
||||
let mut count = 0;
|
||||
|
@ -397,7 +391,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
|
|||
use heed::types::ByteSlice;
|
||||
use heed::{Error, BytesDecode};
|
||||
use roaring::RoaringBitmap;
|
||||
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
|
||||
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
|
||||
db: heed::PolyDatabase,
|
||||
|
@ -478,7 +472,7 @@ fn word_pair_proximities_docids(
|
|||
) -> anyhow::Result<()>
|
||||
{
|
||||
use heed::types::ByteSlice;
|
||||
use milli::RoaringBitmapCodec;
|
||||
use crate::RoaringBitmapCodec;
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
4
src/subcommand/mod.rs
Normal file
4
src/subcommand/mod.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
pub mod indexer;
|
||||
pub mod infos;
|
||||
pub mod search;
|
||||
pub mod serve;
|
|
@ -5,16 +5,13 @@ use std::time::Instant;
|
|||
|
||||
use heed::EnvOpenOptions;
|
||||
use log::debug;
|
||||
use milli::Index;
|
||||
use structopt::StructOpt;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
use crate::Index;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli-search", about = "A simple search binary for milli project.")]
|
||||
struct Opt {
|
||||
/// A simple search helper binary for the milli project.
|
||||
pub struct Opt {
|
||||
/// The database path where the database is located.
|
||||
/// It is created if it doesn't already exist.
|
||||
#[structopt(long = "db", parse(from_os_str))]
|
||||
|
@ -33,9 +30,7 @@ struct Opt {
|
|||
query: Option<String>,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
|
@ -11,16 +11,12 @@ use serde::Deserialize;
|
|||
use structopt::StructOpt;
|
||||
use warp::{Filter, http::Response};
|
||||
|
||||
use milli::tokenizer::{simple_tokenizer, TokenType};
|
||||
use milli::{Index, SearchResult};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
use crate::tokenizer::{simple_tokenizer, TokenType};
|
||||
use crate::{Index, SearchResult};
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli", about = "The server binary of the milli project.")]
|
||||
struct Opt {
|
||||
/// The HTTP main server of the milli project.
|
||||
pub struct Opt {
|
||||
/// The database path where the LMDB database is located.
|
||||
/// It is created if it doesn't already exist.
|
||||
#[structopt(long = "db", parse(from_os_str))]
|
||||
|
@ -73,10 +69,7 @@ struct IndexTemplate {
|
|||
docs_count: usize,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
||||
|
@ -231,8 +224,13 @@ async fn main() -> anyhow::Result<()> {
|
|||
.or(dash_logo_black_route)
|
||||
.or(query_route);
|
||||
|
||||
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
|
||||
warp::serve(routes).run(addr).await;
|
||||
let addr = SocketAddr::from_str(&opt.http_listen_addr)?;
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()?
|
||||
.block_on(async {
|
||||
warp::serve(routes).run(addr).await
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue