From 806ed2cc333382f9bed4681749d87f1a0bf48e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 24 Sep 2018 17:25:24 +0200 Subject: [PATCH] feat: Allow raptor-search to accept stop-wrds by argument --- Cargo.lock | 1 + raptor-search/Cargo.toml | 1 + raptor-search/src/main.rs | 25 +++++++++++++++++-------- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04f194895..9061bc73c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -772,6 +772,7 @@ dependencies = [ "fst 0.3.2 (git+https://github.com/Kerollmops/fst.git?branch=automaton-for-deref)", "raptor 0.1.0", "rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)", + "structopt 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/raptor-search/Cargo.toml b/raptor-search/Cargo.toml index b18105b72..e9eaf136d 100644 --- a/raptor-search/Cargo.toml +++ b/raptor-search/Cargo.toml @@ -6,6 +6,7 @@ authors = ["Kerollmops "] [dependencies] raptor = { path = "../raptor" } +structopt = "0.2" elapsed = "0.1" [dependencies.fst] diff --git a/raptor-search/src/main.rs b/raptor-search/src/main.rs index 8cd8b29ce..f3f0891a5 100644 --- a/raptor-search/src/main.rs +++ b/raptor-search/src/main.rs @@ -1,14 +1,25 @@ -use std::env; use std::fs::File; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::collections::HashSet; use std::str::from_utf8_unchecked; use std::io::{self, BufReader, BufRead, Write}; +use structopt::StructOpt; use elapsed::measure_time; use fst::Streamer; use rocksdb::{DB, DBOptions, IngestExternalFileOptions}; use raptor::{automaton, Metadata, RankedStream}; +#[derive(Debug, StructOpt)] +#[structopt(name = "raptor-search", about = "A Raptor binary to search in a dump.")] +struct Opt { + /// The stop word file, each word must be separated by a newline. + #[structopt(long = "stop-words", parse(from_os_str))] + stop_words: PathBuf, + + /// Meta file name (e.g. relaxed-colden). + meta_name: String, +} + type CommonWords = HashSet; fn common_words

(path: P) -> io::Result @@ -46,7 +57,9 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: } fn main() { - let name = env::args().nth(1).expect("Missing meta file name (e.g. relaxed-colden)"); + let opt = Opt::from_args(); + + let name = opt.meta_name; let map_file = format!("{}.map", name); let idx_file = format!("{}.idx", name); let sst_file = format!("{}.sst", name); @@ -66,11 +79,7 @@ fn main() { }); println!("{} to load the SST file in RocksDB and reopen it for read-only", elapsed); - let common_path = "fr.stopwords.txt"; - let common_words = common_words(common_path).unwrap_or_else(|e| { - println!("{:?}: {:?}", common_path, e); - HashSet::new() - }); + let common_words = common_words(opt.stop_words).expect("reading stop words"); loop { print!("Searching for: ");