feat: Allow raptor-search to accept stop-wrds by argument

This commit is contained in:
Clément Renault 2018-09-24 17:25:24 +02:00
parent 33ea956c7b
commit 806ed2cc33
3 changed files with 19 additions and 8 deletions

1
Cargo.lock generated
View File

@ -772,6 +772,7 @@ dependencies = [
"fst 0.3.2 (git+https://github.com/Kerollmops/fst.git?branch=automaton-for-deref)",
"raptor 0.1.0",
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
"structopt 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]

View File

@ -6,6 +6,7 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
[dependencies]
raptor = { path = "../raptor" }
structopt = "0.2"
elapsed = "0.1"
[dependencies.fst]

View File

@ -1,14 +1,25 @@
use std::env;
use std::fs::File;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::collections::HashSet;
use std::str::from_utf8_unchecked;
use std::io::{self, BufReader, BufRead, Write};
use structopt::StructOpt;
use elapsed::measure_time;
use fst::Streamer;
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
use raptor::{automaton, Metadata, RankedStream};
#[derive(Debug, StructOpt)]
#[structopt(name = "raptor-search", about = "A Raptor binary to search in a dump.")]
struct Opt {
/// The stop word file, each word must be separated by a newline.
#[structopt(long = "stop-words", parse(from_os_str))]
stop_words: PathBuf,
/// Meta file name (e.g. relaxed-colden).
meta_name: String,
}
type CommonWords = HashSet<String>;
fn common_words<P>(path: P) -> io::Result<CommonWords>
@ -46,7 +57,9 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query:
}
fn main() {
let name = env::args().nth(1).expect("Missing meta file name (e.g. relaxed-colden)");
let opt = Opt::from_args();
let name = opt.meta_name;
let map_file = format!("{}.map", name);
let idx_file = format!("{}.idx", name);
let sst_file = format!("{}.sst", name);
@ -66,11 +79,7 @@ fn main() {
});
println!("{} to load the SST file in RocksDB and reopen it for read-only", elapsed);
let common_path = "fr.stopwords.txt";
let common_words = common_words(common_path).unwrap_or_else(|e| {
println!("{:?}: {:?}", common_path, e);
HashSet::new()
});
let common_words = common_words(opt.stop_words).expect("reading stop words");
loop {
print!("Searching for: ");