mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
feat: Allow raptor-search to accept stop-wrds by argument
This commit is contained in:
parent
33ea956c7b
commit
806ed2cc33
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -772,6 +772,7 @@ dependencies = [
|
||||
"fst 0.3.2 (git+https://github.com/Kerollmops/fst.git?branch=automaton-for-deref)",
|
||||
"raptor 0.1.0",
|
||||
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
||||
"structopt 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -6,6 +6,7 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
raptor = { path = "../raptor" }
|
||||
structopt = "0.2"
|
||||
elapsed = "0.1"
|
||||
|
||||
[dependencies.fst]
|
||||
|
@ -1,14 +1,25 @@
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::collections::HashSet;
|
||||
use std::str::from_utf8_unchecked;
|
||||
use std::io::{self, BufReader, BufRead, Write};
|
||||
use structopt::StructOpt;
|
||||
use elapsed::measure_time;
|
||||
use fst::Streamer;
|
||||
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
||||
use raptor::{automaton, Metadata, RankedStream};
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "raptor-search", about = "A Raptor binary to search in a dump.")]
|
||||
struct Opt {
|
||||
/// The stop word file, each word must be separated by a newline.
|
||||
#[structopt(long = "stop-words", parse(from_os_str))]
|
||||
stop_words: PathBuf,
|
||||
|
||||
/// Meta file name (e.g. relaxed-colden).
|
||||
meta_name: String,
|
||||
}
|
||||
|
||||
type CommonWords = HashSet<String>;
|
||||
|
||||
fn common_words<P>(path: P) -> io::Result<CommonWords>
|
||||
@ -46,7 +57,9 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query:
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let name = env::args().nth(1).expect("Missing meta file name (e.g. relaxed-colden)");
|
||||
let opt = Opt::from_args();
|
||||
|
||||
let name = opt.meta_name;
|
||||
let map_file = format!("{}.map", name);
|
||||
let idx_file = format!("{}.idx", name);
|
||||
let sst_file = format!("{}.sst", name);
|
||||
@ -66,11 +79,7 @@ fn main() {
|
||||
});
|
||||
println!("{} to load the SST file in RocksDB and reopen it for read-only", elapsed);
|
||||
|
||||
let common_path = "fr.stopwords.txt";
|
||||
let common_words = common_words(common_path).unwrap_or_else(|e| {
|
||||
println!("{:?}: {:?}", common_path, e);
|
||||
HashSet::new()
|
||||
});
|
||||
let common_words = common_words(opt.stop_words).expect("reading stop words");
|
||||
|
||||
loop {
|
||||
print!("Searching for: ");
|
||||
|
Loading…
Reference in New Issue
Block a user