mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
feat: Allow raptor-search to accept stop-wrds by argument
This commit is contained in:
parent
33ea956c7b
commit
806ed2cc33
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -772,6 +772,7 @@ dependencies = [
|
|||||||
"fst 0.3.2 (git+https://github.com/Kerollmops/fst.git?branch=automaton-for-deref)",
|
"fst 0.3.2 (git+https://github.com/Kerollmops/fst.git?branch=automaton-for-deref)",
|
||||||
"raptor 0.1.0",
|
"raptor 0.1.0",
|
||||||
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
"rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)",
|
||||||
|
"structopt 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -6,6 +6,7 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
raptor = { path = "../raptor" }
|
raptor = { path = "../raptor" }
|
||||||
|
structopt = "0.2"
|
||||||
elapsed = "0.1"
|
elapsed = "0.1"
|
||||||
|
|
||||||
[dependencies.fst]
|
[dependencies.fst]
|
||||||
|
@ -1,14 +1,25 @@
|
|||||||
use std::env;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::str::from_utf8_unchecked;
|
use std::str::from_utf8_unchecked;
|
||||||
use std::io::{self, BufReader, BufRead, Write};
|
use std::io::{self, BufReader, BufRead, Write};
|
||||||
|
use structopt::StructOpt;
|
||||||
use elapsed::measure_time;
|
use elapsed::measure_time;
|
||||||
use fst::Streamer;
|
use fst::Streamer;
|
||||||
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
use rocksdb::{DB, DBOptions, IngestExternalFileOptions};
|
||||||
use raptor::{automaton, Metadata, RankedStream};
|
use raptor::{automaton, Metadata, RankedStream};
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
#[structopt(name = "raptor-search", about = "A Raptor binary to search in a dump.")]
|
||||||
|
struct Opt {
|
||||||
|
/// The stop word file, each word must be separated by a newline.
|
||||||
|
#[structopt(long = "stop-words", parse(from_os_str))]
|
||||||
|
stop_words: PathBuf,
|
||||||
|
|
||||||
|
/// Meta file name (e.g. relaxed-colden).
|
||||||
|
meta_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
type CommonWords = HashSet<String>;
|
type CommonWords = HashSet<String>;
|
||||||
|
|
||||||
fn common_words<P>(path: P) -> io::Result<CommonWords>
|
fn common_words<P>(path: P) -> io::Result<CommonWords>
|
||||||
@ -46,7 +57,9 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query:
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let name = env::args().nth(1).expect("Missing meta file name (e.g. relaxed-colden)");
|
let opt = Opt::from_args();
|
||||||
|
|
||||||
|
let name = opt.meta_name;
|
||||||
let map_file = format!("{}.map", name);
|
let map_file = format!("{}.map", name);
|
||||||
let idx_file = format!("{}.idx", name);
|
let idx_file = format!("{}.idx", name);
|
||||||
let sst_file = format!("{}.sst", name);
|
let sst_file = format!("{}.sst", name);
|
||||||
@ -66,11 +79,7 @@ fn main() {
|
|||||||
});
|
});
|
||||||
println!("{} to load the SST file in RocksDB and reopen it for read-only", elapsed);
|
println!("{} to load the SST file in RocksDB and reopen it for read-only", elapsed);
|
||||||
|
|
||||||
let common_path = "fr.stopwords.txt";
|
let common_words = common_words(opt.stop_words).expect("reading stop words");
|
||||||
let common_words = common_words(common_path).unwrap_or_else(|e| {
|
|
||||||
println!("{:?}: {:?}", common_path, e);
|
|
||||||
HashSet::new()
|
|
||||||
});
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
print!("Searching for: ");
|
print!("Searching for: ");
|
||||||
|
Loading…
Reference in New Issue
Block a user