2018-07-06 21:26:07 +02:00
|
|
|
extern crate rocksdb;
|
2018-05-27 15:23:43 +02:00
|
|
|
extern crate fst;
|
|
|
|
extern crate raptor;
|
2018-06-24 19:27:24 +02:00
|
|
|
extern crate elapsed;
|
2018-05-27 15:23:43 +02:00
|
|
|
|
2018-06-25 22:26:49 +02:00
|
|
|
use std::env;
|
2018-07-06 21:26:07 +02:00
|
|
|
use std::str::from_utf8_unchecked;
|
2018-06-24 19:27:24 +02:00
|
|
|
use std::io::{self, Write};
|
|
|
|
use elapsed::measure_time;
|
2018-05-27 15:23:43 +02:00
|
|
|
use fst::Streamer;
|
2018-07-06 21:26:07 +02:00
|
|
|
use rocksdb::{DB, DBOptions};
|
2018-07-10 21:29:17 +02:00
|
|
|
use raptor::{Metadata, RankedStream, LevBuilder};
|
2018-05-27 15:23:43 +02:00
|
|
|
|
2018-07-10 21:29:17 +02:00
|
|
|
fn search(metadata: &Metadata, database: &DB, lev_builder: &LevBuilder, query: &str) {
|
2018-05-27 15:23:43 +02:00
|
|
|
let mut automatons = Vec::new();
|
|
|
|
for query in query.split_whitespace() {
|
2018-07-06 20:58:06 +02:00
|
|
|
let lev = lev_builder.get_automaton(query);
|
2018-05-27 15:23:43 +02:00
|
|
|
automatons.push(lev);
|
|
|
|
}
|
|
|
|
|
2018-07-10 21:29:17 +02:00
|
|
|
let map = metadata.as_map();
|
|
|
|
let indexes = metadata.as_indexes();
|
|
|
|
|
|
|
|
let mut stream = RankedStream::new(&map, &indexes, automatons, 20);
|
2018-07-06 22:05:51 +02:00
|
|
|
while let Some(document) = stream.next() {
|
2018-07-10 21:29:17 +02:00
|
|
|
print!("{:?}", document.document_id);
|
2018-06-24 19:27:24 +02:00
|
|
|
|
2018-07-06 22:05:51 +02:00
|
|
|
let title_key = format!("{}-title", document.document_id);
|
2018-07-10 21:29:17 +02:00
|
|
|
let title = database.get(title_key.as_bytes()).unwrap().unwrap();
|
2018-07-06 21:26:07 +02:00
|
|
|
let title = unsafe { from_utf8_unchecked(&title) };
|
2018-07-10 21:29:17 +02:00
|
|
|
print!(" {:?}", title);
|
2018-06-24 19:27:24 +02:00
|
|
|
|
|
|
|
println!();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn main() {
|
2018-07-10 21:29:17 +02:00
|
|
|
let map_file = "map.meta";
|
|
|
|
let indexes_file = "indexes.meta";
|
|
|
|
let rocksdb_file = "rocksdb/storage";
|
2018-07-06 20:58:06 +02:00
|
|
|
|
2018-07-10 21:29:17 +02:00
|
|
|
let (elapsed, meta) = measure_time(|| unsafe {
|
|
|
|
Metadata::from_paths(map_file, indexes_file).unwrap()
|
|
|
|
});
|
|
|
|
println!("{} to load metadata", elapsed);
|
2018-06-24 19:27:24 +02:00
|
|
|
|
2018-07-06 21:26:07 +02:00
|
|
|
let (elapsed, db) = measure_time(|| {
|
2018-07-10 21:29:17 +02:00
|
|
|
let options = DBOptions::new();
|
|
|
|
DB::open_for_read_only(options, rocksdb_file, false).unwrap()
|
2018-07-06 21:26:07 +02:00
|
|
|
});
|
2018-07-10 21:29:17 +02:00
|
|
|
println!("{} to load the RocksDB database", elapsed);
|
|
|
|
|
|
|
|
let (elapsed, lev_builder) = measure_time(|| LevBuilder::new());
|
|
|
|
println!("{} to load the levenshtein automaton", elapsed);
|
2018-07-06 21:26:07 +02:00
|
|
|
|
2018-06-24 19:27:24 +02:00
|
|
|
match env::args().nth(1) {
|
|
|
|
Some(query) => {
|
|
|
|
println!("Searching for: {:?}", query);
|
|
|
|
let query = query.to_lowercase();
|
2018-07-10 21:29:17 +02:00
|
|
|
let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query));
|
2018-06-24 19:27:24 +02:00
|
|
|
println!("Finished in {}", elapsed);
|
|
|
|
},
|
|
|
|
None => loop {
|
|
|
|
print!("Searching for: ");
|
|
|
|
io::stdout().flush().unwrap();
|
|
|
|
|
|
|
|
let mut query = String::new();
|
|
|
|
io::stdin().read_line(&mut query).unwrap();
|
|
|
|
let query = query.trim().to_lowercase();
|
|
|
|
|
|
|
|
if query.is_empty() { break }
|
|
|
|
|
2018-07-10 21:29:17 +02:00
|
|
|
let (elapsed, _) = measure_time(|| search(&meta, &db, &lev_builder, &query));
|
2018-06-24 19:27:24 +02:00
|
|
|
println!("Finished in {}", elapsed);
|
|
|
|
},
|
2018-05-27 15:23:43 +02:00
|
|
|
}
|
|
|
|
}
|